stdlib_lapack_eigv_sym_comp.fypp Source File


Source Code

#:include "common.fypp" 
submodule(stdlib_lapack_eig_svd_lsq) stdlib_lapack_eigv_sym_comp
  implicit none


  contains
#:for ik,it,ii in LINALG_INT_KINDS_TYPES

     pure module subroutine stdlib${ii}$_ssygst( itype, uplo, n, a, lda, b, ldb, info )
     !! SSYGST reduces a real symmetric-definite generalized eigenproblem
     !! to standard form.
     !! If ITYPE = 1, the problem is A*x = lambda*B*x,
     !! and A is overwritten by inv(U**T)*A*inv(U) or inv(L)*A*inv(L**T)
     !! If ITYPE = 2 or 3, the problem is A*B*x = lambda*x or
     !! B*A*x = lambda*x, and A is overwritten by U*A*U**T or L**T*A*L.
     !! B must have been previously factorized as U**T*U or L*L**T by SPOTRF.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: uplo
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: itype, lda, ldb, n
           ! Array Arguments 
           real(sp), intent(inout) :: a(lda,*)
           real(sp), intent(in) :: b(ldb,*)
        ! =====================================================================
           
           ! Local Scalars 
           logical(lk) :: upper
           integer(${ik}$) :: k, kb, nb
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input parameters.
           info = 0_${ik}$
           upper = stdlib_lsame( uplo, 'U' )
           if( itype<1_${ik}$ .or. itype>3_${ik}$ ) then
              info = -1_${ik}$
           else if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then
              info = -2_${ik}$
           else if( n<0_${ik}$ ) then
              info = -3_${ik}$
           else if( lda<max( 1_${ik}$, n ) ) then
              info = -5_${ik}$
           else if( ldb<max( 1_${ik}$, n ) ) then
              info = -7_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'SSYGST', -info )
              return
           end if
           ! quick return if possible
           if( n==0 )return
           ! determine the block size for this environment.
           nb = stdlib${ii}$_ilaenv( 1_${ik}$, 'SSYGST', uplo, n, -1_${ik}$, -1_${ik}$, -1_${ik}$ )
           if( nb<=1_${ik}$ .or. nb>=n ) then
              ! use unblocked code
              call stdlib${ii}$_ssygs2( itype, uplo, n, a, lda, b, ldb, info )
           else
              ! use blocked code
              if( itype==1_${ik}$ ) then
                 if( upper ) then
                    ! compute inv(u**t)*a*inv(u)
                    do k = 1, n, nb
                       kb = min( n-k+1, nb )
                       ! update the upper triangle of a(k:n,k:n)
                       call stdlib${ii}$_ssygs2( itype, uplo, kb, a( k, k ), lda,b( k, k ), ldb, info )
                                 
                       if( k+kb<=n ) then
                          call stdlib${ii}$_strsm( 'LEFT', uplo, 'TRANSPOSE', 'NON-UNIT',kb, n-k-kb+1, &
                                    one, b( k, k ), ldb,a( k, k+kb ), lda )
                          call stdlib${ii}$_ssymm( 'LEFT', uplo, kb, n-k-kb+1, -half,a( k, k ), lda, b( &
                                    k, k+kb ), ldb, one,a( k, k+kb ), lda )
                          call stdlib${ii}$_ssyr2k( uplo, 'TRANSPOSE', n-k-kb+1, kb, -one,a( k, k+kb ), &
                                    lda, b( k, k+kb ), ldb,one, a( k+kb, k+kb ), lda )
                          call stdlib${ii}$_ssymm( 'LEFT', uplo, kb, n-k-kb+1, -half,a( k, k ), lda, b( &
                                    k, k+kb ), ldb, one,a( k, k+kb ), lda )
                          call stdlib${ii}$_strsm( 'RIGHT', uplo, 'NO TRANSPOSE','NON-UNIT', kb, n-k-kb+&
                                    1_${ik}$, one,b( k+kb, k+kb ), ldb, a( k, k+kb ),lda )
                       end if
                    end do
                 else
                    ! compute inv(l)*a*inv(l**t)
                    do k = 1, n, nb
                       kb = min( n-k+1, nb )
                       ! update the lower triangle of a(k:n,k:n)
                       call stdlib${ii}$_ssygs2( itype, uplo, kb, a( k, k ), lda,b( k, k ), ldb, info )
                                 
                       if( k+kb<=n ) then
                          call stdlib${ii}$_strsm( 'RIGHT', uplo, 'TRANSPOSE', 'NON-UNIT',n-k-kb+1, kb, &
                                    one, b( k, k ), ldb,a( k+kb, k ), lda )
                          call stdlib${ii}$_ssymm( 'RIGHT', uplo, n-k-kb+1, kb, -half,a( k, k ), lda, b(&
                                     k+kb, k ), ldb, one,a( k+kb, k ), lda )
                          call stdlib${ii}$_ssyr2k( uplo, 'NO TRANSPOSE', n-k-kb+1, kb,-one, a( k+kb, k &
                                    ), lda, b( k+kb, k ),ldb, one, a( k+kb, k+kb ), lda )
                          call stdlib${ii}$_ssymm( 'RIGHT', uplo, n-k-kb+1, kb, -half,a( k, k ), lda, b(&
                                     k+kb, k ), ldb, one,a( k+kb, k ), lda )
                          call stdlib${ii}$_strsm( 'LEFT', uplo, 'NO TRANSPOSE','NON-UNIT', n-k-kb+1, &
                                    kb, one,b( k+kb, k+kb ), ldb, a( k+kb, k ),lda )
                       end if
                    end do
                 end if
              else
                 if( upper ) then
                    ! compute u*a*u**t
                    do k = 1, n, nb
                       kb = min( n-k+1, nb )
                       ! update the upper triangle of a(1:k+kb-1,1:k+kb-1)
                       call stdlib${ii}$_strmm( 'LEFT', uplo, 'NO TRANSPOSE', 'NON-UNIT',k-1, kb, one, &
                                 b, ldb, a( 1_${ik}$, k ), lda )
                       call stdlib${ii}$_ssymm( 'RIGHT', uplo, k-1, kb, half, a( k, k ),lda, b( 1_${ik}$, k ), &
                                 ldb, one, a( 1_${ik}$, k ), lda )
                       call stdlib${ii}$_ssyr2k( uplo, 'NO TRANSPOSE', k-1, kb, one,a( 1_${ik}$, k ), lda, b( &
                                 1_${ik}$, k ), ldb, one, a,lda )
                       call stdlib${ii}$_ssymm( 'RIGHT', uplo, k-1, kb, half, a( k, k ),lda, b( 1_${ik}$, k ), &
                                 ldb, one, a( 1_${ik}$, k ), lda )
                       call stdlib${ii}$_strmm( 'RIGHT', uplo, 'TRANSPOSE', 'NON-UNIT',k-1, kb, one, b( &
                                 k, k ), ldb, a( 1_${ik}$, k ),lda )
                       call stdlib${ii}$_ssygs2( itype, uplo, kb, a( k, k ), lda,b( k, k ), ldb, info )
                                 
                    end do
                 else
                    ! compute l**t*a*l
                    do k = 1, n, nb
                       kb = min( n-k+1, nb )
                       ! update the lower triangle of a(1:k+kb-1,1:k+kb-1)
                       call stdlib${ii}$_strmm( 'RIGHT', uplo, 'NO TRANSPOSE', 'NON-UNIT',kb, k-1, one, &
                                 b, ldb, a( k, 1_${ik}$ ), lda )
                       call stdlib${ii}$_ssymm( 'LEFT', uplo, kb, k-1, half, a( k, k ),lda, b( k, 1_${ik}$ ), &
                                 ldb, one, a( k, 1_${ik}$ ), lda )
                       call stdlib${ii}$_ssyr2k( uplo, 'TRANSPOSE', k-1, kb, one,a( k, 1_${ik}$ ), lda, b( k, &
                                 1_${ik}$ ), ldb, one, a,lda )
                       call stdlib${ii}$_ssymm( 'LEFT', uplo, kb, k-1, half, a( k, k ),lda, b( k, 1_${ik}$ ), &
                                 ldb, one, a( k, 1_${ik}$ ), lda )
                       call stdlib${ii}$_strmm( 'LEFT', uplo, 'TRANSPOSE', 'NON-UNIT', kb,k-1, one, b( &
                                 k, k ), ldb, a( k, 1_${ik}$ ), lda )
                       call stdlib${ii}$_ssygs2( itype, uplo, kb, a( k, k ), lda,b( k, k ), ldb, info )
                                 
                    end do
                 end if
              end if
           end if
           return
     end subroutine stdlib${ii}$_ssygst

     pure module subroutine stdlib${ii}$_dsygst( itype, uplo, n, a, lda, b, ldb, info )
     !! DSYGST reduces a real symmetric-definite generalized eigenproblem
     !! to standard form.
     !! If ITYPE = 1, the problem is A*x = lambda*B*x,
     !! and A is overwritten by inv(U**T)*A*inv(U) or inv(L)*A*inv(L**T)
     !! If ITYPE = 2 or 3, the problem is A*B*x = lambda*x or
     !! B*A*x = lambda*x, and A is overwritten by U*A*U**T or L**T*A*L.
     !! B must have been previously factorized as U**T*U or L*L**T by DPOTRF.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: uplo
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: itype, lda, ldb, n
           ! Array Arguments 
           real(dp), intent(inout) :: a(lda,*)
           real(dp), intent(in) :: b(ldb,*)
        ! =====================================================================
           
           ! Local Scalars 
           logical(lk) :: upper
           integer(${ik}$) :: k, kb, nb
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input parameters.
           info = 0_${ik}$
           upper = stdlib_lsame( uplo, 'U' )
           if( itype<1_${ik}$ .or. itype>3_${ik}$ ) then
              info = -1_${ik}$
           else if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then
              info = -2_${ik}$
           else if( n<0_${ik}$ ) then
              info = -3_${ik}$
           else if( lda<max( 1_${ik}$, n ) ) then
              info = -5_${ik}$
           else if( ldb<max( 1_${ik}$, n ) ) then
              info = -7_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DSYGST', -info )
              return
           end if
           ! quick return if possible
           if( n==0 )return
           ! determine the block size for this environment.
           nb = stdlib${ii}$_ilaenv( 1_${ik}$, 'DSYGST', uplo, n, -1_${ik}$, -1_${ik}$, -1_${ik}$ )
           if( nb<=1_${ik}$ .or. nb>=n ) then
              ! use unblocked code
              call stdlib${ii}$_dsygs2( itype, uplo, n, a, lda, b, ldb, info )
           else
              ! use blocked code
              if( itype==1_${ik}$ ) then
                 if( upper ) then
                    ! compute inv(u**t)*a*inv(u)
                    do k = 1, n, nb
                       kb = min( n-k+1, nb )
                       ! update the upper triangle of a(k:n,k:n)
                       call stdlib${ii}$_dsygs2( itype, uplo, kb, a( k, k ), lda,b( k, k ), ldb, info )
                                 
                       if( k+kb<=n ) then
                          call stdlib${ii}$_dtrsm( 'LEFT', uplo, 'TRANSPOSE', 'NON-UNIT',kb, n-k-kb+1, &
                                    one, b( k, k ), ldb,a( k, k+kb ), lda )
                          call stdlib${ii}$_dsymm( 'LEFT', uplo, kb, n-k-kb+1, -half,a( k, k ), lda, b( &
                                    k, k+kb ), ldb, one,a( k, k+kb ), lda )
                          call stdlib${ii}$_dsyr2k( uplo, 'TRANSPOSE', n-k-kb+1, kb, -one,a( k, k+kb ), &
                                    lda, b( k, k+kb ), ldb,one, a( k+kb, k+kb ), lda )
                          call stdlib${ii}$_dsymm( 'LEFT', uplo, kb, n-k-kb+1, -half,a( k, k ), lda, b( &
                                    k, k+kb ), ldb, one,a( k, k+kb ), lda )
                          call stdlib${ii}$_dtrsm( 'RIGHT', uplo, 'NO TRANSPOSE','NON-UNIT', kb, n-k-kb+&
                                    1_${ik}$, one,b( k+kb, k+kb ), ldb, a( k, k+kb ),lda )
                       end if
                    end do
                 else
                    ! compute inv(l)*a*inv(l**t)
                    do k = 1, n, nb
                       kb = min( n-k+1, nb )
                       ! update the lower triangle of a(k:n,k:n)
                       call stdlib${ii}$_dsygs2( itype, uplo, kb, a( k, k ), lda,b( k, k ), ldb, info )
                                 
                       if( k+kb<=n ) then
                          call stdlib${ii}$_dtrsm( 'RIGHT', uplo, 'TRANSPOSE', 'NON-UNIT',n-k-kb+1, kb, &
                                    one, b( k, k ), ldb,a( k+kb, k ), lda )
                          call stdlib${ii}$_dsymm( 'RIGHT', uplo, n-k-kb+1, kb, -half,a( k, k ), lda, b(&
                                     k+kb, k ), ldb, one,a( k+kb, k ), lda )
                          call stdlib${ii}$_dsyr2k( uplo, 'NO TRANSPOSE', n-k-kb+1, kb,-one, a( k+kb, k &
                                    ), lda, b( k+kb, k ),ldb, one, a( k+kb, k+kb ), lda )
                          call stdlib${ii}$_dsymm( 'RIGHT', uplo, n-k-kb+1, kb, -half,a( k, k ), lda, b(&
                                     k+kb, k ), ldb, one,a( k+kb, k ), lda )
                          call stdlib${ii}$_dtrsm( 'LEFT', uplo, 'NO TRANSPOSE','NON-UNIT', n-k-kb+1, &
                                    kb, one,b( k+kb, k+kb ), ldb, a( k+kb, k ),lda )
                       end if
                    end do
                 end if
              else
                 if( upper ) then
                    ! compute u*a*u**t
                    do k = 1, n, nb
                       kb = min( n-k+1, nb )
                       ! update the upper triangle of a(1:k+kb-1,1:k+kb-1)
                       call stdlib${ii}$_dtrmm( 'LEFT', uplo, 'NO TRANSPOSE', 'NON-UNIT',k-1, kb, one, &
                                 b, ldb, a( 1_${ik}$, k ), lda )
                       call stdlib${ii}$_dsymm( 'RIGHT', uplo, k-1, kb, half, a( k, k ),lda, b( 1_${ik}$, k ), &
                                 ldb, one, a( 1_${ik}$, k ), lda )
                       call stdlib${ii}$_dsyr2k( uplo, 'NO TRANSPOSE', k-1, kb, one,a( 1_${ik}$, k ), lda, b( &
                                 1_${ik}$, k ), ldb, one, a,lda )
                       call stdlib${ii}$_dsymm( 'RIGHT', uplo, k-1, kb, half, a( k, k ),lda, b( 1_${ik}$, k ), &
                                 ldb, one, a( 1_${ik}$, k ), lda )
                       call stdlib${ii}$_dtrmm( 'RIGHT', uplo, 'TRANSPOSE', 'NON-UNIT',k-1, kb, one, b( &
                                 k, k ), ldb, a( 1_${ik}$, k ),lda )
                       call stdlib${ii}$_dsygs2( itype, uplo, kb, a( k, k ), lda,b( k, k ), ldb, info )
                                 
                    end do
                 else
                    ! compute l**t*a*l
                    do k = 1, n, nb
                       kb = min( n-k+1, nb )
                       ! update the lower triangle of a(1:k+kb-1,1:k+kb-1)
                       call stdlib${ii}$_dtrmm( 'RIGHT', uplo, 'NO TRANSPOSE', 'NON-UNIT',kb, k-1, one, &
                                 b, ldb, a( k, 1_${ik}$ ), lda )
                       call stdlib${ii}$_dsymm( 'LEFT', uplo, kb, k-1, half, a( k, k ),lda, b( k, 1_${ik}$ ), &
                                 ldb, one, a( k, 1_${ik}$ ), lda )
                       call stdlib${ii}$_dsyr2k( uplo, 'TRANSPOSE', k-1, kb, one,a( k, 1_${ik}$ ), lda, b( k, &
                                 1_${ik}$ ), ldb, one, a,lda )
                       call stdlib${ii}$_dsymm( 'LEFT', uplo, kb, k-1, half, a( k, k ),lda, b( k, 1_${ik}$ ), &
                                 ldb, one, a( k, 1_${ik}$ ), lda )
                       call stdlib${ii}$_dtrmm( 'LEFT', uplo, 'TRANSPOSE', 'NON-UNIT', kb,k-1, one, b( &
                                 k, k ), ldb, a( k, 1_${ik}$ ), lda )
                       call stdlib${ii}$_dsygs2( itype, uplo, kb, a( k, k ), lda,b( k, k ), ldb, info )
                                 
                    end do
                 end if
              end if
           end if
           return
     end subroutine stdlib${ii}$_dsygst

#:for rk,rt,ri in REAL_KINDS_TYPES
#:if not rk in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ri}$sygst( itype, uplo, n, a, lda, b, ldb, info )
     !! DSYGST: reduces a real symmetric-definite generalized eigenproblem
     !! to standard form.
     !! If ITYPE = 1, the problem is A*x = lambda*B*x,
     !! and A is overwritten by inv(U**T)*A*inv(U) or inv(L)*A*inv(L**T)
     !! If ITYPE = 2 or 3, the problem is A*B*x = lambda*x or
     !! B*A*x = lambda*x, and A is overwritten by U*A*U**T or L**T*A*L.
     !! B must have been previously factorized as U**T*U or L*L**T by DPOTRF.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: uplo
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: itype, lda, ldb, n
           ! Array Arguments 
           real(${rk}$), intent(inout) :: a(lda,*)
           real(${rk}$), intent(in) :: b(ldb,*)
        ! =====================================================================
           
           ! Local Scalars 
           logical(lk) :: upper
           integer(${ik}$) :: k, kb, nb
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input parameters.
           info = 0_${ik}$
           upper = stdlib_lsame( uplo, 'U' )
           if( itype<1_${ik}$ .or. itype>3_${ik}$ ) then
              info = -1_${ik}$
           else if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then
              info = -2_${ik}$
           else if( n<0_${ik}$ ) then
              info = -3_${ik}$
           else if( lda<max( 1_${ik}$, n ) ) then
              info = -5_${ik}$
           else if( ldb<max( 1_${ik}$, n ) ) then
              info = -7_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DSYGST', -info )
              return
           end if
           ! quick return if possible
           if( n==0 )return
           ! determine the block size for this environment.
           nb = stdlib${ii}$_ilaenv( 1_${ik}$, 'DSYGST', uplo, n, -1_${ik}$, -1_${ik}$, -1_${ik}$ )
           if( nb<=1_${ik}$ .or. nb>=n ) then
              ! use unblocked code
              call stdlib${ii}$_${ri}$sygs2( itype, uplo, n, a, lda, b, ldb, info )
           else
              ! use blocked code
              if( itype==1_${ik}$ ) then
                 if( upper ) then
                    ! compute inv(u**t)*a*inv(u)
                    do k = 1, n, nb
                       kb = min( n-k+1, nb )
                       ! update the upper triangle of a(k:n,k:n)
                       call stdlib${ii}$_${ri}$sygs2( itype, uplo, kb, a( k, k ), lda,b( k, k ), ldb, info )
                                 
                       if( k+kb<=n ) then
                          call stdlib${ii}$_${ri}$trsm( 'LEFT', uplo, 'TRANSPOSE', 'NON-UNIT',kb, n-k-kb+1, &
                                    one, b( k, k ), ldb,a( k, k+kb ), lda )
                          call stdlib${ii}$_${ri}$symm( 'LEFT', uplo, kb, n-k-kb+1, -half,a( k, k ), lda, b( &
                                    k, k+kb ), ldb, one,a( k, k+kb ), lda )
                          call stdlib${ii}$_${ri}$syr2k( uplo, 'TRANSPOSE', n-k-kb+1, kb, -one,a( k, k+kb ), &
                                    lda, b( k, k+kb ), ldb,one, a( k+kb, k+kb ), lda )
                          call stdlib${ii}$_${ri}$symm( 'LEFT', uplo, kb, n-k-kb+1, -half,a( k, k ), lda, b( &
                                    k, k+kb ), ldb, one,a( k, k+kb ), lda )
                          call stdlib${ii}$_${ri}$trsm( 'RIGHT', uplo, 'NO TRANSPOSE','NON-UNIT', kb, n-k-kb+&
                                    1_${ik}$, one,b( k+kb, k+kb ), ldb, a( k, k+kb ),lda )
                       end if
                    end do
                 else
                    ! compute inv(l)*a*inv(l**t)
                    do k = 1, n, nb
                       kb = min( n-k+1, nb )
                       ! update the lower triangle of a(k:n,k:n)
                       call stdlib${ii}$_${ri}$sygs2( itype, uplo, kb, a( k, k ), lda,b( k, k ), ldb, info )
                                 
                       if( k+kb<=n ) then
                          call stdlib${ii}$_${ri}$trsm( 'RIGHT', uplo, 'TRANSPOSE', 'NON-UNIT',n-k-kb+1, kb, &
                                    one, b( k, k ), ldb,a( k+kb, k ), lda )
                          call stdlib${ii}$_${ri}$symm( 'RIGHT', uplo, n-k-kb+1, kb, -half,a( k, k ), lda, b(&
                                     k+kb, k ), ldb, one,a( k+kb, k ), lda )
                          call stdlib${ii}$_${ri}$syr2k( uplo, 'NO TRANSPOSE', n-k-kb+1, kb,-one, a( k+kb, k &
                                    ), lda, b( k+kb, k ),ldb, one, a( k+kb, k+kb ), lda )
                          call stdlib${ii}$_${ri}$symm( 'RIGHT', uplo, n-k-kb+1, kb, -half,a( k, k ), lda, b(&
                                     k+kb, k ), ldb, one,a( k+kb, k ), lda )
                          call stdlib${ii}$_${ri}$trsm( 'LEFT', uplo, 'NO TRANSPOSE','NON-UNIT', n-k-kb+1, &
                                    kb, one,b( k+kb, k+kb ), ldb, a( k+kb, k ),lda )
                       end if
                    end do
                 end if
              else
                 if( upper ) then
                    ! compute u*a*u**t
                    do k = 1, n, nb
                       kb = min( n-k+1, nb )
                       ! update the upper triangle of a(1:k+kb-1,1:k+kb-1)
                       call stdlib${ii}$_${ri}$trmm( 'LEFT', uplo, 'NO TRANSPOSE', 'NON-UNIT',k-1, kb, one, &
                                 b, ldb, a( 1_${ik}$, k ), lda )
                       call stdlib${ii}$_${ri}$symm( 'RIGHT', uplo, k-1, kb, half, a( k, k ),lda, b( 1_${ik}$, k ), &
                                 ldb, one, a( 1_${ik}$, k ), lda )
                       call stdlib${ii}$_${ri}$syr2k( uplo, 'NO TRANSPOSE', k-1, kb, one,a( 1_${ik}$, k ), lda, b( &
                                 1_${ik}$, k ), ldb, one, a,lda )
                       call stdlib${ii}$_${ri}$symm( 'RIGHT', uplo, k-1, kb, half, a( k, k ),lda, b( 1_${ik}$, k ), &
                                 ldb, one, a( 1_${ik}$, k ), lda )
                       call stdlib${ii}$_${ri}$trmm( 'RIGHT', uplo, 'TRANSPOSE', 'NON-UNIT',k-1, kb, one, b( &
                                 k, k ), ldb, a( 1_${ik}$, k ),lda )
                       call stdlib${ii}$_${ri}$sygs2( itype, uplo, kb, a( k, k ), lda,b( k, k ), ldb, info )
                                 
                    end do
                 else
                    ! compute l**t*a*l
                    do k = 1, n, nb
                       kb = min( n-k+1, nb )
                       ! update the lower triangle of a(1:k+kb-1,1:k+kb-1)
                       call stdlib${ii}$_${ri}$trmm( 'RIGHT', uplo, 'NO TRANSPOSE', 'NON-UNIT',kb, k-1, one, &
                                 b, ldb, a( k, 1_${ik}$ ), lda )
                       call stdlib${ii}$_${ri}$symm( 'LEFT', uplo, kb, k-1, half, a( k, k ),lda, b( k, 1_${ik}$ ), &
                                 ldb, one, a( k, 1_${ik}$ ), lda )
                       call stdlib${ii}$_${ri}$syr2k( uplo, 'TRANSPOSE', k-1, kb, one,a( k, 1_${ik}$ ), lda, b( k, &
                                 1_${ik}$ ), ldb, one, a,lda )
                       call stdlib${ii}$_${ri}$symm( 'LEFT', uplo, kb, k-1, half, a( k, k ),lda, b( k, 1_${ik}$ ), &
                                 ldb, one, a( k, 1_${ik}$ ), lda )
                       call stdlib${ii}$_${ri}$trmm( 'LEFT', uplo, 'TRANSPOSE', 'NON-UNIT', kb,k-1, one, b( &
                                 k, k ), ldb, a( k, 1_${ik}$ ), lda )
                       call stdlib${ii}$_${ri}$sygs2( itype, uplo, kb, a( k, k ), lda,b( k, k ), ldb, info )
                                 
                    end do
                 end if
              end if
           end if
           return
     end subroutine stdlib${ii}$_${ri}$sygst

#:endif
#:endfor



     pure module subroutine stdlib${ii}$_ssygs2( itype, uplo, n, a, lda, b, ldb, info )
     !! SSYGS2 reduces a real symmetric-definite generalized eigenproblem
     !! to standard form.
     !! If ITYPE = 1, the problem is A*x = lambda*B*x,
     !! and A is overwritten by inv(U**T)*A*inv(U) or inv(L)*A*inv(L**T)
     !! If ITYPE = 2 or 3, the problem is A*B*x = lambda*x or
     !! B*A*x = lambda*x, and A is overwritten by U*A*U**T or L**T *A*L.
     !! B must have been previously factorized as U**T *U or L*L**T by SPOTRF.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: uplo
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: itype, lda, ldb, n
           ! Array Arguments 
           real(sp), intent(inout) :: a(lda,*)
           real(sp), intent(in) :: b(ldb,*)
        ! =====================================================================
           
           ! Local Scalars 
           logical(lk) :: upper
           integer(${ik}$) :: k
           real(sp) :: akk, bkk, ct
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input parameters.
           info = 0_${ik}$
           upper = stdlib_lsame( uplo, 'U' )
           if( itype<1_${ik}$ .or. itype>3_${ik}$ ) then
              info = -1_${ik}$
           else if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then
              info = -2_${ik}$
           else if( n<0_${ik}$ ) then
              info = -3_${ik}$
           else if( lda<max( 1_${ik}$, n ) ) then
              info = -5_${ik}$
           else if( ldb<max( 1_${ik}$, n ) ) then
              info = -7_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'SSYGS2', -info )
              return
           end if
           if( itype==1_${ik}$ ) then
              if( upper ) then
                 ! compute inv(u**t)*a*inv(u)
                 do k = 1, n
                    ! update the upper triangle of a(k:n,k:n)
                    akk = a( k, k )
                    bkk = b( k, k )
                    akk = akk / bkk**2_${ik}$
                    a( k, k ) = akk
                    if( k<n ) then
                       call stdlib${ii}$_sscal( n-k, one / bkk, a( k, k+1 ), lda )
                       ct = -half*akk
                       call stdlib${ii}$_saxpy( n-k, ct, b( k, k+1 ), ldb, a( k, k+1 ),lda )
                       call stdlib${ii}$_ssyr2( uplo, n-k, -one, a( k, k+1 ), lda,b( k, k+1 ), ldb, a( &
                                 k+1, k+1 ), lda )
                       call stdlib${ii}$_saxpy( n-k, ct, b( k, k+1 ), ldb, a( k, k+1 ),lda )
                       call stdlib${ii}$_strsv( uplo, 'TRANSPOSE', 'NON-UNIT', n-k,b( k+1, k+1 ), ldb, &
                                 a( k, k+1 ), lda )
                    end if
                 end do
              else
                 ! compute inv(l)*a*inv(l**t)
                 do k = 1, n
                    ! update the lower triangle of a(k:n,k:n)
                    akk = a( k, k )
                    bkk = b( k, k )
                    akk = akk / bkk**2_${ik}$
                    a( k, k ) = akk
                    if( k<n ) then
                       call stdlib${ii}$_sscal( n-k, one / bkk, a( k+1, k ), 1_${ik}$ )
                       ct = -half*akk
                       call stdlib${ii}$_saxpy( n-k, ct, b( k+1, k ), 1_${ik}$, a( k+1, k ), 1_${ik}$ )
                       call stdlib${ii}$_ssyr2( uplo, n-k, -one, a( k+1, k ), 1_${ik}$,b( k+1, k ), 1_${ik}$, a( k+1, &
                                 k+1 ), lda )
                       call stdlib${ii}$_saxpy( n-k, ct, b( k+1, k ), 1_${ik}$, a( k+1, k ), 1_${ik}$ )
                       call stdlib${ii}$_strsv( uplo, 'NO TRANSPOSE', 'NON-UNIT', n-k,b( k+1, k+1 ), &
                                 ldb, a( k+1, k ), 1_${ik}$ )
                    end if
                 end do
              end if
           else
              if( upper ) then
                 ! compute u*a*u**t
                 do k = 1, n
                    ! update the upper triangle of a(1:k,1:k)
                    akk = a( k, k )
                    bkk = b( k, k )
                    call stdlib${ii}$_strmv( uplo, 'NO TRANSPOSE', 'NON-UNIT', k-1, b,ldb, a( 1_${ik}$, k ), 1_${ik}$ &
                              )
                    ct = half*akk
                    call stdlib${ii}$_saxpy( k-1, ct, b( 1_${ik}$, k ), 1_${ik}$, a( 1_${ik}$, k ), 1_${ik}$ )
                    call stdlib${ii}$_ssyr2( uplo, k-1, one, a( 1_${ik}$, k ), 1_${ik}$, b( 1_${ik}$, k ), 1_${ik}$,a, lda )
                    call stdlib${ii}$_saxpy( k-1, ct, b( 1_${ik}$, k ), 1_${ik}$, a( 1_${ik}$, k ), 1_${ik}$ )
                    call stdlib${ii}$_sscal( k-1, bkk, a( 1_${ik}$, k ), 1_${ik}$ )
                    a( k, k ) = akk*bkk**2_${ik}$
                 end do
              else
                 ! compute l**t *a*l
                 do k = 1, n
                    ! update the lower triangle of a(1:k,1:k)
                    akk = a( k, k )
                    bkk = b( k, k )
                    call stdlib${ii}$_strmv( uplo, 'TRANSPOSE', 'NON-UNIT', k-1, b, ldb,a( k, 1_${ik}$ ), lda )
                              
                    ct = half*akk
                    call stdlib${ii}$_saxpy( k-1, ct, b( k, 1_${ik}$ ), ldb, a( k, 1_${ik}$ ), lda )
                    call stdlib${ii}$_ssyr2( uplo, k-1, one, a( k, 1_${ik}$ ), lda, b( k, 1_${ik}$ ),ldb, a, lda )
                              
                    call stdlib${ii}$_saxpy( k-1, ct, b( k, 1_${ik}$ ), ldb, a( k, 1_${ik}$ ), lda )
                    call stdlib${ii}$_sscal( k-1, bkk, a( k, 1_${ik}$ ), lda )
                    a( k, k ) = akk*bkk**2_${ik}$
                 end do
              end if
           end if
           return
     end subroutine stdlib${ii}$_ssygs2

     pure module subroutine stdlib${ii}$_dsygs2( itype, uplo, n, a, lda, b, ldb, info )
     !! DSYGS2 reduces a real symmetric-definite generalized eigenproblem
     !! to standard form.
     !! If ITYPE = 1, the problem is A*x = lambda*B*x,
     !! and A is overwritten by inv(U**T)*A*inv(U) or inv(L)*A*inv(L**T)
     !! If ITYPE = 2 or 3, the problem is A*B*x = lambda*x or
     !! B*A*x = lambda*x, and A is overwritten by U*A*U**T or L**T *A*L.
     !! B must have been previously factorized as U**T *U or L*L**T by DPOTRF.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: uplo
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: itype, lda, ldb, n
           ! Array Arguments 
           real(dp), intent(inout) :: a(lda,*)
           real(dp), intent(in) :: b(ldb,*)
        ! =====================================================================
           
           ! Local Scalars 
           logical(lk) :: upper
           integer(${ik}$) :: k
           real(dp) :: akk, bkk, ct
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input parameters.
           info = 0_${ik}$
           upper = stdlib_lsame( uplo, 'U' )
           if( itype<1_${ik}$ .or. itype>3_${ik}$ ) then
              info = -1_${ik}$
           else if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then
              info = -2_${ik}$
           else if( n<0_${ik}$ ) then
              info = -3_${ik}$
           else if( lda<max( 1_${ik}$, n ) ) then
              info = -5_${ik}$
           else if( ldb<max( 1_${ik}$, n ) ) then
              info = -7_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DSYGS2', -info )
              return
           end if
           if( itype==1_${ik}$ ) then
              if( upper ) then
                 ! compute inv(u**t)*a*inv(u)
                 do k = 1, n
                    ! update the upper triangle of a(k:n,k:n)
                    akk = a( k, k )
                    bkk = b( k, k )
                    akk = akk / bkk**2_${ik}$
                    a( k, k ) = akk
                    if( k<n ) then
                       call stdlib${ii}$_dscal( n-k, one / bkk, a( k, k+1 ), lda )
                       ct = -half*akk
                       call stdlib${ii}$_daxpy( n-k, ct, b( k, k+1 ), ldb, a( k, k+1 ),lda )
                       call stdlib${ii}$_dsyr2( uplo, n-k, -one, a( k, k+1 ), lda,b( k, k+1 ), ldb, a( &
                                 k+1, k+1 ), lda )
                       call stdlib${ii}$_daxpy( n-k, ct, b( k, k+1 ), ldb, a( k, k+1 ),lda )
                       call stdlib${ii}$_dtrsv( uplo, 'TRANSPOSE', 'NON-UNIT', n-k,b( k+1, k+1 ), ldb, &
                                 a( k, k+1 ), lda )
                    end if
                 end do
              else
                 ! compute inv(l)*a*inv(l**t)
                 do k = 1, n
                    ! update the lower triangle of a(k:n,k:n)
                    akk = a( k, k )
                    bkk = b( k, k )
                    akk = akk / bkk**2_${ik}$
                    a( k, k ) = akk
                    if( k<n ) then
                       call stdlib${ii}$_dscal( n-k, one / bkk, a( k+1, k ), 1_${ik}$ )
                       ct = -half*akk
                       call stdlib${ii}$_daxpy( n-k, ct, b( k+1, k ), 1_${ik}$, a( k+1, k ), 1_${ik}$ )
                       call stdlib${ii}$_dsyr2( uplo, n-k, -one, a( k+1, k ), 1_${ik}$,b( k+1, k ), 1_${ik}$, a( k+1, &
                                 k+1 ), lda )
                       call stdlib${ii}$_daxpy( n-k, ct, b( k+1, k ), 1_${ik}$, a( k+1, k ), 1_${ik}$ )
                       call stdlib${ii}$_dtrsv( uplo, 'NO TRANSPOSE', 'NON-UNIT', n-k,b( k+1, k+1 ), &
                                 ldb, a( k+1, k ), 1_${ik}$ )
                    end if
                 end do
              end if
           else
              if( upper ) then
                 ! compute u*a*u**t
                 do k = 1, n
                    ! update the upper triangle of a(1:k,1:k)
                    akk = a( k, k )
                    bkk = b( k, k )
                    call stdlib${ii}$_dtrmv( uplo, 'NO TRANSPOSE', 'NON-UNIT', k-1, b,ldb, a( 1_${ik}$, k ), 1_${ik}$ &
                              )
                    ct = half*akk
                    call stdlib${ii}$_daxpy( k-1, ct, b( 1_${ik}$, k ), 1_${ik}$, a( 1_${ik}$, k ), 1_${ik}$ )
                    call stdlib${ii}$_dsyr2( uplo, k-1, one, a( 1_${ik}$, k ), 1_${ik}$, b( 1_${ik}$, k ), 1_${ik}$,a, lda )
                    call stdlib${ii}$_daxpy( k-1, ct, b( 1_${ik}$, k ), 1_${ik}$, a( 1_${ik}$, k ), 1_${ik}$ )
                    call stdlib${ii}$_dscal( k-1, bkk, a( 1_${ik}$, k ), 1_${ik}$ )
                    a( k, k ) = akk*bkk**2_${ik}$
                 end do
              else
                 ! compute l**t *a*l
                 do k = 1, n
                    ! update the lower triangle of a(1:k,1:k)
                    akk = a( k, k )
                    bkk = b( k, k )
                    call stdlib${ii}$_dtrmv( uplo, 'TRANSPOSE', 'NON-UNIT', k-1, b, ldb,a( k, 1_${ik}$ ), lda )
                              
                    ct = half*akk
                    call stdlib${ii}$_daxpy( k-1, ct, b( k, 1_${ik}$ ), ldb, a( k, 1_${ik}$ ), lda )
                    call stdlib${ii}$_dsyr2( uplo, k-1, one, a( k, 1_${ik}$ ), lda, b( k, 1_${ik}$ ),ldb, a, lda )
                              
                    call stdlib${ii}$_daxpy( k-1, ct, b( k, 1_${ik}$ ), ldb, a( k, 1_${ik}$ ), lda )
                    call stdlib${ii}$_dscal( k-1, bkk, a( k, 1_${ik}$ ), lda )
                    a( k, k ) = akk*bkk**2_${ik}$
                 end do
              end if
           end if
           return
     end subroutine stdlib${ii}$_dsygs2

#:for rk,rt,ri in REAL_KINDS_TYPES
#:if not rk in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ri}$sygs2( itype, uplo, n, a, lda, b, ldb, info )
     !! DSYGS2: reduces a real symmetric-definite generalized eigenproblem
     !! to standard form.
     !! If ITYPE = 1, the problem is A*x = lambda*B*x,
     !! and A is overwritten by inv(U**T)*A*inv(U) or inv(L)*A*inv(L**T)
     !! If ITYPE = 2 or 3, the problem is A*B*x = lambda*x or
     !! B*A*x = lambda*x, and A is overwritten by U*A*U**T or L**T *A*L.
     !! B must have been previously factorized as U**T *U or L*L**T by DPOTRF.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: uplo
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: itype, lda, ldb, n
           ! Array Arguments 
           real(${rk}$), intent(inout) :: a(lda,*)
           real(${rk}$), intent(in) :: b(ldb,*)
        ! =====================================================================
           
           ! Local Scalars 
           logical(lk) :: upper
           integer(${ik}$) :: k
           real(${rk}$) :: akk, bkk, ct
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input parameters.
           info = 0_${ik}$
           upper = stdlib_lsame( uplo, 'U' )
           if( itype<1_${ik}$ .or. itype>3_${ik}$ ) then
              info = -1_${ik}$
           else if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then
              info = -2_${ik}$
           else if( n<0_${ik}$ ) then
              info = -3_${ik}$
           else if( lda<max( 1_${ik}$, n ) ) then
              info = -5_${ik}$
           else if( ldb<max( 1_${ik}$, n ) ) then
              info = -7_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DSYGS2', -info )
              return
           end if
           if( itype==1_${ik}$ ) then
              if( upper ) then
                 ! compute inv(u**t)*a*inv(u)
                 do k = 1, n
                    ! update the upper triangle of a(k:n,k:n)
                    akk = a( k, k )
                    bkk = b( k, k )
                    akk = akk / bkk**2_${ik}$
                    a( k, k ) = akk
                    if( k<n ) then
                       call stdlib${ii}$_${ri}$scal( n-k, one / bkk, a( k, k+1 ), lda )
                       ct = -half*akk
                       call stdlib${ii}$_${ri}$axpy( n-k, ct, b( k, k+1 ), ldb, a( k, k+1 ),lda )
                       call stdlib${ii}$_${ri}$syr2( uplo, n-k, -one, a( k, k+1 ), lda,b( k, k+1 ), ldb, a( &
                                 k+1, k+1 ), lda )
                       call stdlib${ii}$_${ri}$axpy( n-k, ct, b( k, k+1 ), ldb, a( k, k+1 ),lda )
                       call stdlib${ii}$_${ri}$trsv( uplo, 'TRANSPOSE', 'NON-UNIT', n-k,b( k+1, k+1 ), ldb, &
                                 a( k, k+1 ), lda )
                    end if
                 end do
              else
                 ! compute inv(l)*a*inv(l**t)
                 do k = 1, n
                    ! update the lower triangle of a(k:n,k:n)
                    akk = a( k, k )
                    bkk = b( k, k )
                    akk = akk / bkk**2_${ik}$
                    a( k, k ) = akk
                    if( k<n ) then
                       call stdlib${ii}$_${ri}$scal( n-k, one / bkk, a( k+1, k ), 1_${ik}$ )
                       ct = -half*akk
                       call stdlib${ii}$_${ri}$axpy( n-k, ct, b( k+1, k ), 1_${ik}$, a( k+1, k ), 1_${ik}$ )
                       call stdlib${ii}$_${ri}$syr2( uplo, n-k, -one, a( k+1, k ), 1_${ik}$,b( k+1, k ), 1_${ik}$, a( k+1, &
                                 k+1 ), lda )
                       call stdlib${ii}$_${ri}$axpy( n-k, ct, b( k+1, k ), 1_${ik}$, a( k+1, k ), 1_${ik}$ )
                       call stdlib${ii}$_${ri}$trsv( uplo, 'NO TRANSPOSE', 'NON-UNIT', n-k,b( k+1, k+1 ), &
                                 ldb, a( k+1, k ), 1_${ik}$ )
                    end if
                 end do
              end if
           else
              if( upper ) then
                 ! compute u*a*u**t
                 do k = 1, n
                    ! update the upper triangle of a(1:k,1:k)
                    akk = a( k, k )
                    bkk = b( k, k )
                    call stdlib${ii}$_${ri}$trmv( uplo, 'NO TRANSPOSE', 'NON-UNIT', k-1, b,ldb, a( 1_${ik}$, k ), 1_${ik}$ &
                              )
                    ct = half*akk
                    call stdlib${ii}$_${ri}$axpy( k-1, ct, b( 1_${ik}$, k ), 1_${ik}$, a( 1_${ik}$, k ), 1_${ik}$ )
                    call stdlib${ii}$_${ri}$syr2( uplo, k-1, one, a( 1_${ik}$, k ), 1_${ik}$, b( 1_${ik}$, k ), 1_${ik}$,a, lda )
                    call stdlib${ii}$_${ri}$axpy( k-1, ct, b( 1_${ik}$, k ), 1_${ik}$, a( 1_${ik}$, k ), 1_${ik}$ )
                    call stdlib${ii}$_${ri}$scal( k-1, bkk, a( 1_${ik}$, k ), 1_${ik}$ )
                    a( k, k ) = akk*bkk**2_${ik}$
                 end do
              else
                 ! compute l**t *a*l
                 do k = 1, n
                    ! update the lower triangle of a(1:k,1:k)
                    akk = a( k, k )
                    bkk = b( k, k )
                    call stdlib${ii}$_${ri}$trmv( uplo, 'TRANSPOSE', 'NON-UNIT', k-1, b, ldb,a( k, 1_${ik}$ ), lda )
                              
                    ct = half*akk
                    call stdlib${ii}$_${ri}$axpy( k-1, ct, b( k, 1_${ik}$ ), ldb, a( k, 1_${ik}$ ), lda )
                    call stdlib${ii}$_${ri}$syr2( uplo, k-1, one, a( k, 1_${ik}$ ), lda, b( k, 1_${ik}$ ),ldb, a, lda )
                              
                    call stdlib${ii}$_${ri}$axpy( k-1, ct, b( k, 1_${ik}$ ), ldb, a( k, 1_${ik}$ ), lda )
                    call stdlib${ii}$_${ri}$scal( k-1, bkk, a( k, 1_${ik}$ ), lda )
                    a( k, k ) = akk*bkk**2_${ik}$
                 end do
              end if
           end if
           return
     end subroutine stdlib${ii}$_${ri}$sygs2

#:endif
#:endfor



     pure module subroutine stdlib${ii}$_sspgst( itype, uplo, n, ap, bp, info )
     !! SSPGST reduces a real symmetric-definite generalized eigenproblem
     !! to standard form, using packed storage.
     !! If ITYPE = 1, the problem is A*x = lambda*B*x,
     !! and A is overwritten by inv(U**T)*A*inv(U) or inv(L)*A*inv(L**T)
     !! If ITYPE = 2 or 3, the problem is A*B*x = lambda*x or
     !! B*A*x = lambda*x, and A is overwritten by U*A*U**T or L**T*A*L.
     !! B must have been previously factorized as U**T*U or L*L**T by SPPTRF.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: uplo
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: itype, n
           ! Array Arguments 
           real(sp), intent(inout) :: ap(*)
           real(sp), intent(in) :: bp(*)
        ! =====================================================================
           
           ! Local Scalars 
           logical(lk) :: upper
           integer(${ik}$) :: j, j1, j1j1, jj, k, k1, k1k1, kk
           real(sp) :: ajj, akk, bjj, bkk, ct
           ! Executable Statements 
           ! test the input parameters.
           info = 0_${ik}$
           upper = stdlib_lsame( uplo, 'U' )
           if( itype<1_${ik}$ .or. itype>3_${ik}$ ) then
              info = -1_${ik}$
           else if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then
              info = -2_${ik}$
           else if( n<0_${ik}$ ) then
              info = -3_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'SSPGST', -info )
              return
           end if
           if( itype==1_${ik}$ ) then
              if( upper ) then
                 ! compute inv(u**t)*a*inv(u)
                 ! j1 and jj are the indices of a(1,j) and a(j,j)
                 jj = 0_${ik}$
                 do j = 1, n
                    j1 = jj + 1_${ik}$
                    jj = jj + j
                    ! compute the j-th column of the upper triangle of a
                    bjj = bp( jj )
                    call stdlib${ii}$_stpsv( uplo, 'TRANSPOSE', 'NONUNIT', j, bp,ap( j1 ), 1_${ik}$ )
                    call stdlib${ii}$_sspmv( uplo, j-1, -one, ap, bp( j1 ), 1_${ik}$, one,ap( j1 ), 1_${ik}$ )
                    call stdlib${ii}$_sscal( j-1, one / bjj, ap( j1 ), 1_${ik}$ )
                    ap( jj ) = ( ap( jj )-stdlib${ii}$_sdot( j-1, ap( j1 ), 1_${ik}$, bp( j1 ),1_${ik}$ ) ) / &
                              bjj
                 end do
              else
                 ! compute inv(l)*a*inv(l**t)
                 ! kk and k1k1 are the indices of a(k,k) and a(k+1,k+1)
                 kk = 1_${ik}$
                 do k = 1, n
                    k1k1 = kk + n - k + 1_${ik}$
                    ! update the lower triangle of a(k:n,k:n)
                    akk = ap( kk )
                    bkk = bp( kk )
                    akk = akk / bkk**2_${ik}$
                    ap( kk ) = akk
                    if( k<n ) then
                       call stdlib${ii}$_sscal( n-k, one / bkk, ap( kk+1 ), 1_${ik}$ )
                       ct = -half*akk
                       call stdlib${ii}$_saxpy( n-k, ct, bp( kk+1 ), 1_${ik}$, ap( kk+1 ), 1_${ik}$ )
                       call stdlib${ii}$_sspr2( uplo, n-k, -one, ap( kk+1 ), 1_${ik}$,bp( kk+1 ), 1_${ik}$, ap( k1k1 )&
                                  )
                       call stdlib${ii}$_saxpy( n-k, ct, bp( kk+1 ), 1_${ik}$, ap( kk+1 ), 1_${ik}$ )
                       call stdlib${ii}$_stpsv( uplo, 'NO TRANSPOSE', 'NON-UNIT', n-k,bp( k1k1 ), ap( &
                                 kk+1 ), 1_${ik}$ )
                    end if
                    kk = k1k1
                 end do
              end if
           else
              if( upper ) then
                 ! compute u*a*u**t
                 ! k1 and kk are the indices of a(1,k) and a(k,k)
                 kk = 0_${ik}$
                 do k = 1, n
                    k1 = kk + 1_${ik}$
                    kk = kk + k
                    ! update the upper triangle of a(1:k,1:k)
                    akk = ap( kk )
                    bkk = bp( kk )
                    call stdlib${ii}$_stpmv( uplo, 'NO TRANSPOSE', 'NON-UNIT', k-1, bp,ap( k1 ), 1_${ik}$ )
                              
                    ct = half*akk
                    call stdlib${ii}$_saxpy( k-1, ct, bp( k1 ), 1_${ik}$, ap( k1 ), 1_${ik}$ )
                    call stdlib${ii}$_sspr2( uplo, k-1, one, ap( k1 ), 1_${ik}$, bp( k1 ), 1_${ik}$,ap )
                    call stdlib${ii}$_saxpy( k-1, ct, bp( k1 ), 1_${ik}$, ap( k1 ), 1_${ik}$ )
                    call stdlib${ii}$_sscal( k-1, bkk, ap( k1 ), 1_${ik}$ )
                    ap( kk ) = akk*bkk**2_${ik}$
                 end do
              else
                 ! compute l**t *a*l
                 ! jj and j1j1 are the indices of a(j,j) and a(j+1,j+1)
                 jj = 1_${ik}$
                 do j = 1, n
                    j1j1 = jj + n - j + 1_${ik}$
                    ! compute the j-th column of the lower triangle of a
                    ajj = ap( jj )
                    bjj = bp( jj )
                    ap( jj ) = ajj*bjj + stdlib${ii}$_sdot( n-j, ap( jj+1 ), 1_${ik}$,bp( jj+1 ), 1_${ik}$ )
                    call stdlib${ii}$_sscal( n-j, bjj, ap( jj+1 ), 1_${ik}$ )
                    call stdlib${ii}$_sspmv( uplo, n-j, one, ap( j1j1 ), bp( jj+1 ), 1_${ik}$,one, ap( jj+1 ), &
                              1_${ik}$ )
                    call stdlib${ii}$_stpmv( uplo, 'TRANSPOSE', 'NON-UNIT', n-j+1,bp( jj ), ap( jj ), 1_${ik}$ &
                              )
                    jj = j1j1
                 end do
              end if
           end if
           return
     end subroutine stdlib${ii}$_sspgst

     pure module subroutine stdlib${ii}$_dspgst( itype, uplo, n, ap, bp, info )
     !! DSPGST reduces a real symmetric-definite generalized eigenproblem
     !! to standard form, using packed storage.
     !! If ITYPE = 1, the problem is A*x = lambda*B*x,
     !! and A is overwritten by inv(U**T)*A*inv(U) or inv(L)*A*inv(L**T)
     !! If ITYPE = 2 or 3, the problem is A*B*x = lambda*x or
     !! B*A*x = lambda*x, and A is overwritten by U*A*U**T or L**T*A*L.
     !! B must have been previously factorized as U**T*U or L*L**T by DPPTRF.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: uplo
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: itype, n
           ! Array Arguments 
           real(dp), intent(inout) :: ap(*)
           real(dp), intent(in) :: bp(*)
        ! =====================================================================
           
           ! Local Scalars 
           logical(lk) :: upper
           integer(${ik}$) :: j, j1, j1j1, jj, k, k1, k1k1, kk
           real(dp) :: ajj, akk, bjj, bkk, ct
           ! Executable Statements 
           ! test the input parameters.
           info = 0_${ik}$
           upper = stdlib_lsame( uplo, 'U' )
           if( itype<1_${ik}$ .or. itype>3_${ik}$ ) then
              info = -1_${ik}$
           else if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then
              info = -2_${ik}$
           else if( n<0_${ik}$ ) then
              info = -3_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DSPGST', -info )
              return
           end if
           if( itype==1_${ik}$ ) then
              if( upper ) then
                 ! compute inv(u**t)*a*inv(u)
                 ! j1 and jj are the indices of a(1,j) and a(j,j)
                 jj = 0_${ik}$
                 do j = 1, n
                    j1 = jj + 1_${ik}$
                    jj = jj + j
                    ! compute the j-th column of the upper triangle of a
                    bjj = bp( jj )
                    call stdlib${ii}$_dtpsv( uplo, 'TRANSPOSE', 'NONUNIT', j, bp,ap( j1 ), 1_${ik}$ )
                    call stdlib${ii}$_dspmv( uplo, j-1, -one, ap, bp( j1 ), 1_${ik}$, one,ap( j1 ), 1_${ik}$ )
                    call stdlib${ii}$_dscal( j-1, one / bjj, ap( j1 ), 1_${ik}$ )
                    ap( jj ) = ( ap( jj )-stdlib${ii}$_ddot( j-1, ap( j1 ), 1_${ik}$, bp( j1 ),1_${ik}$ ) ) / &
                              bjj
                 end do
              else
                 ! compute inv(l)*a*inv(l**t)
                 ! kk and k1k1 are the indices of a(k,k) and a(k+1,k+1)
                 kk = 1_${ik}$
                 do k = 1, n
                    k1k1 = kk + n - k + 1_${ik}$
                    ! update the lower triangle of a(k:n,k:n)
                    akk = ap( kk )
                    bkk = bp( kk )
                    akk = akk / bkk**2_${ik}$
                    ap( kk ) = akk
                    if( k<n ) then
                       call stdlib${ii}$_dscal( n-k, one / bkk, ap( kk+1 ), 1_${ik}$ )
                       ct = -half*akk
                       call stdlib${ii}$_daxpy( n-k, ct, bp( kk+1 ), 1_${ik}$, ap( kk+1 ), 1_${ik}$ )
                       call stdlib${ii}$_dspr2( uplo, n-k, -one, ap( kk+1 ), 1_${ik}$,bp( kk+1 ), 1_${ik}$, ap( k1k1 )&
                                  )
                       call stdlib${ii}$_daxpy( n-k, ct, bp( kk+1 ), 1_${ik}$, ap( kk+1 ), 1_${ik}$ )
                       call stdlib${ii}$_dtpsv( uplo, 'NO TRANSPOSE', 'NON-UNIT', n-k,bp( k1k1 ), ap( &
                                 kk+1 ), 1_${ik}$ )
                    end if
                    kk = k1k1
                 end do
              end if
           else
              if( upper ) then
                 ! compute u*a*u**t
                 ! k1 and kk are the indices of a(1,k) and a(k,k)
                 kk = 0_${ik}$
                 do k = 1, n
                    k1 = kk + 1_${ik}$
                    kk = kk + k
                    ! update the upper triangle of a(1:k,1:k)
                    akk = ap( kk )
                    bkk = bp( kk )
                    call stdlib${ii}$_dtpmv( uplo, 'NO TRANSPOSE', 'NON-UNIT', k-1, bp,ap( k1 ), 1_${ik}$ )
                              
                    ct = half*akk
                    call stdlib${ii}$_daxpy( k-1, ct, bp( k1 ), 1_${ik}$, ap( k1 ), 1_${ik}$ )
                    call stdlib${ii}$_dspr2( uplo, k-1, one, ap( k1 ), 1_${ik}$, bp( k1 ), 1_${ik}$,ap )
                    call stdlib${ii}$_daxpy( k-1, ct, bp( k1 ), 1_${ik}$, ap( k1 ), 1_${ik}$ )
                    call stdlib${ii}$_dscal( k-1, bkk, ap( k1 ), 1_${ik}$ )
                    ap( kk ) = akk*bkk**2_${ik}$
                 end do
              else
                 ! compute l**t *a*l
                 ! jj and j1j1 are the indices of a(j,j) and a(j+1,j+1)
                 jj = 1_${ik}$
                 do j = 1, n
                    j1j1 = jj + n - j + 1_${ik}$
                    ! compute the j-th column of the lower triangle of a
                    ajj = ap( jj )
                    bjj = bp( jj )
                    ap( jj ) = ajj*bjj + stdlib${ii}$_ddot( n-j, ap( jj+1 ), 1_${ik}$,bp( jj+1 ), 1_${ik}$ )
                    call stdlib${ii}$_dscal( n-j, bjj, ap( jj+1 ), 1_${ik}$ )
                    call stdlib${ii}$_dspmv( uplo, n-j, one, ap( j1j1 ), bp( jj+1 ), 1_${ik}$,one, ap( jj+1 ), &
                              1_${ik}$ )
                    call stdlib${ii}$_dtpmv( uplo, 'TRANSPOSE', 'NON-UNIT', n-j+1,bp( jj ), ap( jj ), 1_${ik}$ &
                              )
                    jj = j1j1
                 end do
              end if
           end if
           return
     end subroutine stdlib${ii}$_dspgst

#:for rk,rt,ri in REAL_KINDS_TYPES
#:if not rk in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ri}$spgst( itype, uplo, n, ap, bp, info )
     !! DSPGST: reduces a real symmetric-definite generalized eigenproblem
     !! to standard form, using packed storage.
     !! If ITYPE = 1, the problem is A*x = lambda*B*x,
     !! and A is overwritten by inv(U**T)*A*inv(U) or inv(L)*A*inv(L**T)
     !! If ITYPE = 2 or 3, the problem is A*B*x = lambda*x or
     !! B*A*x = lambda*x, and A is overwritten by U*A*U**T or L**T*A*L.
     !! B must have been previously factorized as U**T*U or L*L**T by DPPTRF.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: uplo
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: itype, n
           ! Array Arguments 
           real(${rk}$), intent(inout) :: ap(*)
           real(${rk}$), intent(in) :: bp(*)
        ! =====================================================================
           
           ! Local Scalars 
           logical(lk) :: upper
           integer(${ik}$) :: j, j1, j1j1, jj, k, k1, k1k1, kk
           real(${rk}$) :: ajj, akk, bjj, bkk, ct
           ! Executable Statements 
           ! test the input parameters.
           info = 0_${ik}$
           upper = stdlib_lsame( uplo, 'U' )
           if( itype<1_${ik}$ .or. itype>3_${ik}$ ) then
              info = -1_${ik}$
           else if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then
              info = -2_${ik}$
           else if( n<0_${ik}$ ) then
              info = -3_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DSPGST', -info )
              return
           end if
           if( itype==1_${ik}$ ) then
              if( upper ) then
                 ! compute inv(u**t)*a*inv(u)
                 ! j1 and jj are the indices of a(1,j) and a(j,j)
                 jj = 0_${ik}$
                 do j = 1, n
                    j1 = jj + 1_${ik}$
                    jj = jj + j
                    ! compute the j-th column of the upper triangle of a
                    bjj = bp( jj )
                    call stdlib${ii}$_${ri}$tpsv( uplo, 'TRANSPOSE', 'NONUNIT', j, bp,ap( j1 ), 1_${ik}$ )
                    call stdlib${ii}$_${ri}$spmv( uplo, j-1, -one, ap, bp( j1 ), 1_${ik}$, one,ap( j1 ), 1_${ik}$ )
                    call stdlib${ii}$_${ri}$scal( j-1, one / bjj, ap( j1 ), 1_${ik}$ )
                    ap( jj ) = ( ap( jj )-stdlib${ii}$_${ri}$dot( j-1, ap( j1 ), 1_${ik}$, bp( j1 ),1_${ik}$ ) ) / &
                              bjj
                 end do
              else
                 ! compute inv(l)*a*inv(l**t)
                 ! kk and k1k1 are the indices of a(k,k) and a(k+1,k+1)
                 kk = 1_${ik}$
                 do k = 1, n
                    k1k1 = kk + n - k + 1_${ik}$
                    ! update the lower triangle of a(k:n,k:n)
                    akk = ap( kk )
                    bkk = bp( kk )
                    akk = akk / bkk**2_${ik}$
                    ap( kk ) = akk
                    if( k<n ) then
                       call stdlib${ii}$_${ri}$scal( n-k, one / bkk, ap( kk+1 ), 1_${ik}$ )
                       ct = -half*akk
                       call stdlib${ii}$_${ri}$axpy( n-k, ct, bp( kk+1 ), 1_${ik}$, ap( kk+1 ), 1_${ik}$ )
                       call stdlib${ii}$_${ri}$spr2( uplo, n-k, -one, ap( kk+1 ), 1_${ik}$,bp( kk+1 ), 1_${ik}$, ap( k1k1 )&
                                  )
                       call stdlib${ii}$_${ri}$axpy( n-k, ct, bp( kk+1 ), 1_${ik}$, ap( kk+1 ), 1_${ik}$ )
                       call stdlib${ii}$_${ri}$tpsv( uplo, 'NO TRANSPOSE', 'NON-UNIT', n-k,bp( k1k1 ), ap( &
                                 kk+1 ), 1_${ik}$ )
                    end if
                    kk = k1k1
                 end do
              end if
           else
              if( upper ) then
                 ! compute u*a*u**t
                 ! k1 and kk are the indices of a(1,k) and a(k,k)
                 kk = 0_${ik}$
                 do k = 1, n
                    k1 = kk + 1_${ik}$
                    kk = kk + k
                    ! update the upper triangle of a(1:k,1:k)
                    akk = ap( kk )
                    bkk = bp( kk )
                    call stdlib${ii}$_${ri}$tpmv( uplo, 'NO TRANSPOSE', 'NON-UNIT', k-1, bp,ap( k1 ), 1_${ik}$ )
                              
                    ct = half*akk
                    call stdlib${ii}$_${ri}$axpy( k-1, ct, bp( k1 ), 1_${ik}$, ap( k1 ), 1_${ik}$ )
                    call stdlib${ii}$_${ri}$spr2( uplo, k-1, one, ap( k1 ), 1_${ik}$, bp( k1 ), 1_${ik}$,ap )
                    call stdlib${ii}$_${ri}$axpy( k-1, ct, bp( k1 ), 1_${ik}$, ap( k1 ), 1_${ik}$ )
                    call stdlib${ii}$_${ri}$scal( k-1, bkk, ap( k1 ), 1_${ik}$ )
                    ap( kk ) = akk*bkk**2_${ik}$
                 end do
              else
                 ! compute l**t *a*l
                 ! jj and j1j1 are the indices of a(j,j) and a(j+1,j+1)
                 jj = 1_${ik}$
                 do j = 1, n
                    j1j1 = jj + n - j + 1_${ik}$
                    ! compute the j-th column of the lower triangle of a
                    ajj = ap( jj )
                    bjj = bp( jj )
                    ap( jj ) = ajj*bjj + stdlib${ii}$_${ri}$dot( n-j, ap( jj+1 ), 1_${ik}$,bp( jj+1 ), 1_${ik}$ )
                    call stdlib${ii}$_${ri}$scal( n-j, bjj, ap( jj+1 ), 1_${ik}$ )
                    call stdlib${ii}$_${ri}$spmv( uplo, n-j, one, ap( j1j1 ), bp( jj+1 ), 1_${ik}$,one, ap( jj+1 ), &
                              1_${ik}$ )
                    call stdlib${ii}$_${ri}$tpmv( uplo, 'TRANSPOSE', 'NON-UNIT', n-j+1,bp( jj ), ap( jj ), 1_${ik}$ &
                              )
                    jj = j1j1
                 end do
              end if
           end if
           return
     end subroutine stdlib${ii}$_${ri}$spgst

#:endif
#:endfor



     pure module subroutine stdlib${ii}$_ssbgst( vect, uplo, n, ka, kb, ab, ldab, bb, ldbb, x,ldx, work, info )
     !! SSBGST reduces a real symmetric-definite banded generalized
     !! eigenproblem  A*x = lambda*B*x  to standard form  C*y = lambda*y,
     !! such that C has the same bandwidth as A.
     !! B must have been previously factorized as S**T*S by SPBSTF, using a
     !! split Cholesky factorization. A is overwritten by C = X**T*A*X, where
     !! X = S**(-1)*Q and Q is an orthogonal matrix chosen to preserve the
     !! bandwidth of A.
               
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: uplo, vect
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: ka, kb, ldab, ldbb, ldx, n
           ! Array Arguments 
           real(sp), intent(inout) :: ab(ldab,*)
           real(sp), intent(in) :: bb(ldbb,*)
           real(sp), intent(out) :: work(*), x(ldx,*)
        ! =====================================================================
           
           ! Local Scalars 
           logical(lk) :: update, upper, wantx
           integer(${ik}$) :: i, i0, i1, i2, inca, j, j1, j1t, j2, j2t, k, ka1, kb1, kbt, l, m, nr, &
                     nrt, nx
           real(sp) :: bii, ra, ra1, t
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input parameters
           wantx = stdlib_lsame( vect, 'V' )
           upper = stdlib_lsame( uplo, 'U' )
           ka1 = ka + 1_${ik}$
           kb1 = kb + 1_${ik}$
           info = 0_${ik}$
           if( .not.wantx .and. .not.stdlib_lsame( vect, 'N' ) ) then
              info = -1_${ik}$
           else if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then
              info = -2_${ik}$
           else if( n<0_${ik}$ ) then
              info = -3_${ik}$
           else if( ka<0_${ik}$ ) then
              info = -4_${ik}$
           else if( kb<0_${ik}$ .or. kb>ka ) then
              info = -5_${ik}$
           else if( ldab<ka+1 ) then
              info = -7_${ik}$
           else if( ldbb<kb+1 ) then
              info = -9_${ik}$
           else if( ldx<1_${ik}$ .or. wantx .and. ldx<max( 1_${ik}$, n ) ) then
              info = -11_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'SSBGST', -info )
              return
           end if
           ! quick return if possible
           if( n==0 )return
           inca = ldab*ka1
           ! initialize x to the unit matrix, if needed
           if( wantx )call stdlib${ii}$_slaset( 'FULL', n, n, zero, one, x, ldx )
           ! set m to the splitting point m. it must be the same value as is
           ! used in stdlib${ii}$_spbstf. the chosen value allows the arrays work and rwork
           ! to be of dimension (n).
           m = ( n+kb ) / 2_${ik}$
           ! the routine works in two phases, corresponding to the two halves
           ! of the split cholesky factorization of b as s**t*s where
           ! s = ( u    )
               ! ( m  l )
           ! with u upper triangular of order m, and l lower triangular of
           ! order n-m. s has the same bandwidth as b.
           ! s is treated as a product of elementary matrices:
           ! s = s(m)*s(m-1)*...*s(2)*s(1)*s(m+1)*s(m+2)*...*s(n-1)*s(n)
           ! where s(i) is determined by the i-th row of s.
           ! in phase 1, the index i takes the values n, n-1, ... , m+1;
           ! in phase 2, it takes the values 1, 2, ... , m.
           ! for each value of i, the current matrix a is updated by forming
           ! inv(s(i))**t*a*inv(s(i)). this creates a triangular bulge outside
           ! the band of a. the bulge is then pushed down toward the bottom of
           ! a in phase 1, and up toward the top of a in phase 2, by applying
           ! plane rotations.
           ! there are kb*(kb+1)/2 elements in the bulge, but at most 2*kb-1
           ! of them are linearly independent, so annihilating a bulge requires
           ! only 2*kb-1 plane rotations. the rotations are divided into a 1st
           ! set of kb-1 rotations, and a 2nd set of kb rotations.
           ! wherever possible, rotations are generated and applied in vector
           ! operations of length nr between the indices j1 and j2 (sometimes
           ! replaced by modified values nrt, j1t or j2t).
           ! the cosines and sines of the rotations are stored in the array
           ! work. the cosines of the 1st set of rotations are stored in
           ! elements n+2:n+m-kb-1 and the sines of the 1st set in elements
           ! 2:m-kb-1; the cosines of the 2nd set are stored in elements
           ! n+m-kb+1:2*n and the sines of the second set in elements m-kb+1:n.
           ! the bulges are not formed explicitly; nonzero elements outside the
           ! band are created only when they are required for generating new
           ! rotations; they are stored in the array work, in positions where
           ! they are later overwritten by the sines of the rotations which
           ! annihilate them.
           ! **************************** phase 1 *****************************
           ! the logical structure of this phase is:
           ! update = .true.
           ! do i = n, m + 1, -1
              ! use s(i) to update a and create a new bulge
              ! apply rotations to push all bulges ka positions downward
           ! end do
           ! update = .false.
           ! do i = m + ka + 1, n - 1
              ! apply rotations to push all bulges ka positions downward
           ! end do
           ! to avoid duplicating code, the two loops are merged.
           update = .true.
           i = n + 1_${ik}$
           10 continue
           if( update ) then
              i = i - 1_${ik}$
              kbt = min( kb, i-1 )
              i0 = i - 1_${ik}$
              i1 = min( n, i+ka )
              i2 = i - kbt + ka1
              if( i<m+1 ) then
                 update = .false.
                 i = i + 1_${ik}$
                 i0 = m
                 if( ka==0 )go to 480
                 go to 10
              end if
           else
              i = i + ka
              if( i>n-1 )go to 480
           end if
           if( upper ) then
              ! transform a, working with the upper triangle
              if( update ) then
                 ! form  inv(s(i))**t * a * inv(s(i))
                 bii = bb( kb1, i )
                 do j = i, i1
                    ab( i-j+ka1, j ) = ab( i-j+ka1, j ) / bii
                 end do
                 do j = max( 1, i-ka ), i
                    ab( j-i+ka1, i ) = ab( j-i+ka1, i ) / bii
                 end do
                 do k = i - kbt, i - 1
                    do j = i - kbt, k
                       ab( j-k+ka1, k ) = ab( j-k+ka1, k ) -bb( j-i+kb1, i )*ab( k-i+ka1, i ) -bb(&
                        k-i+kb1, i )*ab( j-i+ka1, i ) +ab( ka1, i )*bb( j-i+kb1, i )*bb( k-i+kb1, &
                                  i )
                    end do
                    do j = max( 1, i-ka ), i - kbt - 1
                       ab( j-k+ka1, k ) = ab( j-k+ka1, k ) -bb( k-i+kb1, i )*ab( j-i+ka1, i )
                                 
                    end do
                 end do
                 do j = i, i1
                    do k = max( j-ka, i-kbt ), i - 1
                       ab( k-j+ka1, j ) = ab( k-j+ka1, j ) -bb( k-i+kb1, i )*ab( i-j+ka1, j )
                                 
                    end do
                 end do
                 if( wantx ) then
                    ! post-multiply x by inv(s(i))
                    call stdlib${ii}$_sscal( n-m, one / bii, x( m+1, i ), 1_${ik}$ )
                    if( kbt>0_${ik}$ )call stdlib${ii}$_sger( n-m, kbt, -one, x( m+1, i ), 1_${ik}$,bb( kb1-kbt, i ), &
                              1_${ik}$, x( m+1, i-kbt ), ldx )
                 end if
                 ! store a(i,i1) in ra1 for use in next loop over k
                 ra1 = ab( i-i1+ka1, i1 )
              end if
              ! generate and apply vectors of rotations to chase all the
              ! existing bulges ka positions down toward the bottom of the
              ! band
              loop_130: do k = 1, kb - 1
                 if( update ) then
                    ! determine the rotations which would annihilate the bulge
                    ! which has in theory just been created
                    if( i-k+ka<n .and. i-k>1_${ik}$ ) then
                       ! generate rotation to annihilate a(i,i-k+ka+1)
                       call stdlib${ii}$_slartg( ab( k+1, i-k+ka ), ra1,work( n+i-k+ka-m ), work( i-k+&
                                 ka-m ),ra )
                       ! create nonzero element a(i-k,i-k+ka+1) outside the
                       ! band and store it in work(i-k)
                       t = -bb( kb1-k, i )*ra1
                       work( i-k ) = work( n+i-k+ka-m )*t -work( i-k+ka-m )*ab( 1_${ik}$, i-k+ka )
                                 
                       ab( 1_${ik}$, i-k+ka ) = work( i-k+ka-m )*t +work( n+i-k+ka-m )*ab( 1_${ik}$, i-k+ka )
                                 
                       ra1 = ra
                    end if
                 end if
                 j2 = i - k - 1_${ik}$ + max( 1_${ik}$, k-i0+2 )*ka1
                 nr = ( n-j2+ka ) / ka1
                 j1 = j2 + ( nr-1 )*ka1
                 if( update ) then
                    j2t = max( j2, i+2*ka-k+1 )
                 else
                    j2t = j2
                 end if
                 nrt = ( n-j2t+ka ) / ka1
                 do j = j2t, j1, ka1
                    ! create nonzero element a(j-ka,j+1) outside the band
                    ! and store it in work(j-m)
                    work( j-m ) = work( j-m )*ab( 1_${ik}$, j+1 )
                    ab( 1_${ik}$, j+1 ) = work( n+j-m )*ab( 1_${ik}$, j+1 )
                 end do
                 ! generate rotations in 1st set to annihilate elements which
                 ! have been created outside the band
                 if( nrt>0_${ik}$ )call stdlib${ii}$_slargv( nrt, ab( 1_${ik}$, j2t ), inca, work( j2t-m ), ka1,work( &
                           n+j2t-m ), ka1 )
                 if( nr>0_${ik}$ ) then
                    ! apply rotations in 1st set from the right
                    do l = 1, ka - 1
                       call stdlib${ii}$_slartv( nr, ab( ka1-l, j2 ), inca,ab( ka-l, j2+1 ), inca, work(&
                                  n+j2-m ),work( j2-m ), ka1 )
                    end do
                    ! apply rotations in 1st set from both sides to diagonal
                    ! blocks
                    call stdlib${ii}$_slar2v( nr, ab( ka1, j2 ), ab( ka1, j2+1 ),ab( ka, j2+1 ), inca, &
                              work( n+j2-m ),work( j2-m ), ka1 )
                 end if
                 ! start applying rotations in 1st set from the left
                 do l = ka - 1, kb - k + 1, -1
                    nrt = ( n-j2+l ) / ka1
                    if( nrt>0_${ik}$ )call stdlib${ii}$_slartv( nrt, ab( l, j2+ka1-l ), inca,ab( l+1, j2+ka1-l &
                              ), inca,work( n+j2-m ), work( j2-m ), ka1 )
                 end do
                 if( wantx ) then
                    ! post-multiply x by product of rotations in 1st set
                    do j = j2, j1, ka1
                       call stdlib${ii}$_srot( n-m, x( m+1, j ), 1_${ik}$, x( m+1, j+1 ), 1_${ik}$,work( n+j-m ), &
                                 work( j-m ) )
                    end do
                 end if
              end do loop_130
              if( update ) then
                 if( i2<=n .and. kbt>0_${ik}$ ) then
                    ! create nonzero element a(i-kbt,i-kbt+ka+1) outside the
                    ! band and store it in work(i-kbt)
                    work( i-kbt ) = -bb( kb1-kbt, i )*ra1
                 end if
              end if
              loop_170: do k = kb, 1, -1
                 if( update ) then
                    j2 = i - k - 1_${ik}$ + max( 2_${ik}$, k-i0+1 )*ka1
                 else
                    j2 = i - k - 1_${ik}$ + max( 1_${ik}$, k-i0+1 )*ka1
                 end if
                 ! finish applying rotations in 2nd set from the left
                 do l = kb - k, 1, -1
                    nrt = ( n-j2+ka+l ) / ka1
                    if( nrt>0_${ik}$ )call stdlib${ii}$_slartv( nrt, ab( l, j2-l+1 ), inca,ab( l+1, j2-l+1 ), &
                              inca, work( n+j2-ka ),work( j2-ka ), ka1 )
                 end do
                 nr = ( n-j2+ka ) / ka1
                 j1 = j2 + ( nr-1 )*ka1
                 do j = j1, j2, -ka1
                    work( j ) = work( j-ka )
                    work( n+j ) = work( n+j-ka )
                 end do
                 do j = j2, j1, ka1
                    ! create nonzero element a(j-ka,j+1) outside the band
                    ! and store it in work(j)
                    work( j ) = work( j )*ab( 1_${ik}$, j+1 )
                    ab( 1_${ik}$, j+1 ) = work( n+j )*ab( 1_${ik}$, j+1 )
                 end do
                 if( update ) then
                    if( i-k<n-ka .and. k<=kbt )work( i-k+ka ) = work( i-k )
                 end if
              end do loop_170
              loop_210: do k = kb, 1, -1
                 j2 = i - k - 1_${ik}$ + max( 1_${ik}$, k-i0+1 )*ka1
                 nr = ( n-j2+ka ) / ka1
                 j1 = j2 + ( nr-1 )*ka1
                 if( nr>0_${ik}$ ) then
                    ! generate rotations in 2nd set to annihilate elements
                    ! which have been created outside the band
                    call stdlib${ii}$_slargv( nr, ab( 1_${ik}$, j2 ), inca, work( j2 ), ka1,work( n+j2 ), ka1 )
                              
                    ! apply rotations in 2nd set from the right
                    do l = 1, ka - 1
                       call stdlib${ii}$_slartv( nr, ab( ka1-l, j2 ), inca,ab( ka-l, j2+1 ), inca, work(&
                                  n+j2 ),work( j2 ), ka1 )
                    end do
                    ! apply rotations in 2nd set from both sides to diagonal
                    ! blocks
                    call stdlib${ii}$_slar2v( nr, ab( ka1, j2 ), ab( ka1, j2+1 ),ab( ka, j2+1 ), inca, &
                              work( n+j2 ),work( j2 ), ka1 )
                 end if
                 ! start applying rotations in 2nd set from the left
                 do l = ka - 1, kb - k + 1, -1
                    nrt = ( n-j2+l ) / ka1
                    if( nrt>0_${ik}$ )call stdlib${ii}$_slartv( nrt, ab( l, j2+ka1-l ), inca,ab( l+1, j2+ka1-l &
                              ), inca, work( n+j2 ),work( j2 ), ka1 )
                 end do
                 if( wantx ) then
                    ! post-multiply x by product of rotations in 2nd set
                    do j = j2, j1, ka1
                       call stdlib${ii}$_srot( n-m, x( m+1, j ), 1_${ik}$, x( m+1, j+1 ), 1_${ik}$,work( n+j ), work( &
                                 j ) )
                    end do
                 end if
              end do loop_210
              do k = 1, kb - 1
                 j2 = i - k - 1_${ik}$ + max( 1_${ik}$, k-i0+2 )*ka1
                 ! finish applying rotations in 1st set from the left
                 do l = kb - k, 1, -1
                    nrt = ( n-j2+l ) / ka1
                    if( nrt>0_${ik}$ )call stdlib${ii}$_slartv( nrt, ab( l, j2+ka1-l ), inca,ab( l+1, j2+ka1-l &
                              ), inca,work( n+j2-m ), work( j2-m ), ka1 )
                 end do
              end do
              if( kb>1_${ik}$ ) then
                 do j = n - 1, i - kb + 2*ka + 1, -1
                    work( n+j-m ) = work( n+j-ka-m )
                    work( j-m ) = work( j-ka-m )
                 end do
              end if
           else
              ! transform a, working with the lower triangle
              if( update ) then
                 ! form  inv(s(i))**t * a * inv(s(i))
                 bii = bb( 1_${ik}$, i )
                 do j = i, i1
                    ab( j-i+1, i ) = ab( j-i+1, i ) / bii
                 end do
                 do j = max( 1, i-ka ), i
                    ab( i-j+1, j ) = ab( i-j+1, j ) / bii
                 end do
                 do k = i - kbt, i - 1
                    do j = i - kbt, k
                       ab( k-j+1, j ) = ab( k-j+1, j ) -bb( i-j+1, j )*ab( i-k+1, k ) -bb( i-k+1, &
                                 k )*ab( i-j+1, j ) +ab( 1_${ik}$, i )*bb( i-j+1, j )*bb( i-k+1, k )
                    end do
                    do j = max( 1, i-ka ), i - kbt - 1
                       ab( k-j+1, j ) = ab( k-j+1, j ) -bb( i-k+1, k )*ab( i-j+1, j )
                    end do
                 end do
                 do j = i, i1
                    do k = max( j-ka, i-kbt ), i - 1
                       ab( j-k+1, k ) = ab( j-k+1, k ) -bb( i-k+1, k )*ab( j-i+1, i )
                    end do
                 end do
                 if( wantx ) then
                    ! post-multiply x by inv(s(i))
                    call stdlib${ii}$_sscal( n-m, one / bii, x( m+1, i ), 1_${ik}$ )
                    if( kbt>0_${ik}$ )call stdlib${ii}$_sger( n-m, kbt, -one, x( m+1, i ), 1_${ik}$,bb( kbt+1, i-kbt )&
                              , ldbb-1,x( m+1, i-kbt ), ldx )
                 end if
                 ! store a(i1,i) in ra1 for use in next loop over k
                 ra1 = ab( i1-i+1, i )
              end if
              ! generate and apply vectors of rotations to chase all the
              ! existing bulges ka positions down toward the bottom of the
              ! band
              loop_360: do k = 1, kb - 1
                 if( update ) then
                    ! determine the rotations which would annihilate the bulge
                    ! which has in theory just been created
                    if( i-k+ka<n .and. i-k>1_${ik}$ ) then
                       ! generate rotation to annihilate a(i-k+ka+1,i)
                       call stdlib${ii}$_slartg( ab( ka1-k, i ), ra1, work( n+i-k+ka-m ),work( i-k+ka-m &
                                 ), ra )
                       ! create nonzero element a(i-k+ka+1,i-k) outside the
                       ! band and store it in work(i-k)
                       t = -bb( k+1, i-k )*ra1
                       work( i-k ) = work( n+i-k+ka-m )*t -work( i-k+ka-m )*ab( ka1, i-k )
                       ab( ka1, i-k ) = work( i-k+ka-m )*t +work( n+i-k+ka-m )*ab( ka1, i-k )
                                 
                       ra1 = ra
                    end if
                 end if
                 j2 = i - k - 1_${ik}$ + max( 1_${ik}$, k-i0+2 )*ka1
                 nr = ( n-j2+ka ) / ka1
                 j1 = j2 + ( nr-1 )*ka1
                 if( update ) then
                    j2t = max( j2, i+2*ka-k+1 )
                 else
                    j2t = j2
                 end if
                 nrt = ( n-j2t+ka ) / ka1
                 do j = j2t, j1, ka1
                    ! create nonzero element a(j+1,j-ka) outside the band
                    ! and store it in work(j-m)
                    work( j-m ) = work( j-m )*ab( ka1, j-ka+1 )
                    ab( ka1, j-ka+1 ) = work( n+j-m )*ab( ka1, j-ka+1 )
                 end do
                 ! generate rotations in 1st set to annihilate elements which
                 ! have been created outside the band
                 if( nrt>0_${ik}$ )call stdlib${ii}$_slargv( nrt, ab( ka1, j2t-ka ), inca, work( j2t-m ),ka1, &
                           work( n+j2t-m ), ka1 )
                 if( nr>0_${ik}$ ) then
                    ! apply rotations in 1st set from the left
                    do l = 1, ka - 1
                       call stdlib${ii}$_slartv( nr, ab( l+1, j2-l ), inca,ab( l+2, j2-l ), inca, work( &
                                 n+j2-m ),work( j2-m ), ka1 )
                    end do
                    ! apply rotations in 1st set from both sides to diagonal
                    ! blocks
                    call stdlib${ii}$_slar2v( nr, ab( 1_${ik}$, j2 ), ab( 1_${ik}$, j2+1 ), ab( 2_${ik}$, j2 ),inca, work( n+&
                              j2-m ), work( j2-m ), ka1 )
                 end if
                 ! start applying rotations in 1st set from the right
                 do l = ka - 1, kb - k + 1, -1
                    nrt = ( n-j2+l ) / ka1
                    if( nrt>0_${ik}$ )call stdlib${ii}$_slartv( nrt, ab( ka1-l+1, j2 ), inca,ab( ka1-l, j2+1 ),&
                               inca, work( n+j2-m ),work( j2-m ), ka1 )
                 end do
                 if( wantx ) then
                    ! post-multiply x by product of rotations in 1st set
                    do j = j2, j1, ka1
                       call stdlib${ii}$_srot( n-m, x( m+1, j ), 1_${ik}$, x( m+1, j+1 ), 1_${ik}$,work( n+j-m ), &
                                 work( j-m ) )
                    end do
                 end if
              end do loop_360
              if( update ) then
                 if( i2<=n .and. kbt>0_${ik}$ ) then
                    ! create nonzero element a(i-kbt+ka+1,i-kbt) outside the
                    ! band and store it in work(i-kbt)
                    work( i-kbt ) = -bb( kbt+1, i-kbt )*ra1
                 end if
              end if
              loop_400: do k = kb, 1, -1
                 if( update ) then
                    j2 = i - k - 1_${ik}$ + max( 2_${ik}$, k-i0+1 )*ka1
                 else
                    j2 = i - k - 1_${ik}$ + max( 1_${ik}$, k-i0+1 )*ka1
                 end if
                 ! finish applying rotations in 2nd set from the right
                 do l = kb - k, 1, -1
                    nrt = ( n-j2+ka+l ) / ka1
                    if( nrt>0_${ik}$ )call stdlib${ii}$_slartv( nrt, ab( ka1-l+1, j2-ka ), inca,ab( ka1-l, j2-&
                              ka+1 ), inca,work( n+j2-ka ), work( j2-ka ), ka1 )
                 end do
                 nr = ( n-j2+ka ) / ka1
                 j1 = j2 + ( nr-1 )*ka1
                 do j = j1, j2, -ka1
                    work( j ) = work( j-ka )
                    work( n+j ) = work( n+j-ka )
                 end do
                 do j = j2, j1, ka1
                    ! create nonzero element a(j+1,j-ka) outside the band
                    ! and store it in work(j)
                    work( j ) = work( j )*ab( ka1, j-ka+1 )
                    ab( ka1, j-ka+1 ) = work( n+j )*ab( ka1, j-ka+1 )
                 end do
                 if( update ) then
                    if( i-k<n-ka .and. k<=kbt )work( i-k+ka ) = work( i-k )
                 end if
              end do loop_400
              loop_440: do k = kb, 1, -1
                 j2 = i - k - 1_${ik}$ + max( 1_${ik}$, k-i0+1 )*ka1
                 nr = ( n-j2+ka ) / ka1
                 j1 = j2 + ( nr-1 )*ka1
                 if( nr>0_${ik}$ ) then
                    ! generate rotations in 2nd set to annihilate elements
                    ! which have been created outside the band
                    call stdlib${ii}$_slargv( nr, ab( ka1, j2-ka ), inca, work( j2 ), ka1,work( n+j2 ), &
                              ka1 )
                    ! apply rotations in 2nd set from the left
                    do l = 1, ka - 1
                       call stdlib${ii}$_slartv( nr, ab( l+1, j2-l ), inca,ab( l+2, j2-l ), inca, work( &
                                 n+j2 ),work( j2 ), ka1 )
                    end do
                    ! apply rotations in 2nd set from both sides to diagonal
                    ! blocks
                    call stdlib${ii}$_slar2v( nr, ab( 1_${ik}$, j2 ), ab( 1_${ik}$, j2+1 ), ab( 2_${ik}$, j2 ),inca, work( n+&
                              j2 ), work( j2 ), ka1 )
                 end if
                 ! start applying rotations in 2nd set from the right
                 do l = ka - 1, kb - k + 1, -1
                    nrt = ( n-j2+l ) / ka1
                    if( nrt>0_${ik}$ )call stdlib${ii}$_slartv( nrt, ab( ka1-l+1, j2 ), inca,ab( ka1-l, j2+1 ),&
                               inca, work( n+j2 ),work( j2 ), ka1 )
                 end do
                 if( wantx ) then
                    ! post-multiply x by product of rotations in 2nd set
                    do j = j2, j1, ka1
                       call stdlib${ii}$_srot( n-m, x( m+1, j ), 1_${ik}$, x( m+1, j+1 ), 1_${ik}$,work( n+j ), work( &
                                 j ) )
                    end do
                 end if
              end do loop_440
              do k = 1, kb - 1
                 j2 = i - k - 1_${ik}$ + max( 1_${ik}$, k-i0+2 )*ka1
                 ! finish applying rotations in 1st set from the right
                 do l = kb - k, 1, -1
                    nrt = ( n-j2+l ) / ka1
                    if( nrt>0_${ik}$ )call stdlib${ii}$_slartv( nrt, ab( ka1-l+1, j2 ), inca,ab( ka1-l, j2+1 ),&
                               inca, work( n+j2-m ),work( j2-m ), ka1 )
                 end do
              end do
              if( kb>1_${ik}$ ) then
                 do j = n - 1, i - kb + 2*ka + 1, -1
                    work( n+j-m ) = work( n+j-ka-m )
                    work( j-m ) = work( j-ka-m )
                 end do
              end if
           end if
           go to 10
           480 continue
           ! **************************** phase 2 *****************************
           ! the logical structure of this phase is:
           ! update = .true.
           ! do i = 1, m
              ! use s(i) to update a and create a new bulge
              ! apply rotations to push all bulges ka positions upward
           ! end do
           ! update = .false.
           ! do i = m - ka - 1, 2, -1
              ! apply rotations to push all bulges ka positions upward
           ! end do
           ! to avoid duplicating code, the two loops are merged.
           update = .true.
           i = 0_${ik}$
           490 continue
           if( update ) then
              i = i + 1_${ik}$
              kbt = min( kb, m-i )
              i0 = i + 1_${ik}$
              i1 = max( 1_${ik}$, i-ka )
              i2 = i + kbt - ka1
              if( i>m ) then
                 update = .false.
                 i = i - 1_${ik}$
                 i0 = m + 1_${ik}$
                 if( ka==0 )return
                 go to 490
              end if
           else
              i = i - ka
              if( i<2 )return
           end if
           if( i<m-kbt ) then
              nx = m
           else
              nx = n
           end if
           if( upper ) then
              ! transform a, working with the upper triangle
              if( update ) then
                 ! form  inv(s(i))**t * a * inv(s(i))
                 bii = bb( kb1, i )
                 do j = i1, i
                    ab( j-i+ka1, i ) = ab( j-i+ka1, i ) / bii
                 end do
                 do j = i, min( n, i+ka )
                    ab( i-j+ka1, j ) = ab( i-j+ka1, j ) / bii
                 end do
                 do k = i + 1, i + kbt
                    do j = k, i + kbt
                       ab( k-j+ka1, j ) = ab( k-j+ka1, j ) -bb( i-j+kb1, j )*ab( i-k+ka1, k ) -bb(&
                        i-k+kb1, k )*ab( i-j+ka1, j ) +ab( ka1, i )*bb( i-j+kb1, j )*bb( i-k+kb1, &
                                  k )
                    end do
                    do j = i + kbt + 1, min( n, i+ka )
                       ab( k-j+ka1, j ) = ab( k-j+ka1, j ) -bb( i-k+kb1, k )*ab( i-j+ka1, j )
                                 
                    end do
                 end do
                 do j = i1, i
                    do k = i + 1, min( j+ka, i+kbt )
                       ab( j-k+ka1, k ) = ab( j-k+ka1, k ) -bb( i-k+kb1, k )*ab( j-i+ka1, i )
                                 
                    end do
                 end do
                 if( wantx ) then
                    ! post-multiply x by inv(s(i))
                    call stdlib${ii}$_sscal( nx, one / bii, x( 1_${ik}$, i ), 1_${ik}$ )
                    if( kbt>0_${ik}$ )call stdlib${ii}$_sger( nx, kbt, -one, x( 1_${ik}$, i ), 1_${ik}$, bb( kb, i+1 ),ldbb-&
                              1_${ik}$, x( 1_${ik}$, i+1 ), ldx )
                 end if
                 ! store a(i1,i) in ra1 for use in next loop over k
                 ra1 = ab( i1-i+ka1, i )
              end if
              ! generate and apply vectors of rotations to chase all the
              ! existing bulges ka positions up toward the top of the band
              loop_610: do k = 1, kb - 1
                 if( update ) then
                    ! determine the rotations which would annihilate the bulge
                    ! which has in theory just been created
                    if( i+k-ka1>0_${ik}$ .and. i+k<m ) then
                       ! generate rotation to annihilate a(i+k-ka-1,i)
                       call stdlib${ii}$_slartg( ab( k+1, i ), ra1, work( n+i+k-ka ),work( i+k-ka ), ra &
                                 )
                       ! create nonzero element a(i+k-ka-1,i+k) outside the
                       ! band and store it in work(m-kb+i+k)
                       t = -bb( kb1-k, i+k )*ra1
                       work( m-kb+i+k ) = work( n+i+k-ka )*t -work( i+k-ka )*ab( 1_${ik}$, i+k )
                       ab( 1_${ik}$, i+k ) = work( i+k-ka )*t +work( n+i+k-ka )*ab( 1_${ik}$, i+k )
                       ra1 = ra
                    end if
                 end if
                 j2 = i + k + 1_${ik}$ - max( 1_${ik}$, k+i0-m+1 )*ka1
                 nr = ( j2+ka-1 ) / ka1
                 j1 = j2 - ( nr-1 )*ka1
                 if( update ) then
                    j2t = min( j2, i-2*ka+k-1 )
                 else
                    j2t = j2
                 end if
                 nrt = ( j2t+ka-1 ) / ka1
                 do j = j1, j2t, ka1
                    ! create nonzero element a(j-1,j+ka) outside the band
                    ! and store it in work(j)
                    work( j ) = work( j )*ab( 1_${ik}$, j+ka-1 )
                    ab( 1_${ik}$, j+ka-1 ) = work( n+j )*ab( 1_${ik}$, j+ka-1 )
                 end do
                 ! generate rotations in 1st set to annihilate elements which
                 ! have been created outside the band
                 if( nrt>0_${ik}$ )call stdlib${ii}$_slargv( nrt, ab( 1_${ik}$, j1+ka ), inca, work( j1 ), ka1,work( &
                           n+j1 ), ka1 )
                 if( nr>0_${ik}$ ) then
                    ! apply rotations in 1st set from the left
                    do l = 1, ka - 1
                       call stdlib${ii}$_slartv( nr, ab( ka1-l, j1+l ), inca,ab( ka-l, j1+l ), inca, &
                                 work( n+j1 ),work( j1 ), ka1 )
                    end do
                    ! apply rotations in 1st set from both sides to diagonal
                    ! blocks
                    call stdlib${ii}$_slar2v( nr, ab( ka1, j1 ), ab( ka1, j1-1 ),ab( ka, j1 ), inca, &
                              work( n+j1 ),work( j1 ), ka1 )
                 end if
                 ! start applying rotations in 1st set from the right
                 do l = ka - 1, kb - k + 1, -1
                    nrt = ( j2+l-1 ) / ka1
                    j1t = j2 - ( nrt-1 )*ka1
                    if( nrt>0_${ik}$ )call stdlib${ii}$_slartv( nrt, ab( l, j1t ), inca,ab( l+1, j1t-1 ), inca,&
                               work( n+j1t ),work( j1t ), ka1 )
                 end do
                 if( wantx ) then
                    ! post-multiply x by product of rotations in 1st set
                    do j = j1, j2, ka1
                       call stdlib${ii}$_srot( nx, x( 1_${ik}$, j ), 1_${ik}$, x( 1_${ik}$, j-1 ), 1_${ik}$,work( n+j ), work( j ) )
                                 
                    end do
                 end if
              end do loop_610
              if( update ) then
                 if( i2>0_${ik}$ .and. kbt>0_${ik}$ ) then
                    ! create nonzero element a(i+kbt-ka-1,i+kbt) outside the
                    ! band and store it in work(m-kb+i+kbt)
                    work( m-kb+i+kbt ) = -bb( kb1-kbt, i+kbt )*ra1
                 end if
              end if
              loop_650: do k = kb, 1, -1
                 if( update ) then
                    j2 = i + k + 1_${ik}$ - max( 2_${ik}$, k+i0-m )*ka1
                 else
                    j2 = i + k + 1_${ik}$ - max( 1_${ik}$, k+i0-m )*ka1
                 end if
                 ! finish applying rotations in 2nd set from the right
                 do l = kb - k, 1, -1
                    nrt = ( j2+ka+l-1 ) / ka1
                    j1t = j2 - ( nrt-1 )*ka1
                    if( nrt>0_${ik}$ )call stdlib${ii}$_slartv( nrt, ab( l, j1t+ka ), inca,ab( l+1, j1t+ka-1 ),&
                               inca,work( n+m-kb+j1t+ka ),work( m-kb+j1t+ka ), ka1 )
                 end do
                 nr = ( j2+ka-1 ) / ka1
                 j1 = j2 - ( nr-1 )*ka1
                 do j = j1, j2, ka1
                    work( m-kb+j ) = work( m-kb+j+ka )
                    work( n+m-kb+j ) = work( n+m-kb+j+ka )
                 end do
                 do j = j1, j2, ka1
                    ! create nonzero element a(j-1,j+ka) outside the band
                    ! and store it in work(m-kb+j)
                    work( m-kb+j ) = work( m-kb+j )*ab( 1_${ik}$, j+ka-1 )
                    ab( 1_${ik}$, j+ka-1 ) = work( n+m-kb+j )*ab( 1_${ik}$, j+ka-1 )
                 end do
                 if( update ) then
                    if( i+k>ka1 .and. k<=kbt )work( m-kb+i+k-ka ) = work( m-kb+i+k )
                 end if
              end do loop_650
              loop_690: do k = kb, 1, -1
                 j2 = i + k + 1_${ik}$ - max( 1_${ik}$, k+i0-m )*ka1
                 nr = ( j2+ka-1 ) / ka1
                 j1 = j2 - ( nr-1 )*ka1
                 if( nr>0_${ik}$ ) then
                    ! generate rotations in 2nd set to annihilate elements
                    ! which have been created outside the band
                    call stdlib${ii}$_slargv( nr, ab( 1_${ik}$, j1+ka ), inca, work( m-kb+j1 ),ka1, work( n+m-&
                              kb+j1 ), ka1 )
                    ! apply rotations in 2nd set from the left
                    do l = 1, ka - 1
                       call stdlib${ii}$_slartv( nr, ab( ka1-l, j1+l ), inca,ab( ka-l, j1+l ), inca,&
                                 work( n+m-kb+j1 ), work( m-kb+j1 ), ka1 )
                    end do
                    ! apply rotations in 2nd set from both sides to diagonal
                    ! blocks
                    call stdlib${ii}$_slar2v( nr, ab( ka1, j1 ), ab( ka1, j1-1 ),ab( ka, j1 ), inca, &
                              work( n+m-kb+j1 ),work( m-kb+j1 ), ka1 )
                 end if
                 ! start applying rotations in 2nd set from the right
                 do l = ka - 1, kb - k + 1, -1
                    nrt = ( j2+l-1 ) / ka1
                    j1t = j2 - ( nrt-1 )*ka1
                    if( nrt>0_${ik}$ )call stdlib${ii}$_slartv( nrt, ab( l, j1t ), inca,ab( l+1, j1t-1 ), inca,&
                              work( n+m-kb+j1t ), work( m-kb+j1t ),ka1 )
                 end do
                 if( wantx ) then
                    ! post-multiply x by product of rotations in 2nd set
                    do j = j1, j2, ka1
                       call stdlib${ii}$_srot( nx, x( 1_${ik}$, j ), 1_${ik}$, x( 1_${ik}$, j-1 ), 1_${ik}$,work( n+m-kb+j ), work( &
                                 m-kb+j ) )
                    end do
                 end if
              end do loop_690
              do k = 1, kb - 1
                 j2 = i + k + 1_${ik}$ - max( 1_${ik}$, k+i0-m+1 )*ka1
                 ! finish applying rotations in 1st set from the right
                 do l = kb - k, 1, -1
                    nrt = ( j2+l-1 ) / ka1
                    j1t = j2 - ( nrt-1 )*ka1
                    if( nrt>0_${ik}$ )call stdlib${ii}$_slartv( nrt, ab( l, j1t ), inca,ab( l+1, j1t-1 ), inca,&
                               work( n+j1t ),work( j1t ), ka1 )
                 end do
              end do
              if( kb>1_${ik}$ ) then
                 do j = 2, min( i+kb, m ) - 2*ka - 1
                    work( n+j ) = work( n+j+ka )
                    work( j ) = work( j+ka )
                 end do
              end if
           else
              ! transform a, working with the lower triangle
              if( update ) then
                 ! form  inv(s(i))**t * a * inv(s(i))
                 bii = bb( 1_${ik}$, i )
                 do j = i1, i
                    ab( i-j+1, j ) = ab( i-j+1, j ) / bii
                 end do
                 do j = i, min( n, i+ka )
                    ab( j-i+1, i ) = ab( j-i+1, i ) / bii
                 end do
                 do k = i + 1, i + kbt
                    do j = k, i + kbt
                       ab( j-k+1, k ) = ab( j-k+1, k ) -bb( j-i+1, i )*ab( k-i+1, i ) -bb( k-i+1, &
                                 i )*ab( j-i+1, i ) +ab( 1_${ik}$, i )*bb( j-i+1, i )*bb( k-i+1, i )
                    end do
                    do j = i + kbt + 1, min( n, i+ka )
                       ab( j-k+1, k ) = ab( j-k+1, k ) -bb( k-i+1, i )*ab( j-i+1, i )
                    end do
                 end do
                 do j = i1, i
                    do k = i + 1, min( j+ka, i+kbt )
                       ab( k-j+1, j ) = ab( k-j+1, j ) -bb( k-i+1, i )*ab( i-j+1, j )
                    end do
                 end do
                 if( wantx ) then
                    ! post-multiply x by inv(s(i))
                    call stdlib${ii}$_sscal( nx, one / bii, x( 1_${ik}$, i ), 1_${ik}$ )
                    if( kbt>0_${ik}$ )call stdlib${ii}$_sger( nx, kbt, -one, x( 1_${ik}$, i ), 1_${ik}$, bb( 2_${ik}$, i ), 1_${ik}$,x( 1_${ik}$, &
                              i+1 ), ldx )
                 end if
                 ! store a(i,i1) in ra1 for use in next loop over k
                 ra1 = ab( i-i1+1, i1 )
              end if
              ! generate and apply vectors of rotations to chase all the
              ! existing bulges ka positions up toward the top of the band
              loop_840: do k = 1, kb - 1
                 if( update ) then
                    ! determine the rotations which would annihilate the bulge
                    ! which has in theory just been created
                    if( i+k-ka1>0_${ik}$ .and. i+k<m ) then
                       ! generate rotation to annihilate a(i,i+k-ka-1)
                       call stdlib${ii}$_slartg( ab( ka1-k, i+k-ka ), ra1,work( n+i+k-ka ), work( i+k-&
                                 ka ), ra )
                       ! create nonzero element a(i+k,i+k-ka-1) outside the
                       ! band and store it in work(m-kb+i+k)
                       t = -bb( k+1, i )*ra1
                       work( m-kb+i+k ) = work( n+i+k-ka )*t -work( i+k-ka )*ab( ka1, i+k-ka )
                                 
                       ab( ka1, i+k-ka ) = work( i+k-ka )*t +work( n+i+k-ka )*ab( ka1, i+k-ka )
                                 
                       ra1 = ra
                    end if
                 end if
                 j2 = i + k + 1_${ik}$ - max( 1_${ik}$, k+i0-m+1 )*ka1
                 nr = ( j2+ka-1 ) / ka1
                 j1 = j2 - ( nr-1 )*ka1
                 if( update ) then
                    j2t = min( j2, i-2*ka+k-1 )
                 else
                    j2t = j2
                 end if
                 nrt = ( j2t+ka-1 ) / ka1
                 do j = j1, j2t, ka1
                    ! create nonzero element a(j+ka,j-1) outside the band
                    ! and store it in work(j)
                    work( j ) = work( j )*ab( ka1, j-1 )
                    ab( ka1, j-1 ) = work( n+j )*ab( ka1, j-1 )
                 end do
                 ! generate rotations in 1st set to annihilate elements which
                 ! have been created outside the band
                 if( nrt>0_${ik}$ )call stdlib${ii}$_slargv( nrt, ab( ka1, j1 ), inca, work( j1 ), ka1,work( n+&
                           j1 ), ka1 )
                 if( nr>0_${ik}$ ) then
                    ! apply rotations in 1st set from the right
                    do l = 1, ka - 1
                       call stdlib${ii}$_slartv( nr, ab( l+1, j1 ), inca, ab( l+2, j1-1 ),inca, work( n+&
                                 j1 ), work( j1 ), ka1 )
                    end do
                    ! apply rotations in 1st set from both sides to diagonal
                    ! blocks
                    call stdlib${ii}$_slar2v( nr, ab( 1_${ik}$, j1 ), ab( 1_${ik}$, j1-1 ),ab( 2_${ik}$, j1-1 ), inca, work( &
                              n+j1 ),work( j1 ), ka1 )
                 end if
                 ! start applying rotations in 1st set from the left
                 do l = ka - 1, kb - k + 1, -1
                    nrt = ( j2+l-1 ) / ka1
                    j1t = j2 - ( nrt-1 )*ka1
                    if( nrt>0_${ik}$ )call stdlib${ii}$_slartv( nrt, ab( ka1-l+1, j1t-ka1+l ), inca,ab( ka1-l, &
                              j1t-ka1+l ), inca,work( n+j1t ), work( j1t ), ka1 )
                 end do
                 if( wantx ) then
                    ! post-multiply x by product of rotations in 1st set
                    do j = j1, j2, ka1
                       call stdlib${ii}$_srot( nx, x( 1_${ik}$, j ), 1_${ik}$, x( 1_${ik}$, j-1 ), 1_${ik}$,work( n+j ), work( j ) )
                                 
                    end do
                 end if
              end do loop_840
              if( update ) then
                 if( i2>0_${ik}$ .and. kbt>0_${ik}$ ) then
                    ! create nonzero element a(i+kbt,i+kbt-ka-1) outside the
                    ! band and store it in work(m-kb+i+kbt)
                    work( m-kb+i+kbt ) = -bb( kbt+1, i )*ra1
                 end if
              end if
              loop_880: do k = kb, 1, -1
                 if( update ) then
                    j2 = i + k + 1_${ik}$ - max( 2_${ik}$, k+i0-m )*ka1
                 else
                    j2 = i + k + 1_${ik}$ - max( 1_${ik}$, k+i0-m )*ka1
                 end if
                 ! finish applying rotations in 2nd set from the left
                 do l = kb - k, 1, -1
                    nrt = ( j2+ka+l-1 ) / ka1
                    j1t = j2 - ( nrt-1 )*ka1
                    if( nrt>0_${ik}$ )call stdlib${ii}$_slartv( nrt, ab( ka1-l+1, j1t+l-1 ), inca,ab( ka1-l, &
                              j1t+l-1 ), inca,work( n+m-kb+j1t+ka ),work( m-kb+j1t+ka ), ka1 )
                 end do
                 nr = ( j2+ka-1 ) / ka1
                 j1 = j2 - ( nr-1 )*ka1
                 do j = j1, j2, ka1
                    work( m-kb+j ) = work( m-kb+j+ka )
                    work( n+m-kb+j ) = work( n+m-kb+j+ka )
                 end do
                 do j = j1, j2, ka1
                    ! create nonzero element a(j+ka,j-1) outside the band
                    ! and store it in work(m-kb+j)
                    work( m-kb+j ) = work( m-kb+j )*ab( ka1, j-1 )
                    ab( ka1, j-1 ) = work( n+m-kb+j )*ab( ka1, j-1 )
                 end do
                 if( update ) then
                    if( i+k>ka1 .and. k<=kbt )work( m-kb+i+k-ka ) = work( m-kb+i+k )
                 end if
              end do loop_880
              loop_920: do k = kb, 1, -1
                 j2 = i + k + 1_${ik}$ - max( 1_${ik}$, k+i0-m )*ka1
                 nr = ( j2+ka-1 ) / ka1
                 j1 = j2 - ( nr-1 )*ka1
                 if( nr>0_${ik}$ ) then
                    ! generate rotations in 2nd set to annihilate elements
                    ! which have been created outside the band
                    call stdlib${ii}$_slargv( nr, ab( ka1, j1 ), inca, work( m-kb+j1 ),ka1, work( n+m-&
                              kb+j1 ), ka1 )
                    ! apply rotations in 2nd set from the right
                    do l = 1, ka - 1
                       call stdlib${ii}$_slartv( nr, ab( l+1, j1 ), inca, ab( l+2, j1-1 ),inca, work( n+&
                                 m-kb+j1 ), work( m-kb+j1 ),ka1 )
                    end do
                    ! apply rotations in 2nd set from both sides to diagonal
                    ! blocks
                    call stdlib${ii}$_slar2v( nr, ab( 1_${ik}$, j1 ), ab( 1_${ik}$, j1-1 ),ab( 2_${ik}$, j1-1 ), inca, work( &
                              n+m-kb+j1 ),work( m-kb+j1 ), ka1 )
                 end if
                 ! start applying rotations in 2nd set from the left
                 do l = ka - 1, kb - k + 1, -1
                    nrt = ( j2+l-1 ) / ka1
                    j1t = j2 - ( nrt-1 )*ka1
                    if( nrt>0_${ik}$ )call stdlib${ii}$_slartv( nrt, ab( ka1-l+1, j1t-ka1+l ), inca,ab( ka1-l, &
                              j1t-ka1+l ), inca,work( n+m-kb+j1t ), work( m-kb+j1t ),ka1 )
                 end do
                 if( wantx ) then
                    ! post-multiply x by product of rotations in 2nd set
                    do j = j1, j2, ka1
                       call stdlib${ii}$_srot( nx, x( 1_${ik}$, j ), 1_${ik}$, x( 1_${ik}$, j-1 ), 1_${ik}$,work( n+m-kb+j ), work( &
                                 m-kb+j ) )
                    end do
                 end if
              end do loop_920
              do k = 1, kb - 1
                 j2 = i + k + 1_${ik}$ - max( 1_${ik}$, k+i0-m+1 )*ka1
                 ! finish applying rotations in 1st set from the left
                 do l = kb - k, 1, -1
                    nrt = ( j2+l-1 ) / ka1
                    j1t = j2 - ( nrt-1 )*ka1
                    if( nrt>0_${ik}$ )call stdlib${ii}$_slartv( nrt, ab( ka1-l+1, j1t-ka1+l ), inca,ab( ka1-l, &
                              j1t-ka1+l ), inca,work( n+j1t ), work( j1t ), ka1 )
                 end do
              end do
              if( kb>1_${ik}$ ) then
                 do j = 2, min( i+kb, m ) - 2*ka - 1
                    work( n+j ) = work( n+j+ka )
                    work( j ) = work( j+ka )
                 end do
              end if
           end if
           go to 490
     end subroutine stdlib${ii}$_ssbgst

     pure module subroutine stdlib${ii}$_dsbgst( vect, uplo, n, ka, kb, ab, ldab, bb, ldbb, x,ldx, work, info )
     !! DSBGST reduces a real symmetric-definite banded generalized
     !! eigenproblem  A*x = lambda*B*x  to standard form  C*y = lambda*y,
     !! such that C has the same bandwidth as A.
     !! B must have been previously factorized as S**T*S by DPBSTF, using a
     !! split Cholesky factorization. A is overwritten by C = X**T*A*X, where
     !! X = S**(-1)*Q and Q is an orthogonal matrix chosen to preserve the
     !! bandwidth of A.
               
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: uplo, vect
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: ka, kb, ldab, ldbb, ldx, n
           ! Array Arguments 
           real(dp), intent(inout) :: ab(ldab,*)
           real(dp), intent(in) :: bb(ldbb,*)
           real(dp), intent(out) :: work(*), x(ldx,*)
        ! =====================================================================
           
           ! Local Scalars 
           logical(lk) :: update, upper, wantx
           integer(${ik}$) :: i, i0, i1, i2, inca, j, j1, j1t, j2, j2t, k, ka1, kb1, kbt, l, m, nr, &
                     nrt, nx
           real(dp) :: bii, ra, ra1, t
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input parameters
           wantx = stdlib_lsame( vect, 'V' )
           upper = stdlib_lsame( uplo, 'U' )
           ka1 = ka + 1_${ik}$
           kb1 = kb + 1_${ik}$
           info = 0_${ik}$
           if( .not.wantx .and. .not.stdlib_lsame( vect, 'N' ) ) then
              info = -1_${ik}$
           else if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then
              info = -2_${ik}$
           else if( n<0_${ik}$ ) then
              info = -3_${ik}$
           else if( ka<0_${ik}$ ) then
              info = -4_${ik}$
           else if( kb<0_${ik}$ .or. kb>ka ) then
              info = -5_${ik}$
           else if( ldab<ka+1 ) then
              info = -7_${ik}$
           else if( ldbb<kb+1 ) then
              info = -9_${ik}$
           else if( ldx<1_${ik}$ .or. wantx .and. ldx<max( 1_${ik}$, n ) ) then
              info = -11_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DSBGST', -info )
              return
           end if
           ! quick return if possible
           if( n==0 )return
           inca = ldab*ka1
           ! initialize x to the unit matrix, if needed
           if( wantx )call stdlib${ii}$_dlaset( 'FULL', n, n, zero, one, x, ldx )
           ! set m to the splitting point m. it must be the same value as is
           ! used in stdlib${ii}$_dpbstf. the chosen value allows the arrays work and rwork
           ! to be of dimension (n).
           m = ( n+kb ) / 2_${ik}$
           ! the routine works in two phases, corresponding to the two halves
           ! of the split cholesky factorization of b as s**t*s where
           ! s = ( u    )
               ! ( m  l )
           ! with u upper triangular of order m, and l lower triangular of
           ! order n-m. s has the same bandwidth as b.
           ! s is treated as a product of elementary matrices:
           ! s = s(m)*s(m-1)*...*s(2)*s(1)*s(m+1)*s(m+2)*...*s(n-1)*s(n)
           ! where s(i) is determined by the i-th row of s.
           ! in phase 1, the index i takes the values n, n-1, ... , m+1;
           ! in phase 2, it takes the values 1, 2, ... , m.
           ! for each value of i, the current matrix a is updated by forming
           ! inv(s(i))**t*a*inv(s(i)). this creates a triangular bulge outside
           ! the band of a. the bulge is then pushed down toward the bottom of
           ! a in phase 1, and up toward the top of a in phase 2, by applying
           ! plane rotations.
           ! there are kb*(kb+1)/2 elements in the bulge, but at most 2*kb-1
           ! of them are linearly independent, so annihilating a bulge requires
           ! only 2*kb-1 plane rotations. the rotations are divided into a 1st
           ! set of kb-1 rotations, and a 2nd set of kb rotations.
           ! wherever possible, rotations are generated and applied in vector
           ! operations of length nr between the indices j1 and j2 (sometimes
           ! replaced by modified values nrt, j1t or j2t).
           ! the cosines and sines of the rotations are stored in the array
           ! work. the cosines of the 1st set of rotations are stored in
           ! elements n+2:n+m-kb-1 and the sines of the 1st set in elements
           ! 2:m-kb-1; the cosines of the 2nd set are stored in elements
           ! n+m-kb+1:2*n and the sines of the second set in elements m-kb+1:n.
           ! the bulges are not formed explicitly; nonzero elements outside the
           ! band are created only when they are required for generating new
           ! rotations; they are stored in the array work, in positions where
           ! they are later overwritten by the sines of the rotations which
           ! annihilate them.
           ! **************************** phase 1 *****************************
           ! the logical structure of this phase is:
           ! update = .true.
           ! do i = n, m + 1, -1
              ! use s(i) to update a and create a new bulge
              ! apply rotations to push all bulges ka positions downward
           ! end do
           ! update = .false.
           ! do i = m + ka + 1, n - 1
              ! apply rotations to push all bulges ka positions downward
           ! end do
           ! to avoid duplicating code, the two loops are merged.
           update = .true.
           i = n + 1_${ik}$
           10 continue
           if( update ) then
              i = i - 1_${ik}$
              kbt = min( kb, i-1 )
              i0 = i - 1_${ik}$
              i1 = min( n, i+ka )
              i2 = i - kbt + ka1
              if( i<m+1 ) then
                 update = .false.
                 i = i + 1_${ik}$
                 i0 = m
                 if( ka==0 )go to 480
                 go to 10
              end if
           else
              i = i + ka
              if( i>n-1 )go to 480
           end if
           if( upper ) then
              ! transform a, working with the upper triangle
              if( update ) then
                 ! form  inv(s(i))**t * a * inv(s(i))
                 bii = bb( kb1, i )
                 do j = i, i1
                    ab( i-j+ka1, j ) = ab( i-j+ka1, j ) / bii
                 end do
                 do j = max( 1, i-ka ), i
                    ab( j-i+ka1, i ) = ab( j-i+ka1, i ) / bii
                 end do
                 do k = i - kbt, i - 1
                    do j = i - kbt, k
                       ab( j-k+ka1, k ) = ab( j-k+ka1, k ) -bb( j-i+kb1, i )*ab( k-i+ka1, i ) -bb(&
                        k-i+kb1, i )*ab( j-i+ka1, i ) +ab( ka1, i )*bb( j-i+kb1, i )*bb( k-i+kb1, &
                                  i )
                    end do
                    do j = max( 1, i-ka ), i - kbt - 1
                       ab( j-k+ka1, k ) = ab( j-k+ka1, k ) -bb( k-i+kb1, i )*ab( j-i+ka1, i )
                                 
                    end do
                 end do
                 do j = i, i1
                    do k = max( j-ka, i-kbt ), i - 1
                       ab( k-j+ka1, j ) = ab( k-j+ka1, j ) -bb( k-i+kb1, i )*ab( i-j+ka1, j )
                                 
                    end do
                 end do
                 if( wantx ) then
                    ! post-multiply x by inv(s(i))
                    call stdlib${ii}$_dscal( n-m, one / bii, x( m+1, i ), 1_${ik}$ )
                    if( kbt>0_${ik}$ )call stdlib${ii}$_dger( n-m, kbt, -one, x( m+1, i ), 1_${ik}$,bb( kb1-kbt, i ), &
                              1_${ik}$, x( m+1, i-kbt ), ldx )
                 end if
                 ! store a(i,i1) in ra1 for use in next loop over k
                 ra1 = ab( i-i1+ka1, i1 )
              end if
              ! generate and apply vectors of rotations to chase all the
              ! existing bulges ka positions down toward the bottom of the
              ! band
              loop_130: do k = 1, kb - 1
                 if( update ) then
                    ! determine the rotations which would annihilate the bulge
                    ! which has in theory just been created
                    if( i-k+ka<n .and. i-k>1_${ik}$ ) then
                       ! generate rotation to annihilate a(i,i-k+ka+1)
                       call stdlib${ii}$_dlartg( ab( k+1, i-k+ka ), ra1,work( n+i-k+ka-m ), work( i-k+&
                                 ka-m ),ra )
                       ! create nonzero element a(i-k,i-k+ka+1) outside the
                       ! band and store it in work(i-k)
                       t = -bb( kb1-k, i )*ra1
                       work( i-k ) = work( n+i-k+ka-m )*t -work( i-k+ka-m )*ab( 1_${ik}$, i-k+ka )
                                 
                       ab( 1_${ik}$, i-k+ka ) = work( i-k+ka-m )*t +work( n+i-k+ka-m )*ab( 1_${ik}$, i-k+ka )
                                 
                       ra1 = ra
                    end if
                 end if
                 j2 = i - k - 1_${ik}$ + max( 1_${ik}$, k-i0+2 )*ka1
                 nr = ( n-j2+ka ) / ka1
                 j1 = j2 + ( nr-1 )*ka1
                 if( update ) then
                    j2t = max( j2, i+2*ka-k+1 )
                 else
                    j2t = j2
                 end if
                 nrt = ( n-j2t+ka ) / ka1
                 do j = j2t, j1, ka1
                    ! create nonzero element a(j-ka,j+1) outside the band
                    ! and store it in work(j-m)
                    work( j-m ) = work( j-m )*ab( 1_${ik}$, j+1 )
                    ab( 1_${ik}$, j+1 ) = work( n+j-m )*ab( 1_${ik}$, j+1 )
                 end do
                 ! generate rotations in 1st set to annihilate elements which
                 ! have been created outside the band
                 if( nrt>0_${ik}$ )call stdlib${ii}$_dlargv( nrt, ab( 1_${ik}$, j2t ), inca, work( j2t-m ), ka1,work( &
                           n+j2t-m ), ka1 )
                 if( nr>0_${ik}$ ) then
                    ! apply rotations in 1st set from the right
                    do l = 1, ka - 1
                       call stdlib${ii}$_dlartv( nr, ab( ka1-l, j2 ), inca,ab( ka-l, j2+1 ), inca, work(&
                                  n+j2-m ),work( j2-m ), ka1 )
                    end do
                    ! apply rotations in 1st set from both sides to diagonal
                    ! blocks
                    call stdlib${ii}$_dlar2v( nr, ab( ka1, j2 ), ab( ka1, j2+1 ),ab( ka, j2+1 ), inca, &
                              work( n+j2-m ),work( j2-m ), ka1 )
                 end if
                 ! start applying rotations in 1st set from the left
                 do l = ka - 1, kb - k + 1, -1
                    nrt = ( n-j2+l ) / ka1
                    if( nrt>0_${ik}$ )call stdlib${ii}$_dlartv( nrt, ab( l, j2+ka1-l ), inca,ab( l+1, j2+ka1-l &
                              ), inca,work( n+j2-m ), work( j2-m ), ka1 )
                 end do
                 if( wantx ) then
                    ! post-multiply x by product of rotations in 1st set
                    do j = j2, j1, ka1
                       call stdlib${ii}$_drot( n-m, x( m+1, j ), 1_${ik}$, x( m+1, j+1 ), 1_${ik}$,work( n+j-m ), &
                                 work( j-m ) )
                    end do
                 end if
              end do loop_130
              if( update ) then
                 if( i2<=n .and. kbt>0_${ik}$ ) then
                    ! create nonzero element a(i-kbt,i-kbt+ka+1) outside the
                    ! band and store it in work(i-kbt)
                    work( i-kbt ) = -bb( kb1-kbt, i )*ra1
                 end if
              end if
              loop_170: do k = kb, 1, -1
                 if( update ) then
                    j2 = i - k - 1_${ik}$ + max( 2_${ik}$, k-i0+1 )*ka1
                 else
                    j2 = i - k - 1_${ik}$ + max( 1_${ik}$, k-i0+1 )*ka1
                 end if
                 ! finish applying rotations in 2nd set from the left
                 do l = kb - k, 1, -1
                    nrt = ( n-j2+ka+l ) / ka1
                    if( nrt>0_${ik}$ )call stdlib${ii}$_dlartv( nrt, ab( l, j2-l+1 ), inca,ab( l+1, j2-l+1 ), &
                              inca, work( n+j2-ka ),work( j2-ka ), ka1 )
                 end do
                 nr = ( n-j2+ka ) / ka1
                 j1 = j2 + ( nr-1 )*ka1
                 do j = j1, j2, -ka1
                    work( j ) = work( j-ka )
                    work( n+j ) = work( n+j-ka )
                 end do
                 do j = j2, j1, ka1
                    ! create nonzero element a(j-ka,j+1) outside the band
                    ! and store it in work(j)
                    work( j ) = work( j )*ab( 1_${ik}$, j+1 )
                    ab( 1_${ik}$, j+1 ) = work( n+j )*ab( 1_${ik}$, j+1 )
                 end do
                 if( update ) then
                    if( i-k<n-ka .and. k<=kbt )work( i-k+ka ) = work( i-k )
                 end if
              end do loop_170
              loop_210: do k = kb, 1, -1
                 j2 = i - k - 1_${ik}$ + max( 1_${ik}$, k-i0+1 )*ka1
                 nr = ( n-j2+ka ) / ka1
                 j1 = j2 + ( nr-1 )*ka1
                 if( nr>0_${ik}$ ) then
                    ! generate rotations in 2nd set to annihilate elements
                    ! which have been created outside the band
                    call stdlib${ii}$_dlargv( nr, ab( 1_${ik}$, j2 ), inca, work( j2 ), ka1,work( n+j2 ), ka1 )
                              
                    ! apply rotations in 2nd set from the right
                    do l = 1, ka - 1
                       call stdlib${ii}$_dlartv( nr, ab( ka1-l, j2 ), inca,ab( ka-l, j2+1 ), inca, work(&
                                  n+j2 ),work( j2 ), ka1 )
                    end do
                    ! apply rotations in 2nd set from both sides to diagonal
                    ! blocks
                    call stdlib${ii}$_dlar2v( nr, ab( ka1, j2 ), ab( ka1, j2+1 ),ab( ka, j2+1 ), inca, &
                              work( n+j2 ),work( j2 ), ka1 )
                 end if
                 ! start applying rotations in 2nd set from the left
                 do l = ka - 1, kb - k + 1, -1
                    nrt = ( n-j2+l ) / ka1
                    if( nrt>0_${ik}$ )call stdlib${ii}$_dlartv( nrt, ab( l, j2+ka1-l ), inca,ab( l+1, j2+ka1-l &
                              ), inca, work( n+j2 ),work( j2 ), ka1 )
                 end do
                 if( wantx ) then
                    ! post-multiply x by product of rotations in 2nd set
                    do j = j2, j1, ka1
                       call stdlib${ii}$_drot( n-m, x( m+1, j ), 1_${ik}$, x( m+1, j+1 ), 1_${ik}$,work( n+j ), work( &
                                 j ) )
                    end do
                 end if
              end do loop_210
              do k = 1, kb - 1
                 j2 = i - k - 1_${ik}$ + max( 1_${ik}$, k-i0+2 )*ka1
                 ! finish applying rotations in 1st set from the left
                 do l = kb - k, 1, -1
                    nrt = ( n-j2+l ) / ka1
                    if( nrt>0_${ik}$ )call stdlib${ii}$_dlartv( nrt, ab( l, j2+ka1-l ), inca,ab( l+1, j2+ka1-l &
                              ), inca,work( n+j2-m ), work( j2-m ), ka1 )
                 end do
              end do
              if( kb>1_${ik}$ ) then
                 do j = n - 1, i - kb + 2*ka + 1, -1
                    work( n+j-m ) = work( n+j-ka-m )
                    work( j-m ) = work( j-ka-m )
                 end do
              end if
           else
              ! transform a, working with the lower triangle
              if( update ) then
                 ! form  inv(s(i))**t * a * inv(s(i))
                 bii = bb( 1_${ik}$, i )
                 do j = i, i1
                    ab( j-i+1, i ) = ab( j-i+1, i ) / bii
                 end do
                 do j = max( 1, i-ka ), i
                    ab( i-j+1, j ) = ab( i-j+1, j ) / bii
                 end do
                 do k = i - kbt, i - 1
                    do j = i - kbt, k
                       ab( k-j+1, j ) = ab( k-j+1, j ) -bb( i-j+1, j )*ab( i-k+1, k ) -bb( i-k+1, &
                                 k )*ab( i-j+1, j ) +ab( 1_${ik}$, i )*bb( i-j+1, j )*bb( i-k+1, k )
                    end do
                    do j = max( 1, i-ka ), i - kbt - 1
                       ab( k-j+1, j ) = ab( k-j+1, j ) -bb( i-k+1, k )*ab( i-j+1, j )
                    end do
                 end do
                 do j = i, i1
                    do k = max( j-ka, i-kbt ), i - 1
                       ab( j-k+1, k ) = ab( j-k+1, k ) -bb( i-k+1, k )*ab( j-i+1, i )
                    end do
                 end do
                 if( wantx ) then
                    ! post-multiply x by inv(s(i))
                    call stdlib${ii}$_dscal( n-m, one / bii, x( m+1, i ), 1_${ik}$ )
                    if( kbt>0_${ik}$ )call stdlib${ii}$_dger( n-m, kbt, -one, x( m+1, i ), 1_${ik}$,bb( kbt+1, i-kbt )&
                              , ldbb-1,x( m+1, i-kbt ), ldx )
                 end if
                 ! store a(i1,i) in ra1 for use in next loop over k
                 ra1 = ab( i1-i+1, i )
              end if
              ! generate and apply vectors of rotations to chase all the
              ! existing bulges ka positions down toward the bottom of the
              ! band
              loop_360: do k = 1, kb - 1
                 if( update ) then
                    ! determine the rotations which would annihilate the bulge
                    ! which has in theory just been created
                    if( i-k+ka<n .and. i-k>1_${ik}$ ) then
                       ! generate rotation to annihilate a(i-k+ka+1,i)
                       call stdlib${ii}$_dlartg( ab( ka1-k, i ), ra1, work( n+i-k+ka-m ),work( i-k+ka-m &
                                 ), ra )
                       ! create nonzero element a(i-k+ka+1,i-k) outside the
                       ! band and store it in work(i-k)
                       t = -bb( k+1, i-k )*ra1
                       work( i-k ) = work( n+i-k+ka-m )*t -work( i-k+ka-m )*ab( ka1, i-k )
                       ab( ka1, i-k ) = work( i-k+ka-m )*t +work( n+i-k+ka-m )*ab( ka1, i-k )
                                 
                       ra1 = ra
                    end if
                 end if
                 j2 = i - k - 1_${ik}$ + max( 1_${ik}$, k-i0+2 )*ka1
                 nr = ( n-j2+ka ) / ka1
                 j1 = j2 + ( nr-1 )*ka1
                 if( update ) then
                    j2t = max( j2, i+2*ka-k+1 )
                 else
                    j2t = j2
                 end if
                 nrt = ( n-j2t+ka ) / ka1
                 do j = j2t, j1, ka1
                    ! create nonzero element a(j+1,j-ka) outside the band
                    ! and store it in work(j-m)
                    work( j-m ) = work( j-m )*ab( ka1, j-ka+1 )
                    ab( ka1, j-ka+1 ) = work( n+j-m )*ab( ka1, j-ka+1 )
                 end do
                 ! generate rotations in 1st set to annihilate elements which
                 ! have been created outside the band
                 if( nrt>0_${ik}$ )call stdlib${ii}$_dlargv( nrt, ab( ka1, j2t-ka ), inca, work( j2t-m ),ka1, &
                           work( n+j2t-m ), ka1 )
                 if( nr>0_${ik}$ ) then
                    ! apply rotations in 1st set from the left
                    do l = 1, ka - 1
                       call stdlib${ii}$_dlartv( nr, ab( l+1, j2-l ), inca,ab( l+2, j2-l ), inca, work( &
                                 n+j2-m ),work( j2-m ), ka1 )
                    end do
                    ! apply rotations in 1st set from both sides to diagonal
                    ! blocks
                    call stdlib${ii}$_dlar2v( nr, ab( 1_${ik}$, j2 ), ab( 1_${ik}$, j2+1 ), ab( 2_${ik}$, j2 ),inca, work( n+&
                              j2-m ), work( j2-m ), ka1 )
                 end if
                 ! start applying rotations in 1st set from the right
                 do l = ka - 1, kb - k + 1, -1
                    nrt = ( n-j2+l ) / ka1
                    if( nrt>0_${ik}$ )call stdlib${ii}$_dlartv( nrt, ab( ka1-l+1, j2 ), inca,ab( ka1-l, j2+1 ),&
                               inca, work( n+j2-m ),work( j2-m ), ka1 )
                 end do
                 if( wantx ) then
                    ! post-multiply x by product of rotations in 1st set
                    do j = j2, j1, ka1
                       call stdlib${ii}$_drot( n-m, x( m+1, j ), 1_${ik}$, x( m+1, j+1 ), 1_${ik}$,work( n+j-m ), &
                                 work( j-m ) )
                    end do
                 end if
              end do loop_360
              if( update ) then
                 if( i2<=n .and. kbt>0_${ik}$ ) then
                    ! create nonzero element a(i-kbt+ka+1,i-kbt) outside the
                    ! band and store it in work(i-kbt)
                    work( i-kbt ) = -bb( kbt+1, i-kbt )*ra1
                 end if
              end if
              loop_400: do k = kb, 1, -1
                 if( update ) then
                    j2 = i - k - 1_${ik}$ + max( 2_${ik}$, k-i0+1 )*ka1
                 else
                    j2 = i - k - 1_${ik}$ + max( 1_${ik}$, k-i0+1 )*ka1
                 end if
                 ! finish applying rotations in 2nd set from the right
                 do l = kb - k, 1, -1
                    nrt = ( n-j2+ka+l ) / ka1
                    if( nrt>0_${ik}$ )call stdlib${ii}$_dlartv( nrt, ab( ka1-l+1, j2-ka ), inca,ab( ka1-l, j2-&
                              ka+1 ), inca,work( n+j2-ka ), work( j2-ka ), ka1 )
                 end do
                 nr = ( n-j2+ka ) / ka1
                 j1 = j2 + ( nr-1 )*ka1
                 do j = j1, j2, -ka1
                    work( j ) = work( j-ka )
                    work( n+j ) = work( n+j-ka )
                 end do
                 do j = j2, j1, ka1
                    ! create nonzero element a(j+1,j-ka) outside the band
                    ! and store it in work(j)
                    work( j ) = work( j )*ab( ka1, j-ka+1 )
                    ab( ka1, j-ka+1 ) = work( n+j )*ab( ka1, j-ka+1 )
                 end do
                 if( update ) then
                    if( i-k<n-ka .and. k<=kbt )work( i-k+ka ) = work( i-k )
                 end if
              end do loop_400
              loop_440: do k = kb, 1, -1
                 j2 = i - k - 1_${ik}$ + max( 1_${ik}$, k-i0+1 )*ka1
                 nr = ( n-j2+ka ) / ka1
                 j1 = j2 + ( nr-1 )*ka1
                 if( nr>0_${ik}$ ) then
                    ! generate rotations in 2nd set to annihilate elements
                    ! which have been created outside the band
                    call stdlib${ii}$_dlargv( nr, ab( ka1, j2-ka ), inca, work( j2 ), ka1,work( n+j2 ), &
                              ka1 )
                    ! apply rotations in 2nd set from the left
                    do l = 1, ka - 1
                       call stdlib${ii}$_dlartv( nr, ab( l+1, j2-l ), inca,ab( l+2, j2-l ), inca, work( &
                                 n+j2 ),work( j2 ), ka1 )
                    end do
                    ! apply rotations in 2nd set from both sides to diagonal
                    ! blocks
                    call stdlib${ii}$_dlar2v( nr, ab( 1_${ik}$, j2 ), ab( 1_${ik}$, j2+1 ), ab( 2_${ik}$, j2 ),inca, work( n+&
                              j2 ), work( j2 ), ka1 )
                 end if
                 ! start applying rotations in 2nd set from the right
                 do l = ka - 1, kb - k + 1, -1
                    nrt = ( n-j2+l ) / ka1
                    if( nrt>0_${ik}$ )call stdlib${ii}$_dlartv( nrt, ab( ka1-l+1, j2 ), inca,ab( ka1-l, j2+1 ),&
                               inca, work( n+j2 ),work( j2 ), ka1 )
                 end do
                 if( wantx ) then
                    ! post-multiply x by product of rotations in 2nd set
                    do j = j2, j1, ka1
                       call stdlib${ii}$_drot( n-m, x( m+1, j ), 1_${ik}$, x( m+1, j+1 ), 1_${ik}$,work( n+j ), work( &
                                 j ) )
                    end do
                 end if
              end do loop_440
              do k = 1, kb - 1
                 j2 = i - k - 1_${ik}$ + max( 1_${ik}$, k-i0+2 )*ka1
                 ! finish applying rotations in 1st set from the right
                 do l = kb - k, 1, -1
                    nrt = ( n-j2+l ) / ka1
                    if( nrt>0_${ik}$ )call stdlib${ii}$_dlartv( nrt, ab( ka1-l+1, j2 ), inca,ab( ka1-l, j2+1 ),&
                               inca, work( n+j2-m ),work( j2-m ), ka1 )
                 end do
              end do
              if( kb>1_${ik}$ ) then
                 do j = n - 1, i - kb + 2*ka + 1, -1
                    work( n+j-m ) = work( n+j-ka-m )
                    work( j-m ) = work( j-ka-m )
                 end do
              end if
           end if
           go to 10
           480 continue
           ! **************************** phase 2 *****************************
           ! the logical structure of this phase is:
           ! update = .true.
           ! do i = 1, m
              ! use s(i) to update a and create a new bulge
              ! apply rotations to push all bulges ka positions upward
           ! end do
           ! update = .false.
           ! do i = m - ka - 1, 2, -1
              ! apply rotations to push all bulges ka positions upward
           ! end do
           ! to avoid duplicating code, the two loops are merged.
           update = .true.
           i = 0_${ik}$
           490 continue
           if( update ) then
              i = i + 1_${ik}$
              kbt = min( kb, m-i )
              i0 = i + 1_${ik}$
              i1 = max( 1_${ik}$, i-ka )
              i2 = i + kbt - ka1
              if( i>m ) then
                 update = .false.
                 i = i - 1_${ik}$
                 i0 = m + 1_${ik}$
                 if( ka==0 )return
                 go to 490
              end if
           else
              i = i - ka
              if( i<2 )return
           end if
           if( i<m-kbt ) then
              nx = m
           else
              nx = n
           end if
           if( upper ) then
              ! transform a, working with the upper triangle
              if( update ) then
                 ! form  inv(s(i))**t * a * inv(s(i))
                 bii = bb( kb1, i )
                 do j = i1, i
                    ab( j-i+ka1, i ) = ab( j-i+ka1, i ) / bii
                 end do
                 do j = i, min( n, i+ka )
                    ab( i-j+ka1, j ) = ab( i-j+ka1, j ) / bii
                 end do
                 do k = i + 1, i + kbt
                    do j = k, i + kbt
                       ab( k-j+ka1, j ) = ab( k-j+ka1, j ) -bb( i-j+kb1, j )*ab( i-k+ka1, k ) -bb(&
                        i-k+kb1, k )*ab( i-j+ka1, j ) +ab( ka1, i )*bb( i-j+kb1, j )*bb( i-k+kb1, &
                                  k )
                    end do
                    do j = i + kbt + 1, min( n, i+ka )
                       ab( k-j+ka1, j ) = ab( k-j+ka1, j ) -bb( i-k+kb1, k )*ab( i-j+ka1, j )
                                 
                    end do
                 end do
                 do j = i1, i
                    do k = i + 1, min( j+ka, i+kbt )
                       ab( j-k+ka1, k ) = ab( j-k+ka1, k ) -bb( i-k+kb1, k )*ab( j-i+ka1, i )
                                 
                    end do
                 end do
                 if( wantx ) then
                    ! post-multiply x by inv(s(i))
                    call stdlib${ii}$_dscal( nx, one / bii, x( 1_${ik}$, i ), 1_${ik}$ )
                    if( kbt>0_${ik}$ )call stdlib${ii}$_dger( nx, kbt, -one, x( 1_${ik}$, i ), 1_${ik}$, bb( kb, i+1 ),ldbb-&
                              1_${ik}$, x( 1_${ik}$, i+1 ), ldx )
                 end if
                 ! store a(i1,i) in ra1 for use in next loop over k
                 ra1 = ab( i1-i+ka1, i )
              end if
              ! generate and apply vectors of rotations to chase all the
              ! existing bulges ka positions up toward the top of the band
              loop_610: do k = 1, kb - 1
                 if( update ) then
                    ! determine the rotations which would annihilate the bulge
                    ! which has in theory just been created
                    if( i+k-ka1>0_${ik}$ .and. i+k<m ) then
                       ! generate rotation to annihilate a(i+k-ka-1,i)
                       call stdlib${ii}$_dlartg( ab( k+1, i ), ra1, work( n+i+k-ka ),work( i+k-ka ), ra &
                                 )
                       ! create nonzero element a(i+k-ka-1,i+k) outside the
                       ! band and store it in work(m-kb+i+k)
                       t = -bb( kb1-k, i+k )*ra1
                       work( m-kb+i+k ) = work( n+i+k-ka )*t -work( i+k-ka )*ab( 1_${ik}$, i+k )
                       ab( 1_${ik}$, i+k ) = work( i+k-ka )*t +work( n+i+k-ka )*ab( 1_${ik}$, i+k )
                       ra1 = ra
                    end if
                 end if
                 j2 = i + k + 1_${ik}$ - max( 1_${ik}$, k+i0-m+1 )*ka1
                 nr = ( j2+ka-1 ) / ka1
                 j1 = j2 - ( nr-1 )*ka1
                 if( update ) then
                    j2t = min( j2, i-2*ka+k-1 )
                 else
                    j2t = j2
                 end if
                 nrt = ( j2t+ka-1 ) / ka1
                 do j = j1, j2t, ka1
                    ! create nonzero element a(j-1,j+ka) outside the band
                    ! and store it in work(j)
                    work( j ) = work( j )*ab( 1_${ik}$, j+ka-1 )
                    ab( 1_${ik}$, j+ka-1 ) = work( n+j )*ab( 1_${ik}$, j+ka-1 )
                 end do
                 ! generate rotations in 1st set to annihilate elements which
                 ! have been created outside the band
                 if( nrt>0_${ik}$ )call stdlib${ii}$_dlargv( nrt, ab( 1_${ik}$, j1+ka ), inca, work( j1 ), ka1,work( &
                           n+j1 ), ka1 )
                 if( nr>0_${ik}$ ) then
                    ! apply rotations in 1st set from the left
                    do l = 1, ka - 1
                       call stdlib${ii}$_dlartv( nr, ab( ka1-l, j1+l ), inca,ab( ka-l, j1+l ), inca, &
                                 work( n+j1 ),work( j1 ), ka1 )
                    end do
                    ! apply rotations in 1st set from both sides to diagonal
                    ! blocks
                    call stdlib${ii}$_dlar2v( nr, ab( ka1, j1 ), ab( ka1, j1-1 ),ab( ka, j1 ), inca, &
                              work( n+j1 ),work( j1 ), ka1 )
                 end if
                 ! start applying rotations in 1st set from the right
                 do l = ka - 1, kb - k + 1, -1
                    nrt = ( j2+l-1 ) / ka1
                    j1t = j2 - ( nrt-1 )*ka1
                    if( nrt>0_${ik}$ )call stdlib${ii}$_dlartv( nrt, ab( l, j1t ), inca,ab( l+1, j1t-1 ), inca,&
                               work( n+j1t ),work( j1t ), ka1 )
                 end do
                 if( wantx ) then
                    ! post-multiply x by product of rotations in 1st set
                    do j = j1, j2, ka1
                       call stdlib${ii}$_drot( nx, x( 1_${ik}$, j ), 1_${ik}$, x( 1_${ik}$, j-1 ), 1_${ik}$,work( n+j ), work( j ) )
                                 
                    end do
                 end if
              end do loop_610
              if( update ) then
                 if( i2>0_${ik}$ .and. kbt>0_${ik}$ ) then
                    ! create nonzero element a(i+kbt-ka-1,i+kbt) outside the
                    ! band and store it in work(m-kb+i+kbt)
                    work( m-kb+i+kbt ) = -bb( kb1-kbt, i+kbt )*ra1
                 end if
              end if
              loop_650: do k = kb, 1, -1
                 if( update ) then
                    j2 = i + k + 1_${ik}$ - max( 2_${ik}$, k+i0-m )*ka1
                 else
                    j2 = i + k + 1_${ik}$ - max( 1_${ik}$, k+i0-m )*ka1
                 end if
                 ! finish applying rotations in 2nd set from the right
                 do l = kb - k, 1, -1
                    nrt = ( j2+ka+l-1 ) / ka1
                    j1t = j2 - ( nrt-1 )*ka1
                    if( nrt>0_${ik}$ )call stdlib${ii}$_dlartv( nrt, ab( l, j1t+ka ), inca,ab( l+1, j1t+ka-1 ),&
                               inca,work( n+m-kb+j1t+ka ),work( m-kb+j1t+ka ), ka1 )
                 end do
                 nr = ( j2+ka-1 ) / ka1
                 j1 = j2 - ( nr-1 )*ka1
                 do j = j1, j2, ka1
                    work( m-kb+j ) = work( m-kb+j+ka )
                    work( n+m-kb+j ) = work( n+m-kb+j+ka )
                 end do
                 do j = j1, j2, ka1
                    ! create nonzero element a(j-1,j+ka) outside the band
                    ! and store it in work(m-kb+j)
                    work( m-kb+j ) = work( m-kb+j )*ab( 1_${ik}$, j+ka-1 )
                    ab( 1_${ik}$, j+ka-1 ) = work( n+m-kb+j )*ab( 1_${ik}$, j+ka-1 )
                 end do
                 if( update ) then
                    if( i+k>ka1 .and. k<=kbt )work( m-kb+i+k-ka ) = work( m-kb+i+k )
                 end if
              end do loop_650
              loop_690: do k = kb, 1, -1
                 j2 = i + k + 1_${ik}$ - max( 1_${ik}$, k+i0-m )*ka1
                 nr = ( j2+ka-1 ) / ka1
                 j1 = j2 - ( nr-1 )*ka1
                 if( nr>0_${ik}$ ) then
                    ! generate rotations in 2nd set to annihilate elements
                    ! which have been created outside the band
                    call stdlib${ii}$_dlargv( nr, ab( 1_${ik}$, j1+ka ), inca, work( m-kb+j1 ),ka1, work( n+m-&
                              kb+j1 ), ka1 )
                    ! apply rotations in 2nd set from the left
                    do l = 1, ka - 1
                       call stdlib${ii}$_dlartv( nr, ab( ka1-l, j1+l ), inca,ab( ka-l, j1+l ), inca,&
                                 work( n+m-kb+j1 ), work( m-kb+j1 ), ka1 )
                    end do
                    ! apply rotations in 2nd set from both sides to diagonal
                    ! blocks
                    call stdlib${ii}$_dlar2v( nr, ab( ka1, j1 ), ab( ka1, j1-1 ),ab( ka, j1 ), inca, &
                              work( n+m-kb+j1 ),work( m-kb+j1 ), ka1 )
                 end if
                 ! start applying rotations in 2nd set from the right
                 do l = ka - 1, kb - k + 1, -1
                    nrt = ( j2+l-1 ) / ka1
                    j1t = j2 - ( nrt-1 )*ka1
                    if( nrt>0_${ik}$ )call stdlib${ii}$_dlartv( nrt, ab( l, j1t ), inca,ab( l+1, j1t-1 ), inca,&
                              work( n+m-kb+j1t ), work( m-kb+j1t ),ka1 )
                 end do
                 if( wantx ) then
                    ! post-multiply x by product of rotations in 2nd set
                    do j = j1, j2, ka1
                       call stdlib${ii}$_drot( nx, x( 1_${ik}$, j ), 1_${ik}$, x( 1_${ik}$, j-1 ), 1_${ik}$,work( n+m-kb+j ), work( &
                                 m-kb+j ) )
                    end do
                 end if
              end do loop_690
              do k = 1, kb - 1
                 j2 = i + k + 1_${ik}$ - max( 1_${ik}$, k+i0-m+1 )*ka1
                 ! finish applying rotations in 1st set from the right
                 do l = kb - k, 1, -1
                    nrt = ( j2+l-1 ) / ka1
                    j1t = j2 - ( nrt-1 )*ka1
                    if( nrt>0_${ik}$ )call stdlib${ii}$_dlartv( nrt, ab( l, j1t ), inca,ab( l+1, j1t-1 ), inca,&
                               work( n+j1t ),work( j1t ), ka1 )
                 end do
              end do
              if( kb>1_${ik}$ ) then
                 do j = 2, min( i+kb, m ) - 2*ka - 1
                    work( n+j ) = work( n+j+ka )
                    work( j ) = work( j+ka )
                 end do
              end if
           else
              ! transform a, working with the lower triangle
              if( update ) then
                 ! form  inv(s(i))**t * a * inv(s(i))
                 bii = bb( 1_${ik}$, i )
                 do j = i1, i
                    ab( i-j+1, j ) = ab( i-j+1, j ) / bii
                 end do
                 do j = i, min( n, i+ka )
                    ab( j-i+1, i ) = ab( j-i+1, i ) / bii
                 end do
                 do k = i + 1, i + kbt
                    do j = k, i + kbt
                       ab( j-k+1, k ) = ab( j-k+1, k ) -bb( j-i+1, i )*ab( k-i+1, i ) -bb( k-i+1, &
                                 i )*ab( j-i+1, i ) +ab( 1_${ik}$, i )*bb( j-i+1, i )*bb( k-i+1, i )
                    end do
                    do j = i + kbt + 1, min( n, i+ka )
                       ab( j-k+1, k ) = ab( j-k+1, k ) -bb( k-i+1, i )*ab( j-i+1, i )
                    end do
                 end do
                 do j = i1, i
                    do k = i + 1, min( j+ka, i+kbt )
                       ab( k-j+1, j ) = ab( k-j+1, j ) -bb( k-i+1, i )*ab( i-j+1, j )
                    end do
                 end do
                 if( wantx ) then
                    ! post-multiply x by inv(s(i))
                    call stdlib${ii}$_dscal( nx, one / bii, x( 1_${ik}$, i ), 1_${ik}$ )
                    if( kbt>0_${ik}$ )call stdlib${ii}$_dger( nx, kbt, -one, x( 1_${ik}$, i ), 1_${ik}$, bb( 2_${ik}$, i ), 1_${ik}$,x( 1_${ik}$, &
                              i+1 ), ldx )
                 end if
                 ! store a(i,i1) in ra1 for use in next loop over k
                 ra1 = ab( i-i1+1, i1 )
              end if
              ! generate and apply vectors of rotations to chase all the
              ! existing bulges ka positions up toward the top of the band
              loop_840: do k = 1, kb - 1
                 if( update ) then
                    ! determine the rotations which would annihilate the bulge
                    ! which has in theory just been created
                    if( i+k-ka1>0_${ik}$ .and. i+k<m ) then
                       ! generate rotation to annihilate a(i,i+k-ka-1)
                       call stdlib${ii}$_dlartg( ab( ka1-k, i+k-ka ), ra1,work( n+i+k-ka ), work( i+k-&
                                 ka ), ra )
                       ! create nonzero element a(i+k,i+k-ka-1) outside the
                       ! band and store it in work(m-kb+i+k)
                       t = -bb( k+1, i )*ra1
                       work( m-kb+i+k ) = work( n+i+k-ka )*t -work( i+k-ka )*ab( ka1, i+k-ka )
                                 
                       ab( ka1, i+k-ka ) = work( i+k-ka )*t +work( n+i+k-ka )*ab( ka1, i+k-ka )
                                 
                       ra1 = ra
                    end if
                 end if
                 j2 = i + k + 1_${ik}$ - max( 1_${ik}$, k+i0-m+1 )*ka1
                 nr = ( j2+ka-1 ) / ka1
                 j1 = j2 - ( nr-1 )*ka1
                 if( update ) then
                    j2t = min( j2, i-2*ka+k-1 )
                 else
                    j2t = j2
                 end if
                 nrt = ( j2t+ka-1 ) / ka1
                 do j = j1, j2t, ka1
                    ! create nonzero element a(j+ka,j-1) outside the band
                    ! and store it in work(j)
                    work( j ) = work( j )*ab( ka1, j-1 )
                    ab( ka1, j-1 ) = work( n+j )*ab( ka1, j-1 )
                 end do
                 ! generate rotations in 1st set to annihilate elements which
                 ! have been created outside the band
                 if( nrt>0_${ik}$ )call stdlib${ii}$_dlargv( nrt, ab( ka1, j1 ), inca, work( j1 ), ka1,work( n+&
                           j1 ), ka1 )
                 if( nr>0_${ik}$ ) then
                    ! apply rotations in 1st set from the right
                    do l = 1, ka - 1
                       call stdlib${ii}$_dlartv( nr, ab( l+1, j1 ), inca, ab( l+2, j1-1 ),inca, work( n+&
                                 j1 ), work( j1 ), ka1 )
                    end do
                    ! apply rotations in 1st set from both sides to diagonal
                    ! blocks
                    call stdlib${ii}$_dlar2v( nr, ab( 1_${ik}$, j1 ), ab( 1_${ik}$, j1-1 ),ab( 2_${ik}$, j1-1 ), inca, work( &
                              n+j1 ),work( j1 ), ka1 )
                 end if
                 ! start applying rotations in 1st set from the left
                 do l = ka - 1, kb - k + 1, -1
                    nrt = ( j2+l-1 ) / ka1
                    j1t = j2 - ( nrt-1 )*ka1
                    if( nrt>0_${ik}$ )call stdlib${ii}$_dlartv( nrt, ab( ka1-l+1, j1t-ka1+l ), inca,ab( ka1-l, &
                              j1t-ka1+l ), inca,work( n+j1t ), work( j1t ), ka1 )
                 end do
                 if( wantx ) then
                    ! post-multiply x by product of rotations in 1st set
                    do j = j1, j2, ka1
                       call stdlib${ii}$_drot( nx, x( 1_${ik}$, j ), 1_${ik}$, x( 1_${ik}$, j-1 ), 1_${ik}$,work( n+j ), work( j ) )
                                 
                    end do
                 end if
              end do loop_840
              if( update ) then
                 if( i2>0_${ik}$ .and. kbt>0_${ik}$ ) then
                    ! create nonzero element a(i+kbt,i+kbt-ka-1) outside the
                    ! band and store it in work(m-kb+i+kbt)
                    work( m-kb+i+kbt ) = -bb( kbt+1, i )*ra1
                 end if
              end if
              loop_880: do k = kb, 1, -1
                 if( update ) then
                    j2 = i + k + 1_${ik}$ - max( 2_${ik}$, k+i0-m )*ka1
                 else
                    j2 = i + k + 1_${ik}$ - max( 1_${ik}$, k+i0-m )*ka1
                 end if
                 ! finish applying rotations in 2nd set from the left
                 do l = kb - k, 1, -1
                    nrt = ( j2+ka+l-1 ) / ka1
                    j1t = j2 - ( nrt-1 )*ka1
                    if( nrt>0_${ik}$ )call stdlib${ii}$_dlartv( nrt, ab( ka1-l+1, j1t+l-1 ), inca,ab( ka1-l, &
                              j1t+l-1 ), inca,work( n+m-kb+j1t+ka ),work( m-kb+j1t+ka ), ka1 )
                 end do
                 nr = ( j2+ka-1 ) / ka1
                 j1 = j2 - ( nr-1 )*ka1
                 do j = j1, j2, ka1
                    work( m-kb+j ) = work( m-kb+j+ka )
                    work( n+m-kb+j ) = work( n+m-kb+j+ka )
                 end do
                 do j = j1, j2, ka1
                    ! create nonzero element a(j+ka,j-1) outside the band
                    ! and store it in work(m-kb+j)
                    work( m-kb+j ) = work( m-kb+j )*ab( ka1, j-1 )
                    ab( ka1, j-1 ) = work( n+m-kb+j )*ab( ka1, j-1 )
                 end do
                 if( update ) then
                    if( i+k>ka1 .and. k<=kbt )work( m-kb+i+k-ka ) = work( m-kb+i+k )
                 end if
              end do loop_880
              loop_920: do k = kb, 1, -1
                 j2 = i + k + 1_${ik}$ - max( 1_${ik}$, k+i0-m )*ka1
                 nr = ( j2+ka-1 ) / ka1
                 j1 = j2 - ( nr-1 )*ka1
                 if( nr>0_${ik}$ ) then
                    ! generate rotations in 2nd set to annihilate elements
                    ! which have been created outside the band
                    call stdlib${ii}$_dlargv( nr, ab( ka1, j1 ), inca, work( m-kb+j1 ),ka1, work( n+m-&
                              kb+j1 ), ka1 )
                    ! apply rotations in 2nd set from the right
                    do l = 1, ka - 1
                       call stdlib${ii}$_dlartv( nr, ab( l+1, j1 ), inca, ab( l+2, j1-1 ),inca, work( n+&
                                 m-kb+j1 ), work( m-kb+j1 ),ka1 )
                    end do
                    ! apply rotations in 2nd set from both sides to diagonal
                    ! blocks
                    call stdlib${ii}$_dlar2v( nr, ab( 1_${ik}$, j1 ), ab( 1_${ik}$, j1-1 ),ab( 2_${ik}$, j1-1 ), inca, work( &
                              n+m-kb+j1 ),work( m-kb+j1 ), ka1 )
                 end if
                 ! start applying rotations in 2nd set from the left
                 do l = ka - 1, kb - k + 1, -1
                    nrt = ( j2+l-1 ) / ka1
                    j1t = j2 - ( nrt-1 )*ka1
                    if( nrt>0_${ik}$ )call stdlib${ii}$_dlartv( nrt, ab( ka1-l+1, j1t-ka1+l ), inca,ab( ka1-l, &
                              j1t-ka1+l ), inca,work( n+m-kb+j1t ), work( m-kb+j1t ),ka1 )
                 end do
                 if( wantx ) then
                    ! post-multiply x by product of rotations in 2nd set
                    do j = j1, j2, ka1
                       call stdlib${ii}$_drot( nx, x( 1_${ik}$, j ), 1_${ik}$, x( 1_${ik}$, j-1 ), 1_${ik}$,work( n+m-kb+j ), work( &
                                 m-kb+j ) )
                    end do
                 end if
              end do loop_920
              do k = 1, kb - 1
                 j2 = i + k + 1_${ik}$ - max( 1_${ik}$, k+i0-m+1 )*ka1
                 ! finish applying rotations in 1st set from the left
                 do l = kb - k, 1, -1
                    nrt = ( j2+l-1 ) / ka1
                    j1t = j2 - ( nrt-1 )*ka1
                    if( nrt>0_${ik}$ )call stdlib${ii}$_dlartv( nrt, ab( ka1-l+1, j1t-ka1+l ), inca,ab( ka1-l, &
                              j1t-ka1+l ), inca,work( n+j1t ), work( j1t ), ka1 )
                 end do
              end do
              if( kb>1_${ik}$ ) then
                 do j = 2, min( i+kb, m ) - 2*ka - 1
                    work( n+j ) = work( n+j+ka )
                    work( j ) = work( j+ka )
                 end do
              end if
           end if
           go to 490
     end subroutine stdlib${ii}$_dsbgst

#:for rk,rt,ri in REAL_KINDS_TYPES
#:if not rk in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ri}$sbgst( vect, uplo, n, ka, kb, ab, ldab, bb, ldbb, x,ldx, work, info )
     !! DSBGST: reduces a real symmetric-definite banded generalized
     !! eigenproblem  A*x = lambda*B*x  to standard form  C*y = lambda*y,
     !! such that C has the same bandwidth as A.
     !! B must have been previously factorized as S**T*S by DPBSTF, using a
     !! split Cholesky factorization. A is overwritten by C = X**T*A*X, where
     !! X = S**(-1)*Q and Q is an orthogonal matrix chosen to preserve the
     !! bandwidth of A.
               
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: uplo, vect
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: ka, kb, ldab, ldbb, ldx, n
           ! Array Arguments 
           real(${rk}$), intent(inout) :: ab(ldab,*)
           real(${rk}$), intent(in) :: bb(ldbb,*)
           real(${rk}$), intent(out) :: work(*), x(ldx,*)
        ! =====================================================================
           
           ! Local Scalars 
           logical(lk) :: update, upper, wantx
           integer(${ik}$) :: i, i0, i1, i2, inca, j, j1, j1t, j2, j2t, k, ka1, kb1, kbt, l, m, nr, &
                     nrt, nx
           real(${rk}$) :: bii, ra, ra1, t
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input parameters
           wantx = stdlib_lsame( vect, 'V' )
           upper = stdlib_lsame( uplo, 'U' )
           ka1 = ka + 1_${ik}$
           kb1 = kb + 1_${ik}$
           info = 0_${ik}$
           if( .not.wantx .and. .not.stdlib_lsame( vect, 'N' ) ) then
              info = -1_${ik}$
           else if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then
              info = -2_${ik}$
           else if( n<0_${ik}$ ) then
              info = -3_${ik}$
           else if( ka<0_${ik}$ ) then
              info = -4_${ik}$
           else if( kb<0_${ik}$ .or. kb>ka ) then
              info = -5_${ik}$
           else if( ldab<ka+1 ) then
              info = -7_${ik}$
           else if( ldbb<kb+1 ) then
              info = -9_${ik}$
           else if( ldx<1_${ik}$ .or. wantx .and. ldx<max( 1_${ik}$, n ) ) then
              info = -11_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DSBGST', -info )
              return
           end if
           ! quick return if possible
           if( n==0 )return
           inca = ldab*ka1
           ! initialize x to the unit matrix, if needed
           if( wantx )call stdlib${ii}$_${ri}$laset( 'FULL', n, n, zero, one, x, ldx )
           ! set m to the splitting point m. it must be the same value as is
           ! used in stdlib${ii}$_${ri}$pbstf. the chosen value allows the arrays work and rwork
           ! to be of dimension (n).
           m = ( n+kb ) / 2_${ik}$
           ! the routine works in two phases, corresponding to the two halves
           ! of the split cholesky factorization of b as s**t*s where
           ! s = ( u    )
               ! ( m  l )
           ! with u upper triangular of order m, and l lower triangular of
           ! order n-m. s has the same bandwidth as b.
           ! s is treated as a product of elementary matrices:
           ! s = s(m)*s(m-1)*...*s(2)*s(1)*s(m+1)*s(m+2)*...*s(n-1)*s(n)
           ! where s(i) is determined by the i-th row of s.
           ! in phase 1, the index i takes the values n, n-1, ... , m+1;
           ! in phase 2, it takes the values 1, 2, ... , m.
           ! for each value of i, the current matrix a is updated by forming
           ! inv(s(i))**t*a*inv(s(i)). this creates a triangular bulge outside
           ! the band of a. the bulge is then pushed down toward the bottom of
           ! a in phase 1, and up toward the top of a in phase 2, by applying
           ! plane rotations.
           ! there are kb*(kb+1)/2 elements in the bulge, but at most 2*kb-1
           ! of them are linearly independent, so annihilating a bulge requires
           ! only 2*kb-1 plane rotations. the rotations are divided into a 1st
           ! set of kb-1 rotations, and a 2nd set of kb rotations.
           ! wherever possible, rotations are generated and applied in vector
           ! operations of length nr between the indices j1 and j2 (sometimes
           ! replaced by modified values nrt, j1t or j2t).
           ! the cosines and sines of the rotations are stored in the array
           ! work. the cosines of the 1st set of rotations are stored in
           ! elements n+2:n+m-kb-1 and the sines of the 1st set in elements
           ! 2:m-kb-1; the cosines of the 2nd set are stored in elements
           ! n+m-kb+1:2*n and the sines of the second set in elements m-kb+1:n.
           ! the bulges are not formed explicitly; nonzero elements outside the
           ! band are created only when they are required for generating new
           ! rotations; they are stored in the array work, in positions where
           ! they are later overwritten by the sines of the rotations which
           ! annihilate them.
           ! **************************** phase 1 *****************************
           ! the logical structure of this phase is:
           ! update = .true.
           ! do i = n, m + 1, -1
              ! use s(i) to update a and create a new bulge
              ! apply rotations to push all bulges ka positions downward
           ! end do
           ! update = .false.
           ! do i = m + ka + 1, n - 1
              ! apply rotations to push all bulges ka positions downward
           ! end do
           ! to avoid duplicating code, the two loops are merged.
           update = .true.
           i = n + 1_${ik}$
           10 continue
           if( update ) then
              i = i - 1_${ik}$
              kbt = min( kb, i-1 )
              i0 = i - 1_${ik}$
              i1 = min( n, i+ka )
              i2 = i - kbt + ka1
              if( i<m+1 ) then
                 update = .false.
                 i = i + 1_${ik}$
                 i0 = m
                 if( ka==0 )go to 480
                 go to 10
              end if
           else
              i = i + ka
              if( i>n-1 )go to 480
           end if
           if( upper ) then
              ! transform a, working with the upper triangle
              if( update ) then
                 ! form  inv(s(i))**t * a * inv(s(i))
                 bii = bb( kb1, i )
                 do j = i, i1
                    ab( i-j+ka1, j ) = ab( i-j+ka1, j ) / bii
                 end do
                 do j = max( 1, i-ka ), i
                    ab( j-i+ka1, i ) = ab( j-i+ka1, i ) / bii
                 end do
                 do k = i - kbt, i - 1
                    do j = i - kbt, k
                       ab( j-k+ka1, k ) = ab( j-k+ka1, k ) -bb( j-i+kb1, i )*ab( k-i+ka1, i ) -bb(&
                        k-i+kb1, i )*ab( j-i+ka1, i ) +ab( ka1, i )*bb( j-i+kb1, i )*bb( k-i+kb1, &
                                  i )
                    end do
                    do j = max( 1, i-ka ), i - kbt - 1
                       ab( j-k+ka1, k ) = ab( j-k+ka1, k ) -bb( k-i+kb1, i )*ab( j-i+ka1, i )
                                 
                    end do
                 end do
                 do j = i, i1
                    do k = max( j-ka, i-kbt ), i - 1
                       ab( k-j+ka1, j ) = ab( k-j+ka1, j ) -bb( k-i+kb1, i )*ab( i-j+ka1, j )
                                 
                    end do
                 end do
                 if( wantx ) then
                    ! post-multiply x by inv(s(i))
                    call stdlib${ii}$_${ri}$scal( n-m, one / bii, x( m+1, i ), 1_${ik}$ )
                    if( kbt>0_${ik}$ )call stdlib${ii}$_${ri}$ger( n-m, kbt, -one, x( m+1, i ), 1_${ik}$,bb( kb1-kbt, i ), &
                              1_${ik}$, x( m+1, i-kbt ), ldx )
                 end if
                 ! store a(i,i1) in ra1 for use in next loop over k
                 ra1 = ab( i-i1+ka1, i1 )
              end if
              ! generate and apply vectors of rotations to chase all the
              ! existing bulges ka positions down toward the bottom of the
              ! band
              loop_130: do k = 1, kb - 1
                 if( update ) then
                    ! determine the rotations which would annihilate the bulge
                    ! which has in theory just been created
                    if( i-k+ka<n .and. i-k>1_${ik}$ ) then
                       ! generate rotation to annihilate a(i,i-k+ka+1)
                       call stdlib${ii}$_${ri}$lartg( ab( k+1, i-k+ka ), ra1,work( n+i-k+ka-m ), work( i-k+&
                                 ka-m ),ra )
                       ! create nonzero element a(i-k,i-k+ka+1) outside the
                       ! band and store it in work(i-k)
                       t = -bb( kb1-k, i )*ra1
                       work( i-k ) = work( n+i-k+ka-m )*t -work( i-k+ka-m )*ab( 1_${ik}$, i-k+ka )
                                 
                       ab( 1_${ik}$, i-k+ka ) = work( i-k+ka-m )*t +work( n+i-k+ka-m )*ab( 1_${ik}$, i-k+ka )
                                 
                       ra1 = ra
                    end if
                 end if
                 j2 = i - k - 1_${ik}$ + max( 1_${ik}$, k-i0+2 )*ka1
                 nr = ( n-j2+ka ) / ka1
                 j1 = j2 + ( nr-1 )*ka1
                 if( update ) then
                    j2t = max( j2, i+2*ka-k+1 )
                 else
                    j2t = j2
                 end if
                 nrt = ( n-j2t+ka ) / ka1
                 do j = j2t, j1, ka1
                    ! create nonzero element a(j-ka,j+1) outside the band
                    ! and store it in work(j-m)
                    work( j-m ) = work( j-m )*ab( 1_${ik}$, j+1 )
                    ab( 1_${ik}$, j+1 ) = work( n+j-m )*ab( 1_${ik}$, j+1 )
                 end do
                 ! generate rotations in 1st set to annihilate elements which
                 ! have been created outside the band
                 if( nrt>0_${ik}$ )call stdlib${ii}$_${ri}$largv( nrt, ab( 1_${ik}$, j2t ), inca, work( j2t-m ), ka1,work( &
                           n+j2t-m ), ka1 )
                 if( nr>0_${ik}$ ) then
                    ! apply rotations in 1st set from the right
                    do l = 1, ka - 1
                       call stdlib${ii}$_${ri}$lartv( nr, ab( ka1-l, j2 ), inca,ab( ka-l, j2+1 ), inca, work(&
                                  n+j2-m ),work( j2-m ), ka1 )
                    end do
                    ! apply rotations in 1st set from both sides to diagonal
                    ! blocks
                    call stdlib${ii}$_${ri}$lar2v( nr, ab( ka1, j2 ), ab( ka1, j2+1 ),ab( ka, j2+1 ), inca, &
                              work( n+j2-m ),work( j2-m ), ka1 )
                 end if
                 ! start applying rotations in 1st set from the left
                 do l = ka - 1, kb - k + 1, -1
                    nrt = ( n-j2+l ) / ka1
                    if( nrt>0_${ik}$ )call stdlib${ii}$_${ri}$lartv( nrt, ab( l, j2+ka1-l ), inca,ab( l+1, j2+ka1-l &
                              ), inca,work( n+j2-m ), work( j2-m ), ka1 )
                 end do
                 if( wantx ) then
                    ! post-multiply x by product of rotations in 1st set
                    do j = j2, j1, ka1
                       call stdlib${ii}$_${ri}$rot( n-m, x( m+1, j ), 1_${ik}$, x( m+1, j+1 ), 1_${ik}$,work( n+j-m ), &
                                 work( j-m ) )
                    end do
                 end if
              end do loop_130
              if( update ) then
                 if( i2<=n .and. kbt>0_${ik}$ ) then
                    ! create nonzero element a(i-kbt,i-kbt+ka+1) outside the
                    ! band and store it in work(i-kbt)
                    work( i-kbt ) = -bb( kb1-kbt, i )*ra1
                 end if
              end if
              loop_170: do k = kb, 1, -1
                 if( update ) then
                    j2 = i - k - 1_${ik}$ + max( 2_${ik}$, k-i0+1 )*ka1
                 else
                    j2 = i - k - 1_${ik}$ + max( 1_${ik}$, k-i0+1 )*ka1
                 end if
                 ! finish applying rotations in 2nd set from the left
                 do l = kb - k, 1, -1
                    nrt = ( n-j2+ka+l ) / ka1
                    if( nrt>0_${ik}$ )call stdlib${ii}$_${ri}$lartv( nrt, ab( l, j2-l+1 ), inca,ab( l+1, j2-l+1 ), &
                              inca, work( n+j2-ka ),work( j2-ka ), ka1 )
                 end do
                 nr = ( n-j2+ka ) / ka1
                 j1 = j2 + ( nr-1 )*ka1
                 do j = j1, j2, -ka1
                    work( j ) = work( j-ka )
                    work( n+j ) = work( n+j-ka )
                 end do
                 do j = j2, j1, ka1
                    ! create nonzero element a(j-ka,j+1) outside the band
                    ! and store it in work(j)
                    work( j ) = work( j )*ab( 1_${ik}$, j+1 )
                    ab( 1_${ik}$, j+1 ) = work( n+j )*ab( 1_${ik}$, j+1 )
                 end do
                 if( update ) then
                    if( i-k<n-ka .and. k<=kbt )work( i-k+ka ) = work( i-k )
                 end if
              end do loop_170
              loop_210: do k = kb, 1, -1
                 j2 = i - k - 1_${ik}$ + max( 1_${ik}$, k-i0+1 )*ka1
                 nr = ( n-j2+ka ) / ka1
                 j1 = j2 + ( nr-1 )*ka1
                 if( nr>0_${ik}$ ) then
                    ! generate rotations in 2nd set to annihilate elements
                    ! which have been created outside the band
                    call stdlib${ii}$_${ri}$largv( nr, ab( 1_${ik}$, j2 ), inca, work( j2 ), ka1,work( n+j2 ), ka1 )
                              
                    ! apply rotations in 2nd set from the right
                    do l = 1, ka - 1
                       call stdlib${ii}$_${ri}$lartv( nr, ab( ka1-l, j2 ), inca,ab( ka-l, j2+1 ), inca, work(&
                                  n+j2 ),work( j2 ), ka1 )
                    end do
                    ! apply rotations in 2nd set from both sides to diagonal
                    ! blocks
                    call stdlib${ii}$_${ri}$lar2v( nr, ab( ka1, j2 ), ab( ka1, j2+1 ),ab( ka, j2+1 ), inca, &
                              work( n+j2 ),work( j2 ), ka1 )
                 end if
                 ! start applying rotations in 2nd set from the left
                 do l = ka - 1, kb - k + 1, -1
                    nrt = ( n-j2+l ) / ka1
                    if( nrt>0_${ik}$ )call stdlib${ii}$_${ri}$lartv( nrt, ab( l, j2+ka1-l ), inca,ab( l+1, j2+ka1-l &
                              ), inca, work( n+j2 ),work( j2 ), ka1 )
                 end do
                 if( wantx ) then
                    ! post-multiply x by product of rotations in 2nd set
                    do j = j2, j1, ka1
                       call stdlib${ii}$_${ri}$rot( n-m, x( m+1, j ), 1_${ik}$, x( m+1, j+1 ), 1_${ik}$,work( n+j ), work( &
                                 j ) )
                    end do
                 end if
              end do loop_210
              do k = 1, kb - 1
                 j2 = i - k - 1_${ik}$ + max( 1_${ik}$, k-i0+2 )*ka1
                 ! finish applying rotations in 1st set from the left
                 do l = kb - k, 1, -1
                    nrt = ( n-j2+l ) / ka1
                    if( nrt>0_${ik}$ )call stdlib${ii}$_${ri}$lartv( nrt, ab( l, j2+ka1-l ), inca,ab( l+1, j2+ka1-l &
                              ), inca,work( n+j2-m ), work( j2-m ), ka1 )
                 end do
              end do
              if( kb>1_${ik}$ ) then
                 do j = n - 1, i - kb + 2*ka + 1, -1
                    work( n+j-m ) = work( n+j-ka-m )
                    work( j-m ) = work( j-ka-m )
                 end do
              end if
           else
              ! transform a, working with the lower triangle
              if( update ) then
                 ! form  inv(s(i))**t * a * inv(s(i))
                 bii = bb( 1_${ik}$, i )
                 do j = i, i1
                    ab( j-i+1, i ) = ab( j-i+1, i ) / bii
                 end do
                 do j = max( 1, i-ka ), i
                    ab( i-j+1, j ) = ab( i-j+1, j ) / bii
                 end do
                 do k = i - kbt, i - 1
                    do j = i - kbt, k
                       ab( k-j+1, j ) = ab( k-j+1, j ) -bb( i-j+1, j )*ab( i-k+1, k ) -bb( i-k+1, &
                                 k )*ab( i-j+1, j ) +ab( 1_${ik}$, i )*bb( i-j+1, j )*bb( i-k+1, k )
                    end do
                    do j = max( 1, i-ka ), i - kbt - 1
                       ab( k-j+1, j ) = ab( k-j+1, j ) -bb( i-k+1, k )*ab( i-j+1, j )
                    end do
                 end do
                 do j = i, i1
                    do k = max( j-ka, i-kbt ), i - 1
                       ab( j-k+1, k ) = ab( j-k+1, k ) -bb( i-k+1, k )*ab( j-i+1, i )
                    end do
                 end do
                 if( wantx ) then
                    ! post-multiply x by inv(s(i))
                    call stdlib${ii}$_${ri}$scal( n-m, one / bii, x( m+1, i ), 1_${ik}$ )
                    if( kbt>0_${ik}$ )call stdlib${ii}$_${ri}$ger( n-m, kbt, -one, x( m+1, i ), 1_${ik}$,bb( kbt+1, i-kbt )&
                              , ldbb-1,x( m+1, i-kbt ), ldx )
                 end if
                 ! store a(i1,i) in ra1 for use in next loop over k
                 ra1 = ab( i1-i+1, i )
              end if
              ! generate and apply vectors of rotations to chase all the
              ! existing bulges ka positions down toward the bottom of the
              ! band
              loop_360: do k = 1, kb - 1
                 if( update ) then
                    ! determine the rotations which would annihilate the bulge
                    ! which has in theory just been created
                    if( i-k+ka<n .and. i-k>1_${ik}$ ) then
                       ! generate rotation to annihilate a(i-k+ka+1,i)
                       call stdlib${ii}$_${ri}$lartg( ab( ka1-k, i ), ra1, work( n+i-k+ka-m ),work( i-k+ka-m &
                                 ), ra )
                       ! create nonzero element a(i-k+ka+1,i-k) outside the
                       ! band and store it in work(i-k)
                       t = -bb( k+1, i-k )*ra1
                       work( i-k ) = work( n+i-k+ka-m )*t -work( i-k+ka-m )*ab( ka1, i-k )
                       ab( ka1, i-k ) = work( i-k+ka-m )*t +work( n+i-k+ka-m )*ab( ka1, i-k )
                                 
                       ra1 = ra
                    end if
                 end if
                 j2 = i - k - 1_${ik}$ + max( 1_${ik}$, k-i0+2 )*ka1
                 nr = ( n-j2+ka ) / ka1
                 j1 = j2 + ( nr-1 )*ka1
                 if( update ) then
                    j2t = max( j2, i+2*ka-k+1 )
                 else
                    j2t = j2
                 end if
                 nrt = ( n-j2t+ka ) / ka1
                 do j = j2t, j1, ka1
                    ! create nonzero element a(j+1,j-ka) outside the band
                    ! and store it in work(j-m)
                    work( j-m ) = work( j-m )*ab( ka1, j-ka+1 )
                    ab( ka1, j-ka+1 ) = work( n+j-m )*ab( ka1, j-ka+1 )
                 end do
                 ! generate rotations in 1st set to annihilate elements which
                 ! have been created outside the band
                 if( nrt>0_${ik}$ )call stdlib${ii}$_${ri}$largv( nrt, ab( ka1, j2t-ka ), inca, work( j2t-m ),ka1, &
                           work( n+j2t-m ), ka1 )
                 if( nr>0_${ik}$ ) then
                    ! apply rotations in 1st set from the left
                    do l = 1, ka - 1
                       call stdlib${ii}$_${ri}$lartv( nr, ab( l+1, j2-l ), inca,ab( l+2, j2-l ), inca, work( &
                                 n+j2-m ),work( j2-m ), ka1 )
                    end do
                    ! apply rotations in 1st set from both sides to diagonal
                    ! blocks
                    call stdlib${ii}$_${ri}$lar2v( nr, ab( 1_${ik}$, j2 ), ab( 1_${ik}$, j2+1 ), ab( 2_${ik}$, j2 ),inca, work( n+&
                              j2-m ), work( j2-m ), ka1 )
                 end if
                 ! start applying rotations in 1st set from the right
                 do l = ka - 1, kb - k + 1, -1
                    nrt = ( n-j2+l ) / ka1
                    if( nrt>0_${ik}$ )call stdlib${ii}$_${ri}$lartv( nrt, ab( ka1-l+1, j2 ), inca,ab( ka1-l, j2+1 ),&
                               inca, work( n+j2-m ),work( j2-m ), ka1 )
                 end do
                 if( wantx ) then
                    ! post-multiply x by product of rotations in 1st set
                    do j = j2, j1, ka1
                       call stdlib${ii}$_${ri}$rot( n-m, x( m+1, j ), 1_${ik}$, x( m+1, j+1 ), 1_${ik}$,work( n+j-m ), &
                                 work( j-m ) )
                    end do
                 end if
              end do loop_360
              if( update ) then
                 if( i2<=n .and. kbt>0_${ik}$ ) then
                    ! create nonzero element a(i-kbt+ka+1,i-kbt) outside the
                    ! band and store it in work(i-kbt)
                    work( i-kbt ) = -bb( kbt+1, i-kbt )*ra1
                 end if
              end if
              loop_400: do k = kb, 1, -1
                 if( update ) then
                    j2 = i - k - 1_${ik}$ + max( 2_${ik}$, k-i0+1 )*ka1
                 else
                    j2 = i - k - 1_${ik}$ + max( 1_${ik}$, k-i0+1 )*ka1
                 end if
                 ! finish applying rotations in 2nd set from the right
                 do l = kb - k, 1, -1
                    nrt = ( n-j2+ka+l ) / ka1
                    if( nrt>0_${ik}$ )call stdlib${ii}$_${ri}$lartv( nrt, ab( ka1-l+1, j2-ka ), inca,ab( ka1-l, j2-&
                              ka+1 ), inca,work( n+j2-ka ), work( j2-ka ), ka1 )
                 end do
                 nr = ( n-j2+ka ) / ka1
                 j1 = j2 + ( nr-1 )*ka1
                 do j = j1, j2, -ka1
                    work( j ) = work( j-ka )
                    work( n+j ) = work( n+j-ka )
                 end do
                 do j = j2, j1, ka1
                    ! create nonzero element a(j+1,j-ka) outside the band
                    ! and store it in work(j)
                    work( j ) = work( j )*ab( ka1, j-ka+1 )
                    ab( ka1, j-ka+1 ) = work( n+j )*ab( ka1, j-ka+1 )
                 end do
                 if( update ) then
                    if( i-k<n-ka .and. k<=kbt )work( i-k+ka ) = work( i-k )
                 end if
              end do loop_400
              loop_440: do k = kb, 1, -1
                 j2 = i - k - 1_${ik}$ + max( 1_${ik}$, k-i0+1 )*ka1
                 nr = ( n-j2+ka ) / ka1
                 j1 = j2 + ( nr-1 )*ka1
                 if( nr>0_${ik}$ ) then
                    ! generate rotations in 2nd set to annihilate elements
                    ! which have been created outside the band
                    call stdlib${ii}$_${ri}$largv( nr, ab( ka1, j2-ka ), inca, work( j2 ), ka1,work( n+j2 ), &
                              ka1 )
                    ! apply rotations in 2nd set from the left
                    do l = 1, ka - 1
                       call stdlib${ii}$_${ri}$lartv( nr, ab( l+1, j2-l ), inca,ab( l+2, j2-l ), inca, work( &
                                 n+j2 ),work( j2 ), ka1 )
                    end do
                    ! apply rotations in 2nd set from both sides to diagonal
                    ! blocks
                    call stdlib${ii}$_${ri}$lar2v( nr, ab( 1_${ik}$, j2 ), ab( 1_${ik}$, j2+1 ), ab( 2_${ik}$, j2 ),inca, work( n+&
                              j2 ), work( j2 ), ka1 )
                 end if
                 ! start applying rotations in 2nd set from the right
                 do l = ka - 1, kb - k + 1, -1
                    nrt = ( n-j2+l ) / ka1
                    if( nrt>0_${ik}$ )call stdlib${ii}$_${ri}$lartv( nrt, ab( ka1-l+1, j2 ), inca,ab( ka1-l, j2+1 ),&
                               inca, work( n+j2 ),work( j2 ), ka1 )
                 end do
                 if( wantx ) then
                    ! post-multiply x by product of rotations in 2nd set
                    do j = j2, j1, ka1
                       call stdlib${ii}$_${ri}$rot( n-m, x( m+1, j ), 1_${ik}$, x( m+1, j+1 ), 1_${ik}$,work( n+j ), work( &
                                 j ) )
                    end do
                 end if
              end do loop_440
              do k = 1, kb - 1
                 j2 = i - k - 1_${ik}$ + max( 1_${ik}$, k-i0+2 )*ka1
                 ! finish applying rotations in 1st set from the right
                 do l = kb - k, 1, -1
                    nrt = ( n-j2+l ) / ka1
                    if( nrt>0_${ik}$ )call stdlib${ii}$_${ri}$lartv( nrt, ab( ka1-l+1, j2 ), inca,ab( ka1-l, j2+1 ),&
                               inca, work( n+j2-m ),work( j2-m ), ka1 )
                 end do
              end do
              if( kb>1_${ik}$ ) then
                 do j = n - 1, i - kb + 2*ka + 1, -1
                    work( n+j-m ) = work( n+j-ka-m )
                    work( j-m ) = work( j-ka-m )
                 end do
              end if
           end if
           go to 10
           480 continue
           ! **************************** phase 2 *****************************
           ! the logical structure of this phase is:
           ! update = .true.
           ! do i = 1, m
              ! use s(i) to update a and create a new bulge
              ! apply rotations to push all bulges ka positions upward
           ! end do
           ! update = .false.
           ! do i = m - ka - 1, 2, -1
              ! apply rotations to push all bulges ka positions upward
           ! end do
           ! to avoid duplicating code, the two loops are merged.
           update = .true.
           i = 0_${ik}$
           490 continue
           if( update ) then
              i = i + 1_${ik}$
              kbt = min( kb, m-i )
              i0 = i + 1_${ik}$
              i1 = max( 1_${ik}$, i-ka )
              i2 = i + kbt - ka1
              if( i>m ) then
                 update = .false.
                 i = i - 1_${ik}$
                 i0 = m + 1_${ik}$
                 if( ka==0 )return
                 go to 490
              end if
           else
              i = i - ka
              if( i<2 )return
           end if
           if( i<m-kbt ) then
              nx = m
           else
              nx = n
           end if
           if( upper ) then
              ! transform a, working with the upper triangle
              if( update ) then
                 ! form  inv(s(i))**t * a * inv(s(i))
                 bii = bb( kb1, i )
                 do j = i1, i
                    ab( j-i+ka1, i ) = ab( j-i+ka1, i ) / bii
                 end do
                 do j = i, min( n, i+ka )
                    ab( i-j+ka1, j ) = ab( i-j+ka1, j ) / bii
                 end do
                 do k = i + 1, i + kbt
                    do j = k, i + kbt
                       ab( k-j+ka1, j ) = ab( k-j+ka1, j ) -bb( i-j+kb1, j )*ab( i-k+ka1, k ) -bb(&
                        i-k+kb1, k )*ab( i-j+ka1, j ) +ab( ka1, i )*bb( i-j+kb1, j )*bb( i-k+kb1, &
                                  k )
                    end do
                    do j = i + kbt + 1, min( n, i+ka )
                       ab( k-j+ka1, j ) = ab( k-j+ka1, j ) -bb( i-k+kb1, k )*ab( i-j+ka1, j )
                                 
                    end do
                 end do
                 do j = i1, i
                    do k = i + 1, min( j+ka, i+kbt )
                       ab( j-k+ka1, k ) = ab( j-k+ka1, k ) -bb( i-k+kb1, k )*ab( j-i+ka1, i )
                                 
                    end do
                 end do
                 if( wantx ) then
                    ! post-multiply x by inv(s(i))
                    call stdlib${ii}$_${ri}$scal( nx, one / bii, x( 1_${ik}$, i ), 1_${ik}$ )
                    if( kbt>0_${ik}$ )call stdlib${ii}$_${ri}$ger( nx, kbt, -one, x( 1_${ik}$, i ), 1_${ik}$, bb( kb, i+1 ),ldbb-&
                              1_${ik}$, x( 1_${ik}$, i+1 ), ldx )
                 end if
                 ! store a(i1,i) in ra1 for use in next loop over k
                 ra1 = ab( i1-i+ka1, i )
              end if
              ! generate and apply vectors of rotations to chase all the
              ! existing bulges ka positions up toward the top of the band
              loop_610: do k = 1, kb - 1
                 if( update ) then
                    ! determine the rotations which would annihilate the bulge
                    ! which has in theory just been created
                    if( i+k-ka1>0_${ik}$ .and. i+k<m ) then
                       ! generate rotation to annihilate a(i+k-ka-1,i)
                       call stdlib${ii}$_${ri}$lartg( ab( k+1, i ), ra1, work( n+i+k-ka ),work( i+k-ka ), ra &
                                 )
                       ! create nonzero element a(i+k-ka-1,i+k) outside the
                       ! band and store it in work(m-kb+i+k)
                       t = -bb( kb1-k, i+k )*ra1
                       work( m-kb+i+k ) = work( n+i+k-ka )*t -work( i+k-ka )*ab( 1_${ik}$, i+k )
                       ab( 1_${ik}$, i+k ) = work( i+k-ka )*t +work( n+i+k-ka )*ab( 1_${ik}$, i+k )
                       ra1 = ra
                    end if
                 end if
                 j2 = i + k + 1_${ik}$ - max( 1_${ik}$, k+i0-m+1 )*ka1
                 nr = ( j2+ka-1 ) / ka1
                 j1 = j2 - ( nr-1 )*ka1
                 if( update ) then
                    j2t = min( j2, i-2*ka+k-1 )
                 else
                    j2t = j2
                 end if
                 nrt = ( j2t+ka-1 ) / ka1
                 do j = j1, j2t, ka1
                    ! create nonzero element a(j-1,j+ka) outside the band
                    ! and store it in work(j)
                    work( j ) = work( j )*ab( 1_${ik}$, j+ka-1 )
                    ab( 1_${ik}$, j+ka-1 ) = work( n+j )*ab( 1_${ik}$, j+ka-1 )
                 end do
                 ! generate rotations in 1st set to annihilate elements which
                 ! have been created outside the band
                 if( nrt>0_${ik}$ )call stdlib${ii}$_${ri}$largv( nrt, ab( 1_${ik}$, j1+ka ), inca, work( j1 ), ka1,work( &
                           n+j1 ), ka1 )
                 if( nr>0_${ik}$ ) then
                    ! apply rotations in 1st set from the left
                    do l = 1, ka - 1
                       call stdlib${ii}$_${ri}$lartv( nr, ab( ka1-l, j1+l ), inca,ab( ka-l, j1+l ), inca, &
                                 work( n+j1 ),work( j1 ), ka1 )
                    end do
                    ! apply rotations in 1st set from both sides to diagonal
                    ! blocks
                    call stdlib${ii}$_${ri}$lar2v( nr, ab( ka1, j1 ), ab( ka1, j1-1 ),ab( ka, j1 ), inca, &
                              work( n+j1 ),work( j1 ), ka1 )
                 end if
                 ! start applying rotations in 1st set from the right
                 do l = ka - 1, kb - k + 1, -1
                    nrt = ( j2+l-1 ) / ka1
                    j1t = j2 - ( nrt-1 )*ka1
                    if( nrt>0_${ik}$ )call stdlib${ii}$_${ri}$lartv( nrt, ab( l, j1t ), inca,ab( l+1, j1t-1 ), inca,&
                               work( n+j1t ),work( j1t ), ka1 )
                 end do
                 if( wantx ) then
                    ! post-multiply x by product of rotations in 1st set
                    do j = j1, j2, ka1
                       call stdlib${ii}$_${ri}$rot( nx, x( 1_${ik}$, j ), 1_${ik}$, x( 1_${ik}$, j-1 ), 1_${ik}$,work( n+j ), work( j ) )
                                 
                    end do
                 end if
              end do loop_610
              if( update ) then
                 if( i2>0_${ik}$ .and. kbt>0_${ik}$ ) then
                    ! create nonzero element a(i+kbt-ka-1,i+kbt) outside the
                    ! band and store it in work(m-kb+i+kbt)
                    work( m-kb+i+kbt ) = -bb( kb1-kbt, i+kbt )*ra1
                 end if
              end if
              loop_650: do k = kb, 1, -1
                 if( update ) then
                    j2 = i + k + 1_${ik}$ - max( 2_${ik}$, k+i0-m )*ka1
                 else
                    j2 = i + k + 1_${ik}$ - max( 1_${ik}$, k+i0-m )*ka1
                 end if
                 ! finish applying rotations in 2nd set from the right
                 do l = kb - k, 1, -1
                    nrt = ( j2+ka+l-1 ) / ka1
                    j1t = j2 - ( nrt-1 )*ka1
                    if( nrt>0_${ik}$ )call stdlib${ii}$_${ri}$lartv( nrt, ab( l, j1t+ka ), inca,ab( l+1, j1t+ka-1 ),&
                               inca,work( n+m-kb+j1t+ka ),work( m-kb+j1t+ka ), ka1 )
                 end do
                 nr = ( j2+ka-1 ) / ka1
                 j1 = j2 - ( nr-1 )*ka1
                 do j = j1, j2, ka1
                    work( m-kb+j ) = work( m-kb+j+ka )
                    work( n+m-kb+j ) = work( n+m-kb+j+ka )
                 end do
                 do j = j1, j2, ka1
                    ! create nonzero element a(j-1,j+ka) outside the band
                    ! and store it in work(m-kb+j)
                    work( m-kb+j ) = work( m-kb+j )*ab( 1_${ik}$, j+ka-1 )
                    ab( 1_${ik}$, j+ka-1 ) = work( n+m-kb+j )*ab( 1_${ik}$, j+ka-1 )
                 end do
                 if( update ) then
                    if( i+k>ka1 .and. k<=kbt )work( m-kb+i+k-ka ) = work( m-kb+i+k )
                 end if
              end do loop_650
              loop_690: do k = kb, 1, -1
                 j2 = i + k + 1_${ik}$ - max( 1_${ik}$, k+i0-m )*ka1
                 nr = ( j2+ka-1 ) / ka1
                 j1 = j2 - ( nr-1 )*ka1
                 if( nr>0_${ik}$ ) then
                    ! generate rotations in 2nd set to annihilate elements
                    ! which have been created outside the band
                    call stdlib${ii}$_${ri}$largv( nr, ab( 1_${ik}$, j1+ka ), inca, work( m-kb+j1 ),ka1, work( n+m-&
                              kb+j1 ), ka1 )
                    ! apply rotations in 2nd set from the left
                    do l = 1, ka - 1
                       call stdlib${ii}$_${ri}$lartv( nr, ab( ka1-l, j1+l ), inca,ab( ka-l, j1+l ), inca,&
                                 work( n+m-kb+j1 ), work( m-kb+j1 ), ka1 )
                    end do
                    ! apply rotations in 2nd set from both sides to diagonal
                    ! blocks
                    call stdlib${ii}$_${ri}$lar2v( nr, ab( ka1, j1 ), ab( ka1, j1-1 ),ab( ka, j1 ), inca, &
                              work( n+m-kb+j1 ),work( m-kb+j1 ), ka1 )
                 end if
                 ! start applying rotations in 2nd set from the right
                 do l = ka - 1, kb - k + 1, -1
                    nrt = ( j2+l-1 ) / ka1
                    j1t = j2 - ( nrt-1 )*ka1
                    if( nrt>0_${ik}$ )call stdlib${ii}$_${ri}$lartv( nrt, ab( l, j1t ), inca,ab( l+1, j1t-1 ), inca,&
                              work( n+m-kb+j1t ), work( m-kb+j1t ),ka1 )
                 end do
                 if( wantx ) then
                    ! post-multiply x by product of rotations in 2nd set
                    do j = j1, j2, ka1
                       call stdlib${ii}$_${ri}$rot( nx, x( 1_${ik}$, j ), 1_${ik}$, x( 1_${ik}$, j-1 ), 1_${ik}$,work( n+m-kb+j ), work( &
                                 m-kb+j ) )
                    end do
                 end if
              end do loop_690
              do k = 1, kb - 1
                 j2 = i + k + 1_${ik}$ - max( 1_${ik}$, k+i0-m+1 )*ka1
                 ! finish applying rotations in 1st set from the right
                 do l = kb - k, 1, -1
                    nrt = ( j2+l-1 ) / ka1
                    j1t = j2 - ( nrt-1 )*ka1
                    if( nrt>0_${ik}$ )call stdlib${ii}$_${ri}$lartv( nrt, ab( l, j1t ), inca,ab( l+1, j1t-1 ), inca,&
                               work( n+j1t ),work( j1t ), ka1 )
                 end do
              end do
              if( kb>1_${ik}$ ) then
                 do j = 2, min( i+kb, m ) - 2*ka - 1
                    work( n+j ) = work( n+j+ka )
                    work( j ) = work( j+ka )
                 end do
              end if
           else
              ! transform a, working with the lower triangle
              if( update ) then
                 ! form  inv(s(i))**t * a * inv(s(i))
                 bii = bb( 1_${ik}$, i )
                 do j = i1, i
                    ab( i-j+1, j ) = ab( i-j+1, j ) / bii
                 end do
                 do j = i, min( n, i+ka )
                    ab( j-i+1, i ) = ab( j-i+1, i ) / bii
                 end do
                 do k = i + 1, i + kbt
                    do j = k, i + kbt
                       ab( j-k+1, k ) = ab( j-k+1, k ) -bb( j-i+1, i )*ab( k-i+1, i ) -bb( k-i+1, &
                                 i )*ab( j-i+1, i ) +ab( 1_${ik}$, i )*bb( j-i+1, i )*bb( k-i+1, i )
                    end do
                    do j = i + kbt + 1, min( n, i+ka )
                       ab( j-k+1, k ) = ab( j-k+1, k ) -bb( k-i+1, i )*ab( j-i+1, i )
                    end do
                 end do
                 do j = i1, i
                    do k = i + 1, min( j+ka, i+kbt )
                       ab( k-j+1, j ) = ab( k-j+1, j ) -bb( k-i+1, i )*ab( i-j+1, j )
                    end do
                 end do
                 if( wantx ) then
                    ! post-multiply x by inv(s(i))
                    call stdlib${ii}$_${ri}$scal( nx, one / bii, x( 1_${ik}$, i ), 1_${ik}$ )
                    if( kbt>0_${ik}$ )call stdlib${ii}$_${ri}$ger( nx, kbt, -one, x( 1_${ik}$, i ), 1_${ik}$, bb( 2_${ik}$, i ), 1_${ik}$,x( 1_${ik}$, &
                              i+1 ), ldx )
                 end if
                 ! store a(i,i1) in ra1 for use in next loop over k
                 ra1 = ab( i-i1+1, i1 )
              end if
              ! generate and apply vectors of rotations to chase all the
              ! existing bulges ka positions up toward the top of the band
              loop_840: do k = 1, kb - 1
                 if( update ) then
                    ! determine the rotations which would annihilate the bulge
                    ! which has in theory just been created
                    if( i+k-ka1>0_${ik}$ .and. i+k<m ) then
                       ! generate rotation to annihilate a(i,i+k-ka-1)
                       call stdlib${ii}$_${ri}$lartg( ab( ka1-k, i+k-ka ), ra1,work( n+i+k-ka ), work( i+k-&
                                 ka ), ra )
                       ! create nonzero element a(i+k,i+k-ka-1) outside the
                       ! band and store it in work(m-kb+i+k)
                       t = -bb( k+1, i )*ra1
                       work( m-kb+i+k ) = work( n+i+k-ka )*t -work( i+k-ka )*ab( ka1, i+k-ka )
                                 
                       ab( ka1, i+k-ka ) = work( i+k-ka )*t +work( n+i+k-ka )*ab( ka1, i+k-ka )
                                 
                       ra1 = ra
                    end if
                 end if
                 j2 = i + k + 1_${ik}$ - max( 1_${ik}$, k+i0-m+1 )*ka1
                 nr = ( j2+ka-1 ) / ka1
                 j1 = j2 - ( nr-1 )*ka1
                 if( update ) then
                    j2t = min( j2, i-2*ka+k-1 )
                 else
                    j2t = j2
                 end if
                 nrt = ( j2t+ka-1 ) / ka1
                 do j = j1, j2t, ka1
                    ! create nonzero element a(j+ka,j-1) outside the band
                    ! and store it in work(j)
                    work( j ) = work( j )*ab( ka1, j-1 )
                    ab( ka1, j-1 ) = work( n+j )*ab( ka1, j-1 )
                 end do
                 ! generate rotations in 1st set to annihilate elements which
                 ! have been created outside the band
                 if( nrt>0_${ik}$ )call stdlib${ii}$_${ri}$largv( nrt, ab( ka1, j1 ), inca, work( j1 ), ka1,work( n+&
                           j1 ), ka1 )
                 if( nr>0_${ik}$ ) then
                    ! apply rotations in 1st set from the right
                    do l = 1, ka - 1
                       call stdlib${ii}$_${ri}$lartv( nr, ab( l+1, j1 ), inca, ab( l+2, j1-1 ),inca, work( n+&
                                 j1 ), work( j1 ), ka1 )
                    end do
                    ! apply rotations in 1st set from both sides to diagonal
                    ! blocks
                    call stdlib${ii}$_${ri}$lar2v( nr, ab( 1_${ik}$, j1 ), ab( 1_${ik}$, j1-1 ),ab( 2_${ik}$, j1-1 ), inca, work( &
                              n+j1 ),work( j1 ), ka1 )
                 end if
                 ! start applying rotations in 1st set from the left
                 do l = ka - 1, kb - k + 1, -1
                    nrt = ( j2+l-1 ) / ka1
                    j1t = j2 - ( nrt-1 )*ka1
                    if( nrt>0_${ik}$ )call stdlib${ii}$_${ri}$lartv( nrt, ab( ka1-l+1, j1t-ka1+l ), inca,ab( ka1-l, &
                              j1t-ka1+l ), inca,work( n+j1t ), work( j1t ), ka1 )
                 end do
                 if( wantx ) then
                    ! post-multiply x by product of rotations in 1st set
                    do j = j1, j2, ka1
                       call stdlib${ii}$_${ri}$rot( nx, x( 1_${ik}$, j ), 1_${ik}$, x( 1_${ik}$, j-1 ), 1_${ik}$,work( n+j ), work( j ) )
                                 
                    end do
                 end if
              end do loop_840
              if( update ) then
                 if( i2>0_${ik}$ .and. kbt>0_${ik}$ ) then
                    ! create nonzero element a(i+kbt,i+kbt-ka-1) outside the
                    ! band and store it in work(m-kb+i+kbt)
                    work( m-kb+i+kbt ) = -bb( kbt+1, i )*ra1
                 end if
              end if
              loop_880: do k = kb, 1, -1
                 if( update ) then
                    j2 = i + k + 1_${ik}$ - max( 2_${ik}$, k+i0-m )*ka1
                 else
                    j2 = i + k + 1_${ik}$ - max( 1_${ik}$, k+i0-m )*ka1
                 end if
                 ! finish applying rotations in 2nd set from the left
                 do l = kb - k, 1, -1
                    nrt = ( j2+ka+l-1 ) / ka1
                    j1t = j2 - ( nrt-1 )*ka1
                    if( nrt>0_${ik}$ )call stdlib${ii}$_${ri}$lartv( nrt, ab( ka1-l+1, j1t+l-1 ), inca,ab( ka1-l, &
                              j1t+l-1 ), inca,work( n+m-kb+j1t+ka ),work( m-kb+j1t+ka ), ka1 )
                 end do
                 nr = ( j2+ka-1 ) / ka1
                 j1 = j2 - ( nr-1 )*ka1
                 do j = j1, j2, ka1
                    work( m-kb+j ) = work( m-kb+j+ka )
                    work( n+m-kb+j ) = work( n+m-kb+j+ka )
                 end do
                 do j = j1, j2, ka1
                    ! create nonzero element a(j+ka,j-1) outside the band
                    ! and store it in work(m-kb+j)
                    work( m-kb+j ) = work( m-kb+j )*ab( ka1, j-1 )
                    ab( ka1, j-1 ) = work( n+m-kb+j )*ab( ka1, j-1 )
                 end do
                 if( update ) then
                    if( i+k>ka1 .and. k<=kbt )work( m-kb+i+k-ka ) = work( m-kb+i+k )
                 end if
              end do loop_880
              loop_920: do k = kb, 1, -1
                 j2 = i + k + 1_${ik}$ - max( 1_${ik}$, k+i0-m )*ka1
                 nr = ( j2+ka-1 ) / ka1
                 j1 = j2 - ( nr-1 )*ka1
                 if( nr>0_${ik}$ ) then
                    ! generate rotations in 2nd set to annihilate elements
                    ! which have been created outside the band
                    call stdlib${ii}$_${ri}$largv( nr, ab( ka1, j1 ), inca, work( m-kb+j1 ),ka1, work( n+m-&
                              kb+j1 ), ka1 )
                    ! apply rotations in 2nd set from the right
                    do l = 1, ka - 1
                       call stdlib${ii}$_${ri}$lartv( nr, ab( l+1, j1 ), inca, ab( l+2, j1-1 ),inca, work( n+&
                                 m-kb+j1 ), work( m-kb+j1 ),ka1 )
                    end do
                    ! apply rotations in 2nd set from both sides to diagonal
                    ! blocks
                    call stdlib${ii}$_${ri}$lar2v( nr, ab( 1_${ik}$, j1 ), ab( 1_${ik}$, j1-1 ),ab( 2_${ik}$, j1-1 ), inca, work( &
                              n+m-kb+j1 ),work( m-kb+j1 ), ka1 )
                 end if
                 ! start applying rotations in 2nd set from the left
                 do l = ka - 1, kb - k + 1, -1
                    nrt = ( j2+l-1 ) / ka1
                    j1t = j2 - ( nrt-1 )*ka1
                    if( nrt>0_${ik}$ )call stdlib${ii}$_${ri}$lartv( nrt, ab( ka1-l+1, j1t-ka1+l ), inca,ab( ka1-l, &
                              j1t-ka1+l ), inca,work( n+m-kb+j1t ), work( m-kb+j1t ),ka1 )
                 end do
                 if( wantx ) then
                    ! post-multiply x by product of rotations in 2nd set
                    do j = j1, j2, ka1
                       call stdlib${ii}$_${ri}$rot( nx, x( 1_${ik}$, j ), 1_${ik}$, x( 1_${ik}$, j-1 ), 1_${ik}$,work( n+m-kb+j ), work( &
                                 m-kb+j ) )
                    end do
                 end if
              end do loop_920
              do k = 1, kb - 1
                 j2 = i + k + 1_${ik}$ - max( 1_${ik}$, k+i0-m+1 )*ka1
                 ! finish applying rotations in 1st set from the left
                 do l = kb - k, 1, -1
                    nrt = ( j2+l-1 ) / ka1
                    j1t = j2 - ( nrt-1 )*ka1
                    if( nrt>0_${ik}$ )call stdlib${ii}$_${ri}$lartv( nrt, ab( ka1-l+1, j1t-ka1+l ), inca,ab( ka1-l, &
                              j1t-ka1+l ), inca,work( n+j1t ), work( j1t ), ka1 )
                 end do
              end do
              if( kb>1_${ik}$ ) then
                 do j = 2, min( i+kb, m ) - 2*ka - 1
                    work( n+j ) = work( n+j+ka )
                    work( j ) = work( j+ka )
                 end do
              end if
           end if
           go to 490
     end subroutine stdlib${ii}$_${ri}$sbgst

#:endif
#:endfor



     pure module subroutine stdlib${ii}$_chegst( itype, uplo, n, a, lda, b, ldb, info )
     !! CHEGST reduces a complex Hermitian-definite generalized
     !! eigenproblem to standard form.
     !! If ITYPE = 1, the problem is A*x = lambda*B*x,
     !! and A is overwritten by inv(U**H)*A*inv(U) or inv(L)*A*inv(L**H)
     !! If ITYPE = 2 or 3, the problem is A*B*x = lambda*x or
     !! B*A*x = lambda*x, and A is overwritten by U*A*U**H or L**H*A*L.
     !! B must have been previously factorized as U**H*U or L*L**H by CPOTRF.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: uplo
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: itype, lda, ldb, n
           ! Array Arguments 
           complex(sp), intent(inout) :: a(lda,*), b(ldb,*)
        ! =====================================================================
           
           
           ! Local Scalars 
           logical(lk) :: upper
           integer(${ik}$) :: k, kb, nb
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input parameters.
           info = 0_${ik}$
           upper = stdlib_lsame( uplo, 'U' )
           if( itype<1_${ik}$ .or. itype>3_${ik}$ ) then
              info = -1_${ik}$
           else if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then
              info = -2_${ik}$
           else if( n<0_${ik}$ ) then
              info = -3_${ik}$
           else if( lda<max( 1_${ik}$, n ) ) then
              info = -5_${ik}$
           else if( ldb<max( 1_${ik}$, n ) ) then
              info = -7_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'CHEGST', -info )
              return
           end if
           ! quick return if possible
           if( n==0 )return
           ! determine the block size for this environment.
           nb = stdlib${ii}$_ilaenv( 1_${ik}$, 'CHEGST', uplo, n, -1_${ik}$, -1_${ik}$, -1_${ik}$ )
           if( nb<=1_${ik}$ .or. nb>=n ) then
              ! use unblocked code
              call stdlib${ii}$_chegs2( itype, uplo, n, a, lda, b, ldb, info )
           else
              ! use blocked code
              if( itype==1_${ik}$ ) then
                 if( upper ) then
                    ! compute inv(u**h)*a*inv(u)
                    do k = 1, n, nb
                       kb = min( n-k+1, nb )
                       ! update the upper triangle of a(k:n,k:n)
                       call stdlib${ii}$_chegs2( itype, uplo, kb, a( k, k ), lda,b( k, k ), ldb, info )
                                 
                       if( k+kb<=n ) then
                          call stdlib${ii}$_ctrsm( 'LEFT', uplo, 'CONJUGATE TRANSPOSE','NON-UNIT', kb, &
                                    n-k-kb+1, cone,b( k, k ), ldb, a( k, k+kb ), lda )
                          call stdlib${ii}$_chemm( 'LEFT', uplo, kb, n-k-kb+1, -chalf,a( k, k ), lda, b(&
                                     k, k+kb ), ldb,cone, a( k, k+kb ), lda )
                          call stdlib${ii}$_cher2k( uplo, 'CONJUGATE TRANSPOSE', n-k-kb+1,kb, -cone, a( &
                                    k, k+kb ), lda,b( k, k+kb ), ldb, one,a( k+kb, k+kb ), lda )
                          call stdlib${ii}$_chemm( 'LEFT', uplo, kb, n-k-kb+1, -chalf,a( k, k ), lda, b(&
                                     k, k+kb ), ldb,cone, a( k, k+kb ), lda )
                          call stdlib${ii}$_ctrsm( 'RIGHT', uplo, 'NO TRANSPOSE','NON-UNIT', kb, n-k-kb+&
                                    1_${ik}$, cone,b( k+kb, k+kb ), ldb, a( k, k+kb ),lda )
                       end if
                    end do
                 else
                    ! compute inv(l)*a*inv(l**h)
                    do k = 1, n, nb
                       kb = min( n-k+1, nb )
                       ! update the lower triangle of a(k:n,k:n)
                       call stdlib${ii}$_chegs2( itype, uplo, kb, a( k, k ), lda,b( k, k ), ldb, info )
                                 
                       if( k+kb<=n ) then
                          call stdlib${ii}$_ctrsm( 'RIGHT', uplo, 'CONJUGATE TRANSPOSE','NON-UNIT', n-k-&
                                    kb+1, kb, cone,b( k, k ), ldb, a( k+kb, k ), lda )
                          call stdlib${ii}$_chemm( 'RIGHT', uplo, n-k-kb+1, kb, -chalf,a( k, k ), lda, &
                                    b( k+kb, k ), ldb,cone, a( k+kb, k ), lda )
                          call stdlib${ii}$_cher2k( uplo, 'NO TRANSPOSE', n-k-kb+1, kb,-cone, a( k+kb, &
                                    k ), lda,b( k+kb, k ), ldb, one,a( k+kb, k+kb ), lda )
                          call stdlib${ii}$_chemm( 'RIGHT', uplo, n-k-kb+1, kb, -chalf,a( k, k ), lda, &
                                    b( k+kb, k ), ldb,cone, a( k+kb, k ), lda )
                          call stdlib${ii}$_ctrsm( 'LEFT', uplo, 'NO TRANSPOSE','NON-UNIT', n-k-kb+1, &
                                    kb, cone,b( k+kb, k+kb ), ldb, a( k+kb, k ),lda )
                       end if
                    end do
                 end if
              else
                 if( upper ) then
                    ! compute u*a*u**h
                    do k = 1, n, nb
                       kb = min( n-k+1, nb )
                       ! update the upper triangle of a(1:k+kb-1,1:k+kb-1)
                       call stdlib${ii}$_ctrmm( 'LEFT', uplo, 'NO TRANSPOSE', 'NON-UNIT',k-1, kb, cone, &
                                 b, ldb, a( 1_${ik}$, k ), lda )
                       call stdlib${ii}$_chemm( 'RIGHT', uplo, k-1, kb, chalf, a( k, k ),lda, b( 1_${ik}$, k ),&
                                  ldb, cone, a( 1_${ik}$, k ),lda )
                       call stdlib${ii}$_cher2k( uplo, 'NO TRANSPOSE', k-1, kb, cone,a( 1_${ik}$, k ), lda, b( &
                                 1_${ik}$, k ), ldb, one, a,lda )
                       call stdlib${ii}$_chemm( 'RIGHT', uplo, k-1, kb, chalf, a( k, k ),lda, b( 1_${ik}$, k ),&
                                  ldb, cone, a( 1_${ik}$, k ),lda )
                       call stdlib${ii}$_ctrmm( 'RIGHT', uplo, 'CONJUGATE TRANSPOSE','NON-UNIT', k-1, &
                                 kb, cone, b( k, k ), ldb,a( 1_${ik}$, k ), lda )
                       call stdlib${ii}$_chegs2( itype, uplo, kb, a( k, k ), lda,b( k, k ), ldb, info )
                                 
                    end do
                 else
                    ! compute l**h*a*l
                    do k = 1, n, nb
                       kb = min( n-k+1, nb )
                       ! update the lower triangle of a(1:k+kb-1,1:k+kb-1)
                       call stdlib${ii}$_ctrmm( 'RIGHT', uplo, 'NO TRANSPOSE', 'NON-UNIT',kb, k-1, cone,&
                                  b, ldb, a( k, 1_${ik}$ ), lda )
                       call stdlib${ii}$_chemm( 'LEFT', uplo, kb, k-1, chalf, a( k, k ),lda, b( k, 1_${ik}$ ), &
                                 ldb, cone, a( k, 1_${ik}$ ),lda )
                       call stdlib${ii}$_cher2k( uplo, 'CONJUGATE TRANSPOSE', k-1, kb,cone, a( k, 1_${ik}$ ), &
                                 lda, b( k, 1_${ik}$ ), ldb,one, a, lda )
                       call stdlib${ii}$_chemm( 'LEFT', uplo, kb, k-1, chalf, a( k, k ),lda, b( k, 1_${ik}$ ), &
                                 ldb, cone, a( k, 1_${ik}$ ),lda )
                       call stdlib${ii}$_ctrmm( 'LEFT', uplo, 'CONJUGATE TRANSPOSE','NON-UNIT', kb, k-1,&
                                  cone, b( k, k ), ldb,a( k, 1_${ik}$ ), lda )
                       call stdlib${ii}$_chegs2( itype, uplo, kb, a( k, k ), lda,b( k, k ), ldb, info )
                                 
                    end do
                 end if
              end if
           end if
           return
     end subroutine stdlib${ii}$_chegst

     pure module subroutine stdlib${ii}$_zhegst( itype, uplo, n, a, lda, b, ldb, info )
     !! ZHEGST reduces a complex Hermitian-definite generalized
     !! eigenproblem to standard form.
     !! If ITYPE = 1, the problem is A*x = lambda*B*x,
     !! and A is overwritten by inv(U**H)*A*inv(U) or inv(L)*A*inv(L**H)
     !! If ITYPE = 2 or 3, the problem is A*B*x = lambda*x or
     !! B*A*x = lambda*x, and A is overwritten by U*A*U**H or L**H*A*L.
     !! B must have been previously factorized as U**H*U or L*L**H by ZPOTRF.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: uplo
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: itype, lda, ldb, n
           ! Array Arguments 
           complex(dp), intent(inout) :: a(lda,*), b(ldb,*)
        ! =====================================================================
           
           
           ! Local Scalars 
           logical(lk) :: upper
           integer(${ik}$) :: k, kb, nb
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input parameters.
           info = 0_${ik}$
           upper = stdlib_lsame( uplo, 'U' )
           if( itype<1_${ik}$ .or. itype>3_${ik}$ ) then
              info = -1_${ik}$
           else if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then
              info = -2_${ik}$
           else if( n<0_${ik}$ ) then
              info = -3_${ik}$
           else if( lda<max( 1_${ik}$, n ) ) then
              info = -5_${ik}$
           else if( ldb<max( 1_${ik}$, n ) ) then
              info = -7_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'ZHEGST', -info )
              return
           end if
           ! quick return if possible
           if( n==0 )return
           ! determine the block size for this environment.
           nb = stdlib${ii}$_ilaenv( 1_${ik}$, 'ZHEGST', uplo, n, -1_${ik}$, -1_${ik}$, -1_${ik}$ )
           if( nb<=1_${ik}$ .or. nb>=n ) then
              ! use unblocked code
              call stdlib${ii}$_zhegs2( itype, uplo, n, a, lda, b, ldb, info )
           else
              ! use blocked code
              if( itype==1_${ik}$ ) then
                 if( upper ) then
                    ! compute inv(u**h)*a*inv(u)
                    do k = 1, n, nb
                       kb = min( n-k+1, nb )
                       ! update the upper triangle of a(k:n,k:n)
                       call stdlib${ii}$_zhegs2( itype, uplo, kb, a( k, k ), lda,b( k, k ), ldb, info )
                                 
                       if( k+kb<=n ) then
                          call stdlib${ii}$_ztrsm( 'LEFT', uplo, 'CONJUGATE TRANSPOSE','NON-UNIT', kb, &
                                    n-k-kb+1, cone,b( k, k ), ldb, a( k, k+kb ), lda )
                          call stdlib${ii}$_zhemm( 'LEFT', uplo, kb, n-k-kb+1, -chalf,a( k, k ), lda, b(&
                                     k, k+kb ), ldb,cone, a( k, k+kb ), lda )
                          call stdlib${ii}$_zher2k( uplo, 'CONJUGATE TRANSPOSE', n-k-kb+1,kb, -cone, a( &
                                    k, k+kb ), lda,b( k, k+kb ), ldb, one,a( k+kb, k+kb ), lda )
                          call stdlib${ii}$_zhemm( 'LEFT', uplo, kb, n-k-kb+1, -chalf,a( k, k ), lda, b(&
                                     k, k+kb ), ldb,cone, a( k, k+kb ), lda )
                          call stdlib${ii}$_ztrsm( 'RIGHT', uplo, 'NO TRANSPOSE','NON-UNIT', kb, n-k-kb+&
                                    1_${ik}$, cone,b( k+kb, k+kb ), ldb, a( k, k+kb ),lda )
                       end if
                    end do
                 else
                    ! compute inv(l)*a*inv(l**h)
                    do k = 1, n, nb
                       kb = min( n-k+1, nb )
                       ! update the lower triangle of a(k:n,k:n)
                       call stdlib${ii}$_zhegs2( itype, uplo, kb, a( k, k ), lda,b( k, k ), ldb, info )
                                 
                       if( k+kb<=n ) then
                          call stdlib${ii}$_ztrsm( 'RIGHT', uplo, 'CONJUGATE TRANSPOSE','NON-UNIT', n-k-&
                                    kb+1, kb, cone,b( k, k ), ldb, a( k+kb, k ), lda )
                          call stdlib${ii}$_zhemm( 'RIGHT', uplo, n-k-kb+1, kb, -chalf,a( k, k ), lda, &
                                    b( k+kb, k ), ldb,cone, a( k+kb, k ), lda )
                          call stdlib${ii}$_zher2k( uplo, 'NO TRANSPOSE', n-k-kb+1, kb,-cone, a( k+kb, &
                                    k ), lda,b( k+kb, k ), ldb, one,a( k+kb, k+kb ), lda )
                          call stdlib${ii}$_zhemm( 'RIGHT', uplo, n-k-kb+1, kb, -chalf,a( k, k ), lda, &
                                    b( k+kb, k ), ldb,cone, a( k+kb, k ), lda )
                          call stdlib${ii}$_ztrsm( 'LEFT', uplo, 'NO TRANSPOSE','NON-UNIT', n-k-kb+1, &
                                    kb, cone,b( k+kb, k+kb ), ldb, a( k+kb, k ),lda )
                       end if
                    end do
                 end if
              else
                 if( upper ) then
                    ! compute u*a*u**h
                    do k = 1, n, nb
                       kb = min( n-k+1, nb )
                       ! update the upper triangle of a(1:k+kb-1,1:k+kb-1)
                       call stdlib${ii}$_ztrmm( 'LEFT', uplo, 'NO TRANSPOSE', 'NON-UNIT',k-1, kb, cone, &
                                 b, ldb, a( 1_${ik}$, k ), lda )
                       call stdlib${ii}$_zhemm( 'RIGHT', uplo, k-1, kb, chalf, a( k, k ),lda, b( 1_${ik}$, k ),&
                                  ldb, cone, a( 1_${ik}$, k ),lda )
                       call stdlib${ii}$_zher2k( uplo, 'NO TRANSPOSE', k-1, kb, cone,a( 1_${ik}$, k ), lda, b( &
                                 1_${ik}$, k ), ldb, one, a,lda )
                       call stdlib${ii}$_zhemm( 'RIGHT', uplo, k-1, kb, chalf, a( k, k ),lda, b( 1_${ik}$, k ),&
                                  ldb, cone, a( 1_${ik}$, k ),lda )
                       call stdlib${ii}$_ztrmm( 'RIGHT', uplo, 'CONJUGATE TRANSPOSE','NON-UNIT', k-1, &
                                 kb, cone, b( k, k ), ldb,a( 1_${ik}$, k ), lda )
                       call stdlib${ii}$_zhegs2( itype, uplo, kb, a( k, k ), lda,b( k, k ), ldb, info )
                                 
                    end do
                 else
                    ! compute l**h*a*l
                    do k = 1, n, nb
                       kb = min( n-k+1, nb )
                       ! update the lower triangle of a(1:k+kb-1,1:k+kb-1)
                       call stdlib${ii}$_ztrmm( 'RIGHT', uplo, 'NO TRANSPOSE', 'NON-UNIT',kb, k-1, cone,&
                                  b, ldb, a( k, 1_${ik}$ ), lda )
                       call stdlib${ii}$_zhemm( 'LEFT', uplo, kb, k-1, chalf, a( k, k ),lda, b( k, 1_${ik}$ ), &
                                 ldb, cone, a( k, 1_${ik}$ ),lda )
                       call stdlib${ii}$_zher2k( uplo, 'CONJUGATE TRANSPOSE', k-1, kb,cone, a( k, 1_${ik}$ ), &
                                 lda, b( k, 1_${ik}$ ), ldb,one, a, lda )
                       call stdlib${ii}$_zhemm( 'LEFT', uplo, kb, k-1, chalf, a( k, k ),lda, b( k, 1_${ik}$ ), &
                                 ldb, cone, a( k, 1_${ik}$ ),lda )
                       call stdlib${ii}$_ztrmm( 'LEFT', uplo, 'CONJUGATE TRANSPOSE','NON-UNIT', kb, k-1,&
                                  cone, b( k, k ), ldb,a( k, 1_${ik}$ ), lda )
                       call stdlib${ii}$_zhegs2( itype, uplo, kb, a( k, k ), lda,b( k, k ), ldb, info )
                                 
                    end do
                 end if
              end if
           end if
           return
     end subroutine stdlib${ii}$_zhegst

#:for ck,ct,ci in CMPLX_KINDS_TYPES
#:if not ck in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ci}$hegst( itype, uplo, n, a, lda, b, ldb, info )
     !! ZHEGST: reduces a complex Hermitian-definite generalized
     !! eigenproblem to standard form.
     !! If ITYPE = 1, the problem is A*x = lambda*B*x,
     !! and A is overwritten by inv(U**H)*A*inv(U) or inv(L)*A*inv(L**H)
     !! If ITYPE = 2 or 3, the problem is A*B*x = lambda*x or
     !! B*A*x = lambda*x, and A is overwritten by U*A*U**H or L**H*A*L.
     !! B must have been previously factorized as U**H*U or L*L**H by ZPOTRF.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${ck}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: uplo
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: itype, lda, ldb, n
           ! Array Arguments 
           complex(${ck}$), intent(inout) :: a(lda,*), b(ldb,*)
        ! =====================================================================
           
           
           ! Local Scalars 
           logical(lk) :: upper
           integer(${ik}$) :: k, kb, nb
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input parameters.
           info = 0_${ik}$
           upper = stdlib_lsame( uplo, 'U' )
           if( itype<1_${ik}$ .or. itype>3_${ik}$ ) then
              info = -1_${ik}$
           else if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then
              info = -2_${ik}$
           else if( n<0_${ik}$ ) then
              info = -3_${ik}$
           else if( lda<max( 1_${ik}$, n ) ) then
              info = -5_${ik}$
           else if( ldb<max( 1_${ik}$, n ) ) then
              info = -7_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'ZHEGST', -info )
              return
           end if
           ! quick return if possible
           if( n==0 )return
           ! determine the block size for this environment.
           nb = stdlib${ii}$_ilaenv( 1_${ik}$, 'ZHEGST', uplo, n, -1_${ik}$, -1_${ik}$, -1_${ik}$ )
           if( nb<=1_${ik}$ .or. nb>=n ) then
              ! use unblocked code
              call stdlib${ii}$_${ci}$hegs2( itype, uplo, n, a, lda, b, ldb, info )
           else
              ! use blocked code
              if( itype==1_${ik}$ ) then
                 if( upper ) then
                    ! compute inv(u**h)*a*inv(u)
                    do k = 1, n, nb
                       kb = min( n-k+1, nb )
                       ! update the upper triangle of a(k:n,k:n)
                       call stdlib${ii}$_${ci}$hegs2( itype, uplo, kb, a( k, k ), lda,b( k, k ), ldb, info )
                                 
                       if( k+kb<=n ) then
                          call stdlib${ii}$_${ci}$trsm( 'LEFT', uplo, 'CONJUGATE TRANSPOSE','NON-UNIT', kb, &
                                    n-k-kb+1, cone,b( k, k ), ldb, a( k, k+kb ), lda )
                          call stdlib${ii}$_${ci}$hemm( 'LEFT', uplo, kb, n-k-kb+1, -chalf,a( k, k ), lda, b(&
                                     k, k+kb ), ldb,cone, a( k, k+kb ), lda )
                          call stdlib${ii}$_${ci}$her2k( uplo, 'CONJUGATE TRANSPOSE', n-k-kb+1,kb, -cone, a( &
                                    k, k+kb ), lda,b( k, k+kb ), ldb, one,a( k+kb, k+kb ), lda )
                          call stdlib${ii}$_${ci}$hemm( 'LEFT', uplo, kb, n-k-kb+1, -chalf,a( k, k ), lda, b(&
                                     k, k+kb ), ldb,cone, a( k, k+kb ), lda )
                          call stdlib${ii}$_${ci}$trsm( 'RIGHT', uplo, 'NO TRANSPOSE','NON-UNIT', kb, n-k-kb+&
                                    1_${ik}$, cone,b( k+kb, k+kb ), ldb, a( k, k+kb ),lda )
                       end if
                    end do
                 else
                    ! compute inv(l)*a*inv(l**h)
                    do k = 1, n, nb
                       kb = min( n-k+1, nb )
                       ! update the lower triangle of a(k:n,k:n)
                       call stdlib${ii}$_${ci}$hegs2( itype, uplo, kb, a( k, k ), lda,b( k, k ), ldb, info )
                                 
                       if( k+kb<=n ) then
                          call stdlib${ii}$_${ci}$trsm( 'RIGHT', uplo, 'CONJUGATE TRANSPOSE','NON-UNIT', n-k-&
                                    kb+1, kb, cone,b( k, k ), ldb, a( k+kb, k ), lda )
                          call stdlib${ii}$_${ci}$hemm( 'RIGHT', uplo, n-k-kb+1, kb, -chalf,a( k, k ), lda, &
                                    b( k+kb, k ), ldb,cone, a( k+kb, k ), lda )
                          call stdlib${ii}$_${ci}$her2k( uplo, 'NO TRANSPOSE', n-k-kb+1, kb,-cone, a( k+kb, &
                                    k ), lda,b( k+kb, k ), ldb, one,a( k+kb, k+kb ), lda )
                          call stdlib${ii}$_${ci}$hemm( 'RIGHT', uplo, n-k-kb+1, kb, -chalf,a( k, k ), lda, &
                                    b( k+kb, k ), ldb,cone, a( k+kb, k ), lda )
                          call stdlib${ii}$_${ci}$trsm( 'LEFT', uplo, 'NO TRANSPOSE','NON-UNIT', n-k-kb+1, &
                                    kb, cone,b( k+kb, k+kb ), ldb, a( k+kb, k ),lda )
                       end if
                    end do
                 end if
              else
                 if( upper ) then
                    ! compute u*a*u**h
                    do k = 1, n, nb
                       kb = min( n-k+1, nb )
                       ! update the upper triangle of a(1:k+kb-1,1:k+kb-1)
                       call stdlib${ii}$_${ci}$trmm( 'LEFT', uplo, 'NO TRANSPOSE', 'NON-UNIT',k-1, kb, cone, &
                                 b, ldb, a( 1_${ik}$, k ), lda )
                       call stdlib${ii}$_${ci}$hemm( 'RIGHT', uplo, k-1, kb, chalf, a( k, k ),lda, b( 1_${ik}$, k ),&
                                  ldb, cone, a( 1_${ik}$, k ),lda )
                       call stdlib${ii}$_${ci}$her2k( uplo, 'NO TRANSPOSE', k-1, kb, cone,a( 1_${ik}$, k ), lda, b( &
                                 1_${ik}$, k ), ldb, one, a,lda )
                       call stdlib${ii}$_${ci}$hemm( 'RIGHT', uplo, k-1, kb, chalf, a( k, k ),lda, b( 1_${ik}$, k ),&
                                  ldb, cone, a( 1_${ik}$, k ),lda )
                       call stdlib${ii}$_${ci}$trmm( 'RIGHT', uplo, 'CONJUGATE TRANSPOSE','NON-UNIT', k-1, &
                                 kb, cone, b( k, k ), ldb,a( 1_${ik}$, k ), lda )
                       call stdlib${ii}$_${ci}$hegs2( itype, uplo, kb, a( k, k ), lda,b( k, k ), ldb, info )
                                 
                    end do
                 else
                    ! compute l**h*a*l
                    do k = 1, n, nb
                       kb = min( n-k+1, nb )
                       ! update the lower triangle of a(1:k+kb-1,1:k+kb-1)
                       call stdlib${ii}$_${ci}$trmm( 'RIGHT', uplo, 'NO TRANSPOSE', 'NON-UNIT',kb, k-1, cone,&
                                  b, ldb, a( k, 1_${ik}$ ), lda )
                       call stdlib${ii}$_${ci}$hemm( 'LEFT', uplo, kb, k-1, chalf, a( k, k ),lda, b( k, 1_${ik}$ ), &
                                 ldb, cone, a( k, 1_${ik}$ ),lda )
                       call stdlib${ii}$_${ci}$her2k( uplo, 'CONJUGATE TRANSPOSE', k-1, kb,cone, a( k, 1_${ik}$ ), &
                                 lda, b( k, 1_${ik}$ ), ldb,one, a, lda )
                       call stdlib${ii}$_${ci}$hemm( 'LEFT', uplo, kb, k-1, chalf, a( k, k ),lda, b( k, 1_${ik}$ ), &
                                 ldb, cone, a( k, 1_${ik}$ ),lda )
                       call stdlib${ii}$_${ci}$trmm( 'LEFT', uplo, 'CONJUGATE TRANSPOSE','NON-UNIT', kb, k-1,&
                                  cone, b( k, k ), ldb,a( k, 1_${ik}$ ), lda )
                       call stdlib${ii}$_${ci}$hegs2( itype, uplo, kb, a( k, k ), lda,b( k, k ), ldb, info )
                                 
                    end do
                 end if
              end if
           end if
           return
     end subroutine stdlib${ii}$_${ci}$hegst

#:endif
#:endfor



     pure module subroutine stdlib${ii}$_chegs2( itype, uplo, n, a, lda, b, ldb, info )
     !! CHEGS2 reduces a complex Hermitian-definite generalized
     !! eigenproblem to standard form.
     !! If ITYPE = 1, the problem is A*x = lambda*B*x,
     !! and A is overwritten by inv(U**H)*A*inv(U) or inv(L)*A*inv(L**H)
     !! If ITYPE = 2 or 3, the problem is A*B*x = lambda*x or
     !! B*A*x = lambda*x, and A is overwritten by U*A*U**H or L**H *A*L.
     !! B must have been previously factorized as U**H *U or L*L**H by ZPOTRF.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: uplo
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: itype, lda, ldb, n
           ! Array Arguments 
           complex(sp), intent(inout) :: a(lda,*), b(ldb,*)
        ! =====================================================================
           
           
           ! Local Scalars 
           logical(lk) :: upper
           integer(${ik}$) :: k
           real(sp) :: akk, bkk
           complex(sp) :: ct
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input parameters.
           info = 0_${ik}$
           upper = stdlib_lsame( uplo, 'U' )
           if( itype<1_${ik}$ .or. itype>3_${ik}$ ) then
              info = -1_${ik}$
           else if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then
              info = -2_${ik}$
           else if( n<0_${ik}$ ) then
              info = -3_${ik}$
           else if( lda<max( 1_${ik}$, n ) ) then
              info = -5_${ik}$
           else if( ldb<max( 1_${ik}$, n ) ) then
              info = -7_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'CHEGS2', -info )
              return
           end if
           if( itype==1_${ik}$ ) then
              if( upper ) then
                 ! compute inv(u**h)*a*inv(u)
                 do k = 1, n
                    ! update the upper triangle of a(k:n,k:n)
                    akk = real( a( k, k ),KIND=sp)
                    bkk = real( b( k, k ),KIND=sp)
                    akk = akk / bkk**2_${ik}$
                    a( k, k ) = akk
                    if( k<n ) then
                       call stdlib${ii}$_csscal( n-k, one / bkk, a( k, k+1 ), lda )
                       ct = -half*akk
                       call stdlib${ii}$_clacgv( n-k, a( k, k+1 ), lda )
                       call stdlib${ii}$_clacgv( n-k, b( k, k+1 ), ldb )
                       call stdlib${ii}$_caxpy( n-k, ct, b( k, k+1 ), ldb, a( k, k+1 ),lda )
                       call stdlib${ii}$_cher2( uplo, n-k, -cone, a( k, k+1 ), lda,b( k, k+1 ), ldb, a( &
                                 k+1, k+1 ), lda )
                       call stdlib${ii}$_caxpy( n-k, ct, b( k, k+1 ), ldb, a( k, k+1 ),lda )
                       call stdlib${ii}$_clacgv( n-k, b( k, k+1 ), ldb )
                       call stdlib${ii}$_ctrsv( uplo, 'CONJUGATE TRANSPOSE', 'NON-UNIT',n-k, b( k+1, k+&
                                 1_${ik}$ ), ldb, a( k, k+1 ),lda )
                       call stdlib${ii}$_clacgv( n-k, a( k, k+1 ), lda )
                    end if
                 end do
              else
                 ! compute inv(l)*a*inv(l**h)
                 do k = 1, n
                    ! update the lower triangle of a(k:n,k:n)
                    akk = real( a( k, k ),KIND=sp)
                    bkk = real( b( k, k ),KIND=sp)
                    akk = akk / bkk**2_${ik}$
                    a( k, k ) = akk
                    if( k<n ) then
                       call stdlib${ii}$_csscal( n-k, one / bkk, a( k+1, k ), 1_${ik}$ )
                       ct = -half*akk
                       call stdlib${ii}$_caxpy( n-k, ct, b( k+1, k ), 1_${ik}$, a( k+1, k ), 1_${ik}$ )
                       call stdlib${ii}$_cher2( uplo, n-k, -cone, a( k+1, k ), 1_${ik}$,b( k+1, k ), 1_${ik}$, a( k+1,&
                                  k+1 ), lda )
                       call stdlib${ii}$_caxpy( n-k, ct, b( k+1, k ), 1_${ik}$, a( k+1, k ), 1_${ik}$ )
                       call stdlib${ii}$_ctrsv( uplo, 'NO TRANSPOSE', 'NON-UNIT', n-k,b( k+1, k+1 ), &
                                 ldb, a( k+1, k ), 1_${ik}$ )
                    end if
                 end do
              end if
           else
              if( upper ) then
                 ! compute u*a*u**h
                 do k = 1, n
                    ! update the upper triangle of a(1:k,1:k)
                    akk = real( a( k, k ),KIND=sp)
                    bkk = real( b( k, k ),KIND=sp)
                    call stdlib${ii}$_ctrmv( uplo, 'NO TRANSPOSE', 'NON-UNIT', k-1, b,ldb, a( 1_${ik}$, k ), 1_${ik}$ &
                              )
                    ct = half*akk
                    call stdlib${ii}$_caxpy( k-1, ct, b( 1_${ik}$, k ), 1_${ik}$, a( 1_${ik}$, k ), 1_${ik}$ )
                    call stdlib${ii}$_cher2( uplo, k-1, cone, a( 1_${ik}$, k ), 1_${ik}$, b( 1_${ik}$, k ), 1_${ik}$,a, lda )
                              
                    call stdlib${ii}$_caxpy( k-1, ct, b( 1_${ik}$, k ), 1_${ik}$, a( 1_${ik}$, k ), 1_${ik}$ )
                    call stdlib${ii}$_csscal( k-1, bkk, a( 1_${ik}$, k ), 1_${ik}$ )
                    a( k, k ) = akk*bkk**2_${ik}$
                 end do
              else
                 ! compute l**h *a*l
                 do k = 1, n
                    ! update the lower triangle of a(1:k,1:k)
                    akk = real( a( k, k ),KIND=sp)
                    bkk = real( b( k, k ),KIND=sp)
                    call stdlib${ii}$_clacgv( k-1, a( k, 1_${ik}$ ), lda )
                    call stdlib${ii}$_ctrmv( uplo, 'CONJUGATE TRANSPOSE', 'NON-UNIT', k-1,b, ldb, a( k, &
                              1_${ik}$ ), lda )
                    ct = half*akk
                    call stdlib${ii}$_clacgv( k-1, b( k, 1_${ik}$ ), ldb )
                    call stdlib${ii}$_caxpy( k-1, ct, b( k, 1_${ik}$ ), ldb, a( k, 1_${ik}$ ), lda )
                    call stdlib${ii}$_cher2( uplo, k-1, cone, a( k, 1_${ik}$ ), lda, b( k, 1_${ik}$ ),ldb, a, lda )
                              
                    call stdlib${ii}$_caxpy( k-1, ct, b( k, 1_${ik}$ ), ldb, a( k, 1_${ik}$ ), lda )
                    call stdlib${ii}$_clacgv( k-1, b( k, 1_${ik}$ ), ldb )
                    call stdlib${ii}$_csscal( k-1, bkk, a( k, 1_${ik}$ ), lda )
                    call stdlib${ii}$_clacgv( k-1, a( k, 1_${ik}$ ), lda )
                    a( k, k ) = akk*bkk**2_${ik}$
                 end do
              end if
           end if
           return
     end subroutine stdlib${ii}$_chegs2

     pure module subroutine stdlib${ii}$_zhegs2( itype, uplo, n, a, lda, b, ldb, info )
     !! ZHEGS2 reduces a complex Hermitian-definite generalized
     !! eigenproblem to standard form.
     !! If ITYPE = 1, the problem is A*x = lambda*B*x,
     !! and A is overwritten by inv(U**H)*A*inv(U) or inv(L)*A*inv(L**H)
     !! If ITYPE = 2 or 3, the problem is A*B*x = lambda*x or
     !! B*A*x = lambda*x, and A is overwritten by U*A*U**H or L**H *A*L.
     !! B must have been previously factorized as U**H *U or L*L**H by ZPOTRF.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: uplo
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: itype, lda, ldb, n
           ! Array Arguments 
           complex(dp), intent(inout) :: a(lda,*), b(ldb,*)
        ! =====================================================================
           
           
           ! Local Scalars 
           logical(lk) :: upper
           integer(${ik}$) :: k
           real(dp) :: akk, bkk
           complex(dp) :: ct
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input parameters.
           info = 0_${ik}$
           upper = stdlib_lsame( uplo, 'U' )
           if( itype<1_${ik}$ .or. itype>3_${ik}$ ) then
              info = -1_${ik}$
           else if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then
              info = -2_${ik}$
           else if( n<0_${ik}$ ) then
              info = -3_${ik}$
           else if( lda<max( 1_${ik}$, n ) ) then
              info = -5_${ik}$
           else if( ldb<max( 1_${ik}$, n ) ) then
              info = -7_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'ZHEGS2', -info )
              return
           end if
           if( itype==1_${ik}$ ) then
              if( upper ) then
                 ! compute inv(u**h)*a*inv(u)
                 do k = 1, n
                    ! update the upper triangle of a(k:n,k:n)
                    akk = real( a( k, k ),KIND=dp)
                    bkk = real( b( k, k ),KIND=dp)
                    akk = akk / bkk**2_${ik}$
                    a( k, k ) = akk
                    if( k<n ) then
                       call stdlib${ii}$_zdscal( n-k, one / bkk, a( k, k+1 ), lda )
                       ct = -half*akk
                       call stdlib${ii}$_zlacgv( n-k, a( k, k+1 ), lda )
                       call stdlib${ii}$_zlacgv( n-k, b( k, k+1 ), ldb )
                       call stdlib${ii}$_zaxpy( n-k, ct, b( k, k+1 ), ldb, a( k, k+1 ),lda )
                       call stdlib${ii}$_zher2( uplo, n-k, -cone, a( k, k+1 ), lda,b( k, k+1 ), ldb, a( &
                                 k+1, k+1 ), lda )
                       call stdlib${ii}$_zaxpy( n-k, ct, b( k, k+1 ), ldb, a( k, k+1 ),lda )
                       call stdlib${ii}$_zlacgv( n-k, b( k, k+1 ), ldb )
                       call stdlib${ii}$_ztrsv( uplo, 'CONJUGATE TRANSPOSE', 'NON-UNIT',n-k, b( k+1, k+&
                                 1_${ik}$ ), ldb, a( k, k+1 ),lda )
                       call stdlib${ii}$_zlacgv( n-k, a( k, k+1 ), lda )
                    end if
                 end do
              else
                 ! compute inv(l)*a*inv(l**h)
                 do k = 1, n
                    ! update the lower triangle of a(k:n,k:n)
                    akk = real( a( k, k ),KIND=dp)
                    bkk = real( b( k, k ),KIND=dp)
                    akk = akk / bkk**2_${ik}$
                    a( k, k ) = akk
                    if( k<n ) then
                       call stdlib${ii}$_zdscal( n-k, one / bkk, a( k+1, k ), 1_${ik}$ )
                       ct = -half*akk
                       call stdlib${ii}$_zaxpy( n-k, ct, b( k+1, k ), 1_${ik}$, a( k+1, k ), 1_${ik}$ )
                       call stdlib${ii}$_zher2( uplo, n-k, -cone, a( k+1, k ), 1_${ik}$,b( k+1, k ), 1_${ik}$, a( k+1,&
                                  k+1 ), lda )
                       call stdlib${ii}$_zaxpy( n-k, ct, b( k+1, k ), 1_${ik}$, a( k+1, k ), 1_${ik}$ )
                       call stdlib${ii}$_ztrsv( uplo, 'NO TRANSPOSE', 'NON-UNIT', n-k,b( k+1, k+1 ), &
                                 ldb, a( k+1, k ), 1_${ik}$ )
                    end if
                 end do
              end if
           else
              if( upper ) then
                 ! compute u*a*u**h
                 do k = 1, n
                    ! update the upper triangle of a(1:k,1:k)
                    akk = real( a( k, k ),KIND=dp)
                    bkk = real( b( k, k ),KIND=dp)
                    call stdlib${ii}$_ztrmv( uplo, 'NO TRANSPOSE', 'NON-UNIT', k-1, b,ldb, a( 1_${ik}$, k ), 1_${ik}$ &
                              )
                    ct = half*akk
                    call stdlib${ii}$_zaxpy( k-1, ct, b( 1_${ik}$, k ), 1_${ik}$, a( 1_${ik}$, k ), 1_${ik}$ )
                    call stdlib${ii}$_zher2( uplo, k-1, cone, a( 1_${ik}$, k ), 1_${ik}$, b( 1_${ik}$, k ), 1_${ik}$,a, lda )
                              
                    call stdlib${ii}$_zaxpy( k-1, ct, b( 1_${ik}$, k ), 1_${ik}$, a( 1_${ik}$, k ), 1_${ik}$ )
                    call stdlib${ii}$_zdscal( k-1, bkk, a( 1_${ik}$, k ), 1_${ik}$ )
                    a( k, k ) = akk*bkk**2_${ik}$
                 end do
              else
                 ! compute l**h *a*l
                 do k = 1, n
                    ! update the lower triangle of a(1:k,1:k)
                    akk = real( a( k, k ),KIND=dp)
                    bkk = real( b( k, k ),KIND=dp)
                    call stdlib${ii}$_zlacgv( k-1, a( k, 1_${ik}$ ), lda )
                    call stdlib${ii}$_ztrmv( uplo, 'CONJUGATE TRANSPOSE', 'NON-UNIT', k-1,b, ldb, a( k, &
                              1_${ik}$ ), lda )
                    ct = half*akk
                    call stdlib${ii}$_zlacgv( k-1, b( k, 1_${ik}$ ), ldb )
                    call stdlib${ii}$_zaxpy( k-1, ct, b( k, 1_${ik}$ ), ldb, a( k, 1_${ik}$ ), lda )
                    call stdlib${ii}$_zher2( uplo, k-1, cone, a( k, 1_${ik}$ ), lda, b( k, 1_${ik}$ ),ldb, a, lda )
                              
                    call stdlib${ii}$_zaxpy( k-1, ct, b( k, 1_${ik}$ ), ldb, a( k, 1_${ik}$ ), lda )
                    call stdlib${ii}$_zlacgv( k-1, b( k, 1_${ik}$ ), ldb )
                    call stdlib${ii}$_zdscal( k-1, bkk, a( k, 1_${ik}$ ), lda )
                    call stdlib${ii}$_zlacgv( k-1, a( k, 1_${ik}$ ), lda )
                    a( k, k ) = akk*bkk**2_${ik}$
                 end do
              end if
           end if
           return
     end subroutine stdlib${ii}$_zhegs2

#:for ck,ct,ci in CMPLX_KINDS_TYPES
#:if not ck in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ci}$hegs2( itype, uplo, n, a, lda, b, ldb, info )
     !! ZHEGS2: reduces a complex Hermitian-definite generalized
     !! eigenproblem to standard form.
     !! If ITYPE = 1, the problem is A*x = lambda*B*x,
     !! and A is overwritten by inv(U**H)*A*inv(U) or inv(L)*A*inv(L**H)
     !! If ITYPE = 2 or 3, the problem is A*B*x = lambda*x or
     !! B*A*x = lambda*x, and A is overwritten by U*A*U**H or L**H *A*L.
     !! B must have been previously factorized as U**H *U or L*L**H by ZPOTRF.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${ck}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: uplo
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: itype, lda, ldb, n
           ! Array Arguments 
           complex(${ck}$), intent(inout) :: a(lda,*), b(ldb,*)
        ! =====================================================================
           
           
           ! Local Scalars 
           logical(lk) :: upper
           integer(${ik}$) :: k
           real(${ck}$) :: akk, bkk
           complex(${ck}$) :: ct
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input parameters.
           info = 0_${ik}$
           upper = stdlib_lsame( uplo, 'U' )
           if( itype<1_${ik}$ .or. itype>3_${ik}$ ) then
              info = -1_${ik}$
           else if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then
              info = -2_${ik}$
           else if( n<0_${ik}$ ) then
              info = -3_${ik}$
           else if( lda<max( 1_${ik}$, n ) ) then
              info = -5_${ik}$
           else if( ldb<max( 1_${ik}$, n ) ) then
              info = -7_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'ZHEGS2', -info )
              return
           end if
           if( itype==1_${ik}$ ) then
              if( upper ) then
                 ! compute inv(u**h)*a*inv(u)
                 do k = 1, n
                    ! update the upper triangle of a(k:n,k:n)
                    akk = real( a( k, k ),KIND=${ck}$)
                    bkk = real( b( k, k ),KIND=${ck}$)
                    akk = akk / bkk**2_${ik}$
                    a( k, k ) = akk
                    if( k<n ) then
                       call stdlib${ii}$_${ci}$dscal( n-k, one / bkk, a( k, k+1 ), lda )
                       ct = -half*akk
                       call stdlib${ii}$_${ci}$lacgv( n-k, a( k, k+1 ), lda )
                       call stdlib${ii}$_${ci}$lacgv( n-k, b( k, k+1 ), ldb )
                       call stdlib${ii}$_${ci}$axpy( n-k, ct, b( k, k+1 ), ldb, a( k, k+1 ),lda )
                       call stdlib${ii}$_${ci}$her2( uplo, n-k, -cone, a( k, k+1 ), lda,b( k, k+1 ), ldb, a( &
                                 k+1, k+1 ), lda )
                       call stdlib${ii}$_${ci}$axpy( n-k, ct, b( k, k+1 ), ldb, a( k, k+1 ),lda )
                       call stdlib${ii}$_${ci}$lacgv( n-k, b( k, k+1 ), ldb )
                       call stdlib${ii}$_${ci}$trsv( uplo, 'CONJUGATE TRANSPOSE', 'NON-UNIT',n-k, b( k+1, k+&
                                 1_${ik}$ ), ldb, a( k, k+1 ),lda )
                       call stdlib${ii}$_${ci}$lacgv( n-k, a( k, k+1 ), lda )
                    end if
                 end do
              else
                 ! compute inv(l)*a*inv(l**h)
                 do k = 1, n
                    ! update the lower triangle of a(k:n,k:n)
                    akk = real( a( k, k ),KIND=${ck}$)
                    bkk = real( b( k, k ),KIND=${ck}$)
                    akk = akk / bkk**2_${ik}$
                    a( k, k ) = akk
                    if( k<n ) then
                       call stdlib${ii}$_${ci}$dscal( n-k, one / bkk, a( k+1, k ), 1_${ik}$ )
                       ct = -half*akk
                       call stdlib${ii}$_${ci}$axpy( n-k, ct, b( k+1, k ), 1_${ik}$, a( k+1, k ), 1_${ik}$ )
                       call stdlib${ii}$_${ci}$her2( uplo, n-k, -cone, a( k+1, k ), 1_${ik}$,b( k+1, k ), 1_${ik}$, a( k+1,&
                                  k+1 ), lda )
                       call stdlib${ii}$_${ci}$axpy( n-k, ct, b( k+1, k ), 1_${ik}$, a( k+1, k ), 1_${ik}$ )
                       call stdlib${ii}$_${ci}$trsv( uplo, 'NO TRANSPOSE', 'NON-UNIT', n-k,b( k+1, k+1 ), &
                                 ldb, a( k+1, k ), 1_${ik}$ )
                    end if
                 end do
              end if
           else
              if( upper ) then
                 ! compute u*a*u**h
                 do k = 1, n
                    ! update the upper triangle of a(1:k,1:k)
                    akk = real( a( k, k ),KIND=${ck}$)
                    bkk = real( b( k, k ),KIND=${ck}$)
                    call stdlib${ii}$_${ci}$trmv( uplo, 'NO TRANSPOSE', 'NON-UNIT', k-1, b,ldb, a( 1_${ik}$, k ), 1_${ik}$ &
                              )
                    ct = half*akk
                    call stdlib${ii}$_${ci}$axpy( k-1, ct, b( 1_${ik}$, k ), 1_${ik}$, a( 1_${ik}$, k ), 1_${ik}$ )
                    call stdlib${ii}$_${ci}$her2( uplo, k-1, cone, a( 1_${ik}$, k ), 1_${ik}$, b( 1_${ik}$, k ), 1_${ik}$,a, lda )
                              
                    call stdlib${ii}$_${ci}$axpy( k-1, ct, b( 1_${ik}$, k ), 1_${ik}$, a( 1_${ik}$, k ), 1_${ik}$ )
                    call stdlib${ii}$_${ci}$dscal( k-1, bkk, a( 1_${ik}$, k ), 1_${ik}$ )
                    a( k, k ) = akk*bkk**2_${ik}$
                 end do
              else
                 ! compute l**h *a*l
                 do k = 1, n
                    ! update the lower triangle of a(1:k,1:k)
                    akk = real( a( k, k ),KIND=${ck}$)
                    bkk = real( b( k, k ),KIND=${ck}$)
                    call stdlib${ii}$_${ci}$lacgv( k-1, a( k, 1_${ik}$ ), lda )
                    call stdlib${ii}$_${ci}$trmv( uplo, 'CONJUGATE TRANSPOSE', 'NON-UNIT', k-1,b, ldb, a( k, &
                              1_${ik}$ ), lda )
                    ct = half*akk
                    call stdlib${ii}$_${ci}$lacgv( k-1, b( k, 1_${ik}$ ), ldb )
                    call stdlib${ii}$_${ci}$axpy( k-1, ct, b( k, 1_${ik}$ ), ldb, a( k, 1_${ik}$ ), lda )
                    call stdlib${ii}$_${ci}$her2( uplo, k-1, cone, a( k, 1_${ik}$ ), lda, b( k, 1_${ik}$ ),ldb, a, lda )
                              
                    call stdlib${ii}$_${ci}$axpy( k-1, ct, b( k, 1_${ik}$ ), ldb, a( k, 1_${ik}$ ), lda )
                    call stdlib${ii}$_${ci}$lacgv( k-1, b( k, 1_${ik}$ ), ldb )
                    call stdlib${ii}$_${ci}$dscal( k-1, bkk, a( k, 1_${ik}$ ), lda )
                    call stdlib${ii}$_${ci}$lacgv( k-1, a( k, 1_${ik}$ ), lda )
                    a( k, k ) = akk*bkk**2_${ik}$
                 end do
              end if
           end if
           return
     end subroutine stdlib${ii}$_${ci}$hegs2

#:endif
#:endfor



     pure module subroutine stdlib${ii}$_chpgst( itype, uplo, n, ap, bp, info )
     !! CHPGST reduces a complex Hermitian-definite generalized
     !! eigenproblem to standard form, using packed storage.
     !! If ITYPE = 1, the problem is A*x = lambda*B*x,
     !! and A is overwritten by inv(U**H)*A*inv(U) or inv(L)*A*inv(L**H)
     !! If ITYPE = 2 or 3, the problem is A*B*x = lambda*x or
     !! B*A*x = lambda*x, and A is overwritten by U*A*U**H or L**H*A*L.
     !! B must have been previously factorized as U**H*U or L*L**H by CPPTRF.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: uplo
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: itype, n
           ! Array Arguments 
           complex(sp), intent(inout) :: ap(*)
           complex(sp), intent(in) :: bp(*)
        ! =====================================================================
           
           
           ! Local Scalars 
           logical(lk) :: upper
           integer(${ik}$) :: j, j1, j1j1, jj, k, k1, k1k1, kk
           real(sp) :: ajj, akk, bjj, bkk
           complex(sp) :: ct
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input parameters.
           info = 0_${ik}$
           upper = stdlib_lsame( uplo, 'U' )
           if( itype<1_${ik}$ .or. itype>3_${ik}$ ) then
              info = -1_${ik}$
           else if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then
              info = -2_${ik}$
           else if( n<0_${ik}$ ) then
              info = -3_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'CHPGST', -info )
              return
           end if
           if( itype==1_${ik}$ ) then
              if( upper ) then
                 ! compute inv(u**h)*a*inv(u)
                 ! j1 and jj are the indices of a(1,j) and a(j,j)
                 jj = 0_${ik}$
                 do j = 1, n
                    j1 = jj + 1_${ik}$
                    jj = jj + j
                    ! compute the j-th column of the upper triangle of a
                    ap( jj ) = real( ap( jj ),KIND=sp)
                    bjj = real( bp( jj ),KIND=sp)
                    call stdlib${ii}$_ctpsv( uplo, 'CONJUGATE TRANSPOSE', 'NON-UNIT', j,bp, ap( j1 ), 1_${ik}$ &
                              )
                    call stdlib${ii}$_chpmv( uplo, j-1, -cone, ap, bp( j1 ), 1_${ik}$, cone,ap( j1 ), 1_${ik}$ )
                              
                    call stdlib${ii}$_csscal( j-1, one / bjj, ap( j1 ), 1_${ik}$ )
                    ap( jj ) = ( ap( jj )-stdlib${ii}$_cdotc( j-1, ap( j1 ), 1_${ik}$, bp( j1 ),1_${ik}$ ) ) / &
                              bjj
                 end do
              else
                 ! compute inv(l)*a*inv(l**h)
                 ! kk and k1k1 are the indices of a(k,k) and a(k+1,k+1)
                 kk = 1_${ik}$
                 do k = 1, n
                    k1k1 = kk + n - k + 1_${ik}$
                    ! update the lower triangle of a(k:n,k:n)
                    akk = real( ap( kk ),KIND=sp)
                    bkk = real( bp( kk ),KIND=sp)
                    akk = akk / bkk**2_${ik}$
                    ap( kk ) = akk
                    if( k<n ) then
                       call stdlib${ii}$_csscal( n-k, one / bkk, ap( kk+1 ), 1_${ik}$ )
                       ct = -half*akk
                       call stdlib${ii}$_caxpy( n-k, ct, bp( kk+1 ), 1_${ik}$, ap( kk+1 ), 1_${ik}$ )
                       call stdlib${ii}$_chpr2( uplo, n-k, -cone, ap( kk+1 ), 1_${ik}$,bp( kk+1 ), 1_${ik}$, ap( k1k1 &
                                 ) )
                       call stdlib${ii}$_caxpy( n-k, ct, bp( kk+1 ), 1_${ik}$, ap( kk+1 ), 1_${ik}$ )
                       call stdlib${ii}$_ctpsv( uplo, 'NO TRANSPOSE', 'NON-UNIT', n-k,bp( k1k1 ), ap( &
                                 kk+1 ), 1_${ik}$ )
                    end if
                    kk = k1k1
                 end do
              end if
           else
              if( upper ) then
                 ! compute u*a*u**h
                 ! k1 and kk are the indices of a(1,k) and a(k,k)
                 kk = 0_${ik}$
                 do k = 1, n
                    k1 = kk + 1_${ik}$
                    kk = kk + k
                    ! update the upper triangle of a(1:k,1:k)
                    akk = real( ap( kk ),KIND=sp)
                    bkk = real( bp( kk ),KIND=sp)
                    call stdlib${ii}$_ctpmv( uplo, 'NO TRANSPOSE', 'NON-UNIT', k-1, bp,ap( k1 ), 1_${ik}$ )
                              
                    ct = half*akk
                    call stdlib${ii}$_caxpy( k-1, ct, bp( k1 ), 1_${ik}$, ap( k1 ), 1_${ik}$ )
                    call stdlib${ii}$_chpr2( uplo, k-1, cone, ap( k1 ), 1_${ik}$, bp( k1 ), 1_${ik}$,ap )
                    call stdlib${ii}$_caxpy( k-1, ct, bp( k1 ), 1_${ik}$, ap( k1 ), 1_${ik}$ )
                    call stdlib${ii}$_csscal( k-1, bkk, ap( k1 ), 1_${ik}$ )
                    ap( kk ) = akk*bkk**2_${ik}$
                 end do
              else
                 ! compute l**h *a*l
                 ! jj and j1j1 are the indices of a(j,j) and a(j+1,j+1)
                 jj = 1_${ik}$
                 do j = 1, n
                    j1j1 = jj + n - j + 1_${ik}$
                    ! compute the j-th column of the lower triangle of a
                    ajj = real( ap( jj ),KIND=sp)
                    bjj = real( bp( jj ),KIND=sp)
                    ap( jj ) = ajj*bjj + stdlib${ii}$_cdotc( n-j, ap( jj+1 ), 1_${ik}$,bp( jj+1 ), 1_${ik}$ )
                    call stdlib${ii}$_csscal( n-j, bjj, ap( jj+1 ), 1_${ik}$ )
                    call stdlib${ii}$_chpmv( uplo, n-j, cone, ap( j1j1 ), bp( jj+1 ), 1_${ik}$,cone, ap( jj+1 )&
                              , 1_${ik}$ )
                    call stdlib${ii}$_ctpmv( uplo, 'CONJUGATE TRANSPOSE', 'NON-UNIT',n-j+1, bp( jj ), &
                              ap( jj ), 1_${ik}$ )
                    jj = j1j1
                 end do
              end if
           end if
           return
     end subroutine stdlib${ii}$_chpgst

     pure module subroutine stdlib${ii}$_zhpgst( itype, uplo, n, ap, bp, info )
     !! ZHPGST reduces a complex Hermitian-definite generalized
     !! eigenproblem to standard form, using packed storage.
     !! If ITYPE = 1, the problem is A*x = lambda*B*x,
     !! and A is overwritten by inv(U**H)*A*inv(U) or inv(L)*A*inv(L**H)
     !! If ITYPE = 2 or 3, the problem is A*B*x = lambda*x or
     !! B*A*x = lambda*x, and A is overwritten by U*A*U**H or L**H*A*L.
     !! B must have been previously factorized as U**H*U or L*L**H by ZPPTRF.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: uplo
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: itype, n
           ! Array Arguments 
           complex(dp), intent(inout) :: ap(*)
           complex(dp), intent(in) :: bp(*)
        ! =====================================================================
           
           
           ! Local Scalars 
           logical(lk) :: upper
           integer(${ik}$) :: j, j1, j1j1, jj, k, k1, k1k1, kk
           real(dp) :: ajj, akk, bjj, bkk
           complex(dp) :: ct
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input parameters.
           info = 0_${ik}$
           upper = stdlib_lsame( uplo, 'U' )
           if( itype<1_${ik}$ .or. itype>3_${ik}$ ) then
              info = -1_${ik}$
           else if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then
              info = -2_${ik}$
           else if( n<0_${ik}$ ) then
              info = -3_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'ZHPGST', -info )
              return
           end if
           if( itype==1_${ik}$ ) then
              if( upper ) then
                 ! compute inv(u**h)*a*inv(u)
                 ! j1 and jj are the indices of a(1,j) and a(j,j)
                 jj = 0_${ik}$
                 do j = 1, n
                    j1 = jj + 1_${ik}$
                    jj = jj + j
                    ! compute the j-th column of the upper triangle of a
                    ap( jj ) = real( ap( jj ),KIND=dp)
                    bjj = real( bp( jj ),KIND=dp)
                    call stdlib${ii}$_ztpsv( uplo, 'CONJUGATE TRANSPOSE', 'NON-UNIT', j,bp, ap( j1 ), 1_${ik}$ &
                              )
                    call stdlib${ii}$_zhpmv( uplo, j-1, -cone, ap, bp( j1 ), 1_${ik}$, cone,ap( j1 ), 1_${ik}$ )
                              
                    call stdlib${ii}$_zdscal( j-1, one / bjj, ap( j1 ), 1_${ik}$ )
                    ap( jj ) = ( ap( jj )-stdlib${ii}$_zdotc( j-1, ap( j1 ), 1_${ik}$, bp( j1 ),1_${ik}$ ) ) / &
                              bjj
                 end do
              else
                 ! compute inv(l)*a*inv(l**h)
                 ! kk and k1k1 are the indices of a(k,k) and a(k+1,k+1)
                 kk = 1_${ik}$
                 do k = 1, n
                    k1k1 = kk + n - k + 1_${ik}$
                    ! update the lower triangle of a(k:n,k:n)
                    akk = real( ap( kk ),KIND=dp)
                    bkk = real( bp( kk ),KIND=dp)
                    akk = akk / bkk**2_${ik}$
                    ap( kk ) = akk
                    if( k<n ) then
                       call stdlib${ii}$_zdscal( n-k, one / bkk, ap( kk+1 ), 1_${ik}$ )
                       ct = -half*akk
                       call stdlib${ii}$_zaxpy( n-k, ct, bp( kk+1 ), 1_${ik}$, ap( kk+1 ), 1_${ik}$ )
                       call stdlib${ii}$_zhpr2( uplo, n-k, -cone, ap( kk+1 ), 1_${ik}$,bp( kk+1 ), 1_${ik}$, ap( k1k1 &
                                 ) )
                       call stdlib${ii}$_zaxpy( n-k, ct, bp( kk+1 ), 1_${ik}$, ap( kk+1 ), 1_${ik}$ )
                       call stdlib${ii}$_ztpsv( uplo, 'NO TRANSPOSE', 'NON-UNIT', n-k,bp( k1k1 ), ap( &
                                 kk+1 ), 1_${ik}$ )
                    end if
                    kk = k1k1
                 end do
              end if
           else
              if( upper ) then
                 ! compute u*a*u**h
                 ! k1 and kk are the indices of a(1,k) and a(k,k)
                 kk = 0_${ik}$
                 do k = 1, n
                    k1 = kk + 1_${ik}$
                    kk = kk + k
                    ! update the upper triangle of a(1:k,1:k)
                    akk = real( ap( kk ),KIND=dp)
                    bkk = real( bp( kk ),KIND=dp)
                    call stdlib${ii}$_ztpmv( uplo, 'NO TRANSPOSE', 'NON-UNIT', k-1, bp,ap( k1 ), 1_${ik}$ )
                              
                    ct = half*akk
                    call stdlib${ii}$_zaxpy( k-1, ct, bp( k1 ), 1_${ik}$, ap( k1 ), 1_${ik}$ )
                    call stdlib${ii}$_zhpr2( uplo, k-1, cone, ap( k1 ), 1_${ik}$, bp( k1 ), 1_${ik}$,ap )
                    call stdlib${ii}$_zaxpy( k-1, ct, bp( k1 ), 1_${ik}$, ap( k1 ), 1_${ik}$ )
                    call stdlib${ii}$_zdscal( k-1, bkk, ap( k1 ), 1_${ik}$ )
                    ap( kk ) = akk*bkk**2_${ik}$
                 end do
              else
                 ! compute l**h *a*l
                 ! jj and j1j1 are the indices of a(j,j) and a(j+1,j+1)
                 jj = 1_${ik}$
                 do j = 1, n
                    j1j1 = jj + n - j + 1_${ik}$
                    ! compute the j-th column of the lower triangle of a
                    ajj = real( ap( jj ),KIND=dp)
                    bjj = real( bp( jj ),KIND=dp)
                    ap( jj ) = ajj*bjj + stdlib${ii}$_zdotc( n-j, ap( jj+1 ), 1_${ik}$,bp( jj+1 ), 1_${ik}$ )
                    call stdlib${ii}$_zdscal( n-j, bjj, ap( jj+1 ), 1_${ik}$ )
                    call stdlib${ii}$_zhpmv( uplo, n-j, cone, ap( j1j1 ), bp( jj+1 ), 1_${ik}$,cone, ap( jj+1 )&
                              , 1_${ik}$ )
                    call stdlib${ii}$_ztpmv( uplo, 'CONJUGATE TRANSPOSE', 'NON-UNIT',n-j+1, bp( jj ), &
                              ap( jj ), 1_${ik}$ )
                    jj = j1j1
                 end do
              end if
           end if
           return
     end subroutine stdlib${ii}$_zhpgst

#:for ck,ct,ci in CMPLX_KINDS_TYPES
#:if not ck in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ci}$hpgst( itype, uplo, n, ap, bp, info )
     !! ZHPGST: reduces a complex Hermitian-definite generalized
     !! eigenproblem to standard form, using packed storage.
     !! If ITYPE = 1, the problem is A*x = lambda*B*x,
     !! and A is overwritten by inv(U**H)*A*inv(U) or inv(L)*A*inv(L**H)
     !! If ITYPE = 2 or 3, the problem is A*B*x = lambda*x or
     !! B*A*x = lambda*x, and A is overwritten by U*A*U**H or L**H*A*L.
     !! B must have been previously factorized as U**H*U or L*L**H by ZPPTRF.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${ck}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: uplo
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: itype, n
           ! Array Arguments 
           complex(${ck}$), intent(inout) :: ap(*)
           complex(${ck}$), intent(in) :: bp(*)
        ! =====================================================================
           
           
           ! Local Scalars 
           logical(lk) :: upper
           integer(${ik}$) :: j, j1, j1j1, jj, k, k1, k1k1, kk
           real(${ck}$) :: ajj, akk, bjj, bkk
           complex(${ck}$) :: ct
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input parameters.
           info = 0_${ik}$
           upper = stdlib_lsame( uplo, 'U' )
           if( itype<1_${ik}$ .or. itype>3_${ik}$ ) then
              info = -1_${ik}$
           else if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then
              info = -2_${ik}$
           else if( n<0_${ik}$ ) then
              info = -3_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'ZHPGST', -info )
              return
           end if
           if( itype==1_${ik}$ ) then
              if( upper ) then
                 ! compute inv(u**h)*a*inv(u)
                 ! j1 and jj are the indices of a(1,j) and a(j,j)
                 jj = 0_${ik}$
                 do j = 1, n
                    j1 = jj + 1_${ik}$
                    jj = jj + j
                    ! compute the j-th column of the upper triangle of a
                    ap( jj ) = real( ap( jj ),KIND=${ck}$)
                    bjj = real( bp( jj ),KIND=${ck}$)
                    call stdlib${ii}$_${ci}$tpsv( uplo, 'CONJUGATE TRANSPOSE', 'NON-UNIT', j,bp, ap( j1 ), 1_${ik}$ &
                              )
                    call stdlib${ii}$_${ci}$hpmv( uplo, j-1, -cone, ap, bp( j1 ), 1_${ik}$, cone,ap( j1 ), 1_${ik}$ )
                              
                    call stdlib${ii}$_${ci}$dscal( j-1, one / bjj, ap( j1 ), 1_${ik}$ )
                    ap( jj ) = ( ap( jj )-stdlib${ii}$_${ci}$dotc( j-1, ap( j1 ), 1_${ik}$, bp( j1 ),1_${ik}$ ) ) / &
                              bjj
                 end do
              else
                 ! compute inv(l)*a*inv(l**h)
                 ! kk and k1k1 are the indices of a(k,k) and a(k+1,k+1)
                 kk = 1_${ik}$
                 do k = 1, n
                    k1k1 = kk + n - k + 1_${ik}$
                    ! update the lower triangle of a(k:n,k:n)
                    akk = real( ap( kk ),KIND=${ck}$)
                    bkk = real( bp( kk ),KIND=${ck}$)
                    akk = akk / bkk**2_${ik}$
                    ap( kk ) = akk
                    if( k<n ) then
                       call stdlib${ii}$_${ci}$dscal( n-k, one / bkk, ap( kk+1 ), 1_${ik}$ )
                       ct = -half*akk
                       call stdlib${ii}$_${ci}$axpy( n-k, ct, bp( kk+1 ), 1_${ik}$, ap( kk+1 ), 1_${ik}$ )
                       call stdlib${ii}$_${ci}$hpr2( uplo, n-k, -cone, ap( kk+1 ), 1_${ik}$,bp( kk+1 ), 1_${ik}$, ap( k1k1 &
                                 ) )
                       call stdlib${ii}$_${ci}$axpy( n-k, ct, bp( kk+1 ), 1_${ik}$, ap( kk+1 ), 1_${ik}$ )
                       call stdlib${ii}$_${ci}$tpsv( uplo, 'NO TRANSPOSE', 'NON-UNIT', n-k,bp( k1k1 ), ap( &
                                 kk+1 ), 1_${ik}$ )
                    end if
                    kk = k1k1
                 end do
              end if
           else
              if( upper ) then
                 ! compute u*a*u**h
                 ! k1 and kk are the indices of a(1,k) and a(k,k)
                 kk = 0_${ik}$
                 do k = 1, n
                    k1 = kk + 1_${ik}$
                    kk = kk + k
                    ! update the upper triangle of a(1:k,1:k)
                    akk = real( ap( kk ),KIND=${ck}$)
                    bkk = real( bp( kk ),KIND=${ck}$)
                    call stdlib${ii}$_${ci}$tpmv( uplo, 'NO TRANSPOSE', 'NON-UNIT', k-1, bp,ap( k1 ), 1_${ik}$ )
                              
                    ct = half*akk
                    call stdlib${ii}$_${ci}$axpy( k-1, ct, bp( k1 ), 1_${ik}$, ap( k1 ), 1_${ik}$ )
                    call stdlib${ii}$_${ci}$hpr2( uplo, k-1, cone, ap( k1 ), 1_${ik}$, bp( k1 ), 1_${ik}$,ap )
                    call stdlib${ii}$_${ci}$axpy( k-1, ct, bp( k1 ), 1_${ik}$, ap( k1 ), 1_${ik}$ )
                    call stdlib${ii}$_${ci}$dscal( k-1, bkk, ap( k1 ), 1_${ik}$ )
                    ap( kk ) = akk*bkk**2_${ik}$
                 end do
              else
                 ! compute l**h *a*l
                 ! jj and j1j1 are the indices of a(j,j) and a(j+1,j+1)
                 jj = 1_${ik}$
                 do j = 1, n
                    j1j1 = jj + n - j + 1_${ik}$
                    ! compute the j-th column of the lower triangle of a
                    ajj = real( ap( jj ),KIND=${ck}$)
                    bjj = real( bp( jj ),KIND=${ck}$)
                    ap( jj ) = ajj*bjj + stdlib${ii}$_${ci}$dotc( n-j, ap( jj+1 ), 1_${ik}$,bp( jj+1 ), 1_${ik}$ )
                    call stdlib${ii}$_${ci}$dscal( n-j, bjj, ap( jj+1 ), 1_${ik}$ )
                    call stdlib${ii}$_${ci}$hpmv( uplo, n-j, cone, ap( j1j1 ), bp( jj+1 ), 1_${ik}$,cone, ap( jj+1 )&
                              , 1_${ik}$ )
                    call stdlib${ii}$_${ci}$tpmv( uplo, 'CONJUGATE TRANSPOSE', 'NON-UNIT',n-j+1, bp( jj ), &
                              ap( jj ), 1_${ik}$ )
                    jj = j1j1
                 end do
              end if
           end if
           return
     end subroutine stdlib${ii}$_${ci}$hpgst

#:endif
#:endfor



     pure module subroutine stdlib${ii}$_chbgst( vect, uplo, n, ka, kb, ab, ldab, bb, ldbb, x,ldx, work, rwork,&
     !! CHBGST reduces a complex Hermitian-definite banded generalized
     !! eigenproblem  A*x = lambda*B*x  to standard form  C*y = lambda*y,
     !! such that C has the same bandwidth as A.
     !! B must have been previously factorized as S**H*S by CPBSTF, using a
     !! split Cholesky factorization. A is overwritten by C = X**H*A*X, where
     !! X = S**(-1)*Q and Q is a unitary matrix chosen to preserve the
     !! bandwidth of A.
                info )
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: uplo, vect
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: ka, kb, ldab, ldbb, ldx, n
           ! Array Arguments 
           real(sp), intent(out) :: rwork(*)
           complex(sp), intent(inout) :: ab(ldab,*)
           complex(sp), intent(in) :: bb(ldbb,*)
           complex(sp), intent(out) :: work(*), x(ldx,*)
        ! =====================================================================
           
           
           ! Local Scalars 
           logical(lk) :: update, upper, wantx
           integer(${ik}$) :: i, i0, i1, i2, inca, j, j1, j1t, j2, j2t, k, ka1, kb1, kbt, l, m, nr, &
                     nrt, nx
           real(sp) :: bii
           complex(sp) :: ra, ra1, t
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input parameters
           wantx = stdlib_lsame( vect, 'V' )
           upper = stdlib_lsame( uplo, 'U' )
           ka1 = ka + 1_${ik}$
           kb1 = kb + 1_${ik}$
           info = 0_${ik}$
           if( .not.wantx .and. .not.stdlib_lsame( vect, 'N' ) ) then
              info = -1_${ik}$
           else if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then
              info = -2_${ik}$
           else if( n<0_${ik}$ ) then
              info = -3_${ik}$
           else if( ka<0_${ik}$ ) then
              info = -4_${ik}$
           else if( kb<0_${ik}$ .or. kb>ka ) then
              info = -5_${ik}$
           else if( ldab<ka+1 ) then
              info = -7_${ik}$
           else if( ldbb<kb+1 ) then
              info = -9_${ik}$
           else if( ldx<1_${ik}$ .or. wantx .and. ldx<max( 1_${ik}$, n ) ) then
              info = -11_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'CHBGST', -info )
              return
           end if
           ! quick return if possible
           if( n==0 )return
           inca = ldab*ka1
           ! initialize x to the unit matrix, if needed
           if( wantx )call stdlib${ii}$_claset( 'FULL', n, n, czero, cone, x, ldx )
           ! set m to the splitting point m. it must be the same value as is
           ! used in stdlib${ii}$_cpbstf. the chosen value allows the arrays work and rwork
           ! to be of dimension (n).
           m = ( n+kb ) / 2_${ik}$
           ! the routine works in two phases, corresponding to the two halves
           ! of the split cholesky factorization of b as s**h*s where
           ! s = ( u    )
               ! ( m  l )
           ! with u upper triangular of order m, and l lower triangular of
           ! order n-m. s has the same bandwidth as b.
           ! s is treated as a product of elementary matrices:
           ! s = s(m)*s(m-1)*...*s(2)*s(1)*s(m+1)*s(m+2)*...*s(n-1)*s(n)
           ! where s(i) is determined by the i-th row of s.
           ! in phase 1, the index i takes the values n, n-1, ... , m+1;
           ! in phase 2, it takes the values 1, 2, ... , m.
           ! for each value of i, the current matrix a is updated by forming
           ! inv(s(i))**h*a*inv(s(i)). this creates a triangular bulge outside
           ! the band of a. the bulge is then pushed down toward the bottom of
           ! a in phase 1, and up toward the top of a in phase 2, by applying
           ! plane rotations.
           ! there are kb*(kb+1)/2 elements in the bulge, but at most 2*kb-1
           ! of them are linearly independent, so annihilating a bulge requires
           ! only 2*kb-1 plane rotations. the rotations are divided into a 1st
           ! set of kb-1 rotations, and a 2nd set of kb rotations.
           ! wherever possible, rotations are generated and applied in vector
           ! operations of length nr between the indices j1 and j2 (sometimes
           ! replaced by modified values nrt, j1t or j2t).
           ! the real cosines and complex sines of the rotations are stored in
           ! the arrays rwork and work, those of the 1st set in elements
           ! 2:m-kb-1, and those of the 2nd set in elements m-kb+1:n.
           ! the bulges are not formed explicitly; nonzero elements outside the
           ! band are created only when they are required for generating new
           ! rotations; they are stored in the array work, in positions where
           ! they are later overwritten by the sines of the rotations which
           ! annihilate them.
           ! **************************** phase 1 *****************************
           ! the logical structure of this phase is:
           ! update = .true.
           ! do i = n, m + 1, -1
              ! use s(i) to update a and create a new bulge
              ! apply rotations to push all bulges ka positions downward
           ! end do
           ! update = .false.
           ! do i = m + ka + 1, n - 1
              ! apply rotations to push all bulges ka positions downward
           ! end do
           ! to avoid duplicating code, the two loops are merged.
           update = .true.
           i = n + 1_${ik}$
           10 continue
           if( update ) then
              i = i - 1_${ik}$
              kbt = min( kb, i-1 )
              i0 = i - 1_${ik}$
              i1 = min( n, i+ka )
              i2 = i - kbt + ka1
              if( i<m+1 ) then
                 update = .false.
                 i = i + 1_${ik}$
                 i0 = m
                 if( ka==0 )go to 480
                 go to 10
              end if
           else
              i = i + ka
              if( i>n-1 )go to 480
           end if
           if( upper ) then
              ! transform a, working with the upper triangle
              if( update ) then
                 ! form  inv(s(i))**h * a * inv(s(i))
                 bii = real( bb( kb1, i ),KIND=sp)
                 ab( ka1, i ) = ( real( ab( ka1, i ),KIND=sp) / bii ) / bii
                 do j = i + 1, i1
                    ab( i-j+ka1, j ) = ab( i-j+ka1, j ) / bii
                 end do
                 do j = max( 1, i-ka ), i - 1
                    ab( j-i+ka1, i ) = ab( j-i+ka1, i ) / bii
                 end do
                 do k = i - kbt, i - 1
                    do j = i - kbt, k
                       ab( j-k+ka1, k ) = ab( j-k+ka1, k ) -bb( j-i+kb1, i )*conjg( ab( k-i+ka1, &
                       i ) ) -conjg( bb( k-i+kb1, i ) )*ab( j-i+ka1, i ) +real( ab( ka1, i ),&
                                 KIND=sp)*bb( j-i+kb1, i )*conjg( bb( k-i+kb1, i ) )
                    end do
                    do j = max( 1, i-ka ), i - kbt - 1
                       ab( j-k+ka1, k ) = ab( j-k+ka1, k ) -conjg( bb( k-i+kb1, i ) )*ab( j-i+ka1,&
                                  i )
                    end do
                 end do
                 do j = i, i1
                    do k = max( j-ka, i-kbt ), i - 1
                       ab( k-j+ka1, j ) = ab( k-j+ka1, j ) -bb( k-i+kb1, i )*ab( i-j+ka1, j )
                                 
                    end do
                 end do
                 if( wantx ) then
                    ! post-multiply x by inv(s(i))
                    call stdlib${ii}$_csscal( n-m, one / bii, x( m+1, i ), 1_${ik}$ )
                    if( kbt>0_${ik}$ )call stdlib${ii}$_cgerc( n-m, kbt, -cone, x( m+1, i ), 1_${ik}$,bb( kb1-kbt, i )&
                              , 1_${ik}$, x( m+1, i-kbt ),ldx )
                 end if
                 ! store a(i,i1) in ra1 for use in next loop over k
                 ra1 = ab( i-i1+ka1, i1 )
              end if
              ! generate and apply vectors of rotations to chase all the
              ! existing bulges ka positions down toward the bottom of the
              ! band
              loop_130: do k = 1, kb - 1
                 if( update ) then
                    ! determine the rotations which would annihilate the bulge
                    ! which has in theory just been created
                    if( i-k+ka<n .and. i-k>1_${ik}$ ) then
                       ! generate rotation to annihilate a(i,i-k+ka+1)
                       call stdlib${ii}$_clartg( ab( k+1, i-k+ka ), ra1,rwork( i-k+ka-m ), work( i-k+ka-&
                                 m ), ra )
                       ! create nonzero element a(i-k,i-k+ka+1) outside the
                       ! band and store it in work(i-k)
                       t = -bb( kb1-k, i )*ra1
                       work( i-k ) = rwork( i-k+ka-m )*t -conjg( work( i-k+ka-m ) )*ab( 1_${ik}$, i-k+ka &
                                 )
                       ab( 1_${ik}$, i-k+ka ) = work( i-k+ka-m )*t +rwork( i-k+ka-m )*ab( 1_${ik}$, i-k+ka )
                                 
                       ra1 = ra
                    end if
                 end if
                 j2 = i - k - 1_${ik}$ + max( 1_${ik}$, k-i0+2 )*ka1
                 nr = ( n-j2+ka ) / ka1
                 j1 = j2 + ( nr-1 )*ka1
                 if( update ) then
                    j2t = max( j2, i+2*ka-k+1 )
                 else
                    j2t = j2
                 end if
                 nrt = ( n-j2t+ka ) / ka1
                 do j = j2t, j1, ka1
                    ! create nonzero element a(j-ka,j+1) outside the band
                    ! and store it in work(j-m)
                    work( j-m ) = work( j-m )*ab( 1_${ik}$, j+1 )
                    ab( 1_${ik}$, j+1 ) = rwork( j-m )*ab( 1_${ik}$, j+1 )
                 end do
                 ! generate rotations in 1st set to annihilate elements which
                 ! have been created outside the band
                 if( nrt>0_${ik}$ )call stdlib${ii}$_clargv( nrt, ab( 1_${ik}$, j2t ), inca, work( j2t-m ), ka1,rwork(&
                            j2t-m ), ka1 )
                 if( nr>0_${ik}$ ) then
                    ! apply rotations in 1st set from the right
                    do l = 1, ka - 1
                       call stdlib${ii}$_clartv( nr, ab( ka1-l, j2 ), inca,ab( ka-l, j2+1 ), inca, &
                                 rwork( j2-m ),work( j2-m ), ka1 )
                    end do
                    ! apply rotations in 1st set from both sides to diagonal
                    ! blocks
                    call stdlib${ii}$_clar2v( nr, ab( ka1, j2 ), ab( ka1, j2+1 ),ab( ka, j2+1 ), inca, &
                              rwork( j2-m ),work( j2-m ), ka1 )
                    call stdlib${ii}$_clacgv( nr, work( j2-m ), ka1 )
                 end if
                 ! start applying rotations in 1st set from the left
                 do l = ka - 1, kb - k + 1, -1
                    nrt = ( n-j2+l ) / ka1
                    if( nrt>0_${ik}$ )call stdlib${ii}$_clartv( nrt, ab( l, j2+ka1-l ), inca,ab( l+1, j2+ka1-l &
                              ), inca, rwork( j2-m ),work( j2-m ), ka1 )
                 end do
                 if( wantx ) then
                    ! post-multiply x by product of rotations in 1st set
                    do j = j2, j1, ka1
                       call stdlib${ii}$_crot( n-m, x( m+1, j ), 1_${ik}$, x( m+1, j+1 ), 1_${ik}$,rwork( j-m ), &
                                 conjg( work( j-m ) ) )
                    end do
                 end if
              end do loop_130
              if( update ) then
                 if( i2<=n .and. kbt>0_${ik}$ ) then
                    ! create nonzero element a(i-kbt,i-kbt+ka+1) outside the
                    ! band and store it in work(i-kbt)
                    work( i-kbt ) = -bb( kb1-kbt, i )*ra1
                 end if
              end if
              loop_170: do k = kb, 1, -1
                 if( update ) then
                    j2 = i - k - 1_${ik}$ + max( 2_${ik}$, k-i0+1 )*ka1
                 else
                    j2 = i - k - 1_${ik}$ + max( 1_${ik}$, k-i0+1 )*ka1
                 end if
                 ! finish applying rotations in 2nd set from the left
                 do l = kb - k, 1, -1
                    nrt = ( n-j2+ka+l ) / ka1
                    if( nrt>0_${ik}$ )call stdlib${ii}$_clartv( nrt, ab( l, j2-l+1 ), inca,ab( l+1, j2-l+1 ), &
                              inca, rwork( j2-ka ),work( j2-ka ), ka1 )
                 end do
                 nr = ( n-j2+ka ) / ka1
                 j1 = j2 + ( nr-1 )*ka1
                 do j = j1, j2, -ka1
                    work( j ) = work( j-ka )
                    rwork( j ) = rwork( j-ka )
                 end do
                 do j = j2, j1, ka1
                    ! create nonzero element a(j-ka,j+1) outside the band
                    ! and store it in work(j)
                    work( j ) = work( j )*ab( 1_${ik}$, j+1 )
                    ab( 1_${ik}$, j+1 ) = rwork( j )*ab( 1_${ik}$, j+1 )
                 end do
                 if( update ) then
                    if( i-k<n-ka .and. k<=kbt )work( i-k+ka ) = work( i-k )
                 end if
              end do loop_170
              loop_210: do k = kb, 1, -1
                 j2 = i - k - 1_${ik}$ + max( 1_${ik}$, k-i0+1 )*ka1
                 nr = ( n-j2+ka ) / ka1
                 j1 = j2 + ( nr-1 )*ka1
                 if( nr>0_${ik}$ ) then
                    ! generate rotations in 2nd set to annihilate elements
                    ! which have been created outside the band
                    call stdlib${ii}$_clargv( nr, ab( 1_${ik}$, j2 ), inca, work( j2 ), ka1,rwork( j2 ), ka1 )
                              
                    ! apply rotations in 2nd set from the right
                    do l = 1, ka - 1
                       call stdlib${ii}$_clartv( nr, ab( ka1-l, j2 ), inca,ab( ka-l, j2+1 ), inca, &
                                 rwork( j2 ),work( j2 ), ka1 )
                    end do
                    ! apply rotations in 2nd set from both sides to diagonal
                    ! blocks
                    call stdlib${ii}$_clar2v( nr, ab( ka1, j2 ), ab( ka1, j2+1 ),ab( ka, j2+1 ), inca, &
                              rwork( j2 ),work( j2 ), ka1 )
                    call stdlib${ii}$_clacgv( nr, work( j2 ), ka1 )
                 end if
                 ! start applying rotations in 2nd set from the left
                 do l = ka - 1, kb - k + 1, -1
                    nrt = ( n-j2+l ) / ka1
                    if( nrt>0_${ik}$ )call stdlib${ii}$_clartv( nrt, ab( l, j2+ka1-l ), inca,ab( l+1, j2+ka1-l &
                              ), inca, rwork( j2 ),work( j2 ), ka1 )
                 end do
                 if( wantx ) then
                    ! post-multiply x by product of rotations in 2nd set
                    do j = j2, j1, ka1
                       call stdlib${ii}$_crot( n-m, x( m+1, j ), 1_${ik}$, x( m+1, j+1 ), 1_${ik}$,rwork( j ), conjg( &
                                 work( j ) ) )
                    end do
                 end if
              end do loop_210
              do k = 1, kb - 1
                 j2 = i - k - 1_${ik}$ + max( 1_${ik}$, k-i0+2 )*ka1
                 ! finish applying rotations in 1st set from the left
                 do l = kb - k, 1, -1
                    nrt = ( n-j2+l ) / ka1
                    if( nrt>0_${ik}$ )call stdlib${ii}$_clartv( nrt, ab( l, j2+ka1-l ), inca,ab( l+1, j2+ka1-l &
                              ), inca, rwork( j2-m ),work( j2-m ), ka1 )
                 end do
              end do
              if( kb>1_${ik}$ ) then
                 do j = n - 1, j2 + ka, -1
                    rwork( j-m ) = rwork( j-ka-m )
                    work( j-m ) = work( j-ka-m )
                 end do
              end if
           else
              ! transform a, working with the lower triangle
              if( update ) then
                 ! form  inv(s(i))**h * a * inv(s(i))
                 bii = real( bb( 1_${ik}$, i ),KIND=sp)
                 ab( 1_${ik}$, i ) = ( real( ab( 1_${ik}$, i ),KIND=sp) / bii ) / bii
                 do j = i + 1, i1
                    ab( j-i+1, i ) = ab( j-i+1, i ) / bii
                 end do
                 do j = max( 1, i-ka ), i - 1
                    ab( i-j+1, j ) = ab( i-j+1, j ) / bii
                 end do
                 do k = i - kbt, i - 1
                    do j = i - kbt, k
                       ab( k-j+1, j ) = ab( k-j+1, j ) -bb( i-j+1, j )*conjg( ab( i-k+1,k ) ) - &
                       conjg( bb( i-k+1, k ) )*ab( i-j+1, j ) + real( ab( 1_${ik}$, i ),KIND=sp)*bb( i-j+&
                                 1_${ik}$, j )*conjg( bb( i-k+1,k ) )
                    end do
                    do j = max( 1, i-ka ), i - kbt - 1
                       ab( k-j+1, j ) = ab( k-j+1, j ) -conjg( bb( i-k+1, k ) )*ab( i-j+1, j )
                                 
                    end do
                 end do
                 do j = i, i1
                    do k = max( j-ka, i-kbt ), i - 1
                       ab( j-k+1, k ) = ab( j-k+1, k ) -bb( i-k+1, k )*ab( j-i+1, i )
                    end do
                 end do
                 if( wantx ) then
                    ! post-multiply x by inv(s(i))
                    call stdlib${ii}$_csscal( n-m, one / bii, x( m+1, i ), 1_${ik}$ )
                    if( kbt>0_${ik}$ )call stdlib${ii}$_cgeru( n-m, kbt, -cone, x( m+1, i ), 1_${ik}$,bb( kbt+1, i-&
                              kbt ), ldbb-1,x( m+1, i-kbt ), ldx )
                 end if
                 ! store a(i1,i) in ra1 for use in next loop over k
                 ra1 = ab( i1-i+1, i )
              end if
              ! generate and apply vectors of rotations to chase all the
              ! existing bulges ka positions down toward the bottom of the
              ! band
              loop_360: do k = 1, kb - 1
                 if( update ) then
                    ! determine the rotations which would annihilate the bulge
                    ! which has in theory just been created
                    if( i-k+ka<n .and. i-k>1_${ik}$ ) then
                       ! generate rotation to annihilate a(i-k+ka+1,i)
                       call stdlib${ii}$_clartg( ab( ka1-k, i ), ra1, rwork( i-k+ka-m ),work( i-k+ka-m )&
                                 , ra )
                       ! create nonzero element a(i-k+ka+1,i-k) outside the
                       ! band and store it in work(i-k)
                       t = -bb( k+1, i-k )*ra1
                       work( i-k ) = rwork( i-k+ka-m )*t -conjg( work( i-k+ka-m ) )*ab( ka1, i-k )
                                 
                       ab( ka1, i-k ) = work( i-k+ka-m )*t +rwork( i-k+ka-m )*ab( ka1, i-k )
                                 
                       ra1 = ra
                    end if
                 end if
                 j2 = i - k - 1_${ik}$ + max( 1_${ik}$, k-i0+2 )*ka1
                 nr = ( n-j2+ka ) / ka1
                 j1 = j2 + ( nr-1 )*ka1
                 if( update ) then
                    j2t = max( j2, i+2*ka-k+1 )
                 else
                    j2t = j2
                 end if
                 nrt = ( n-j2t+ka ) / ka1
                 do j = j2t, j1, ka1
                    ! create nonzero element a(j+1,j-ka) outside the band
                    ! and store it in work(j-m)
                    work( j-m ) = work( j-m )*ab( ka1, j-ka+1 )
                    ab( ka1, j-ka+1 ) = rwork( j-m )*ab( ka1, j-ka+1 )
                 end do
                 ! generate rotations in 1st set to annihilate elements which
                 ! have been created outside the band
                 if( nrt>0_${ik}$ )call stdlib${ii}$_clargv( nrt, ab( ka1, j2t-ka ), inca, work( j2t-m ),ka1, &
                           rwork( j2t-m ), ka1 )
                 if( nr>0_${ik}$ ) then
                    ! apply rotations in 1st set from the left
                    do l = 1, ka - 1
                       call stdlib${ii}$_clartv( nr, ab( l+1, j2-l ), inca,ab( l+2, j2-l ), inca, rwork(&
                                  j2-m ),work( j2-m ), ka1 )
                    end do
                    ! apply rotations in 1st set from both sides to diagonal
                    ! blocks
                    call stdlib${ii}$_clar2v( nr, ab( 1_${ik}$, j2 ), ab( 1_${ik}$, j2+1 ), ab( 2_${ik}$, j2 ),inca, rwork( &
                              j2-m ), work( j2-m ), ka1 )
                    call stdlib${ii}$_clacgv( nr, work( j2-m ), ka1 )
                 end if
                 ! start applying rotations in 1st set from the right
                 do l = ka - 1, kb - k + 1, -1
                    nrt = ( n-j2+l ) / ka1
                    if( nrt>0_${ik}$ )call stdlib${ii}$_clartv( nrt, ab( ka1-l+1, j2 ), inca,ab( ka1-l, j2+1 ),&
                               inca, rwork( j2-m ),work( j2-m ), ka1 )
                 end do
                 if( wantx ) then
                    ! post-multiply x by product of rotations in 1st set
                    do j = j2, j1, ka1
                       call stdlib${ii}$_crot( n-m, x( m+1, j ), 1_${ik}$, x( m+1, j+1 ), 1_${ik}$,rwork( j-m ), work(&
                                  j-m ) )
                    end do
                 end if
              end do loop_360
              if( update ) then
                 if( i2<=n .and. kbt>0_${ik}$ ) then
                    ! create nonzero element a(i-kbt+ka+1,i-kbt) outside the
                    ! band and store it in work(i-kbt)
                    work( i-kbt ) = -bb( kbt+1, i-kbt )*ra1
                 end if
              end if
              loop_400: do k = kb, 1, -1
                 if( update ) then
                    j2 = i - k - 1_${ik}$ + max( 2_${ik}$, k-i0+1 )*ka1
                 else
                    j2 = i - k - 1_${ik}$ + max( 1_${ik}$, k-i0+1 )*ka1
                 end if
                 ! finish applying rotations in 2nd set from the right
                 do l = kb - k, 1, -1
                    nrt = ( n-j2+ka+l ) / ka1
                    if( nrt>0_${ik}$ )call stdlib${ii}$_clartv( nrt, ab( ka1-l+1, j2-ka ), inca,ab( ka1-l, j2-&
                              ka+1 ), inca,rwork( j2-ka ), work( j2-ka ), ka1 )
                 end do
                 nr = ( n-j2+ka ) / ka1
                 j1 = j2 + ( nr-1 )*ka1
                 do j = j1, j2, -ka1
                    work( j ) = work( j-ka )
                    rwork( j ) = rwork( j-ka )
                 end do
                 do j = j2, j1, ka1
                    ! create nonzero element a(j+1,j-ka) outside the band
                    ! and store it in work(j)
                    work( j ) = work( j )*ab( ka1, j-ka+1 )
                    ab( ka1, j-ka+1 ) = rwork( j )*ab( ka1, j-ka+1 )
                 end do
                 if( update ) then
                    if( i-k<n-ka .and. k<=kbt )work( i-k+ka ) = work( i-k )
                 end if
              end do loop_400
              loop_440: do k = kb, 1, -1
                 j2 = i - k - 1_${ik}$ + max( 1_${ik}$, k-i0+1 )*ka1
                 nr = ( n-j2+ka ) / ka1
                 j1 = j2 + ( nr-1 )*ka1
                 if( nr>0_${ik}$ ) then
                    ! generate rotations in 2nd set to annihilate elements
                    ! which have been created outside the band
                    call stdlib${ii}$_clargv( nr, ab( ka1, j2-ka ), inca, work( j2 ), ka1,rwork( j2 ), &
                              ka1 )
                    ! apply rotations in 2nd set from the left
                    do l = 1, ka - 1
                       call stdlib${ii}$_clartv( nr, ab( l+1, j2-l ), inca,ab( l+2, j2-l ), inca, rwork(&
                                  j2 ),work( j2 ), ka1 )
                    end do
                    ! apply rotations in 2nd set from both sides to diagonal
                    ! blocks
                    call stdlib${ii}$_clar2v( nr, ab( 1_${ik}$, j2 ), ab( 1_${ik}$, j2+1 ), ab( 2_${ik}$, j2 ),inca, rwork( &
                              j2 ), work( j2 ), ka1 )
                    call stdlib${ii}$_clacgv( nr, work( j2 ), ka1 )
                 end if
                 ! start applying rotations in 2nd set from the right
                 do l = ka - 1, kb - k + 1, -1
                    nrt = ( n-j2+l ) / ka1
                    if( nrt>0_${ik}$ )call stdlib${ii}$_clartv( nrt, ab( ka1-l+1, j2 ), inca,ab( ka1-l, j2+1 ),&
                               inca, rwork( j2 ),work( j2 ), ka1 )
                 end do
                 if( wantx ) then
                    ! post-multiply x by product of rotations in 2nd set
                    do j = j2, j1, ka1
                       call stdlib${ii}$_crot( n-m, x( m+1, j ), 1_${ik}$, x( m+1, j+1 ), 1_${ik}$,rwork( j ), work( &
                                 j ) )
                    end do
                 end if
              end do loop_440
              do k = 1, kb - 1
                 j2 = i - k - 1_${ik}$ + max( 1_${ik}$, k-i0+2 )*ka1
                 ! finish applying rotations in 1st set from the right
                 do l = kb - k, 1, -1
                    nrt = ( n-j2+l ) / ka1
                    if( nrt>0_${ik}$ )call stdlib${ii}$_clartv( nrt, ab( ka1-l+1, j2 ), inca,ab( ka1-l, j2+1 ),&
                               inca, rwork( j2-m ),work( j2-m ), ka1 )
                 end do
              end do
              if( kb>1_${ik}$ ) then
                 do j = n - 1, j2 + ka, -1
                    rwork( j-m ) = rwork( j-ka-m )
                    work( j-m ) = work( j-ka-m )
                 end do
              end if
           end if
           go to 10
           480 continue
           ! **************************** phase 2 *****************************
           ! the logical structure of this phase is:
           ! update = .true.
           ! do i = 1, m
              ! use s(i) to update a and create a new bulge
              ! apply rotations to push all bulges ka positions upward
           ! end do
           ! update = .false.
           ! do i = m - ka - 1, 2, -1
              ! apply rotations to push all bulges ka positions upward
           ! end do
           ! to avoid duplicating code, the two loops are merged.
           update = .true.
           i = 0_${ik}$
           490 continue
           if( update ) then
              i = i + 1_${ik}$
              kbt = min( kb, m-i )
              i0 = i + 1_${ik}$
              i1 = max( 1_${ik}$, i-ka )
              i2 = i + kbt - ka1
              if( i>m ) then
                 update = .false.
                 i = i - 1_${ik}$
                 i0 = m + 1_${ik}$
                 if( ka==0 )return
                 go to 490
              end if
           else
              i = i - ka
              if( i<2 )return
           end if
           if( i<m-kbt ) then
              nx = m
           else
              nx = n
           end if
           if( upper ) then
              ! transform a, working with the upper triangle
              if( update ) then
                 ! form  inv(s(i))**h * a * inv(s(i))
                 bii = real( bb( kb1, i ),KIND=sp)
                 ab( ka1, i ) = ( real( ab( ka1, i ),KIND=sp) / bii ) / bii
                 do j = i1, i - 1
                    ab( j-i+ka1, i ) = ab( j-i+ka1, i ) / bii
                 end do
                 do j = i + 1, min( n, i+ka )
                    ab( i-j+ka1, j ) = ab( i-j+ka1, j ) / bii
                 end do
                 do k = i + 1, i + kbt
                    do j = k, i + kbt
                       ab( k-j+ka1, j ) = ab( k-j+ka1, j ) -bb( i-j+kb1, j )*conjg( ab( i-k+ka1, &
                       k ) ) -conjg( bb( i-k+kb1, k ) )*ab( i-j+ka1, j ) +real( ab( ka1, i ),&
                                 KIND=sp)*bb( i-j+kb1, j )*conjg( bb( i-k+kb1, k ) )
                    end do
                    do j = i + kbt + 1, min( n, i+ka )
                       ab( k-j+ka1, j ) = ab( k-j+ka1, j ) -conjg( bb( i-k+kb1, k ) )*ab( i-j+ka1,&
                                  j )
                    end do
                 end do
                 do j = i1, i
                    do k = i + 1, min( j+ka, i+kbt )
                       ab( j-k+ka1, k ) = ab( j-k+ka1, k ) -bb( i-k+kb1, k )*ab( j-i+ka1, i )
                                 
                    end do
                 end do
                 if( wantx ) then
                    ! post-multiply x by inv(s(i))
                    call stdlib${ii}$_csscal( nx, one / bii, x( 1_${ik}$, i ), 1_${ik}$ )
                    if( kbt>0_${ik}$ )call stdlib${ii}$_cgeru( nx, kbt, -cone, x( 1_${ik}$, i ), 1_${ik}$,bb( kb, i+1 ), &
                              ldbb-1, x( 1_${ik}$, i+1 ), ldx )
                 end if
                 ! store a(i1,i) in ra1 for use in next loop over k
                 ra1 = ab( i1-i+ka1, i )
              end if
              ! generate and apply vectors of rotations to chase all the
              ! existing bulges ka positions up toward the top of the band
              loop_610: do k = 1, kb - 1
                 if( update ) then
                    ! determine the rotations which would annihilate the bulge
                    ! which has in theory just been created
                    if( i+k-ka1>0_${ik}$ .and. i+k<m ) then
                       ! generate rotation to annihilate a(i+k-ka-1,i)
                       call stdlib${ii}$_clartg( ab( k+1, i ), ra1, rwork( i+k-ka ),work( i+k-ka ), ra )
                                 
                       ! create nonzero element a(i+k-ka-1,i+k) outside the
                       ! band and store it in work(m-kb+i+k)
                       t = -bb( kb1-k, i+k )*ra1
                       work( m-kb+i+k ) = rwork( i+k-ka )*t -conjg( work( i+k-ka ) )*ab( 1_${ik}$, i+k )
                                 
                       ab( 1_${ik}$, i+k ) = work( i+k-ka )*t +rwork( i+k-ka )*ab( 1_${ik}$, i+k )
                       ra1 = ra
                    end if
                 end if
                 j2 = i + k + 1_${ik}$ - max( 1_${ik}$, k+i0-m+1 )*ka1
                 nr = ( j2+ka-1 ) / ka1
                 j1 = j2 - ( nr-1 )*ka1
                 if( update ) then
                    j2t = min( j2, i-2*ka+k-1 )
                 else
                    j2t = j2
                 end if
                 nrt = ( j2t+ka-1 ) / ka1
                 do j = j1, j2t, ka1
                    ! create nonzero element a(j-1,j+ka) outside the band
                    ! and store it in work(j)
                    work( j ) = work( j )*ab( 1_${ik}$, j+ka-1 )
                    ab( 1_${ik}$, j+ka-1 ) = rwork( j )*ab( 1_${ik}$, j+ka-1 )
                 end do
                 ! generate rotations in 1st set to annihilate elements which
                 ! have been created outside the band
                 if( nrt>0_${ik}$ )call stdlib${ii}$_clargv( nrt, ab( 1_${ik}$, j1+ka ), inca, work( j1 ), ka1,rwork( &
                           j1 ), ka1 )
                 if( nr>0_${ik}$ ) then
                    ! apply rotations in 1st set from the left
                    do l = 1, ka - 1
                       call stdlib${ii}$_clartv( nr, ab( ka1-l, j1+l ), inca,ab( ka-l, j1+l ), inca, &
                                 rwork( j1 ),work( j1 ), ka1 )
                    end do
                    ! apply rotations in 1st set from both sides to diagonal
                    ! blocks
                    call stdlib${ii}$_clar2v( nr, ab( ka1, j1 ), ab( ka1, j1-1 ),ab( ka, j1 ), inca, &
                              rwork( j1 ), work( j1 ),ka1 )
                    call stdlib${ii}$_clacgv( nr, work( j1 ), ka1 )
                 end if
                 ! start applying rotations in 1st set from the right
                 do l = ka - 1, kb - k + 1, -1
                    nrt = ( j2+l-1 ) / ka1
                    j1t = j2 - ( nrt-1 )*ka1
                    if( nrt>0_${ik}$ )call stdlib${ii}$_clartv( nrt, ab( l, j1t ), inca,ab( l+1, j1t-1 ), inca,&
                               rwork( j1t ),work( j1t ), ka1 )
                 end do
                 if( wantx ) then
                    ! post-multiply x by product of rotations in 1st set
                    do j = j1, j2, ka1
                       call stdlib${ii}$_crot( nx, x( 1_${ik}$, j ), 1_${ik}$, x( 1_${ik}$, j-1 ), 1_${ik}$,rwork( j ), work( j ) )
                                 
                    end do
                 end if
              end do loop_610
              if( update ) then
                 if( i2>0_${ik}$ .and. kbt>0_${ik}$ ) then
                    ! create nonzero element a(i+kbt-ka-1,i+kbt) outside the
                    ! band and store it in work(m-kb+i+kbt)
                    work( m-kb+i+kbt ) = -bb( kb1-kbt, i+kbt )*ra1
                 end if
              end if
              loop_650: do k = kb, 1, -1
                 if( update ) then
                    j2 = i + k + 1_${ik}$ - max( 2_${ik}$, k+i0-m )*ka1
                 else
                    j2 = i + k + 1_${ik}$ - max( 1_${ik}$, k+i0-m )*ka1
                 end if
                 ! finish applying rotations in 2nd set from the right
                 do l = kb - k, 1, -1
                    nrt = ( j2+ka+l-1 ) / ka1
                    j1t = j2 - ( nrt-1 )*ka1
                    if( nrt>0_${ik}$ )call stdlib${ii}$_clartv( nrt, ab( l, j1t+ka ), inca,ab( l+1, j1t+ka-1 ),&
                               inca,rwork( m-kb+j1t+ka ),work( m-kb+j1t+ka ), ka1 )
                 end do
                 nr = ( j2+ka-1 ) / ka1
                 j1 = j2 - ( nr-1 )*ka1
                 do j = j1, j2, ka1
                    work( m-kb+j ) = work( m-kb+j+ka )
                    rwork( m-kb+j ) = rwork( m-kb+j+ka )
                 end do
                 do j = j1, j2, ka1
                    ! create nonzero element a(j-1,j+ka) outside the band
                    ! and store it in work(m-kb+j)
                    work( m-kb+j ) = work( m-kb+j )*ab( 1_${ik}$, j+ka-1 )
                    ab( 1_${ik}$, j+ka-1 ) = rwork( m-kb+j )*ab( 1_${ik}$, j+ka-1 )
                 end do
                 if( update ) then
                    if( i+k>ka1 .and. k<=kbt )work( m-kb+i+k-ka ) = work( m-kb+i+k )
                 end if
              end do loop_650
              loop_690: do k = kb, 1, -1
                 j2 = i + k + 1_${ik}$ - max( 1_${ik}$, k+i0-m )*ka1
                 nr = ( j2+ka-1 ) / ka1
                 j1 = j2 - ( nr-1 )*ka1
                 if( nr>0_${ik}$ ) then
                    ! generate rotations in 2nd set to annihilate elements
                    ! which have been created outside the band
                    call stdlib${ii}$_clargv( nr, ab( 1_${ik}$, j1+ka ), inca, work( m-kb+j1 ),ka1, rwork( m-&
                              kb+j1 ), ka1 )
                    ! apply rotations in 2nd set from the left
                    do l = 1, ka - 1
                       call stdlib${ii}$_clartv( nr, ab( ka1-l, j1+l ), inca,ab( ka-l, j1+l ), inca, &
                                 rwork( m-kb+j1 ),work( m-kb+j1 ), ka1 )
                    end do
                    ! apply rotations in 2nd set from both sides to diagonal
                    ! blocks
                    call stdlib${ii}$_clar2v( nr, ab( ka1, j1 ), ab( ka1, j1-1 ),ab( ka, j1 ), inca, &
                              rwork( m-kb+j1 ),work( m-kb+j1 ), ka1 )
                    call stdlib${ii}$_clacgv( nr, work( m-kb+j1 ), ka1 )
                 end if
                 ! start applying rotations in 2nd set from the right
                 do l = ka - 1, kb - k + 1, -1
                    nrt = ( j2+l-1 ) / ka1
                    j1t = j2 - ( nrt-1 )*ka1
                    if( nrt>0_${ik}$ )call stdlib${ii}$_clartv( nrt, ab( l, j1t ), inca,ab( l+1, j1t-1 ), inca,&
                              rwork( m-kb+j1t ), work( m-kb+j1t ),ka1 )
                 end do
                 if( wantx ) then
                    ! post-multiply x by product of rotations in 2nd set
                    do j = j1, j2, ka1
                       call stdlib${ii}$_crot( nx, x( 1_${ik}$, j ), 1_${ik}$, x( 1_${ik}$, j-1 ), 1_${ik}$,rwork( m-kb+j ), work( &
                                 m-kb+j ) )
                    end do
                 end if
              end do loop_690
              do k = 1, kb - 1
                 j2 = i + k + 1_${ik}$ - max( 1_${ik}$, k+i0-m+1 )*ka1
                 ! finish applying rotations in 1st set from the right
                 do l = kb - k, 1, -1
                    nrt = ( j2+l-1 ) / ka1
                    j1t = j2 - ( nrt-1 )*ka1
                    if( nrt>0_${ik}$ )call stdlib${ii}$_clartv( nrt, ab( l, j1t ), inca,ab( l+1, j1t-1 ), inca,&
                               rwork( j1t ),work( j1t ), ka1 )
                 end do
              end do
              if( kb>1_${ik}$ ) then
                 do j = 2, i2 - ka
                    rwork( j ) = rwork( j+ka )
                    work( j ) = work( j+ka )
                 end do
              end if
           else
              ! transform a, working with the lower triangle
              if( update ) then
                 ! form  inv(s(i))**h * a * inv(s(i))
                 bii = real( bb( 1_${ik}$, i ),KIND=sp)
                 ab( 1_${ik}$, i ) = ( real( ab( 1_${ik}$, i ),KIND=sp) / bii ) / bii
                 do j = i1, i - 1
                    ab( i-j+1, j ) = ab( i-j+1, j ) / bii
                 end do
                 do j = i + 1, min( n, i+ka )
                    ab( j-i+1, i ) = ab( j-i+1, i ) / bii
                 end do
                 do k = i + 1, i + kbt
                    do j = k, i + kbt
                       ab( j-k+1, k ) = ab( j-k+1, k ) -bb( j-i+1, i )*conjg( ab( k-i+1,i ) ) - &
                       conjg( bb( k-i+1, i ) )*ab( j-i+1, i ) + real( ab( 1_${ik}$, i ),KIND=sp)*bb( j-i+&
                                 1_${ik}$, i )*conjg( bb( k-i+1,i ) )
                    end do
                    do j = i + kbt + 1, min( n, i+ka )
                       ab( j-k+1, k ) = ab( j-k+1, k ) -conjg( bb( k-i+1, i ) )*ab( j-i+1, i )
                                 
                    end do
                 end do
                 do j = i1, i
                    do k = i + 1, min( j+ka, i+kbt )
                       ab( k-j+1, j ) = ab( k-j+1, j ) -bb( k-i+1, i )*ab( i-j+1, j )
                    end do
                 end do
                 if( wantx ) then
                    ! post-multiply x by inv(s(i))
                    call stdlib${ii}$_csscal( nx, one / bii, x( 1_${ik}$, i ), 1_${ik}$ )
                    if( kbt>0_${ik}$ )call stdlib${ii}$_cgerc( nx, kbt, -cone, x( 1_${ik}$, i ), 1_${ik}$, bb( 2_${ik}$, i ),1_${ik}$, x( &
                              1_${ik}$, i+1 ), ldx )
                 end if
                 ! store a(i,i1) in ra1 for use in next loop over k
                 ra1 = ab( i-i1+1, i1 )
              end if
              ! generate and apply vectors of rotations to chase all the
              ! existing bulges ka positions up toward the top of the band
              loop_840: do k = 1, kb - 1
                 if( update ) then
                    ! determine the rotations which would annihilate the bulge
                    ! which has in theory just been created
                    if( i+k-ka1>0_${ik}$ .and. i+k<m ) then
                       ! generate rotation to annihilate a(i,i+k-ka-1)
                       call stdlib${ii}$_clartg( ab( ka1-k, i+k-ka ), ra1,rwork( i+k-ka ), work( i+k-ka &
                                 ), ra )
                       ! create nonzero element a(i+k,i+k-ka-1) outside the
                       ! band and store it in work(m-kb+i+k)
                       t = -bb( k+1, i )*ra1
                       work( m-kb+i+k ) = rwork( i+k-ka )*t -conjg( work( i+k-ka ) )*ab( ka1, i+k-&
                                 ka )
                       ab( ka1, i+k-ka ) = work( i+k-ka )*t +rwork( i+k-ka )*ab( ka1, i+k-ka )
                                 
                       ra1 = ra
                    end if
                 end if
                 j2 = i + k + 1_${ik}$ - max( 1_${ik}$, k+i0-m+1 )*ka1
                 nr = ( j2+ka-1 ) / ka1
                 j1 = j2 - ( nr-1 )*ka1
                 if( update ) then
                    j2t = min( j2, i-2*ka+k-1 )
                 else
                    j2t = j2
                 end if
                 nrt = ( j2t+ka-1 ) / ka1
                 do j = j1, j2t, ka1
                    ! create nonzero element a(j+ka,j-1) outside the band
                    ! and store it in work(j)
                    work( j ) = work( j )*ab( ka1, j-1 )
                    ab( ka1, j-1 ) = rwork( j )*ab( ka1, j-1 )
                 end do
                 ! generate rotations in 1st set to annihilate elements which
                 ! have been created outside the band
                 if( nrt>0_${ik}$ )call stdlib${ii}$_clargv( nrt, ab( ka1, j1 ), inca, work( j1 ), ka1,rwork( &
                           j1 ), ka1 )
                 if( nr>0_${ik}$ ) then
                    ! apply rotations in 1st set from the right
                    do l = 1, ka - 1
                       call stdlib${ii}$_clartv( nr, ab( l+1, j1 ), inca, ab( l+2, j1-1 ),inca, rwork( &
                                 j1 ), work( j1 ), ka1 )
                    end do
                    ! apply rotations in 1st set from both sides to diagonal
                    ! blocks
                    call stdlib${ii}$_clar2v( nr, ab( 1_${ik}$, j1 ), ab( 1_${ik}$, j1-1 ),ab( 2_${ik}$, j1-1 ), inca, rwork(&
                               j1 ),work( j1 ), ka1 )
                    call stdlib${ii}$_clacgv( nr, work( j1 ), ka1 )
                 end if
                 ! start applying rotations in 1st set from the left
                 do l = ka - 1, kb - k + 1, -1
                    nrt = ( j2+l-1 ) / ka1
                    j1t = j2 - ( nrt-1 )*ka1
                    if( nrt>0_${ik}$ )call stdlib${ii}$_clartv( nrt, ab( ka1-l+1, j1t-ka1+l ), inca,ab( ka1-l, &
                              j1t-ka1+l ), inca,rwork( j1t ), work( j1t ), ka1 )
                 end do
                 if( wantx ) then
                    ! post-multiply x by product of rotations in 1st set
                    do j = j1, j2, ka1
                       call stdlib${ii}$_crot( nx, x( 1_${ik}$, j ), 1_${ik}$, x( 1_${ik}$, j-1 ), 1_${ik}$,rwork( j ), conjg( work(&
                                  j ) ) )
                    end do
                 end if
              end do loop_840
              if( update ) then
                 if( i2>0_${ik}$ .and. kbt>0_${ik}$ ) then
                    ! create nonzero element a(i+kbt,i+kbt-ka-1) outside the
                    ! band and store it in work(m-kb+i+kbt)
                    work( m-kb+i+kbt ) = -bb( kbt+1, i )*ra1
                 end if
              end if
              loop_880: do k = kb, 1, -1
                 if( update ) then
                    j2 = i + k + 1_${ik}$ - max( 2_${ik}$, k+i0-m )*ka1
                 else
                    j2 = i + k + 1_${ik}$ - max( 1_${ik}$, k+i0-m )*ka1
                 end if
                 ! finish applying rotations in 2nd set from the left
                 do l = kb - k, 1, -1
                    nrt = ( j2+ka+l-1 ) / ka1
                    j1t = j2 - ( nrt-1 )*ka1
                    if( nrt>0_${ik}$ )call stdlib${ii}$_clartv( nrt, ab( ka1-l+1, j1t+l-1 ), inca,ab( ka1-l, &
                              j1t+l-1 ), inca,rwork( m-kb+j1t+ka ),work( m-kb+j1t+ka ), ka1 )
                 end do
                 nr = ( j2+ka-1 ) / ka1
                 j1 = j2 - ( nr-1 )*ka1
                 do j = j1, j2, ka1
                    work( m-kb+j ) = work( m-kb+j+ka )
                    rwork( m-kb+j ) = rwork( m-kb+j+ka )
                 end do
                 do j = j1, j2, ka1
                    ! create nonzero element a(j+ka,j-1) outside the band
                    ! and store it in work(m-kb+j)
                    work( m-kb+j ) = work( m-kb+j )*ab( ka1, j-1 )
                    ab( ka1, j-1 ) = rwork( m-kb+j )*ab( ka1, j-1 )
                 end do
                 if( update ) then
                    if( i+k>ka1 .and. k<=kbt )work( m-kb+i+k-ka ) = work( m-kb+i+k )
                 end if
              end do loop_880
              loop_920: do k = kb, 1, -1
                 j2 = i + k + 1_${ik}$ - max( 1_${ik}$, k+i0-m )*ka1
                 nr = ( j2+ka-1 ) / ka1
                 j1 = j2 - ( nr-1 )*ka1
                 if( nr>0_${ik}$ ) then
                    ! generate rotations in 2nd set to annihilate elements
                    ! which have been created outside the band
                    call stdlib${ii}$_clargv( nr, ab( ka1, j1 ), inca, work( m-kb+j1 ),ka1, rwork( m-kb+&
                              j1 ), ka1 )
                    ! apply rotations in 2nd set from the right
                    do l = 1, ka - 1
                       call stdlib${ii}$_clartv( nr, ab( l+1, j1 ), inca, ab( l+2, j1-1 ),inca, rwork( &
                                 m-kb+j1 ), work( m-kb+j1 ),ka1 )
                    end do
                    ! apply rotations in 2nd set from both sides to diagonal
                    ! blocks
                    call stdlib${ii}$_clar2v( nr, ab( 1_${ik}$, j1 ), ab( 1_${ik}$, j1-1 ),ab( 2_${ik}$, j1-1 ), inca, rwork(&
                               m-kb+j1 ),work( m-kb+j1 ), ka1 )
                    call stdlib${ii}$_clacgv( nr, work( m-kb+j1 ), ka1 )
                 end if
                 ! start applying rotations in 2nd set from the left
                 do l = ka - 1, kb - k + 1, -1
                    nrt = ( j2+l-1 ) / ka1
                    j1t = j2 - ( nrt-1 )*ka1
                    if( nrt>0_${ik}$ )call stdlib${ii}$_clartv( nrt, ab( ka1-l+1, j1t-ka1+l ), inca,ab( ka1-l, &
                              j1t-ka1+l ), inca,rwork( m-kb+j1t ), work( m-kb+j1t ),ka1 )
                 end do
                 if( wantx ) then
                    ! post-multiply x by product of rotations in 2nd set
                    do j = j1, j2, ka1
                       call stdlib${ii}$_crot( nx, x( 1_${ik}$, j ), 1_${ik}$, x( 1_${ik}$, j-1 ), 1_${ik}$,rwork( m-kb+j ), conjg( &
                                 work( m-kb+j ) ) )
                    end do
                 end if
              end do loop_920
              do k = 1, kb - 1
                 j2 = i + k + 1_${ik}$ - max( 1_${ik}$, k+i0-m+1 )*ka1
                 ! finish applying rotations in 1st set from the left
                 do l = kb - k, 1, -1
                    nrt = ( j2+l-1 ) / ka1
                    j1t = j2 - ( nrt-1 )*ka1
                    if( nrt>0_${ik}$ )call stdlib${ii}$_clartv( nrt, ab( ka1-l+1, j1t-ka1+l ), inca,ab( ka1-l, &
                              j1t-ka1+l ), inca,rwork( j1t ), work( j1t ), ka1 )
                 end do
              end do
              if( kb>1_${ik}$ ) then
                 do j = 2, i2 - ka
                    rwork( j ) = rwork( j+ka )
                    work( j ) = work( j+ka )
                 end do
              end if
           end if
           go to 490
     end subroutine stdlib${ii}$_chbgst

     pure module subroutine stdlib${ii}$_zhbgst( vect, uplo, n, ka, kb, ab, ldab, bb, ldbb, x,ldx, work, rwork,&
     !! ZHBGST reduces a complex Hermitian-definite banded generalized
     !! eigenproblem  A*x = lambda*B*x  to standard form  C*y = lambda*y,
     !! such that C has the same bandwidth as A.
     !! B must have been previously factorized as S**H*S by ZPBSTF, using a
     !! split Cholesky factorization. A is overwritten by C = X**H*A*X, where
     !! X = S**(-1)*Q and Q is a unitary matrix chosen to preserve the
     !! bandwidth of A.
                info )
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: uplo, vect
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: ka, kb, ldab, ldbb, ldx, n
           ! Array Arguments 
           real(dp), intent(out) :: rwork(*)
           complex(dp), intent(inout) :: ab(ldab,*)
           complex(dp), intent(in) :: bb(ldbb,*)
           complex(dp), intent(out) :: work(*), x(ldx,*)
        ! =====================================================================
           
           
           ! Local Scalars 
           logical(lk) :: update, upper, wantx
           integer(${ik}$) :: i, i0, i1, i2, inca, j, j1, j1t, j2, j2t, k, ka1, kb1, kbt, l, m, nr, &
                     nrt, nx
           real(dp) :: bii
           complex(dp) :: ra, ra1, t
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input parameters
           wantx = stdlib_lsame( vect, 'V' )
           upper = stdlib_lsame( uplo, 'U' )
           ka1 = ka + 1_${ik}$
           kb1 = kb + 1_${ik}$
           info = 0_${ik}$
           if( .not.wantx .and. .not.stdlib_lsame( vect, 'N' ) ) then
              info = -1_${ik}$
           else if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then
              info = -2_${ik}$
           else if( n<0_${ik}$ ) then
              info = -3_${ik}$
           else if( ka<0_${ik}$ ) then
              info = -4_${ik}$
           else if( kb<0_${ik}$ .or. kb>ka ) then
              info = -5_${ik}$
           else if( ldab<ka+1 ) then
              info = -7_${ik}$
           else if( ldbb<kb+1 ) then
              info = -9_${ik}$
           else if( ldx<1_${ik}$ .or. wantx .and. ldx<max( 1_${ik}$, n ) ) then
              info = -11_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'ZHBGST', -info )
              return
           end if
           ! quick return if possible
           if( n==0 )return
           inca = ldab*ka1
           ! initialize x to the unit matrix, if needed
           if( wantx )call stdlib${ii}$_zlaset( 'FULL', n, n, czero, cone, x, ldx )
           ! set m to the splitting point m. it must be the same value as is
           ! used in stdlib${ii}$_zpbstf. the chosen value allows the arrays work and rwork
           ! to be of dimension (n).
           m = ( n+kb ) / 2_${ik}$
           ! the routine works in two phases, corresponding to the two halves
           ! of the split cholesky factorization of b as s**h*s where
           ! s = ( u    )
               ! ( m  l )
           ! with u upper triangular of order m, and l lower triangular of
           ! order n-m. s has the same bandwidth as b.
           ! s is treated as a product of elementary matrices:
           ! s = s(m)*s(m-1)*...*s(2)*s(1)*s(m+1)*s(m+2)*...*s(n-1)*s(n)
           ! where s(i) is determined by the i-th row of s.
           ! in phase 1, the index i takes the values n, n-1, ... , m+1;
           ! in phase 2, it takes the values 1, 2, ... , m.
           ! for each value of i, the current matrix a is updated by forming
           ! inv(s(i))**h*a*inv(s(i)). this creates a triangular bulge outside
           ! the band of a. the bulge is then pushed down toward the bottom of
           ! a in phase 1, and up toward the top of a in phase 2, by applying
           ! plane rotations.
           ! there are kb*(kb+1)/2 elements in the bulge, but at most 2*kb-1
           ! of them are linearly independent, so annihilating a bulge requires
           ! only 2*kb-1 plane rotations. the rotations are divided into a 1st
           ! set of kb-1 rotations, and a 2nd set of kb rotations.
           ! wherever possible, rotations are generated and applied in vector
           ! operations of length nr between the indices j1 and j2 (sometimes
           ! replaced by modified values nrt, j1t or j2t).
           ! the real cosines and complex sines of the rotations are stored in
           ! the arrays rwork and work, those of the 1st set in elements
           ! 2:m-kb-1, and those of the 2nd set in elements m-kb+1:n.
           ! the bulges are not formed explicitly; nonzero elements outside the
           ! band are created only when they are required for generating new
           ! rotations; they are stored in the array work, in positions where
           ! they are later overwritten by the sines of the rotations which
           ! annihilate them.
           ! **************************** phase 1 *****************************
           ! the logical structure of this phase is:
           ! update = .true.
           ! do i = n, m + 1, -1
              ! use s(i) to update a and create a new bulge
              ! apply rotations to push all bulges ka positions downward
           ! end do
           ! update = .false.
           ! do i = m + ka + 1, n - 1
              ! apply rotations to push all bulges ka positions downward
           ! end do
           ! to avoid duplicating code, the two loops are merged.
           update = .true.
           i = n + 1_${ik}$
           10 continue
           if( update ) then
              i = i - 1_${ik}$
              kbt = min( kb, i-1 )
              i0 = i - 1_${ik}$
              i1 = min( n, i+ka )
              i2 = i - kbt + ka1
              if( i<m+1 ) then
                 update = .false.
                 i = i + 1_${ik}$
                 i0 = m
                 if( ka==0 )go to 480
                 go to 10
              end if
           else
              i = i + ka
              if( i>n-1 )go to 480
           end if
           if( upper ) then
              ! transform a, working with the upper triangle
              if( update ) then
                 ! form  inv(s(i))**h * a * inv(s(i))
                 bii = real( bb( kb1, i ),KIND=dp)
                 ab( ka1, i ) = ( real( ab( ka1, i ),KIND=dp) / bii ) / bii
                 do j = i + 1, i1
                    ab( i-j+ka1, j ) = ab( i-j+ka1, j ) / bii
                 end do
                 do j = max( 1, i-ka ), i - 1
                    ab( j-i+ka1, i ) = ab( j-i+ka1, i ) / bii
                 end do
                 do k = i - kbt, i - 1
                    do j = i - kbt, k
                       ab( j-k+ka1, k ) = ab( j-k+ka1, k ) -bb( j-i+kb1, i )*conjg( ab( k-i+ka1, &
                       i ) ) -conjg( bb( k-i+kb1, i ) )*ab( j-i+ka1, i ) +real( ab( ka1, i ),&
                                 KIND=dp)*bb( j-i+kb1, i )*conjg( bb( k-i+kb1, i ) )
                    end do
                    do j = max( 1, i-ka ), i - kbt - 1
                       ab( j-k+ka1, k ) = ab( j-k+ka1, k ) -conjg( bb( k-i+kb1, i ) )*ab( j-i+ka1,&
                                  i )
                    end do
                 end do
                 do j = i, i1
                    do k = max( j-ka, i-kbt ), i - 1
                       ab( k-j+ka1, j ) = ab( k-j+ka1, j ) -bb( k-i+kb1, i )*ab( i-j+ka1, j )
                                 
                    end do
                 end do
                 if( wantx ) then
                    ! post-multiply x by inv(s(i))
                    call stdlib${ii}$_zdscal( n-m, one / bii, x( m+1, i ), 1_${ik}$ )
                    if( kbt>0_${ik}$ )call stdlib${ii}$_zgerc( n-m, kbt, -cone, x( m+1, i ), 1_${ik}$,bb( kb1-kbt, i )&
                              , 1_${ik}$, x( m+1, i-kbt ),ldx )
                 end if
                 ! store a(i,i1) in ra1 for use in next loop over k
                 ra1 = ab( i-i1+ka1, i1 )
              end if
              ! generate and apply vectors of rotations to chase all the
              ! existing bulges ka positions down toward the bottom of the
              ! band
              loop_130: do k = 1, kb - 1
                 if( update ) then
                    ! determine the rotations which would annihilate the bulge
                    ! which has in theory just been created
                    if( i-k+ka<n .and. i-k>1_${ik}$ ) then
                       ! generate rotation to annihilate a(i,i-k+ka+1)
                       call stdlib${ii}$_zlartg( ab( k+1, i-k+ka ), ra1,rwork( i-k+ka-m ), work( i-k+ka-&
                                 m ), ra )
                       ! create nonzero element a(i-k,i-k+ka+1) outside the
                       ! band and store it in work(i-k)
                       t = -bb( kb1-k, i )*ra1
                       work( i-k ) = rwork( i-k+ka-m )*t -conjg( work( i-k+ka-m ) )*ab( 1_${ik}$, i-k+ka &
                                 )
                       ab( 1_${ik}$, i-k+ka ) = work( i-k+ka-m )*t +rwork( i-k+ka-m )*ab( 1_${ik}$, i-k+ka )
                                 
                       ra1 = ra
                    end if
                 end if
                 j2 = i - k - 1_${ik}$ + max( 1_${ik}$, k-i0+2 )*ka1
                 nr = ( n-j2+ka ) / ka1
                 j1 = j2 + ( nr-1 )*ka1
                 if( update ) then
                    j2t = max( j2, i+2*ka-k+1 )
                 else
                    j2t = j2
                 end if
                 nrt = ( n-j2t+ka ) / ka1
                 do j = j2t, j1, ka1
                    ! create nonzero element a(j-ka,j+1) outside the band
                    ! and store it in work(j-m)
                    work( j-m ) = work( j-m )*ab( 1_${ik}$, j+1 )
                    ab( 1_${ik}$, j+1 ) = rwork( j-m )*ab( 1_${ik}$, j+1 )
                 end do
                 ! generate rotations in 1st set to annihilate elements which
                 ! have been created outside the band
                 if( nrt>0_${ik}$ )call stdlib${ii}$_zlargv( nrt, ab( 1_${ik}$, j2t ), inca, work( j2t-m ), ka1,rwork(&
                            j2t-m ), ka1 )
                 if( nr>0_${ik}$ ) then
                    ! apply rotations in 1st set from the right
                    do l = 1, ka - 1
                       call stdlib${ii}$_zlartv( nr, ab( ka1-l, j2 ), inca,ab( ka-l, j2+1 ), inca, &
                                 rwork( j2-m ),work( j2-m ), ka1 )
                    end do
                    ! apply rotations in 1st set from both sides to diagonal
                    ! blocks
                    call stdlib${ii}$_zlar2v( nr, ab( ka1, j2 ), ab( ka1, j2+1 ),ab( ka, j2+1 ), inca, &
                              rwork( j2-m ),work( j2-m ), ka1 )
                    call stdlib${ii}$_zlacgv( nr, work( j2-m ), ka1 )
                 end if
                 ! start applying rotations in 1st set from the left
                 do l = ka - 1, kb - k + 1, -1
                    nrt = ( n-j2+l ) / ka1
                    if( nrt>0_${ik}$ )call stdlib${ii}$_zlartv( nrt, ab( l, j2+ka1-l ), inca,ab( l+1, j2+ka1-l &
                              ), inca, rwork( j2-m ),work( j2-m ), ka1 )
                 end do
                 if( wantx ) then
                    ! post-multiply x by product of rotations in 1st set
                    do j = j2, j1, ka1
                       call stdlib${ii}$_zrot( n-m, x( m+1, j ), 1_${ik}$, x( m+1, j+1 ), 1_${ik}$,rwork( j-m ), &
                                 conjg( work( j-m ) ) )
                    end do
                 end if
              end do loop_130
              if( update ) then
                 if( i2<=n .and. kbt>0_${ik}$ ) then
                    ! create nonzero element a(i-kbt,i-kbt+ka+1) outside the
                    ! band and store it in work(i-kbt)
                    work( i-kbt ) = -bb( kb1-kbt, i )*ra1
                 end if
              end if
              loop_170: do k = kb, 1, -1
                 if( update ) then
                    j2 = i - k - 1_${ik}$ + max( 2_${ik}$, k-i0+1 )*ka1
                 else
                    j2 = i - k - 1_${ik}$ + max( 1_${ik}$, k-i0+1 )*ka1
                 end if
                 ! finish applying rotations in 2nd set from the left
                 do l = kb - k, 1, -1
                    nrt = ( n-j2+ka+l ) / ka1
                    if( nrt>0_${ik}$ )call stdlib${ii}$_zlartv( nrt, ab( l, j2-l+1 ), inca,ab( l+1, j2-l+1 ), &
                              inca, rwork( j2-ka ),work( j2-ka ), ka1 )
                 end do
                 nr = ( n-j2+ka ) / ka1
                 j1 = j2 + ( nr-1 )*ka1
                 do j = j1, j2, -ka1
                    work( j ) = work( j-ka )
                    rwork( j ) = rwork( j-ka )
                 end do
                 do j = j2, j1, ka1
                    ! create nonzero element a(j-ka,j+1) outside the band
                    ! and store it in work(j)
                    work( j ) = work( j )*ab( 1_${ik}$, j+1 )
                    ab( 1_${ik}$, j+1 ) = rwork( j )*ab( 1_${ik}$, j+1 )
                 end do
                 if( update ) then
                    if( i-k<n-ka .and. k<=kbt )work( i-k+ka ) = work( i-k )
                 end if
              end do loop_170
              loop_210: do k = kb, 1, -1
                 j2 = i - k - 1_${ik}$ + max( 1_${ik}$, k-i0+1 )*ka1
                 nr = ( n-j2+ka ) / ka1
                 j1 = j2 + ( nr-1 )*ka1
                 if( nr>0_${ik}$ ) then
                    ! generate rotations in 2nd set to annihilate elements
                    ! which have been created outside the band
                    call stdlib${ii}$_zlargv( nr, ab( 1_${ik}$, j2 ), inca, work( j2 ), ka1,rwork( j2 ), ka1 )
                              
                    ! apply rotations in 2nd set from the right
                    do l = 1, ka - 1
                       call stdlib${ii}$_zlartv( nr, ab( ka1-l, j2 ), inca,ab( ka-l, j2+1 ), inca, &
                                 rwork( j2 ),work( j2 ), ka1 )
                    end do
                    ! apply rotations in 2nd set from both sides to diagonal
                    ! blocks
                    call stdlib${ii}$_zlar2v( nr, ab( ka1, j2 ), ab( ka1, j2+1 ),ab( ka, j2+1 ), inca, &
                              rwork( j2 ),work( j2 ), ka1 )
                    call stdlib${ii}$_zlacgv( nr, work( j2 ), ka1 )
                 end if
                 ! start applying rotations in 2nd set from the left
                 do l = ka - 1, kb - k + 1, -1
                    nrt = ( n-j2+l ) / ka1
                    if( nrt>0_${ik}$ )call stdlib${ii}$_zlartv( nrt, ab( l, j2+ka1-l ), inca,ab( l+1, j2+ka1-l &
                              ), inca, rwork( j2 ),work( j2 ), ka1 )
                 end do
                 if( wantx ) then
                    ! post-multiply x by product of rotations in 2nd set
                    do j = j2, j1, ka1
                       call stdlib${ii}$_zrot( n-m, x( m+1, j ), 1_${ik}$, x( m+1, j+1 ), 1_${ik}$,rwork( j ), conjg( &
                                 work( j ) ) )
                    end do
                 end if
              end do loop_210
              do k = 1, kb - 1
                 j2 = i - k - 1_${ik}$ + max( 1_${ik}$, k-i0+2 )*ka1
                 ! finish applying rotations in 1st set from the left
                 do l = kb - k, 1, -1
                    nrt = ( n-j2+l ) / ka1
                    if( nrt>0_${ik}$ )call stdlib${ii}$_zlartv( nrt, ab( l, j2+ka1-l ), inca,ab( l+1, j2+ka1-l &
                              ), inca, rwork( j2-m ),work( j2-m ), ka1 )
                 end do
              end do
              if( kb>1_${ik}$ ) then
                 do j = n - 1, j2 + ka, -1
                    rwork( j-m ) = rwork( j-ka-m )
                    work( j-m ) = work( j-ka-m )
                 end do
              end if
           else
              ! transform a, working with the lower triangle
              if( update ) then
                 ! form  inv(s(i))**h * a * inv(s(i))
                 bii = real( bb( 1_${ik}$, i ),KIND=dp)
                 ab( 1_${ik}$, i ) = ( real( ab( 1_${ik}$, i ),KIND=dp) / bii ) / bii
                 do j = i + 1, i1
                    ab( j-i+1, i ) = ab( j-i+1, i ) / bii
                 end do
                 do j = max( 1, i-ka ), i - 1
                    ab( i-j+1, j ) = ab( i-j+1, j ) / bii
                 end do
                 do k = i - kbt, i - 1
                    do j = i - kbt, k
                       ab( k-j+1, j ) = ab( k-j+1, j ) -bb( i-j+1, j )*conjg( ab( i-k+1,k ) ) - &
                       conjg( bb( i-k+1, k ) )*ab( i-j+1, j ) + real( ab( 1_${ik}$, i ),KIND=dp)*bb( i-j+&
                                 1_${ik}$, j )*conjg( bb( i-k+1,k ) )
                    end do
                    do j = max( 1, i-ka ), i - kbt - 1
                       ab( k-j+1, j ) = ab( k-j+1, j ) -conjg( bb( i-k+1, k ) )*ab( i-j+1, j )
                                 
                    end do
                 end do
                 do j = i, i1
                    do k = max( j-ka, i-kbt ), i - 1
                       ab( j-k+1, k ) = ab( j-k+1, k ) -bb( i-k+1, k )*ab( j-i+1, i )
                    end do
                 end do
                 if( wantx ) then
                    ! post-multiply x by inv(s(i))
                    call stdlib${ii}$_zdscal( n-m, one / bii, x( m+1, i ), 1_${ik}$ )
                    if( kbt>0_${ik}$ )call stdlib${ii}$_zgeru( n-m, kbt, -cone, x( m+1, i ), 1_${ik}$,bb( kbt+1, i-&
                              kbt ), ldbb-1,x( m+1, i-kbt ), ldx )
                 end if
                 ! store a(i1,i) in ra1 for use in next loop over k
                 ra1 = ab( i1-i+1, i )
              end if
              ! generate and apply vectors of rotations to chase all the
              ! existing bulges ka positions down toward the bottom of the
              ! band
              loop_360: do k = 1, kb - 1
                 if( update ) then
                    ! determine the rotations which would annihilate the bulge
                    ! which has in theory just been created
                    if( i-k+ka<n .and. i-k>1_${ik}$ ) then
                       ! generate rotation to annihilate a(i-k+ka+1,i)
                       call stdlib${ii}$_zlartg( ab( ka1-k, i ), ra1, rwork( i-k+ka-m ),work( i-k+ka-m )&
                                 , ra )
                       ! create nonzero element a(i-k+ka+1,i-k) outside the
                       ! band and store it in work(i-k)
                       t = -bb( k+1, i-k )*ra1
                       work( i-k ) = rwork( i-k+ka-m )*t -conjg( work( i-k+ka-m ) )*ab( ka1, i-k )
                                 
                       ab( ka1, i-k ) = work( i-k+ka-m )*t +rwork( i-k+ka-m )*ab( ka1, i-k )
                                 
                       ra1 = ra
                    end if
                 end if
                 j2 = i - k - 1_${ik}$ + max( 1_${ik}$, k-i0+2 )*ka1
                 nr = ( n-j2+ka ) / ka1
                 j1 = j2 + ( nr-1 )*ka1
                 if( update ) then
                    j2t = max( j2, i+2*ka-k+1 )
                 else
                    j2t = j2
                 end if
                 nrt = ( n-j2t+ka ) / ka1
                 do j = j2t, j1, ka1
                    ! create nonzero element a(j+1,j-ka) outside the band
                    ! and store it in work(j-m)
                    work( j-m ) = work( j-m )*ab( ka1, j-ka+1 )
                    ab( ka1, j-ka+1 ) = rwork( j-m )*ab( ka1, j-ka+1 )
                 end do
                 ! generate rotations in 1st set to annihilate elements which
                 ! have been created outside the band
                 if( nrt>0_${ik}$ )call stdlib${ii}$_zlargv( nrt, ab( ka1, j2t-ka ), inca, work( j2t-m ),ka1, &
                           rwork( j2t-m ), ka1 )
                 if( nr>0_${ik}$ ) then
                    ! apply rotations in 1st set from the left
                    do l = 1, ka - 1
                       call stdlib${ii}$_zlartv( nr, ab( l+1, j2-l ), inca,ab( l+2, j2-l ), inca, rwork(&
                                  j2-m ),work( j2-m ), ka1 )
                    end do
                    ! apply rotations in 1st set from both sides to diagonal
                    ! blocks
                    call stdlib${ii}$_zlar2v( nr, ab( 1_${ik}$, j2 ), ab( 1_${ik}$, j2+1 ), ab( 2_${ik}$, j2 ),inca, rwork( &
                              j2-m ), work( j2-m ), ka1 )
                    call stdlib${ii}$_zlacgv( nr, work( j2-m ), ka1 )
                 end if
                 ! start applying rotations in 1st set from the right
                 do l = ka - 1, kb - k + 1, -1
                    nrt = ( n-j2+l ) / ka1
                    if( nrt>0_${ik}$ )call stdlib${ii}$_zlartv( nrt, ab( ka1-l+1, j2 ), inca,ab( ka1-l, j2+1 ),&
                               inca, rwork( j2-m ),work( j2-m ), ka1 )
                 end do
                 if( wantx ) then
                    ! post-multiply x by product of rotations in 1st set
                    do j = j2, j1, ka1
                       call stdlib${ii}$_zrot( n-m, x( m+1, j ), 1_${ik}$, x( m+1, j+1 ), 1_${ik}$,rwork( j-m ), work(&
                                  j-m ) )
                    end do
                 end if
              end do loop_360
              if( update ) then
                 if( i2<=n .and. kbt>0_${ik}$ ) then
                    ! create nonzero element a(i-kbt+ka+1,i-kbt) outside the
                    ! band and store it in work(i-kbt)
                    work( i-kbt ) = -bb( kbt+1, i-kbt )*ra1
                 end if
              end if
              loop_400: do k = kb, 1, -1
                 if( update ) then
                    j2 = i - k - 1_${ik}$ + max( 2_${ik}$, k-i0+1 )*ka1
                 else
                    j2 = i - k - 1_${ik}$ + max( 1_${ik}$, k-i0+1 )*ka1
                 end if
                 ! finish applying rotations in 2nd set from the right
                 do l = kb - k, 1, -1
                    nrt = ( n-j2+ka+l ) / ka1
                    if( nrt>0_${ik}$ )call stdlib${ii}$_zlartv( nrt, ab( ka1-l+1, j2-ka ), inca,ab( ka1-l, j2-&
                              ka+1 ), inca,rwork( j2-ka ), work( j2-ka ), ka1 )
                 end do
                 nr = ( n-j2+ka ) / ka1
                 j1 = j2 + ( nr-1 )*ka1
                 do j = j1, j2, -ka1
                    work( j ) = work( j-ka )
                    rwork( j ) = rwork( j-ka )
                 end do
                 do j = j2, j1, ka1
                    ! create nonzero element a(j+1,j-ka) outside the band
                    ! and store it in work(j)
                    work( j ) = work( j )*ab( ka1, j-ka+1 )
                    ab( ka1, j-ka+1 ) = rwork( j )*ab( ka1, j-ka+1 )
                 end do
                 if( update ) then
                    if( i-k<n-ka .and. k<=kbt )work( i-k+ka ) = work( i-k )
                 end if
              end do loop_400
              loop_440: do k = kb, 1, -1
                 j2 = i - k - 1_${ik}$ + max( 1_${ik}$, k-i0+1 )*ka1
                 nr = ( n-j2+ka ) / ka1
                 j1 = j2 + ( nr-1 )*ka1
                 if( nr>0_${ik}$ ) then
                    ! generate rotations in 2nd set to annihilate elements
                    ! which have been created outside the band
                    call stdlib${ii}$_zlargv( nr, ab( ka1, j2-ka ), inca, work( j2 ), ka1,rwork( j2 ), &
                              ka1 )
                    ! apply rotations in 2nd set from the left
                    do l = 1, ka - 1
                       call stdlib${ii}$_zlartv( nr, ab( l+1, j2-l ), inca,ab( l+2, j2-l ), inca, rwork(&
                                  j2 ),work( j2 ), ka1 )
                    end do
                    ! apply rotations in 2nd set from both sides to diagonal
                    ! blocks
                    call stdlib${ii}$_zlar2v( nr, ab( 1_${ik}$, j2 ), ab( 1_${ik}$, j2+1 ), ab( 2_${ik}$, j2 ),inca, rwork( &
                              j2 ), work( j2 ), ka1 )
                    call stdlib${ii}$_zlacgv( nr, work( j2 ), ka1 )
                 end if
                 ! start applying rotations in 2nd set from the right
                 do l = ka - 1, kb - k + 1, -1
                    nrt = ( n-j2+l ) / ka1
                    if( nrt>0_${ik}$ )call stdlib${ii}$_zlartv( nrt, ab( ka1-l+1, j2 ), inca,ab( ka1-l, j2+1 ),&
                               inca, rwork( j2 ),work( j2 ), ka1 )
                 end do
                 if( wantx ) then
                    ! post-multiply x by product of rotations in 2nd set
                    do j = j2, j1, ka1
                       call stdlib${ii}$_zrot( n-m, x( m+1, j ), 1_${ik}$, x( m+1, j+1 ), 1_${ik}$,rwork( j ), work( &
                                 j ) )
                    end do
                 end if
              end do loop_440
              do k = 1, kb - 1
                 j2 = i - k - 1_${ik}$ + max( 1_${ik}$, k-i0+2 )*ka1
                 ! finish applying rotations in 1st set from the right
                 do l = kb - k, 1, -1
                    nrt = ( n-j2+l ) / ka1
                    if( nrt>0_${ik}$ )call stdlib${ii}$_zlartv( nrt, ab( ka1-l+1, j2 ), inca,ab( ka1-l, j2+1 ),&
                               inca, rwork( j2-m ),work( j2-m ), ka1 )
                 end do
              end do
              if( kb>1_${ik}$ ) then
                 do j = n - 1, j2 + ka, -1
                    rwork( j-m ) = rwork( j-ka-m )
                    work( j-m ) = work( j-ka-m )
                 end do
              end if
           end if
           go to 10
           480 continue
           ! **************************** phase 2 *****************************
           ! the logical structure of this phase is:
           ! update = .true.
           ! do i = 1, m
              ! use s(i) to update a and create a new bulge
              ! apply rotations to push all bulges ka positions upward
           ! end do
           ! update = .false.
           ! do i = m - ka - 1, 2, -1
              ! apply rotations to push all bulges ka positions upward
           ! end do
           ! to avoid duplicating code, the two loops are merged.
           update = .true.
           i = 0_${ik}$
           490 continue
           if( update ) then
              i = i + 1_${ik}$
              kbt = min( kb, m-i )
              i0 = i + 1_${ik}$
              i1 = max( 1_${ik}$, i-ka )
              i2 = i + kbt - ka1
              if( i>m ) then
                 update = .false.
                 i = i - 1_${ik}$
                 i0 = m + 1_${ik}$
                 if( ka==0 )return
                 go to 490
              end if
           else
              i = i - ka
              if( i<2 )return
           end if
           if( i<m-kbt ) then
              nx = m
           else
              nx = n
           end if
           if( upper ) then
              ! transform a, working with the upper triangle
              if( update ) then
                 ! form  inv(s(i))**h * a * inv(s(i))
                 bii = real( bb( kb1, i ),KIND=dp)
                 ab( ka1, i ) = ( real( ab( ka1, i ),KIND=dp) / bii ) / bii
                 do j = i1, i - 1
                    ab( j-i+ka1, i ) = ab( j-i+ka1, i ) / bii
                 end do
                 do j = i + 1, min( n, i+ka )
                    ab( i-j+ka1, j ) = ab( i-j+ka1, j ) / bii
                 end do
                 do k = i + 1, i + kbt
                    do j = k, i + kbt
                       ab( k-j+ka1, j ) = ab( k-j+ka1, j ) -bb( i-j+kb1, j )*conjg( ab( i-k+ka1, &
                       k ) ) -conjg( bb( i-k+kb1, k ) )*ab( i-j+ka1, j ) +real( ab( ka1, i ),&
                                 KIND=dp)*bb( i-j+kb1, j )*conjg( bb( i-k+kb1, k ) )
                    end do
                    do j = i + kbt + 1, min( n, i+ka )
                       ab( k-j+ka1, j ) = ab( k-j+ka1, j ) -conjg( bb( i-k+kb1, k ) )*ab( i-j+ka1,&
                                  j )
                    end do
                 end do
                 do j = i1, i
                    do k = i + 1, min( j+ka, i+kbt )
                       ab( j-k+ka1, k ) = ab( j-k+ka1, k ) -bb( i-k+kb1, k )*ab( j-i+ka1, i )
                                 
                    end do
                 end do
                 if( wantx ) then
                    ! post-multiply x by inv(s(i))
                    call stdlib${ii}$_zdscal( nx, one / bii, x( 1_${ik}$, i ), 1_${ik}$ )
                    if( kbt>0_${ik}$ )call stdlib${ii}$_zgeru( nx, kbt, -cone, x( 1_${ik}$, i ), 1_${ik}$,bb( kb, i+1 ), &
                              ldbb-1, x( 1_${ik}$, i+1 ), ldx )
                 end if
                 ! store a(i1,i) in ra1 for use in next loop over k
                 ra1 = ab( i1-i+ka1, i )
              end if
              ! generate and apply vectors of rotations to chase all the
              ! existing bulges ka positions up toward the top of the band
              loop_610: do k = 1, kb - 1
                 if( update ) then
                    ! determine the rotations which would annihilate the bulge
                    ! which has in theory just been created
                    if( i+k-ka1>0_${ik}$ .and. i+k<m ) then
                       ! generate rotation to annihilate a(i+k-ka-1,i)
                       call stdlib${ii}$_zlartg( ab( k+1, i ), ra1, rwork( i+k-ka ),work( i+k-ka ), ra )
                                 
                       ! create nonzero element a(i+k-ka-1,i+k) outside the
                       ! band and store it in work(m-kb+i+k)
                       t = -bb( kb1-k, i+k )*ra1
                       work( m-kb+i+k ) = rwork( i+k-ka )*t -conjg( work( i+k-ka ) )*ab( 1_${ik}$, i+k )
                                 
                       ab( 1_${ik}$, i+k ) = work( i+k-ka )*t +rwork( i+k-ka )*ab( 1_${ik}$, i+k )
                       ra1 = ra
                    end if
                 end if
                 j2 = i + k + 1_${ik}$ - max( 1_${ik}$, k+i0-m+1 )*ka1
                 nr = ( j2+ka-1 ) / ka1
                 j1 = j2 - ( nr-1 )*ka1
                 if( update ) then
                    j2t = min( j2, i-2*ka+k-1 )
                 else
                    j2t = j2
                 end if
                 nrt = ( j2t+ka-1 ) / ka1
                 do j = j1, j2t, ka1
                    ! create nonzero element a(j-1,j+ka) outside the band
                    ! and store it in work(j)
                    work( j ) = work( j )*ab( 1_${ik}$, j+ka-1 )
                    ab( 1_${ik}$, j+ka-1 ) = rwork( j )*ab( 1_${ik}$, j+ka-1 )
                 end do
                 ! generate rotations in 1st set to annihilate elements which
                 ! have been created outside the band
                 if( nrt>0_${ik}$ )call stdlib${ii}$_zlargv( nrt, ab( 1_${ik}$, j1+ka ), inca, work( j1 ), ka1,rwork( &
                           j1 ), ka1 )
                 if( nr>0_${ik}$ ) then
                    ! apply rotations in 1st set from the left
                    do l = 1, ka - 1
                       call stdlib${ii}$_zlartv( nr, ab( ka1-l, j1+l ), inca,ab( ka-l, j1+l ), inca, &
                                 rwork( j1 ),work( j1 ), ka1 )
                    end do
                    ! apply rotations in 1st set from both sides to diagonal
                    ! blocks
                    call stdlib${ii}$_zlar2v( nr, ab( ka1, j1 ), ab( ka1, j1-1 ),ab( ka, j1 ), inca, &
                              rwork( j1 ), work( j1 ),ka1 )
                    call stdlib${ii}$_zlacgv( nr, work( j1 ), ka1 )
                 end if
                 ! start applying rotations in 1st set from the right
                 do l = ka - 1, kb - k + 1, -1
                    nrt = ( j2+l-1 ) / ka1
                    j1t = j2 - ( nrt-1 )*ka1
                    if( nrt>0_${ik}$ )call stdlib${ii}$_zlartv( nrt, ab( l, j1t ), inca,ab( l+1, j1t-1 ), inca,&
                               rwork( j1t ),work( j1t ), ka1 )
                 end do
                 if( wantx ) then
                    ! post-multiply x by product of rotations in 1st set
                    do j = j1, j2, ka1
                       call stdlib${ii}$_zrot( nx, x( 1_${ik}$, j ), 1_${ik}$, x( 1_${ik}$, j-1 ), 1_${ik}$,rwork( j ), work( j ) )
                                 
                    end do
                 end if
              end do loop_610
              if( update ) then
                 if( i2>0_${ik}$ .and. kbt>0_${ik}$ ) then
                    ! create nonzero element a(i+kbt-ka-1,i+kbt) outside the
                    ! band and store it in work(m-kb+i+kbt)
                    work( m-kb+i+kbt ) = -bb( kb1-kbt, i+kbt )*ra1
                 end if
              end if
              loop_650: do k = kb, 1, -1
                 if( update ) then
                    j2 = i + k + 1_${ik}$ - max( 2_${ik}$, k+i0-m )*ka1
                 else
                    j2 = i + k + 1_${ik}$ - max( 1_${ik}$, k+i0-m )*ka1
                 end if
                 ! finish applying rotations in 2nd set from the right
                 do l = kb - k, 1, -1
                    nrt = ( j2+ka+l-1 ) / ka1
                    j1t = j2 - ( nrt-1 )*ka1
                    if( nrt>0_${ik}$ )call stdlib${ii}$_zlartv( nrt, ab( l, j1t+ka ), inca,ab( l+1, j1t+ka-1 ),&
                               inca,rwork( m-kb+j1t+ka ),work( m-kb+j1t+ka ), ka1 )
                 end do
                 nr = ( j2+ka-1 ) / ka1
                 j1 = j2 - ( nr-1 )*ka1
                 do j = j1, j2, ka1
                    work( m-kb+j ) = work( m-kb+j+ka )
                    rwork( m-kb+j ) = rwork( m-kb+j+ka )
                 end do
                 do j = j1, j2, ka1
                    ! create nonzero element a(j-1,j+ka) outside the band
                    ! and store it in work(m-kb+j)
                    work( m-kb+j ) = work( m-kb+j )*ab( 1_${ik}$, j+ka-1 )
                    ab( 1_${ik}$, j+ka-1 ) = rwork( m-kb+j )*ab( 1_${ik}$, j+ka-1 )
                 end do
                 if( update ) then
                    if( i+k>ka1 .and. k<=kbt )work( m-kb+i+k-ka ) = work( m-kb+i+k )
                 end if
              end do loop_650
              loop_690: do k = kb, 1, -1
                 j2 = i + k + 1_${ik}$ - max( 1_${ik}$, k+i0-m )*ka1
                 nr = ( j2+ka-1 ) / ka1
                 j1 = j2 - ( nr-1 )*ka1
                 if( nr>0_${ik}$ ) then
                    ! generate rotations in 2nd set to annihilate elements
                    ! which have been created outside the band
                    call stdlib${ii}$_zlargv( nr, ab( 1_${ik}$, j1+ka ), inca, work( m-kb+j1 ),ka1, rwork( m-&
                              kb+j1 ), ka1 )
                    ! apply rotations in 2nd set from the left
                    do l = 1, ka - 1
                       call stdlib${ii}$_zlartv( nr, ab( ka1-l, j1+l ), inca,ab( ka-l, j1+l ), inca, &
                                 rwork( m-kb+j1 ),work( m-kb+j1 ), ka1 )
                    end do
                    ! apply rotations in 2nd set from both sides to diagonal
                    ! blocks
                    call stdlib${ii}$_zlar2v( nr, ab( ka1, j1 ), ab( ka1, j1-1 ),ab( ka, j1 ), inca, &
                              rwork( m-kb+j1 ),work( m-kb+j1 ), ka1 )
                    call stdlib${ii}$_zlacgv( nr, work( m-kb+j1 ), ka1 )
                 end if
                 ! start applying rotations in 2nd set from the right
                 do l = ka - 1, kb - k + 1, -1
                    nrt = ( j2+l-1 ) / ka1
                    j1t = j2 - ( nrt-1 )*ka1
                    if( nrt>0_${ik}$ )call stdlib${ii}$_zlartv( nrt, ab( l, j1t ), inca,ab( l+1, j1t-1 ), inca,&
                              rwork( m-kb+j1t ), work( m-kb+j1t ),ka1 )
                 end do
                 if( wantx ) then
                    ! post-multiply x by product of rotations in 2nd set
                    do j = j1, j2, ka1
                       call stdlib${ii}$_zrot( nx, x( 1_${ik}$, j ), 1_${ik}$, x( 1_${ik}$, j-1 ), 1_${ik}$,rwork( m-kb+j ), work( &
                                 m-kb+j ) )
                    end do
                 end if
              end do loop_690
              do k = 1, kb - 1
                 j2 = i + k + 1_${ik}$ - max( 1_${ik}$, k+i0-m+1 )*ka1
                 ! finish applying rotations in 1st set from the right
                 do l = kb - k, 1, -1
                    nrt = ( j2+l-1 ) / ka1
                    j1t = j2 - ( nrt-1 )*ka1
                    if( nrt>0_${ik}$ )call stdlib${ii}$_zlartv( nrt, ab( l, j1t ), inca,ab( l+1, j1t-1 ), inca,&
                               rwork( j1t ),work( j1t ), ka1 )
                 end do
              end do
              if( kb>1_${ik}$ ) then
                 do j = 2, i2 - ka
                    rwork( j ) = rwork( j+ka )
                    work( j ) = work( j+ka )
                 end do
              end if
           else
              ! transform a, working with the lower triangle
              if( update ) then
                 ! form  inv(s(i))**h * a * inv(s(i))
                 bii = real( bb( 1_${ik}$, i ),KIND=dp)
                 ab( 1_${ik}$, i ) = ( real( ab( 1_${ik}$, i ),KIND=dp) / bii ) / bii
                 do j = i1, i - 1
                    ab( i-j+1, j ) = ab( i-j+1, j ) / bii
                 end do
                 do j = i + 1, min( n, i+ka )
                    ab( j-i+1, i ) = ab( j-i+1, i ) / bii
                 end do
                 do k = i + 1, i + kbt
                    do j = k, i + kbt
                       ab( j-k+1, k ) = ab( j-k+1, k ) -bb( j-i+1, i )*conjg( ab( k-i+1,i ) ) - &
                       conjg( bb( k-i+1, i ) )*ab( j-i+1, i ) + real( ab( 1_${ik}$, i ),KIND=dp)*bb( j-i+&
                                 1_${ik}$, i )*conjg( bb( k-i+1,i ) )
                    end do
                    do j = i + kbt + 1, min( n, i+ka )
                       ab( j-k+1, k ) = ab( j-k+1, k ) -conjg( bb( k-i+1, i ) )*ab( j-i+1, i )
                                 
                    end do
                 end do
                 do j = i1, i
                    do k = i + 1, min( j+ka, i+kbt )
                       ab( k-j+1, j ) = ab( k-j+1, j ) -bb( k-i+1, i )*ab( i-j+1, j )
                    end do
                 end do
                 if( wantx ) then
                    ! post-multiply x by inv(s(i))
                    call stdlib${ii}$_zdscal( nx, one / bii, x( 1_${ik}$, i ), 1_${ik}$ )
                    if( kbt>0_${ik}$ )call stdlib${ii}$_zgerc( nx, kbt, -cone, x( 1_${ik}$, i ), 1_${ik}$, bb( 2_${ik}$, i ),1_${ik}$, x( &
                              1_${ik}$, i+1 ), ldx )
                 end if
                 ! store a(i,i1) in ra1 for use in next loop over k
                 ra1 = ab( i-i1+1, i1 )
              end if
              ! generate and apply vectors of rotations to chase all the
              ! existing bulges ka positions up toward the top of the band
              loop_840: do k = 1, kb - 1
                 if( update ) then
                    ! determine the rotations which would annihilate the bulge
                    ! which has in theory just been created
                    if( i+k-ka1>0_${ik}$ .and. i+k<m ) then
                       ! generate rotation to annihilate a(i,i+k-ka-1)
                       call stdlib${ii}$_zlartg( ab( ka1-k, i+k-ka ), ra1,rwork( i+k-ka ), work( i+k-ka &
                                 ), ra )
                       ! create nonzero element a(i+k,i+k-ka-1) outside the
                       ! band and store it in work(m-kb+i+k)
                       t = -bb( k+1, i )*ra1
                       work( m-kb+i+k ) = rwork( i+k-ka )*t -conjg( work( i+k-ka ) )*ab( ka1, i+k-&
                                 ka )
                       ab( ka1, i+k-ka ) = work( i+k-ka )*t +rwork( i+k-ka )*ab( ka1, i+k-ka )
                                 
                       ra1 = ra
                    end if
                 end if
                 j2 = i + k + 1_${ik}$ - max( 1_${ik}$, k+i0-m+1 )*ka1
                 nr = ( j2+ka-1 ) / ka1
                 j1 = j2 - ( nr-1 )*ka1
                 if( update ) then
                    j2t = min( j2, i-2*ka+k-1 )
                 else
                    j2t = j2
                 end if
                 nrt = ( j2t+ka-1 ) / ka1
                 do j = j1, j2t, ka1
                    ! create nonzero element a(j+ka,j-1) outside the band
                    ! and store it in work(j)
                    work( j ) = work( j )*ab( ka1, j-1 )
                    ab( ka1, j-1 ) = rwork( j )*ab( ka1, j-1 )
                 end do
                 ! generate rotations in 1st set to annihilate elements which
                 ! have been created outside the band
                 if( nrt>0_${ik}$ )call stdlib${ii}$_zlargv( nrt, ab( ka1, j1 ), inca, work( j1 ), ka1,rwork( &
                           j1 ), ka1 )
                 if( nr>0_${ik}$ ) then
                    ! apply rotations in 1st set from the right
                    do l = 1, ka - 1
                       call stdlib${ii}$_zlartv( nr, ab( l+1, j1 ), inca, ab( l+2, j1-1 ),inca, rwork( &
                                 j1 ), work( j1 ), ka1 )
                    end do
                    ! apply rotations in 1st set from both sides to diagonal
                    ! blocks
                    call stdlib${ii}$_zlar2v( nr, ab( 1_${ik}$, j1 ), ab( 1_${ik}$, j1-1 ),ab( 2_${ik}$, j1-1 ), inca, rwork(&
                               j1 ),work( j1 ), ka1 )
                    call stdlib${ii}$_zlacgv( nr, work( j1 ), ka1 )
                 end if
                 ! start applying rotations in 1st set from the left
                 do l = ka - 1, kb - k + 1, -1
                    nrt = ( j2+l-1 ) / ka1
                    j1t = j2 - ( nrt-1 )*ka1
                    if( nrt>0_${ik}$ )call stdlib${ii}$_zlartv( nrt, ab( ka1-l+1, j1t-ka1+l ), inca,ab( ka1-l, &
                              j1t-ka1+l ), inca,rwork( j1t ), work( j1t ), ka1 )
                 end do
                 if( wantx ) then
                    ! post-multiply x by product of rotations in 1st set
                    do j = j1, j2, ka1
                       call stdlib${ii}$_zrot( nx, x( 1_${ik}$, j ), 1_${ik}$, x( 1_${ik}$, j-1 ), 1_${ik}$,rwork( j ), conjg( work(&
                                  j ) ) )
                    end do
                 end if
              end do loop_840
              if( update ) then
                 if( i2>0_${ik}$ .and. kbt>0_${ik}$ ) then
                    ! create nonzero element a(i+kbt,i+kbt-ka-1) outside the
                    ! band and store it in work(m-kb+i+kbt)
                    work( m-kb+i+kbt ) = -bb( kbt+1, i )*ra1
                 end if
              end if
              loop_880: do k = kb, 1, -1
                 if( update ) then
                    j2 = i + k + 1_${ik}$ - max( 2_${ik}$, k+i0-m )*ka1
                 else
                    j2 = i + k + 1_${ik}$ - max( 1_${ik}$, k+i0-m )*ka1
                 end if
                 ! finish applying rotations in 2nd set from the left
                 do l = kb - k, 1, -1
                    nrt = ( j2+ka+l-1 ) / ka1
                    j1t = j2 - ( nrt-1 )*ka1
                    if( nrt>0_${ik}$ )call stdlib${ii}$_zlartv( nrt, ab( ka1-l+1, j1t+l-1 ), inca,ab( ka1-l, &
                              j1t+l-1 ), inca,rwork( m-kb+j1t+ka ),work( m-kb+j1t+ka ), ka1 )
                 end do
                 nr = ( j2+ka-1 ) / ka1
                 j1 = j2 - ( nr-1 )*ka1
                 do j = j1, j2, ka1
                    work( m-kb+j ) = work( m-kb+j+ka )
                    rwork( m-kb+j ) = rwork( m-kb+j+ka )
                 end do
                 do j = j1, j2, ka1
                    ! create nonzero element a(j+ka,j-1) outside the band
                    ! and store it in work(m-kb+j)
                    work( m-kb+j ) = work( m-kb+j )*ab( ka1, j-1 )
                    ab( ka1, j-1 ) = rwork( m-kb+j )*ab( ka1, j-1 )
                 end do
                 if( update ) then
                    if( i+k>ka1 .and. k<=kbt )work( m-kb+i+k-ka ) = work( m-kb+i+k )
                 end if
              end do loop_880
              loop_920: do k = kb, 1, -1
                 j2 = i + k + 1_${ik}$ - max( 1_${ik}$, k+i0-m )*ka1
                 nr = ( j2+ka-1 ) / ka1
                 j1 = j2 - ( nr-1 )*ka1
                 if( nr>0_${ik}$ ) then
                    ! generate rotations in 2nd set to annihilate elements
                    ! which have been created outside the band
                    call stdlib${ii}$_zlargv( nr, ab( ka1, j1 ), inca, work( m-kb+j1 ),ka1, rwork( m-kb+&
                              j1 ), ka1 )
                    ! apply rotations in 2nd set from the right
                    do l = 1, ka - 1
                       call stdlib${ii}$_zlartv( nr, ab( l+1, j1 ), inca, ab( l+2, j1-1 ),inca, rwork( &
                                 m-kb+j1 ), work( m-kb+j1 ),ka1 )
                    end do
                    ! apply rotations in 2nd set from both sides to diagonal
                    ! blocks
                    call stdlib${ii}$_zlar2v( nr, ab( 1_${ik}$, j1 ), ab( 1_${ik}$, j1-1 ),ab( 2_${ik}$, j1-1 ), inca, rwork(&
                               m-kb+j1 ),work( m-kb+j1 ), ka1 )
                    call stdlib${ii}$_zlacgv( nr, work( m-kb+j1 ), ka1 )
                 end if
                 ! start applying rotations in 2nd set from the left
                 do l = ka - 1, kb - k + 1, -1
                    nrt = ( j2+l-1 ) / ka1
                    j1t = j2 - ( nrt-1 )*ka1
                    if( nrt>0_${ik}$ )call stdlib${ii}$_zlartv( nrt, ab( ka1-l+1, j1t-ka1+l ), inca,ab( ka1-l, &
                              j1t-ka1+l ), inca,rwork( m-kb+j1t ), work( m-kb+j1t ),ka1 )
                 end do
                 if( wantx ) then
                    ! post-multiply x by product of rotations in 2nd set
                    do j = j1, j2, ka1
                       call stdlib${ii}$_zrot( nx, x( 1_${ik}$, j ), 1_${ik}$, x( 1_${ik}$, j-1 ), 1_${ik}$,rwork( m-kb+j ), conjg( &
                                 work( m-kb+j ) ) )
                    end do
                 end if
              end do loop_920
              do k = 1, kb - 1
                 j2 = i + k + 1_${ik}$ - max( 1_${ik}$, k+i0-m+1 )*ka1
                 ! finish applying rotations in 1st set from the left
                 do l = kb - k, 1, -1
                    nrt = ( j2+l-1 ) / ka1
                    j1t = j2 - ( nrt-1 )*ka1
                    if( nrt>0_${ik}$ )call stdlib${ii}$_zlartv( nrt, ab( ka1-l+1, j1t-ka1+l ), inca,ab( ka1-l, &
                              j1t-ka1+l ), inca,rwork( j1t ), work( j1t ), ka1 )
                 end do
              end do
              if( kb>1_${ik}$ ) then
                 do j = 2, i2 - ka
                    rwork( j ) = rwork( j+ka )
                    work( j ) = work( j+ka )
                 end do
              end if
           end if
           go to 490
     end subroutine stdlib${ii}$_zhbgst

#:for ck,ct,ci in CMPLX_KINDS_TYPES
#:if not ck in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ci}$hbgst( vect, uplo, n, ka, kb, ab, ldab, bb, ldbb, x,ldx, work, rwork,&
     !! ZHBGST: reduces a complex Hermitian-definite banded generalized
     !! eigenproblem  A*x = lambda*B*x  to standard form  C*y = lambda*y,
     !! such that C has the same bandwidth as A.
     !! B must have been previously factorized as S**H*S by ZPBSTF, using a
     !! split Cholesky factorization. A is overwritten by C = X**H*A*X, where
     !! X = S**(-1)*Q and Q is a unitary matrix chosen to preserve the
     !! bandwidth of A.
                info )
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${ck}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: uplo, vect
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: ka, kb, ldab, ldbb, ldx, n
           ! Array Arguments 
           real(${ck}$), intent(out) :: rwork(*)
           complex(${ck}$), intent(inout) :: ab(ldab,*)
           complex(${ck}$), intent(in) :: bb(ldbb,*)
           complex(${ck}$), intent(out) :: work(*), x(ldx,*)
        ! =====================================================================
           
           
           ! Local Scalars 
           logical(lk) :: update, upper, wantx
           integer(${ik}$) :: i, i0, i1, i2, inca, j, j1, j1t, j2, j2t, k, ka1, kb1, kbt, l, m, nr, &
                     nrt, nx
           real(${ck}$) :: bii
           complex(${ck}$) :: ra, ra1, t
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input parameters
           wantx = stdlib_lsame( vect, 'V' )
           upper = stdlib_lsame( uplo, 'U' )
           ka1 = ka + 1_${ik}$
           kb1 = kb + 1_${ik}$
           info = 0_${ik}$
           if( .not.wantx .and. .not.stdlib_lsame( vect, 'N' ) ) then
              info = -1_${ik}$
           else if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then
              info = -2_${ik}$
           else if( n<0_${ik}$ ) then
              info = -3_${ik}$
           else if( ka<0_${ik}$ ) then
              info = -4_${ik}$
           else if( kb<0_${ik}$ .or. kb>ka ) then
              info = -5_${ik}$
           else if( ldab<ka+1 ) then
              info = -7_${ik}$
           else if( ldbb<kb+1 ) then
              info = -9_${ik}$
           else if( ldx<1_${ik}$ .or. wantx .and. ldx<max( 1_${ik}$, n ) ) then
              info = -11_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'ZHBGST', -info )
              return
           end if
           ! quick return if possible
           if( n==0 )return
           inca = ldab*ka1
           ! initialize x to the unit matrix, if needed
           if( wantx )call stdlib${ii}$_${ci}$laset( 'FULL', n, n, czero, cone, x, ldx )
           ! set m to the splitting point m. it must be the same value as is
           ! used in stdlib${ii}$_${ci}$pbstf. the chosen value allows the arrays work and rwork
           ! to be of dimension (n).
           m = ( n+kb ) / 2_${ik}$
           ! the routine works in two phases, corresponding to the two halves
           ! of the split cholesky factorization of b as s**h*s where
           ! s = ( u    )
               ! ( m  l )
           ! with u upper triangular of order m, and l lower triangular of
           ! order n-m. s has the same bandwidth as b.
           ! s is treated as a product of elementary matrices:
           ! s = s(m)*s(m-1)*...*s(2)*s(1)*s(m+1)*s(m+2)*...*s(n-1)*s(n)
           ! where s(i) is determined by the i-th row of s.
           ! in phase 1, the index i takes the values n, n-1, ... , m+1;
           ! in phase 2, it takes the values 1, 2, ... , m.
           ! for each value of i, the current matrix a is updated by forming
           ! inv(s(i))**h*a*inv(s(i)). this creates a triangular bulge outside
           ! the band of a. the bulge is then pushed down toward the bottom of
           ! a in phase 1, and up toward the top of a in phase 2, by applying
           ! plane rotations.
           ! there are kb*(kb+1)/2 elements in the bulge, but at most 2*kb-1
           ! of them are linearly independent, so annihilating a bulge requires
           ! only 2*kb-1 plane rotations. the rotations are divided into a 1st
           ! set of kb-1 rotations, and a 2nd set of kb rotations.
           ! wherever possible, rotations are generated and applied in vector
           ! operations of length nr between the indices j1 and j2 (sometimes
           ! replaced by modified values nrt, j1t or j2t).
           ! the real cosines and complex sines of the rotations are stored in
           ! the arrays rwork and work, those of the 1st set in elements
           ! 2:m-kb-1, and those of the 2nd set in elements m-kb+1:n.
           ! the bulges are not formed explicitly; nonzero elements outside the
           ! band are created only when they are required for generating new
           ! rotations; they are stored in the array work, in positions where
           ! they are later overwritten by the sines of the rotations which
           ! annihilate them.
           ! **************************** phase 1 *****************************
           ! the logical structure of this phase is:
           ! update = .true.
           ! do i = n, m + 1, -1
              ! use s(i) to update a and create a new bulge
              ! apply rotations to push all bulges ka positions downward
           ! end do
           ! update = .false.
           ! do i = m + ka + 1, n - 1
              ! apply rotations to push all bulges ka positions downward
           ! end do
           ! to avoid duplicating code, the two loops are merged.
           update = .true.
           i = n + 1_${ik}$
           10 continue
           if( update ) then
              i = i - 1_${ik}$
              kbt = min( kb, i-1 )
              i0 = i - 1_${ik}$
              i1 = min( n, i+ka )
              i2 = i - kbt + ka1
              if( i<m+1 ) then
                 update = .false.
                 i = i + 1_${ik}$
                 i0 = m
                 if( ka==0 )go to 480
                 go to 10
              end if
           else
              i = i + ka
              if( i>n-1 )go to 480
           end if
           if( upper ) then
              ! transform a, working with the upper triangle
              if( update ) then
                 ! form  inv(s(i))**h * a * inv(s(i))
                 bii = real( bb( kb1, i ),KIND=${ck}$)
                 ab( ka1, i ) = ( real( ab( ka1, i ),KIND=${ck}$) / bii ) / bii
                 do j = i + 1, i1
                    ab( i-j+ka1, j ) = ab( i-j+ka1, j ) / bii
                 end do
                 do j = max( 1, i-ka ), i - 1
                    ab( j-i+ka1, i ) = ab( j-i+ka1, i ) / bii
                 end do
                 do k = i - kbt, i - 1
                    do j = i - kbt, k
                       ab( j-k+ka1, k ) = ab( j-k+ka1, k ) -bb( j-i+kb1, i )*conjg( ab( k-i+ka1, &
                       i ) ) -conjg( bb( k-i+kb1, i ) )*ab( j-i+ka1, i ) +real( ab( ka1, i ),&
                                 KIND=${ck}$)*bb( j-i+kb1, i )*conjg( bb( k-i+kb1, i ) )
                    end do
                    do j = max( 1, i-ka ), i - kbt - 1
                       ab( j-k+ka1, k ) = ab( j-k+ka1, k ) -conjg( bb( k-i+kb1, i ) )*ab( j-i+ka1,&
                                  i )
                    end do
                 end do
                 do j = i, i1
                    do k = max( j-ka, i-kbt ), i - 1
                       ab( k-j+ka1, j ) = ab( k-j+ka1, j ) -bb( k-i+kb1, i )*ab( i-j+ka1, j )
                                 
                    end do
                 end do
                 if( wantx ) then
                    ! post-multiply x by inv(s(i))
                    call stdlib${ii}$_${ci}$dscal( n-m, one / bii, x( m+1, i ), 1_${ik}$ )
                    if( kbt>0_${ik}$ )call stdlib${ii}$_${ci}$gerc( n-m, kbt, -cone, x( m+1, i ), 1_${ik}$,bb( kb1-kbt, i )&
                              , 1_${ik}$, x( m+1, i-kbt ),ldx )
                 end if
                 ! store a(i,i1) in ra1 for use in next loop over k
                 ra1 = ab( i-i1+ka1, i1 )
              end if
              ! generate and apply vectors of rotations to chase all the
              ! existing bulges ka positions down toward the bottom of the
              ! band
              loop_130: do k = 1, kb - 1
                 if( update ) then
                    ! determine the rotations which would annihilate the bulge
                    ! which has in theory just been created
                    if( i-k+ka<n .and. i-k>1_${ik}$ ) then
                       ! generate rotation to annihilate a(i,i-k+ka+1)
                       call stdlib${ii}$_${ci}$lartg( ab( k+1, i-k+ka ), ra1,rwork( i-k+ka-m ), work( i-k+ka-&
                                 m ), ra )
                       ! create nonzero element a(i-k,i-k+ka+1) outside the
                       ! band and store it in work(i-k)
                       t = -bb( kb1-k, i )*ra1
                       work( i-k ) = rwork( i-k+ka-m )*t -conjg( work( i-k+ka-m ) )*ab( 1_${ik}$, i-k+ka &
                                 )
                       ab( 1_${ik}$, i-k+ka ) = work( i-k+ka-m )*t +rwork( i-k+ka-m )*ab( 1_${ik}$, i-k+ka )
                                 
                       ra1 = ra
                    end if
                 end if
                 j2 = i - k - 1_${ik}$ + max( 1_${ik}$, k-i0+2 )*ka1
                 nr = ( n-j2+ka ) / ka1
                 j1 = j2 + ( nr-1 )*ka1
                 if( update ) then
                    j2t = max( j2, i+2*ka-k+1 )
                 else
                    j2t = j2
                 end if
                 nrt = ( n-j2t+ka ) / ka1
                 do j = j2t, j1, ka1
                    ! create nonzero element a(j-ka,j+1) outside the band
                    ! and store it in work(j-m)
                    work( j-m ) = work( j-m )*ab( 1_${ik}$, j+1 )
                    ab( 1_${ik}$, j+1 ) = rwork( j-m )*ab( 1_${ik}$, j+1 )
                 end do
                 ! generate rotations in 1st set to annihilate elements which
                 ! have been created outside the band
                 if( nrt>0_${ik}$ )call stdlib${ii}$_${ci}$largv( nrt, ab( 1_${ik}$, j2t ), inca, work( j2t-m ), ka1,rwork(&
                            j2t-m ), ka1 )
                 if( nr>0_${ik}$ ) then
                    ! apply rotations in 1st set from the right
                    do l = 1, ka - 1
                       call stdlib${ii}$_${ci}$lartv( nr, ab( ka1-l, j2 ), inca,ab( ka-l, j2+1 ), inca, &
                                 rwork( j2-m ),work( j2-m ), ka1 )
                    end do
                    ! apply rotations in 1st set from both sides to diagonal
                    ! blocks
                    call stdlib${ii}$_${ci}$lar2v( nr, ab( ka1, j2 ), ab( ka1, j2+1 ),ab( ka, j2+1 ), inca, &
                              rwork( j2-m ),work( j2-m ), ka1 )
                    call stdlib${ii}$_${ci}$lacgv( nr, work( j2-m ), ka1 )
                 end if
                 ! start applying rotations in 1st set from the left
                 do l = ka - 1, kb - k + 1, -1
                    nrt = ( n-j2+l ) / ka1
                    if( nrt>0_${ik}$ )call stdlib${ii}$_${ci}$lartv( nrt, ab( l, j2+ka1-l ), inca,ab( l+1, j2+ka1-l &
                              ), inca, rwork( j2-m ),work( j2-m ), ka1 )
                 end do
                 if( wantx ) then
                    ! post-multiply x by product of rotations in 1st set
                    do j = j2, j1, ka1
                       call stdlib${ii}$_${ci}$rot( n-m, x( m+1, j ), 1_${ik}$, x( m+1, j+1 ), 1_${ik}$,rwork( j-m ), &
                                 conjg( work( j-m ) ) )
                    end do
                 end if
              end do loop_130
              if( update ) then
                 if( i2<=n .and. kbt>0_${ik}$ ) then
                    ! create nonzero element a(i-kbt,i-kbt+ka+1) outside the
                    ! band and store it in work(i-kbt)
                    work( i-kbt ) = -bb( kb1-kbt, i )*ra1
                 end if
              end if
              loop_170: do k = kb, 1, -1
                 if( update ) then
                    j2 = i - k - 1_${ik}$ + max( 2_${ik}$, k-i0+1 )*ka1
                 else
                    j2 = i - k - 1_${ik}$ + max( 1_${ik}$, k-i0+1 )*ka1
                 end if
                 ! finish applying rotations in 2nd set from the left
                 do l = kb - k, 1, -1
                    nrt = ( n-j2+ka+l ) / ka1
                    if( nrt>0_${ik}$ )call stdlib${ii}$_${ci}$lartv( nrt, ab( l, j2-l+1 ), inca,ab( l+1, j2-l+1 ), &
                              inca, rwork( j2-ka ),work( j2-ka ), ka1 )
                 end do
                 nr = ( n-j2+ka ) / ka1
                 j1 = j2 + ( nr-1 )*ka1
                 do j = j1, j2, -ka1
                    work( j ) = work( j-ka )
                    rwork( j ) = rwork( j-ka )
                 end do
                 do j = j2, j1, ka1
                    ! create nonzero element a(j-ka,j+1) outside the band
                    ! and store it in work(j)
                    work( j ) = work( j )*ab( 1_${ik}$, j+1 )
                    ab( 1_${ik}$, j+1 ) = rwork( j )*ab( 1_${ik}$, j+1 )
                 end do
                 if( update ) then
                    if( i-k<n-ka .and. k<=kbt )work( i-k+ka ) = work( i-k )
                 end if
              end do loop_170
              loop_210: do k = kb, 1, -1
                 j2 = i - k - 1_${ik}$ + max( 1_${ik}$, k-i0+1 )*ka1
                 nr = ( n-j2+ka ) / ka1
                 j1 = j2 + ( nr-1 )*ka1
                 if( nr>0_${ik}$ ) then
                    ! generate rotations in 2nd set to annihilate elements
                    ! which have been created outside the band
                    call stdlib${ii}$_${ci}$largv( nr, ab( 1_${ik}$, j2 ), inca, work( j2 ), ka1,rwork( j2 ), ka1 )
                              
                    ! apply rotations in 2nd set from the right
                    do l = 1, ka - 1
                       call stdlib${ii}$_${ci}$lartv( nr, ab( ka1-l, j2 ), inca,ab( ka-l, j2+1 ), inca, &
                                 rwork( j2 ),work( j2 ), ka1 )
                    end do
                    ! apply rotations in 2nd set from both sides to diagonal
                    ! blocks
                    call stdlib${ii}$_${ci}$lar2v( nr, ab( ka1, j2 ), ab( ka1, j2+1 ),ab( ka, j2+1 ), inca, &
                              rwork( j2 ),work( j2 ), ka1 )
                    call stdlib${ii}$_${ci}$lacgv( nr, work( j2 ), ka1 )
                 end if
                 ! start applying rotations in 2nd set from the left
                 do l = ka - 1, kb - k + 1, -1
                    nrt = ( n-j2+l ) / ka1
                    if( nrt>0_${ik}$ )call stdlib${ii}$_${ci}$lartv( nrt, ab( l, j2+ka1-l ), inca,ab( l+1, j2+ka1-l &
                              ), inca, rwork( j2 ),work( j2 ), ka1 )
                 end do
                 if( wantx ) then
                    ! post-multiply x by product of rotations in 2nd set
                    do j = j2, j1, ka1
                       call stdlib${ii}$_${ci}$rot( n-m, x( m+1, j ), 1_${ik}$, x( m+1, j+1 ), 1_${ik}$,rwork( j ), conjg( &
                                 work( j ) ) )
                    end do
                 end if
              end do loop_210
              do k = 1, kb - 1
                 j2 = i - k - 1_${ik}$ + max( 1_${ik}$, k-i0+2 )*ka1
                 ! finish applying rotations in 1st set from the left
                 do l = kb - k, 1, -1
                    nrt = ( n-j2+l ) / ka1
                    if( nrt>0_${ik}$ )call stdlib${ii}$_${ci}$lartv( nrt, ab( l, j2+ka1-l ), inca,ab( l+1, j2+ka1-l &
                              ), inca, rwork( j2-m ),work( j2-m ), ka1 )
                 end do
              end do
              if( kb>1_${ik}$ ) then
                 do j = n - 1, j2 + ka, -1
                    rwork( j-m ) = rwork( j-ka-m )
                    work( j-m ) = work( j-ka-m )
                 end do
              end if
           else
              ! transform a, working with the lower triangle
              if( update ) then
                 ! form  inv(s(i))**h * a * inv(s(i))
                 bii = real( bb( 1_${ik}$, i ),KIND=${ck}$)
                 ab( 1_${ik}$, i ) = ( real( ab( 1_${ik}$, i ),KIND=${ck}$) / bii ) / bii
                 do j = i + 1, i1
                    ab( j-i+1, i ) = ab( j-i+1, i ) / bii
                 end do
                 do j = max( 1, i-ka ), i - 1
                    ab( i-j+1, j ) = ab( i-j+1, j ) / bii
                 end do
                 do k = i - kbt, i - 1
                    do j = i - kbt, k
                       ab( k-j+1, j ) = ab( k-j+1, j ) -bb( i-j+1, j )*conjg( ab( i-k+1,k ) ) - &
                       conjg( bb( i-k+1, k ) )*ab( i-j+1, j ) + real( ab( 1_${ik}$, i ),KIND=${ck}$)*bb( i-j+&
                                 1_${ik}$, j )*conjg( bb( i-k+1,k ) )
                    end do
                    do j = max( 1, i-ka ), i - kbt - 1
                       ab( k-j+1, j ) = ab( k-j+1, j ) -conjg( bb( i-k+1, k ) )*ab( i-j+1, j )
                                 
                    end do
                 end do
                 do j = i, i1
                    do k = max( j-ka, i-kbt ), i - 1
                       ab( j-k+1, k ) = ab( j-k+1, k ) -bb( i-k+1, k )*ab( j-i+1, i )
                    end do
                 end do
                 if( wantx ) then
                    ! post-multiply x by inv(s(i))
                    call stdlib${ii}$_${ci}$dscal( n-m, one / bii, x( m+1, i ), 1_${ik}$ )
                    if( kbt>0_${ik}$ )call stdlib${ii}$_${ci}$geru( n-m, kbt, -cone, x( m+1, i ), 1_${ik}$,bb( kbt+1, i-&
                              kbt ), ldbb-1,x( m+1, i-kbt ), ldx )
                 end if
                 ! store a(i1,i) in ra1 for use in next loop over k
                 ra1 = ab( i1-i+1, i )
              end if
              ! generate and apply vectors of rotations to chase all the
              ! existing bulges ka positions down toward the bottom of the
              ! band
              loop_360: do k = 1, kb - 1
                 if( update ) then
                    ! determine the rotations which would annihilate the bulge
                    ! which has in theory just been created
                    if( i-k+ka<n .and. i-k>1_${ik}$ ) then
                       ! generate rotation to annihilate a(i-k+ka+1,i)
                       call stdlib${ii}$_${ci}$lartg( ab( ka1-k, i ), ra1, rwork( i-k+ka-m ),work( i-k+ka-m )&
                                 , ra )
                       ! create nonzero element a(i-k+ka+1,i-k) outside the
                       ! band and store it in work(i-k)
                       t = -bb( k+1, i-k )*ra1
                       work( i-k ) = rwork( i-k+ka-m )*t -conjg( work( i-k+ka-m ) )*ab( ka1, i-k )
                                 
                       ab( ka1, i-k ) = work( i-k+ka-m )*t +rwork( i-k+ka-m )*ab( ka1, i-k )
                                 
                       ra1 = ra
                    end if
                 end if
                 j2 = i - k - 1_${ik}$ + max( 1_${ik}$, k-i0+2 )*ka1
                 nr = ( n-j2+ka ) / ka1
                 j1 = j2 + ( nr-1 )*ka1
                 if( update ) then
                    j2t = max( j2, i+2*ka-k+1 )
                 else
                    j2t = j2
                 end if
                 nrt = ( n-j2t+ka ) / ka1
                 do j = j2t, j1, ka1
                    ! create nonzero element a(j+1,j-ka) outside the band
                    ! and store it in work(j-m)
                    work( j-m ) = work( j-m )*ab( ka1, j-ka+1 )
                    ab( ka1, j-ka+1 ) = rwork( j-m )*ab( ka1, j-ka+1 )
                 end do
                 ! generate rotations in 1st set to annihilate elements which
                 ! have been created outside the band
                 if( nrt>0_${ik}$ )call stdlib${ii}$_${ci}$largv( nrt, ab( ka1, j2t-ka ), inca, work( j2t-m ),ka1, &
                           rwork( j2t-m ), ka1 )
                 if( nr>0_${ik}$ ) then
                    ! apply rotations in 1st set from the left
                    do l = 1, ka - 1
                       call stdlib${ii}$_${ci}$lartv( nr, ab( l+1, j2-l ), inca,ab( l+2, j2-l ), inca, rwork(&
                                  j2-m ),work( j2-m ), ka1 )
                    end do
                    ! apply rotations in 1st set from both sides to diagonal
                    ! blocks
                    call stdlib${ii}$_${ci}$lar2v( nr, ab( 1_${ik}$, j2 ), ab( 1_${ik}$, j2+1 ), ab( 2_${ik}$, j2 ),inca, rwork( &
                              j2-m ), work( j2-m ), ka1 )
                    call stdlib${ii}$_${ci}$lacgv( nr, work( j2-m ), ka1 )
                 end if
                 ! start applying rotations in 1st set from the right
                 do l = ka - 1, kb - k + 1, -1
                    nrt = ( n-j2+l ) / ka1
                    if( nrt>0_${ik}$ )call stdlib${ii}$_${ci}$lartv( nrt, ab( ka1-l+1, j2 ), inca,ab( ka1-l, j2+1 ),&
                               inca, rwork( j2-m ),work( j2-m ), ka1 )
                 end do
                 if( wantx ) then
                    ! post-multiply x by product of rotations in 1st set
                    do j = j2, j1, ka1
                       call stdlib${ii}$_${ci}$rot( n-m, x( m+1, j ), 1_${ik}$, x( m+1, j+1 ), 1_${ik}$,rwork( j-m ), work(&
                                  j-m ) )
                    end do
                 end if
              end do loop_360
              if( update ) then
                 if( i2<=n .and. kbt>0_${ik}$ ) then
                    ! create nonzero element a(i-kbt+ka+1,i-kbt) outside the
                    ! band and store it in work(i-kbt)
                    work( i-kbt ) = -bb( kbt+1, i-kbt )*ra1
                 end if
              end if
              loop_400: do k = kb, 1, -1
                 if( update ) then
                    j2 = i - k - 1_${ik}$ + max( 2_${ik}$, k-i0+1 )*ka1
                 else
                    j2 = i - k - 1_${ik}$ + max( 1_${ik}$, k-i0+1 )*ka1
                 end if
                 ! finish applying rotations in 2nd set from the right
                 do l = kb - k, 1, -1
                    nrt = ( n-j2+ka+l ) / ka1
                    if( nrt>0_${ik}$ )call stdlib${ii}$_${ci}$lartv( nrt, ab( ka1-l+1, j2-ka ), inca,ab( ka1-l, j2-&
                              ka+1 ), inca,rwork( j2-ka ), work( j2-ka ), ka1 )
                 end do
                 nr = ( n-j2+ka ) / ka1
                 j1 = j2 + ( nr-1 )*ka1
                 do j = j1, j2, -ka1
                    work( j ) = work( j-ka )
                    rwork( j ) = rwork( j-ka )
                 end do
                 do j = j2, j1, ka1
                    ! create nonzero element a(j+1,j-ka) outside the band
                    ! and store it in work(j)
                    work( j ) = work( j )*ab( ka1, j-ka+1 )
                    ab( ka1, j-ka+1 ) = rwork( j )*ab( ka1, j-ka+1 )
                 end do
                 if( update ) then
                    if( i-k<n-ka .and. k<=kbt )work( i-k+ka ) = work( i-k )
                 end if
              end do loop_400
              loop_440: do k = kb, 1, -1
                 j2 = i - k - 1_${ik}$ + max( 1_${ik}$, k-i0+1 )*ka1
                 nr = ( n-j2+ka ) / ka1
                 j1 = j2 + ( nr-1 )*ka1
                 if( nr>0_${ik}$ ) then
                    ! generate rotations in 2nd set to annihilate elements
                    ! which have been created outside the band
                    call stdlib${ii}$_${ci}$largv( nr, ab( ka1, j2-ka ), inca, work( j2 ), ka1,rwork( j2 ), &
                              ka1 )
                    ! apply rotations in 2nd set from the left
                    do l = 1, ka - 1
                       call stdlib${ii}$_${ci}$lartv( nr, ab( l+1, j2-l ), inca,ab( l+2, j2-l ), inca, rwork(&
                                  j2 ),work( j2 ), ka1 )
                    end do
                    ! apply rotations in 2nd set from both sides to diagonal
                    ! blocks
                    call stdlib${ii}$_${ci}$lar2v( nr, ab( 1_${ik}$, j2 ), ab( 1_${ik}$, j2+1 ), ab( 2_${ik}$, j2 ),inca, rwork( &
                              j2 ), work( j2 ), ka1 )
                    call stdlib${ii}$_${ci}$lacgv( nr, work( j2 ), ka1 )
                 end if
                 ! start applying rotations in 2nd set from the right
                 do l = ka - 1, kb - k + 1, -1
                    nrt = ( n-j2+l ) / ka1
                    if( nrt>0_${ik}$ )call stdlib${ii}$_${ci}$lartv( nrt, ab( ka1-l+1, j2 ), inca,ab( ka1-l, j2+1 ),&
                               inca, rwork( j2 ),work( j2 ), ka1 )
                 end do
                 if( wantx ) then
                    ! post-multiply x by product of rotations in 2nd set
                    do j = j2, j1, ka1
                       call stdlib${ii}$_${ci}$rot( n-m, x( m+1, j ), 1_${ik}$, x( m+1, j+1 ), 1_${ik}$,rwork( j ), work( &
                                 j ) )
                    end do
                 end if
              end do loop_440
              do k = 1, kb - 1
                 j2 = i - k - 1_${ik}$ + max( 1_${ik}$, k-i0+2 )*ka1
                 ! finish applying rotations in 1st set from the right
                 do l = kb - k, 1, -1
                    nrt = ( n-j2+l ) / ka1
                    if( nrt>0_${ik}$ )call stdlib${ii}$_${ci}$lartv( nrt, ab( ka1-l+1, j2 ), inca,ab( ka1-l, j2+1 ),&
                               inca, rwork( j2-m ),work( j2-m ), ka1 )
                 end do
              end do
              if( kb>1_${ik}$ ) then
                 do j = n - 1, j2 + ka, -1
                    rwork( j-m ) = rwork( j-ka-m )
                    work( j-m ) = work( j-ka-m )
                 end do
              end if
           end if
           go to 10
           480 continue
           ! **************************** phase 2 *****************************
           ! the logical structure of this phase is:
           ! update = .true.
           ! do i = 1, m
              ! use s(i) to update a and create a new bulge
              ! apply rotations to push all bulges ka positions upward
           ! end do
           ! update = .false.
           ! do i = m - ka - 1, 2, -1
              ! apply rotations to push all bulges ka positions upward
           ! end do
           ! to avoid duplicating code, the two loops are merged.
           update = .true.
           i = 0_${ik}$
           490 continue
           if( update ) then
              i = i + 1_${ik}$
              kbt = min( kb, m-i )
              i0 = i + 1_${ik}$
              i1 = max( 1_${ik}$, i-ka )
              i2 = i + kbt - ka1
              if( i>m ) then
                 update = .false.
                 i = i - 1_${ik}$
                 i0 = m + 1_${ik}$
                 if( ka==0 )return
                 go to 490
              end if
           else
              i = i - ka
              if( i<2 )return
           end if
           if( i<m-kbt ) then
              nx = m
           else
              nx = n
           end if
           if( upper ) then
              ! transform a, working with the upper triangle
              if( update ) then
                 ! form  inv(s(i))**h * a * inv(s(i))
                 bii = real( bb( kb1, i ),KIND=${ck}$)
                 ab( ka1, i ) = ( real( ab( ka1, i ),KIND=${ck}$) / bii ) / bii
                 do j = i1, i - 1
                    ab( j-i+ka1, i ) = ab( j-i+ka1, i ) / bii
                 end do
                 do j = i + 1, min( n, i+ka )
                    ab( i-j+ka1, j ) = ab( i-j+ka1, j ) / bii
                 end do
                 do k = i + 1, i + kbt
                    do j = k, i + kbt
                       ab( k-j+ka1, j ) = ab( k-j+ka1, j ) -bb( i-j+kb1, j )*conjg( ab( i-k+ka1, &
                       k ) ) -conjg( bb( i-k+kb1, k ) )*ab( i-j+ka1, j ) +real( ab( ka1, i ),&
                                 KIND=${ck}$)*bb( i-j+kb1, j )*conjg( bb( i-k+kb1, k ) )
                    end do
                    do j = i + kbt + 1, min( n, i+ka )
                       ab( k-j+ka1, j ) = ab( k-j+ka1, j ) -conjg( bb( i-k+kb1, k ) )*ab( i-j+ka1,&
                                  j )
                    end do
                 end do
                 do j = i1, i
                    do k = i + 1, min( j+ka, i+kbt )
                       ab( j-k+ka1, k ) = ab( j-k+ka1, k ) -bb( i-k+kb1, k )*ab( j-i+ka1, i )
                                 
                    end do
                 end do
                 if( wantx ) then
                    ! post-multiply x by inv(s(i))
                    call stdlib${ii}$_${ci}$dscal( nx, one / bii, x( 1_${ik}$, i ), 1_${ik}$ )
                    if( kbt>0_${ik}$ )call stdlib${ii}$_${ci}$geru( nx, kbt, -cone, x( 1_${ik}$, i ), 1_${ik}$,bb( kb, i+1 ), &
                              ldbb-1, x( 1_${ik}$, i+1 ), ldx )
                 end if
                 ! store a(i1,i) in ra1 for use in next loop over k
                 ra1 = ab( i1-i+ka1, i )
              end if
              ! generate and apply vectors of rotations to chase all the
              ! existing bulges ka positions up toward the top of the band
              loop_610: do k = 1, kb - 1
                 if( update ) then
                    ! determine the rotations which would annihilate the bulge
                    ! which has in theory just been created
                    if( i+k-ka1>0_${ik}$ .and. i+k<m ) then
                       ! generate rotation to annihilate a(i+k-ka-1,i)
                       call stdlib${ii}$_${ci}$lartg( ab( k+1, i ), ra1, rwork( i+k-ka ),work( i+k-ka ), ra )
                                 
                       ! create nonzero element a(i+k-ka-1,i+k) outside the
                       ! band and store it in work(m-kb+i+k)
                       t = -bb( kb1-k, i+k )*ra1
                       work( m-kb+i+k ) = rwork( i+k-ka )*t -conjg( work( i+k-ka ) )*ab( 1_${ik}$, i+k )
                                 
                       ab( 1_${ik}$, i+k ) = work( i+k-ka )*t +rwork( i+k-ka )*ab( 1_${ik}$, i+k )
                       ra1 = ra
                    end if
                 end if
                 j2 = i + k + 1_${ik}$ - max( 1_${ik}$, k+i0-m+1 )*ka1
                 nr = ( j2+ka-1 ) / ka1
                 j1 = j2 - ( nr-1 )*ka1
                 if( update ) then
                    j2t = min( j2, i-2*ka+k-1 )
                 else
                    j2t = j2
                 end if
                 nrt = ( j2t+ka-1 ) / ka1
                 do j = j1, j2t, ka1
                    ! create nonzero element a(j-1,j+ka) outside the band
                    ! and store it in work(j)
                    work( j ) = work( j )*ab( 1_${ik}$, j+ka-1 )
                    ab( 1_${ik}$, j+ka-1 ) = rwork( j )*ab( 1_${ik}$, j+ka-1 )
                 end do
                 ! generate rotations in 1st set to annihilate elements which
                 ! have been created outside the band
                 if( nrt>0_${ik}$ )call stdlib${ii}$_${ci}$largv( nrt, ab( 1_${ik}$, j1+ka ), inca, work( j1 ), ka1,rwork( &
                           j1 ), ka1 )
                 if( nr>0_${ik}$ ) then
                    ! apply rotations in 1st set from the left
                    do l = 1, ka - 1
                       call stdlib${ii}$_${ci}$lartv( nr, ab( ka1-l, j1+l ), inca,ab( ka-l, j1+l ), inca, &
                                 rwork( j1 ),work( j1 ), ka1 )
                    end do
                    ! apply rotations in 1st set from both sides to diagonal
                    ! blocks
                    call stdlib${ii}$_${ci}$lar2v( nr, ab( ka1, j1 ), ab( ka1, j1-1 ),ab( ka, j1 ), inca, &
                              rwork( j1 ), work( j1 ),ka1 )
                    call stdlib${ii}$_${ci}$lacgv( nr, work( j1 ), ka1 )
                 end if
                 ! start applying rotations in 1st set from the right
                 do l = ka - 1, kb - k + 1, -1
                    nrt = ( j2+l-1 ) / ka1
                    j1t = j2 - ( nrt-1 )*ka1
                    if( nrt>0_${ik}$ )call stdlib${ii}$_${ci}$lartv( nrt, ab( l, j1t ), inca,ab( l+1, j1t-1 ), inca,&
                               rwork( j1t ),work( j1t ), ka1 )
                 end do
                 if( wantx ) then
                    ! post-multiply x by product of rotations in 1st set
                    do j = j1, j2, ka1
                       call stdlib${ii}$_${ci}$rot( nx, x( 1_${ik}$, j ), 1_${ik}$, x( 1_${ik}$, j-1 ), 1_${ik}$,rwork( j ), work( j ) )
                                 
                    end do
                 end if
              end do loop_610
              if( update ) then
                 if( i2>0_${ik}$ .and. kbt>0_${ik}$ ) then
                    ! create nonzero element a(i+kbt-ka-1,i+kbt) outside the
                    ! band and store it in work(m-kb+i+kbt)
                    work( m-kb+i+kbt ) = -bb( kb1-kbt, i+kbt )*ra1
                 end if
              end if
              loop_650: do k = kb, 1, -1
                 if( update ) then
                    j2 = i + k + 1_${ik}$ - max( 2_${ik}$, k+i0-m )*ka1
                 else
                    j2 = i + k + 1_${ik}$ - max( 1_${ik}$, k+i0-m )*ka1
                 end if
                 ! finish applying rotations in 2nd set from the right
                 do l = kb - k, 1, -1
                    nrt = ( j2+ka+l-1 ) / ka1
                    j1t = j2 - ( nrt-1 )*ka1
                    if( nrt>0_${ik}$ )call stdlib${ii}$_${ci}$lartv( nrt, ab( l, j1t+ka ), inca,ab( l+1, j1t+ka-1 ),&
                               inca,rwork( m-kb+j1t+ka ),work( m-kb+j1t+ka ), ka1 )
                 end do
                 nr = ( j2+ka-1 ) / ka1
                 j1 = j2 - ( nr-1 )*ka1
                 do j = j1, j2, ka1
                    work( m-kb+j ) = work( m-kb+j+ka )
                    rwork( m-kb+j ) = rwork( m-kb+j+ka )
                 end do
                 do j = j1, j2, ka1
                    ! create nonzero element a(j-1,j+ka) outside the band
                    ! and store it in work(m-kb+j)
                    work( m-kb+j ) = work( m-kb+j )*ab( 1_${ik}$, j+ka-1 )
                    ab( 1_${ik}$, j+ka-1 ) = rwork( m-kb+j )*ab( 1_${ik}$, j+ka-1 )
                 end do
                 if( update ) then
                    if( i+k>ka1 .and. k<=kbt )work( m-kb+i+k-ka ) = work( m-kb+i+k )
                 end if
              end do loop_650
              loop_690: do k = kb, 1, -1
                 j2 = i + k + 1_${ik}$ - max( 1_${ik}$, k+i0-m )*ka1
                 nr = ( j2+ka-1 ) / ka1
                 j1 = j2 - ( nr-1 )*ka1
                 if( nr>0_${ik}$ ) then
                    ! generate rotations in 2nd set to annihilate elements
                    ! which have been created outside the band
                    call stdlib${ii}$_${ci}$largv( nr, ab( 1_${ik}$, j1+ka ), inca, work( m-kb+j1 ),ka1, rwork( m-&
                              kb+j1 ), ka1 )
                    ! apply rotations in 2nd set from the left
                    do l = 1, ka - 1
                       call stdlib${ii}$_${ci}$lartv( nr, ab( ka1-l, j1+l ), inca,ab( ka-l, j1+l ), inca, &
                                 rwork( m-kb+j1 ),work( m-kb+j1 ), ka1 )
                    end do
                    ! apply rotations in 2nd set from both sides to diagonal
                    ! blocks
                    call stdlib${ii}$_${ci}$lar2v( nr, ab( ka1, j1 ), ab( ka1, j1-1 ),ab( ka, j1 ), inca, &
                              rwork( m-kb+j1 ),work( m-kb+j1 ), ka1 )
                    call stdlib${ii}$_${ci}$lacgv( nr, work( m-kb+j1 ), ka1 )
                 end if
                 ! start applying rotations in 2nd set from the right
                 do l = ka - 1, kb - k + 1, -1
                    nrt = ( j2+l-1 ) / ka1
                    j1t = j2 - ( nrt-1 )*ka1
                    if( nrt>0_${ik}$ )call stdlib${ii}$_${ci}$lartv( nrt, ab( l, j1t ), inca,ab( l+1, j1t-1 ), inca,&
                              rwork( m-kb+j1t ), work( m-kb+j1t ),ka1 )
                 end do
                 if( wantx ) then
                    ! post-multiply x by product of rotations in 2nd set
                    do j = j1, j2, ka1
                       call stdlib${ii}$_${ci}$rot( nx, x( 1_${ik}$, j ), 1_${ik}$, x( 1_${ik}$, j-1 ), 1_${ik}$,rwork( m-kb+j ), work( &
                                 m-kb+j ) )
                    end do
                 end if
              end do loop_690
              do k = 1, kb - 1
                 j2 = i + k + 1_${ik}$ - max( 1_${ik}$, k+i0-m+1 )*ka1
                 ! finish applying rotations in 1st set from the right
                 do l = kb - k, 1, -1
                    nrt = ( j2+l-1 ) / ka1
                    j1t = j2 - ( nrt-1 )*ka1
                    if( nrt>0_${ik}$ )call stdlib${ii}$_${ci}$lartv( nrt, ab( l, j1t ), inca,ab( l+1, j1t-1 ), inca,&
                               rwork( j1t ),work( j1t ), ka1 )
                 end do
              end do
              if( kb>1_${ik}$ ) then
                 do j = 2, i2 - ka
                    rwork( j ) = rwork( j+ka )
                    work( j ) = work( j+ka )
                 end do
              end if
           else
              ! transform a, working with the lower triangle
              if( update ) then
                 ! form  inv(s(i))**h * a * inv(s(i))
                 bii = real( bb( 1_${ik}$, i ),KIND=${ck}$)
                 ab( 1_${ik}$, i ) = ( real( ab( 1_${ik}$, i ),KIND=${ck}$) / bii ) / bii
                 do j = i1, i - 1
                    ab( i-j+1, j ) = ab( i-j+1, j ) / bii
                 end do
                 do j = i + 1, min( n, i+ka )
                    ab( j-i+1, i ) = ab( j-i+1, i ) / bii
                 end do
                 do k = i + 1, i + kbt
                    do j = k, i + kbt
                       ab( j-k+1, k ) = ab( j-k+1, k ) -bb( j-i+1, i )*conjg( ab( k-i+1,i ) ) - &
                       conjg( bb( k-i+1, i ) )*ab( j-i+1, i ) + real( ab( 1_${ik}$, i ),KIND=${ck}$)*bb( j-i+&
                                 1_${ik}$, i )*conjg( bb( k-i+1,i ) )
                    end do
                    do j = i + kbt + 1, min( n, i+ka )
                       ab( j-k+1, k ) = ab( j-k+1, k ) -conjg( bb( k-i+1, i ) )*ab( j-i+1, i )
                                 
                    end do
                 end do
                 do j = i1, i
                    do k = i + 1, min( j+ka, i+kbt )
                       ab( k-j+1, j ) = ab( k-j+1, j ) -bb( k-i+1, i )*ab( i-j+1, j )
                    end do
                 end do
                 if( wantx ) then
                    ! post-multiply x by inv(s(i))
                    call stdlib${ii}$_${ci}$dscal( nx, one / bii, x( 1_${ik}$, i ), 1_${ik}$ )
                    if( kbt>0_${ik}$ )call stdlib${ii}$_${ci}$gerc( nx, kbt, -cone, x( 1_${ik}$, i ), 1_${ik}$, bb( 2_${ik}$, i ),1_${ik}$, x( &
                              1_${ik}$, i+1 ), ldx )
                 end if
                 ! store a(i,i1) in ra1 for use in next loop over k
                 ra1 = ab( i-i1+1, i1 )
              end if
              ! generate and apply vectors of rotations to chase all the
              ! existing bulges ka positions up toward the top of the band
              loop_840: do k = 1, kb - 1
                 if( update ) then
                    ! determine the rotations which would annihilate the bulge
                    ! which has in theory just been created
                    if( i+k-ka1>0_${ik}$ .and. i+k<m ) then
                       ! generate rotation to annihilate a(i,i+k-ka-1)
                       call stdlib${ii}$_${ci}$lartg( ab( ka1-k, i+k-ka ), ra1,rwork( i+k-ka ), work( i+k-ka &
                                 ), ra )
                       ! create nonzero element a(i+k,i+k-ka-1) outside the
                       ! band and store it in work(m-kb+i+k)
                       t = -bb( k+1, i )*ra1
                       work( m-kb+i+k ) = rwork( i+k-ka )*t -conjg( work( i+k-ka ) )*ab( ka1, i+k-&
                                 ka )
                       ab( ka1, i+k-ka ) = work( i+k-ka )*t +rwork( i+k-ka )*ab( ka1, i+k-ka )
                                 
                       ra1 = ra
                    end if
                 end if
                 j2 = i + k + 1_${ik}$ - max( 1_${ik}$, k+i0-m+1 )*ka1
                 nr = ( j2+ka-1 ) / ka1
                 j1 = j2 - ( nr-1 )*ka1
                 if( update ) then
                    j2t = min( j2, i-2*ka+k-1 )
                 else
                    j2t = j2
                 end if
                 nrt = ( j2t+ka-1 ) / ka1
                 do j = j1, j2t, ka1
                    ! create nonzero element a(j+ka,j-1) outside the band
                    ! and store it in work(j)
                    work( j ) = work( j )*ab( ka1, j-1 )
                    ab( ka1, j-1 ) = rwork( j )*ab( ka1, j-1 )
                 end do
                 ! generate rotations in 1st set to annihilate elements which
                 ! have been created outside the band
                 if( nrt>0_${ik}$ )call stdlib${ii}$_${ci}$largv( nrt, ab( ka1, j1 ), inca, work( j1 ), ka1,rwork( &
                           j1 ), ka1 )
                 if( nr>0_${ik}$ ) then
                    ! apply rotations in 1st set from the right
                    do l = 1, ka - 1
                       call stdlib${ii}$_${ci}$lartv( nr, ab( l+1, j1 ), inca, ab( l+2, j1-1 ),inca, rwork( &
                                 j1 ), work( j1 ), ka1 )
                    end do
                    ! apply rotations in 1st set from both sides to diagonal
                    ! blocks
                    call stdlib${ii}$_${ci}$lar2v( nr, ab( 1_${ik}$, j1 ), ab( 1_${ik}$, j1-1 ),ab( 2_${ik}$, j1-1 ), inca, rwork(&
                               j1 ),work( j1 ), ka1 )
                    call stdlib${ii}$_${ci}$lacgv( nr, work( j1 ), ka1 )
                 end if
                 ! start applying rotations in 1st set from the left
                 do l = ka - 1, kb - k + 1, -1
                    nrt = ( j2+l-1 ) / ka1
                    j1t = j2 - ( nrt-1 )*ka1
                    if( nrt>0_${ik}$ )call stdlib${ii}$_${ci}$lartv( nrt, ab( ka1-l+1, j1t-ka1+l ), inca,ab( ka1-l, &
                              j1t-ka1+l ), inca,rwork( j1t ), work( j1t ), ka1 )
                 end do
                 if( wantx ) then
                    ! post-multiply x by product of rotations in 1st set
                    do j = j1, j2, ka1
                       call stdlib${ii}$_${ci}$rot( nx, x( 1_${ik}$, j ), 1_${ik}$, x( 1_${ik}$, j-1 ), 1_${ik}$,rwork( j ), conjg( work(&
                                  j ) ) )
                    end do
                 end if
              end do loop_840
              if( update ) then
                 if( i2>0_${ik}$ .and. kbt>0_${ik}$ ) then
                    ! create nonzero element a(i+kbt,i+kbt-ka-1) outside the
                    ! band and store it in work(m-kb+i+kbt)
                    work( m-kb+i+kbt ) = -bb( kbt+1, i )*ra1
                 end if
              end if
              loop_880: do k = kb, 1, -1
                 if( update ) then
                    j2 = i + k + 1_${ik}$ - max( 2_${ik}$, k+i0-m )*ka1
                 else
                    j2 = i + k + 1_${ik}$ - max( 1_${ik}$, k+i0-m )*ka1
                 end if
                 ! finish applying rotations in 2nd set from the left
                 do l = kb - k, 1, -1
                    nrt = ( j2+ka+l-1 ) / ka1
                    j1t = j2 - ( nrt-1 )*ka1
                    if( nrt>0_${ik}$ )call stdlib${ii}$_${ci}$lartv( nrt, ab( ka1-l+1, j1t+l-1 ), inca,ab( ka1-l, &
                              j1t+l-1 ), inca,rwork( m-kb+j1t+ka ),work( m-kb+j1t+ka ), ka1 )
                 end do
                 nr = ( j2+ka-1 ) / ka1
                 j1 = j2 - ( nr-1 )*ka1
                 do j = j1, j2, ka1
                    work( m-kb+j ) = work( m-kb+j+ka )
                    rwork( m-kb+j ) = rwork( m-kb+j+ka )
                 end do
                 do j = j1, j2, ka1
                    ! create nonzero element a(j+ka,j-1) outside the band
                    ! and store it in work(m-kb+j)
                    work( m-kb+j ) = work( m-kb+j )*ab( ka1, j-1 )
                    ab( ka1, j-1 ) = rwork( m-kb+j )*ab( ka1, j-1 )
                 end do
                 if( update ) then
                    if( i+k>ka1 .and. k<=kbt )work( m-kb+i+k-ka ) = work( m-kb+i+k )
                 end if
              end do loop_880
              loop_920: do k = kb, 1, -1
                 j2 = i + k + 1_${ik}$ - max( 1_${ik}$, k+i0-m )*ka1
                 nr = ( j2+ka-1 ) / ka1
                 j1 = j2 - ( nr-1 )*ka1
                 if( nr>0_${ik}$ ) then
                    ! generate rotations in 2nd set to annihilate elements
                    ! which have been created outside the band
                    call stdlib${ii}$_${ci}$largv( nr, ab( ka1, j1 ), inca, work( m-kb+j1 ),ka1, rwork( m-kb+&
                              j1 ), ka1 )
                    ! apply rotations in 2nd set from the right
                    do l = 1, ka - 1
                       call stdlib${ii}$_${ci}$lartv( nr, ab( l+1, j1 ), inca, ab( l+2, j1-1 ),inca, rwork( &
                                 m-kb+j1 ), work( m-kb+j1 ),ka1 )
                    end do
                    ! apply rotations in 2nd set from both sides to diagonal
                    ! blocks
                    call stdlib${ii}$_${ci}$lar2v( nr, ab( 1_${ik}$, j1 ), ab( 1_${ik}$, j1-1 ),ab( 2_${ik}$, j1-1 ), inca, rwork(&
                               m-kb+j1 ),work( m-kb+j1 ), ka1 )
                    call stdlib${ii}$_${ci}$lacgv( nr, work( m-kb+j1 ), ka1 )
                 end if
                 ! start applying rotations in 2nd set from the left
                 do l = ka - 1, kb - k + 1, -1
                    nrt = ( j2+l-1 ) / ka1
                    j1t = j2 - ( nrt-1 )*ka1
                    if( nrt>0_${ik}$ )call stdlib${ii}$_${ci}$lartv( nrt, ab( ka1-l+1, j1t-ka1+l ), inca,ab( ka1-l, &
                              j1t-ka1+l ), inca,rwork( m-kb+j1t ), work( m-kb+j1t ),ka1 )
                 end do
                 if( wantx ) then
                    ! post-multiply x by product of rotations in 2nd set
                    do j = j1, j2, ka1
                       call stdlib${ii}$_${ci}$rot( nx, x( 1_${ik}$, j ), 1_${ik}$, x( 1_${ik}$, j-1 ), 1_${ik}$,rwork( m-kb+j ), conjg( &
                                 work( m-kb+j ) ) )
                    end do
                 end if
              end do loop_920
              do k = 1, kb - 1
                 j2 = i + k + 1_${ik}$ - max( 1_${ik}$, k+i0-m+1 )*ka1
                 ! finish applying rotations in 1st set from the left
                 do l = kb - k, 1, -1
                    nrt = ( j2+l-1 ) / ka1
                    j1t = j2 - ( nrt-1 )*ka1
                    if( nrt>0_${ik}$ )call stdlib${ii}$_${ci}$lartv( nrt, ab( ka1-l+1, j1t-ka1+l ), inca,ab( ka1-l, &
                              j1t-ka1+l ), inca,rwork( j1t ), work( j1t ), ka1 )
                 end do
              end do
              if( kb>1_${ik}$ ) then
                 do j = 2, i2 - ka
                    rwork( j ) = rwork( j+ka )
                    work( j ) = work( j+ka )
                 end do
              end if
           end if
           go to 490
     end subroutine stdlib${ii}$_${ci}$hbgst

#:endif
#:endfor



     pure module subroutine stdlib${ii}$_spbstf( uplo, n, kd, ab, ldab, info )
     !! SPBSTF computes a split Cholesky factorization of a real
     !! symmetric positive definite band matrix A.
     !! This routine is designed to be used in conjunction with SSBGST.
     !! The factorization has the form  A = S**T*S  where S is a band matrix
     !! of the same bandwidth as A and the following structure:
     !! S = ( U    )
     !! ( M  L )
     !! where U is upper triangular of order m = (n+kd)/2, and L is lower
     !! triangular of order n-m.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: uplo
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: kd, ldab, n
           ! Array Arguments 
           real(sp), intent(inout) :: ab(ldab,*)
        ! =====================================================================
           
           ! Local Scalars 
           logical(lk) :: upper
           integer(${ik}$) :: j, kld, km, m
           real(sp) :: ajj
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input parameters.
           info = 0_${ik}$
           upper = stdlib_lsame( uplo, 'U' )
           if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( kd<0_${ik}$ ) then
              info = -3_${ik}$
           else if( ldab<kd+1 ) then
              info = -5_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'SPBSTF', -info )
              return
           end if
           ! quick return if possible
           if( n==0 )return
           kld = max( 1_${ik}$, ldab-1 )
           ! set the splitting point m.
           m = ( n+kd ) / 2_${ik}$
           if( upper ) then
              ! factorize a(m+1:n,m+1:n) as l**t*l, and update a(1:m,1:m).
              do j = n, m + 1, -1
                 ! compute s(j,j) and test for non-positive-definiteness.
                 ajj = ab( kd+1, j )
                 if( ajj<=zero )go to 50
                 ajj = sqrt( ajj )
                 ab( kd+1, j ) = ajj
                 km = min( j-1, kd )
                 ! compute elements j-km:j-1 of the j-th column and update the
                 ! the leading submatrix within the band.
                 call stdlib${ii}$_sscal( km, one / ajj, ab( kd+1-km, j ), 1_${ik}$ )
                 call stdlib${ii}$_ssyr( 'UPPER', km, -one, ab( kd+1-km, j ), 1_${ik}$,ab( kd+1, j-km ), kld )
                           
              end do
              ! factorize the updated submatrix a(1:m,1:m) as u**t*u.
              do j = 1, m
                 ! compute s(j,j) and test for non-positive-definiteness.
                 ajj = ab( kd+1, j )
                 if( ajj<=zero )go to 50
                 ajj = sqrt( ajj )
                 ab( kd+1, j ) = ajj
                 km = min( kd, m-j )
                 ! compute elements j+1:j+km of the j-th row and update the
                 ! trailing submatrix within the band.
                 if( km>0_${ik}$ ) then
                    call stdlib${ii}$_sscal( km, one / ajj, ab( kd, j+1 ), kld )
                    call stdlib${ii}$_ssyr( 'UPPER', km, -one, ab( kd, j+1 ), kld,ab( kd+1, j+1 ), kld )
                              
                 end if
              end do
           else
              ! factorize a(m+1:n,m+1:n) as l**t*l, and update a(1:m,1:m).
              do j = n, m + 1, -1
                 ! compute s(j,j) and test for non-positive-definiteness.
                 ajj = ab( 1_${ik}$, j )
                 if( ajj<=zero )go to 50
                 ajj = sqrt( ajj )
                 ab( 1_${ik}$, j ) = ajj
                 km = min( j-1, kd )
                 ! compute elements j-km:j-1 of the j-th row and update the
                 ! trailing submatrix within the band.
                 call stdlib${ii}$_sscal( km, one / ajj, ab( km+1, j-km ), kld )
                 call stdlib${ii}$_ssyr( 'LOWER', km, -one, ab( km+1, j-km ), kld,ab( 1_${ik}$, j-km ), kld )
                           
              end do
              ! factorize the updated submatrix a(1:m,1:m) as u**t*u.
              do j = 1, m
                 ! compute s(j,j) and test for non-positive-definiteness.
                 ajj = ab( 1_${ik}$, j )
                 if( ajj<=zero )go to 50
                 ajj = sqrt( ajj )
                 ab( 1_${ik}$, j ) = ajj
                 km = min( kd, m-j )
                 ! compute elements j+1:j+km of the j-th column and update the
                 ! trailing submatrix within the band.
                 if( km>0_${ik}$ ) then
                    call stdlib${ii}$_sscal( km, one / ajj, ab( 2_${ik}$, j ), 1_${ik}$ )
                    call stdlib${ii}$_ssyr( 'LOWER', km, -one, ab( 2_${ik}$, j ), 1_${ik}$,ab( 1_${ik}$, j+1 ), kld )
                 end if
              end do
           end if
           return
           50 continue
           info = j
           return
     end subroutine stdlib${ii}$_spbstf

     pure module subroutine stdlib${ii}$_dpbstf( uplo, n, kd, ab, ldab, info )
     !! DPBSTF computes a split Cholesky factorization of a real
     !! symmetric positive definite band matrix A.
     !! This routine is designed to be used in conjunction with DSBGST.
     !! The factorization has the form  A = S**T*S  where S is a band matrix
     !! of the same bandwidth as A and the following structure:
     !! S = ( U    )
     !! ( M  L )
     !! where U is upper triangular of order m = (n+kd)/2, and L is lower
     !! triangular of order n-m.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: uplo
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: kd, ldab, n
           ! Array Arguments 
           real(dp), intent(inout) :: ab(ldab,*)
        ! =====================================================================
           
           ! Local Scalars 
           logical(lk) :: upper
           integer(${ik}$) :: j, kld, km, m
           real(dp) :: ajj
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input parameters.
           info = 0_${ik}$
           upper = stdlib_lsame( uplo, 'U' )
           if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( kd<0_${ik}$ ) then
              info = -3_${ik}$
           else if( ldab<kd+1 ) then
              info = -5_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DPBSTF', -info )
              return
           end if
           ! quick return if possible
           if( n==0 )return
           kld = max( 1_${ik}$, ldab-1 )
           ! set the splitting point m.
           m = ( n+kd ) / 2_${ik}$
           if( upper ) then
              ! factorize a(m+1:n,m+1:n) as l**t*l, and update a(1:m,1:m).
              do j = n, m + 1, -1
                 ! compute s(j,j) and test for non-positive-definiteness.
                 ajj = ab( kd+1, j )
                 if( ajj<=zero )go to 50
                 ajj = sqrt( ajj )
                 ab( kd+1, j ) = ajj
                 km = min( j-1, kd )
                 ! compute elements j-km:j-1 of the j-th column and update the
                 ! the leading submatrix within the band.
                 call stdlib${ii}$_dscal( km, one / ajj, ab( kd+1-km, j ), 1_${ik}$ )
                 call stdlib${ii}$_dsyr( 'UPPER', km, -one, ab( kd+1-km, j ), 1_${ik}$,ab( kd+1, j-km ), kld )
                           
              end do
              ! factorize the updated submatrix a(1:m,1:m) as u**t*u.
              do j = 1, m
                 ! compute s(j,j) and test for non-positive-definiteness.
                 ajj = ab( kd+1, j )
                 if( ajj<=zero )go to 50
                 ajj = sqrt( ajj )
                 ab( kd+1, j ) = ajj
                 km = min( kd, m-j )
                 ! compute elements j+1:j+km of the j-th row and update the
                 ! trailing submatrix within the band.
                 if( km>0_${ik}$ ) then
                    call stdlib${ii}$_dscal( km, one / ajj, ab( kd, j+1 ), kld )
                    call stdlib${ii}$_dsyr( 'UPPER', km, -one, ab( kd, j+1 ), kld,ab( kd+1, j+1 ), kld )
                              
                 end if
              end do
           else
              ! factorize a(m+1:n,m+1:n) as l**t*l, and update a(1:m,1:m).
              do j = n, m + 1, -1
                 ! compute s(j,j) and test for non-positive-definiteness.
                 ajj = ab( 1_${ik}$, j )
                 if( ajj<=zero )go to 50
                 ajj = sqrt( ajj )
                 ab( 1_${ik}$, j ) = ajj
                 km = min( j-1, kd )
                 ! compute elements j-km:j-1 of the j-th row and update the
                 ! trailing submatrix within the band.
                 call stdlib${ii}$_dscal( km, one / ajj, ab( km+1, j-km ), kld )
                 call stdlib${ii}$_dsyr( 'LOWER', km, -one, ab( km+1, j-km ), kld,ab( 1_${ik}$, j-km ), kld )
                           
              end do
              ! factorize the updated submatrix a(1:m,1:m) as u**t*u.
              do j = 1, m
                 ! compute s(j,j) and test for non-positive-definiteness.
                 ajj = ab( 1_${ik}$, j )
                 if( ajj<=zero )go to 50
                 ajj = sqrt( ajj )
                 ab( 1_${ik}$, j ) = ajj
                 km = min( kd, m-j )
                 ! compute elements j+1:j+km of the j-th column and update the
                 ! trailing submatrix within the band.
                 if( km>0_${ik}$ ) then
                    call stdlib${ii}$_dscal( km, one / ajj, ab( 2_${ik}$, j ), 1_${ik}$ )
                    call stdlib${ii}$_dsyr( 'LOWER', km, -one, ab( 2_${ik}$, j ), 1_${ik}$,ab( 1_${ik}$, j+1 ), kld )
                 end if
              end do
           end if
           return
           50 continue
           info = j
           return
     end subroutine stdlib${ii}$_dpbstf

#:for rk,rt,ri in REAL_KINDS_TYPES
#:if not rk in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ri}$pbstf( uplo, n, kd, ab, ldab, info )
     !! DPBSTF: computes a split Cholesky factorization of a real
     !! symmetric positive definite band matrix A.
     !! This routine is designed to be used in conjunction with DSBGST.
     !! The factorization has the form  A = S**T*S  where S is a band matrix
     !! of the same bandwidth as A and the following structure:
     !! S = ( U    )
     !! ( M  L )
     !! where U is upper triangular of order m = (n+kd)/2, and L is lower
     !! triangular of order n-m.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: uplo
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: kd, ldab, n
           ! Array Arguments 
           real(${rk}$), intent(inout) :: ab(ldab,*)
        ! =====================================================================
           
           ! Local Scalars 
           logical(lk) :: upper
           integer(${ik}$) :: j, kld, km, m
           real(${rk}$) :: ajj
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input parameters.
           info = 0_${ik}$
           upper = stdlib_lsame( uplo, 'U' )
           if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( kd<0_${ik}$ ) then
              info = -3_${ik}$
           else if( ldab<kd+1 ) then
              info = -5_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DPBSTF', -info )
              return
           end if
           ! quick return if possible
           if( n==0 )return
           kld = max( 1_${ik}$, ldab-1 )
           ! set the splitting point m.
           m = ( n+kd ) / 2_${ik}$
           if( upper ) then
              ! factorize a(m+1:n,m+1:n) as l**t*l, and update a(1:m,1:m).
              do j = n, m + 1, -1
                 ! compute s(j,j) and test for non-positive-definiteness.
                 ajj = ab( kd+1, j )
                 if( ajj<=zero )go to 50
                 ajj = sqrt( ajj )
                 ab( kd+1, j ) = ajj
                 km = min( j-1, kd )
                 ! compute elements j-km:j-1 of the j-th column and update the
                 ! the leading submatrix within the band.
                 call stdlib${ii}$_${ri}$scal( km, one / ajj, ab( kd+1-km, j ), 1_${ik}$ )
                 call stdlib${ii}$_${ri}$syr( 'UPPER', km, -one, ab( kd+1-km, j ), 1_${ik}$,ab( kd+1, j-km ), kld )
                           
              end do
              ! factorize the updated submatrix a(1:m,1:m) as u**t*u.
              do j = 1, m
                 ! compute s(j,j) and test for non-positive-definiteness.
                 ajj = ab( kd+1, j )
                 if( ajj<=zero )go to 50
                 ajj = sqrt( ajj )
                 ab( kd+1, j ) = ajj
                 km = min( kd, m-j )
                 ! compute elements j+1:j+km of the j-th row and update the
                 ! trailing submatrix within the band.
                 if( km>0_${ik}$ ) then
                    call stdlib${ii}$_${ri}$scal( km, one / ajj, ab( kd, j+1 ), kld )
                    call stdlib${ii}$_${ri}$syr( 'UPPER', km, -one, ab( kd, j+1 ), kld,ab( kd+1, j+1 ), kld )
                              
                 end if
              end do
           else
              ! factorize a(m+1:n,m+1:n) as l**t*l, and update a(1:m,1:m).
              do j = n, m + 1, -1
                 ! compute s(j,j) and test for non-positive-definiteness.
                 ajj = ab( 1_${ik}$, j )
                 if( ajj<=zero )go to 50
                 ajj = sqrt( ajj )
                 ab( 1_${ik}$, j ) = ajj
                 km = min( j-1, kd )
                 ! compute elements j-km:j-1 of the j-th row and update the
                 ! trailing submatrix within the band.
                 call stdlib${ii}$_${ri}$scal( km, one / ajj, ab( km+1, j-km ), kld )
                 call stdlib${ii}$_${ri}$syr( 'LOWER', km, -one, ab( km+1, j-km ), kld,ab( 1_${ik}$, j-km ), kld )
                           
              end do
              ! factorize the updated submatrix a(1:m,1:m) as u**t*u.
              do j = 1, m
                 ! compute s(j,j) and test for non-positive-definiteness.
                 ajj = ab( 1_${ik}$, j )
                 if( ajj<=zero )go to 50
                 ajj = sqrt( ajj )
                 ab( 1_${ik}$, j ) = ajj
                 km = min( kd, m-j )
                 ! compute elements j+1:j+km of the j-th column and update the
                 ! trailing submatrix within the band.
                 if( km>0_${ik}$ ) then
                    call stdlib${ii}$_${ri}$scal( km, one / ajj, ab( 2_${ik}$, j ), 1_${ik}$ )
                    call stdlib${ii}$_${ri}$syr( 'LOWER', km, -one, ab( 2_${ik}$, j ), 1_${ik}$,ab( 1_${ik}$, j+1 ), kld )
                 end if
              end do
           end if
           return
           50 continue
           info = j
           return
     end subroutine stdlib${ii}$_${ri}$pbstf

#:endif
#:endfor

     pure module subroutine stdlib${ii}$_cpbstf( uplo, n, kd, ab, ldab, info )
     !! CPBSTF computes a split Cholesky factorization of a complex
     !! Hermitian positive definite band matrix A.
     !! This routine is designed to be used in conjunction with CHBGST.
     !! The factorization has the form  A = S**H*S  where S is a band matrix
     !! of the same bandwidth as A and the following structure:
     !! S = ( U    )
     !! ( M  L )
     !! where U is upper triangular of order m = (n+kd)/2, and L is lower
     !! triangular of order n-m.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: uplo
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: kd, ldab, n
           ! Array Arguments 
           complex(sp), intent(inout) :: ab(ldab,*)
        ! =====================================================================
           
           ! Local Scalars 
           logical(lk) :: upper
           integer(${ik}$) :: j, kld, km, m
           real(sp) :: ajj
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input parameters.
           info = 0_${ik}$
           upper = stdlib_lsame( uplo, 'U' )
           if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( kd<0_${ik}$ ) then
              info = -3_${ik}$
           else if( ldab<kd+1 ) then
              info = -5_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'CPBSTF', -info )
              return
           end if
           ! quick return if possible
           if( n==0 )return
           kld = max( 1_${ik}$, ldab-1 )
           ! set the splitting point m.
           m = ( n+kd ) / 2_${ik}$
           if( upper ) then
              ! factorize a(m+1:n,m+1:n) as l**h*l, and update a(1:m,1:m).
              do j = n, m + 1, -1
                 ! compute s(j,j) and test for non-positive-definiteness.
                 ajj = real( ab( kd+1, j ),KIND=sp)
                 if( ajj<=zero ) then
                    ab( kd+1, j ) = ajj
                    go to 50
                 end if
                 ajj = sqrt( ajj )
                 ab( kd+1, j ) = ajj
                 km = min( j-1, kd )
                 ! compute elements j-km:j-1 of the j-th column and update the
                 ! the leading submatrix within the band.
                 call stdlib${ii}$_csscal( km, one / ajj, ab( kd+1-km, j ), 1_${ik}$ )
                 call stdlib${ii}$_cher( 'UPPER', km, -one, ab( kd+1-km, j ), 1_${ik}$,ab( kd+1, j-km ), kld )
                           
              end do
              ! factorize the updated submatrix a(1:m,1:m) as u**h*u.
              do j = 1, m
                 ! compute s(j,j) and test for non-positive-definiteness.
                 ajj = real( ab( kd+1, j ),KIND=sp)
                 if( ajj<=zero ) then
                    ab( kd+1, j ) = ajj
                    go to 50
                 end if
                 ajj = sqrt( ajj )
                 ab( kd+1, j ) = ajj
                 km = min( kd, m-j )
                 ! compute elements j+1:j+km of the j-th row and update the
                 ! trailing submatrix within the band.
                 if( km>0_${ik}$ ) then
                    call stdlib${ii}$_csscal( km, one / ajj, ab( kd, j+1 ), kld )
                    call stdlib${ii}$_clacgv( km, ab( kd, j+1 ), kld )
                    call stdlib${ii}$_cher( 'UPPER', km, -one, ab( kd, j+1 ), kld,ab( kd+1, j+1 ), kld )
                              
                    call stdlib${ii}$_clacgv( km, ab( kd, j+1 ), kld )
                 end if
              end do
           else
              ! factorize a(m+1:n,m+1:n) as l**h*l, and update a(1:m,1:m).
              do j = n, m + 1, -1
                 ! compute s(j,j) and test for non-positive-definiteness.
                 ajj = real( ab( 1_${ik}$, j ),KIND=sp)
                 if( ajj<=zero ) then
                    ab( 1_${ik}$, j ) = ajj
                    go to 50
                 end if
                 ajj = sqrt( ajj )
                 ab( 1_${ik}$, j ) = ajj
                 km = min( j-1, kd )
                 ! compute elements j-km:j-1 of the j-th row and update the
                 ! trailing submatrix within the band.
                 call stdlib${ii}$_csscal( km, one / ajj, ab( km+1, j-km ), kld )
                 call stdlib${ii}$_clacgv( km, ab( km+1, j-km ), kld )
                 call stdlib${ii}$_cher( 'LOWER', km, -one, ab( km+1, j-km ), kld,ab( 1_${ik}$, j-km ), kld )
                           
                 call stdlib${ii}$_clacgv( km, ab( km+1, j-km ), kld )
              end do
              ! factorize the updated submatrix a(1:m,1:m) as u**h*u.
              do j = 1, m
                 ! compute s(j,j) and test for non-positive-definiteness.
                 ajj = real( ab( 1_${ik}$, j ),KIND=sp)
                 if( ajj<=zero ) then
                    ab( 1_${ik}$, j ) = ajj
                    go to 50
                 end if
                 ajj = sqrt( ajj )
                 ab( 1_${ik}$, j ) = ajj
                 km = min( kd, m-j )
                 ! compute elements j+1:j+km of the j-th column and update the
                 ! trailing submatrix within the band.
                 if( km>0_${ik}$ ) then
                    call stdlib${ii}$_csscal( km, one / ajj, ab( 2_${ik}$, j ), 1_${ik}$ )
                    call stdlib${ii}$_cher( 'LOWER', km, -one, ab( 2_${ik}$, j ), 1_${ik}$,ab( 1_${ik}$, j+1 ), kld )
                 end if
              end do
           end if
           return
           50 continue
           info = j
           return
     end subroutine stdlib${ii}$_cpbstf

     pure module subroutine stdlib${ii}$_zpbstf( uplo, n, kd, ab, ldab, info )
     !! ZPBSTF computes a split Cholesky factorization of a complex
     !! Hermitian positive definite band matrix A.
     !! This routine is designed to be used in conjunction with ZHBGST.
     !! The factorization has the form  A = S**H*S  where S is a band matrix
     !! of the same bandwidth as A and the following structure:
     !! S = ( U    )
     !! ( M  L )
     !! where U is upper triangular of order m = (n+kd)/2, and L is lower
     !! triangular of order n-m.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: uplo
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: kd, ldab, n
           ! Array Arguments 
           complex(dp), intent(inout) :: ab(ldab,*)
        ! =====================================================================
           
           ! Local Scalars 
           logical(lk) :: upper
           integer(${ik}$) :: j, kld, km, m
           real(dp) :: ajj
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input parameters.
           info = 0_${ik}$
           upper = stdlib_lsame( uplo, 'U' )
           if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( kd<0_${ik}$ ) then
              info = -3_${ik}$
           else if( ldab<kd+1 ) then
              info = -5_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'ZPBSTF', -info )
              return
           end if
           ! quick return if possible
           if( n==0 )return
           kld = max( 1_${ik}$, ldab-1 )
           ! set the splitting point m.
           m = ( n+kd ) / 2_${ik}$
           if( upper ) then
              ! factorize a(m+1:n,m+1:n) as l**h*l, and update a(1:m,1:m).
              do j = n, m + 1, -1
                 ! compute s(j,j) and test for non-positive-definiteness.
                 ajj = real( ab( kd+1, j ),KIND=dp)
                 if( ajj<=zero ) then
                    ab( kd+1, j ) = ajj
                    go to 50
                 end if
                 ajj = sqrt( ajj )
                 ab( kd+1, j ) = ajj
                 km = min( j-1, kd )
                 ! compute elements j-km:j-1 of the j-th column and update the
                 ! the leading submatrix within the band.
                 call stdlib${ii}$_zdscal( km, one / ajj, ab( kd+1-km, j ), 1_${ik}$ )
                 call stdlib${ii}$_zher( 'UPPER', km, -one, ab( kd+1-km, j ), 1_${ik}$,ab( kd+1, j-km ), kld )
                           
              end do
              ! factorize the updated submatrix a(1:m,1:m) as u**h*u.
              do j = 1, m
                 ! compute s(j,j) and test for non-positive-definiteness.
                 ajj = real( ab( kd+1, j ),KIND=dp)
                 if( ajj<=zero ) then
                    ab( kd+1, j ) = ajj
                    go to 50
                 end if
                 ajj = sqrt( ajj )
                 ab( kd+1, j ) = ajj
                 km = min( kd, m-j )
                 ! compute elements j+1:j+km of the j-th row and update the
                 ! trailing submatrix within the band.
                 if( km>0_${ik}$ ) then
                    call stdlib${ii}$_zdscal( km, one / ajj, ab( kd, j+1 ), kld )
                    call stdlib${ii}$_zlacgv( km, ab( kd, j+1 ), kld )
                    call stdlib${ii}$_zher( 'UPPER', km, -one, ab( kd, j+1 ), kld,ab( kd+1, j+1 ), kld )
                              
                    call stdlib${ii}$_zlacgv( km, ab( kd, j+1 ), kld )
                 end if
              end do
           else
              ! factorize a(m+1:n,m+1:n) as l**h*l, and update a(1:m,1:m).
              do j = n, m + 1, -1
                 ! compute s(j,j) and test for non-positive-definiteness.
                 ajj = real( ab( 1_${ik}$, j ),KIND=dp)
                 if( ajj<=zero ) then
                    ab( 1_${ik}$, j ) = ajj
                    go to 50
                 end if
                 ajj = sqrt( ajj )
                 ab( 1_${ik}$, j ) = ajj
                 km = min( j-1, kd )
                 ! compute elements j-km:j-1 of the j-th row and update the
                 ! trailing submatrix within the band.
                 call stdlib${ii}$_zdscal( km, one / ajj, ab( km+1, j-km ), kld )
                 call stdlib${ii}$_zlacgv( km, ab( km+1, j-km ), kld )
                 call stdlib${ii}$_zher( 'LOWER', km, -one, ab( km+1, j-km ), kld,ab( 1_${ik}$, j-km ), kld )
                           
                 call stdlib${ii}$_zlacgv( km, ab( km+1, j-km ), kld )
              end do
              ! factorize the updated submatrix a(1:m,1:m) as u**h*u.
              do j = 1, m
                 ! compute s(j,j) and test for non-positive-definiteness.
                 ajj = real( ab( 1_${ik}$, j ),KIND=dp)
                 if( ajj<=zero ) then
                    ab( 1_${ik}$, j ) = ajj
                    go to 50
                 end if
                 ajj = sqrt( ajj )
                 ab( 1_${ik}$, j ) = ajj
                 km = min( kd, m-j )
                 ! compute elements j+1:j+km of the j-th column and update the
                 ! trailing submatrix within the band.
                 if( km>0_${ik}$ ) then
                    call stdlib${ii}$_zdscal( km, one / ajj, ab( 2_${ik}$, j ), 1_${ik}$ )
                    call stdlib${ii}$_zher( 'LOWER', km, -one, ab( 2_${ik}$, j ), 1_${ik}$,ab( 1_${ik}$, j+1 ), kld )
                 end if
              end do
           end if
           return
           50 continue
           info = j
           return
     end subroutine stdlib${ii}$_zpbstf

#:for ck,ct,ci in CMPLX_KINDS_TYPES
#:if not ck in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ci}$pbstf( uplo, n, kd, ab, ldab, info )
     !! ZPBSTF: computes a split Cholesky factorization of a complex
     !! Hermitian positive definite band matrix A.
     !! This routine is designed to be used in conjunction with ZHBGST.
     !! The factorization has the form  A = S**H*S  where S is a band matrix
     !! of the same bandwidth as A and the following structure:
     !! S = ( U    )
     !! ( M  L )
     !! where U is upper triangular of order m = (n+kd)/2, and L is lower
     !! triangular of order n-m.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${ck}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: uplo
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: kd, ldab, n
           ! Array Arguments 
           complex(${ck}$), intent(inout) :: ab(ldab,*)
        ! =====================================================================
           
           ! Local Scalars 
           logical(lk) :: upper
           integer(${ik}$) :: j, kld, km, m
           real(${ck}$) :: ajj
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input parameters.
           info = 0_${ik}$
           upper = stdlib_lsame( uplo, 'U' )
           if( .not.upper .and. .not.stdlib_lsame( uplo, 'L' ) ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( kd<0_${ik}$ ) then
              info = -3_${ik}$
           else if( ldab<kd+1 ) then
              info = -5_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'ZPBSTF', -info )
              return
           end if
           ! quick return if possible
           if( n==0 )return
           kld = max( 1_${ik}$, ldab-1 )
           ! set the splitting point m.
           m = ( n+kd ) / 2_${ik}$
           if( upper ) then
              ! factorize a(m+1:n,m+1:n) as l**h*l, and update a(1:m,1:m).
              do j = n, m + 1, -1
                 ! compute s(j,j) and test for non-positive-definiteness.
                 ajj = real( ab( kd+1, j ),KIND=${ck}$)
                 if( ajj<=zero ) then
                    ab( kd+1, j ) = ajj
                    go to 50
                 end if
                 ajj = sqrt( ajj )
                 ab( kd+1, j ) = ajj
                 km = min( j-1, kd )
                 ! compute elements j-km:j-1 of the j-th column and update the
                 ! the leading submatrix within the band.
                 call stdlib${ii}$_${ci}$dscal( km, one / ajj, ab( kd+1-km, j ), 1_${ik}$ )
                 call stdlib${ii}$_${ci}$her( 'UPPER', km, -one, ab( kd+1-km, j ), 1_${ik}$,ab( kd+1, j-km ), kld )
                           
              end do
              ! factorize the updated submatrix a(1:m,1:m) as u**h*u.
              do j = 1, m
                 ! compute s(j,j) and test for non-positive-definiteness.
                 ajj = real( ab( kd+1, j ),KIND=${ck}$)
                 if( ajj<=zero ) then
                    ab( kd+1, j ) = ajj
                    go to 50
                 end if
                 ajj = sqrt( ajj )
                 ab( kd+1, j ) = ajj
                 km = min( kd, m-j )
                 ! compute elements j+1:j+km of the j-th row and update the
                 ! trailing submatrix within the band.
                 if( km>0_${ik}$ ) then
                    call stdlib${ii}$_${ci}$dscal( km, one / ajj, ab( kd, j+1 ), kld )
                    call stdlib${ii}$_${ci}$lacgv( km, ab( kd, j+1 ), kld )
                    call stdlib${ii}$_${ci}$her( 'UPPER', km, -one, ab( kd, j+1 ), kld,ab( kd+1, j+1 ), kld )
                              
                    call stdlib${ii}$_${ci}$lacgv( km, ab( kd, j+1 ), kld )
                 end if
              end do
           else
              ! factorize a(m+1:n,m+1:n) as l**h*l, and update a(1:m,1:m).
              do j = n, m + 1, -1
                 ! compute s(j,j) and test for non-positive-definiteness.
                 ajj = real( ab( 1_${ik}$, j ),KIND=${ck}$)
                 if( ajj<=zero ) then
                    ab( 1_${ik}$, j ) = ajj
                    go to 50
                 end if
                 ajj = sqrt( ajj )
                 ab( 1_${ik}$, j ) = ajj
                 km = min( j-1, kd )
                 ! compute elements j-km:j-1 of the j-th row and update the
                 ! trailing submatrix within the band.
                 call stdlib${ii}$_${ci}$dscal( km, one / ajj, ab( km+1, j-km ), kld )
                 call stdlib${ii}$_${ci}$lacgv( km, ab( km+1, j-km ), kld )
                 call stdlib${ii}$_${ci}$her( 'LOWER', km, -one, ab( km+1, j-km ), kld,ab( 1_${ik}$, j-km ), kld )
                           
                 call stdlib${ii}$_${ci}$lacgv( km, ab( km+1, j-km ), kld )
              end do
              ! factorize the updated submatrix a(1:m,1:m) as u**h*u.
              do j = 1, m
                 ! compute s(j,j) and test for non-positive-definiteness.
                 ajj = real( ab( 1_${ik}$, j ),KIND=${ck}$)
                 if( ajj<=zero ) then
                    ab( 1_${ik}$, j ) = ajj
                    go to 50
                 end if
                 ajj = sqrt( ajj )
                 ab( 1_${ik}$, j ) = ajj
                 km = min( kd, m-j )
                 ! compute elements j+1:j+km of the j-th column and update the
                 ! trailing submatrix within the band.
                 if( km>0_${ik}$ ) then
                    call stdlib${ii}$_${ci}$dscal( km, one / ajj, ab( 2_${ik}$, j ), 1_${ik}$ )
                    call stdlib${ii}$_${ci}$her( 'LOWER', km, -one, ab( 2_${ik}$, j ), 1_${ik}$,ab( 1_${ik}$, j+1 ), kld )
                 end if
              end do
           end if
           return
           50 continue
           info = j
           return
     end subroutine stdlib${ii}$_${ci}$pbstf

#:endif
#:endfor



     pure module subroutine stdlib${ii}$_slag2( a, lda, b, ldb, safmin, scale1, scale2, wr1,wr2, wi )
     !! SLAG2 computes the eigenvalues of a 2 x 2 generalized eigenvalue
     !! problem  A - w B, with scaling as necessary to avoid over-/underflow.
     !! The scaling factor "s" results in a modified eigenvalue equation
     !! s A - w B
     !! where  s  is a non-negative scaling factor chosen so that  w,  w B,
     !! and  s A  do not overflow and, if possible, do not underflow, either.
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(in) :: lda, ldb
           real(sp), intent(in) :: safmin
           real(sp), intent(out) :: scale1, scale2, wi, wr1, wr2
           ! Array Arguments 
           real(sp), intent(in) :: a(lda,*), b(ldb,*)
        ! =====================================================================
           ! Parameters 
           real(sp), parameter :: fuzzy1 = one+1.0e-5_sp
           
           
           
           ! Local Scalars 
           real(sp) :: a11, a12, a21, a22, abi22, anorm, as11, as12, as22, ascale, b11, b12, b22, &
           binv11, binv22, bmin, bnorm, bscale, bsize, c1, c2, c3, c4, c5, diff, discr, pp, qq, r,&
            rtmax, rtmin, s1, s2, safmax, shift, ss, sum, wabs, wbig, wdet, wscale, wsize, &
                      wsmall
           ! Intrinsic Functions 
           ! Executable Statements 
           rtmin = sqrt( safmin )
           rtmax = one / rtmin
           safmax = one / safmin
           ! scale a
           anorm = max( abs( a( 1_${ik}$, 1_${ik}$ ) )+abs( a( 2_${ik}$, 1_${ik}$ ) ),abs( a( 1_${ik}$, 2_${ik}$ ) )+abs( a( 2_${ik}$, 2_${ik}$ ) ), &
                     safmin )
           ascale = one / anorm
           a11 = ascale*a( 1_${ik}$, 1_${ik}$ )
           a21 = ascale*a( 2_${ik}$, 1_${ik}$ )
           a12 = ascale*a( 1_${ik}$, 2_${ik}$ )
           a22 = ascale*a( 2_${ik}$, 2_${ik}$ )
           ! perturb b if necessary to insure non-singularity
           b11 = b( 1_${ik}$, 1_${ik}$ )
           b12 = b( 1_${ik}$, 2_${ik}$ )
           b22 = b( 2_${ik}$, 2_${ik}$ )
           bmin = rtmin*max( abs( b11 ), abs( b12 ), abs( b22 ), rtmin )
           if( abs( b11 )<bmin )b11 = sign( bmin, b11 )
           if( abs( b22 )<bmin )b22 = sign( bmin, b22 )
           ! scale b
           bnorm = max( abs( b11 ), abs( b12 )+abs( b22 ), safmin )
           bsize = max( abs( b11 ), abs( b22 ) )
           bscale = one / bsize
           b11 = b11*bscale
           b12 = b12*bscale
           b22 = b22*bscale
           ! compute larger eigenvalue by method described by c. van loan
           ! ( as is a shifted by -shift*b )
           binv11 = one / b11
           binv22 = one / b22
           s1 = a11*binv11
           s2 = a22*binv22
           if( abs( s1 )<=abs( s2 ) ) then
              as12 = a12 - s1*b12
              as22 = a22 - s1*b22
              ss = a21*( binv11*binv22 )
              abi22 = as22*binv22 - ss*b12
              pp = half*abi22
              shift = s1
           else
              as12 = a12 - s2*b12
              as11 = a11 - s2*b11
              ss = a21*( binv11*binv22 )
              abi22 = -ss*b12
              pp = half*( as11*binv11+abi22 )
              shift = s2
           end if
           qq = ss*as12
           if( abs( pp*rtmin )>=one ) then
              discr = ( rtmin*pp )**2_${ik}$ + qq*safmin
              r = sqrt( abs( discr ) )*rtmax
           else
              if( pp**2_${ik}$+abs( qq )<=safmin ) then
                 discr = ( rtmax*pp )**2_${ik}$ + qq*safmax
                 r = sqrt( abs( discr ) )*rtmin
              else
                 discr = pp**2_${ik}$ + qq
                 r = sqrt( abs( discr ) )
              end if
           end if
           ! note: the test of r in the following if is to cover the case when
                 ! discr is small and negative and is flushed to zero during
                 ! the calculation of r.  on machines which have a consistent
                 ! flush-to-zero threshold and handle numbers above that
                 ! threshold correctly, it would not be necessary.
           if( discr>=zero .or. r==zero ) then
              sum = pp + sign( r, pp )
              diff = pp - sign( r, pp )
              wbig = shift + sum
              ! compute smaller eigenvalue
              wsmall = shift + diff
              if( half*abs( wbig )>max( abs( wsmall ), safmin ) ) then
                 wdet = ( a11*a22-a12*a21 )*( binv11*binv22 )
                 wsmall = wdet / wbig
              end if
              ! choose (real) eigenvalue closest to 2,2 element of a*b**(-1)
              ! for wr1.
              if( pp>abi22 ) then
                 wr1 = min( wbig, wsmall )
                 wr2 = max( wbig, wsmall )
              else
                 wr1 = max( wbig, wsmall )
                 wr2 = min( wbig, wsmall )
              end if
              wi = zero
           else
              ! complex eigenvalues
              wr1 = shift + pp
              wr2 = wr1
              wi = r
           end if
           ! further scaling to avoid underflow and overflow in computing
           ! scale1 and overflow in computing w*b.
           ! this scale factor (wscale) is bounded from above using c1 and c2,
           ! and from below using c3 and c4.
              ! c1 implements the condition  s a  must never overflow.
              ! c2 implements the condition  w b  must never overflow.
              ! c3, with c2,
                 ! implement the condition that s a - w b must never overflow.
              ! c4 implements the condition  s    should not underflow.
              ! c5 implements the condition  max(s,|w|) should be at least 2.
           c1 = bsize*( safmin*max( one, ascale ) )
           c2 = safmin*max( one, bnorm )
           c3 = bsize*safmin
           if( ascale<=one .and. bsize<=one ) then
              c4 = min( one, ( ascale / safmin )*bsize )
           else
              c4 = one
           end if
           if( ascale<=one .or. bsize<=one ) then
              c5 = min( one, ascale*bsize )
           else
              c5 = one
           end if
           ! scale first eigenvalue
           wabs = abs( wr1 ) + abs( wi )
           wsize = max( safmin, c1, fuzzy1*( wabs*c2+c3 ),min( c4, half*max( wabs, c5 ) ) )
                     
           if( wsize/=one ) then
              wscale = one / wsize
              if( wsize>one ) then
                 scale1 = ( max( ascale, bsize )*wscale )*min( ascale, bsize )
              else
                 scale1 = ( min( ascale, bsize )*wscale )*max( ascale, bsize )
              end if
              wr1 = wr1*wscale
              if( wi/=zero ) then
                 wi = wi*wscale
                 wr2 = wr1
                 scale2 = scale1
              end if
           else
              scale1 = ascale*bsize
              scale2 = scale1
           end if
           ! scale second eigenvalue (if real)
           if( wi==zero ) then
              wsize = max( safmin, c1, fuzzy1*( abs( wr2 )*c2+c3 ),min( c4, half*max( abs( wr2 ), &
                        c5 ) ) )
              if( wsize/=one ) then
                 wscale = one / wsize
                 if( wsize>one ) then
                    scale2 = ( max( ascale, bsize )*wscale )*min( ascale, bsize )
                 else
                    scale2 = ( min( ascale, bsize )*wscale )*max( ascale, bsize )
                 end if
                 wr2 = wr2*wscale
              else
                 scale2 = ascale*bsize
              end if
           end if
           return
     end subroutine stdlib${ii}$_slag2

     pure module subroutine stdlib${ii}$_dlag2( a, lda, b, ldb, safmin, scale1, scale2, wr1,wr2, wi )
     !! DLAG2 computes the eigenvalues of a 2 x 2 generalized eigenvalue
     !! problem  A - w B, with scaling as necessary to avoid over-/underflow.
     !! The scaling factor "s" results in a modified eigenvalue equation
     !! s A - w B
     !! where  s  is a non-negative scaling factor chosen so that  w,  w B,
     !! and  s A  do not overflow and, if possible, do not underflow, either.
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(in) :: lda, ldb
           real(dp), intent(in) :: safmin
           real(dp), intent(out) :: scale1, scale2, wi, wr1, wr2
           ! Array Arguments 
           real(dp), intent(in) :: a(lda,*), b(ldb,*)
        ! =====================================================================
           ! Parameters 
           real(dp), parameter :: fuzzy1 = one+1.0e-5_dp
           
           
           
           ! Local Scalars 
           real(dp) :: a11, a12, a21, a22, abi22, anorm, as11, as12, as22, ascale, b11, b12, b22, &
           binv11, binv22, bmin, bnorm, bscale, bsize, c1, c2, c3, c4, c5, diff, discr, pp, qq, r,&
            rtmax, rtmin, s1, s2, safmax, shift, ss, sum, wabs, wbig, wdet, wscale, wsize, &
                      wsmall
           ! Intrinsic Functions 
           ! Executable Statements 
           rtmin = sqrt( safmin )
           rtmax = one / rtmin
           safmax = one / safmin
           ! scale a
           anorm = max( abs( a( 1_${ik}$, 1_${ik}$ ) )+abs( a( 2_${ik}$, 1_${ik}$ ) ),abs( a( 1_${ik}$, 2_${ik}$ ) )+abs( a( 2_${ik}$, 2_${ik}$ ) ), &
                     safmin )
           ascale = one / anorm
           a11 = ascale*a( 1_${ik}$, 1_${ik}$ )
           a21 = ascale*a( 2_${ik}$, 1_${ik}$ )
           a12 = ascale*a( 1_${ik}$, 2_${ik}$ )
           a22 = ascale*a( 2_${ik}$, 2_${ik}$ )
           ! perturb b if necessary to insure non-singularity
           b11 = b( 1_${ik}$, 1_${ik}$ )
           b12 = b( 1_${ik}$, 2_${ik}$ )
           b22 = b( 2_${ik}$, 2_${ik}$ )
           bmin = rtmin*max( abs( b11 ), abs( b12 ), abs( b22 ), rtmin )
           if( abs( b11 )<bmin )b11 = sign( bmin, b11 )
           if( abs( b22 )<bmin )b22 = sign( bmin, b22 )
           ! scale b
           bnorm = max( abs( b11 ), abs( b12 )+abs( b22 ), safmin )
           bsize = max( abs( b11 ), abs( b22 ) )
           bscale = one / bsize
           b11 = b11*bscale
           b12 = b12*bscale
           b22 = b22*bscale
           ! compute larger eigenvalue by method described by c. van loan
           ! ( as is a shifted by -shift*b )
           binv11 = one / b11
           binv22 = one / b22
           s1 = a11*binv11
           s2 = a22*binv22
           if( abs( s1 )<=abs( s2 ) ) then
              as12 = a12 - s1*b12
              as22 = a22 - s1*b22
              ss = a21*( binv11*binv22 )
              abi22 = as22*binv22 - ss*b12
              pp = half*abi22
              shift = s1
           else
              as12 = a12 - s2*b12
              as11 = a11 - s2*b11
              ss = a21*( binv11*binv22 )
              abi22 = -ss*b12
              pp = half*( as11*binv11+abi22 )
              shift = s2
           end if
           qq = ss*as12
           if( abs( pp*rtmin )>=one ) then
              discr = ( rtmin*pp )**2_${ik}$ + qq*safmin
              r = sqrt( abs( discr ) )*rtmax
           else
              if( pp**2_${ik}$+abs( qq )<=safmin ) then
                 discr = ( rtmax*pp )**2_${ik}$ + qq*safmax
                 r = sqrt( abs( discr ) )*rtmin
              else
                 discr = pp**2_${ik}$ + qq
                 r = sqrt( abs( discr ) )
              end if
           end if
           ! note: the test of r in the following if is to cover the case when
                 ! discr is small and negative and is flushed to zero during
                 ! the calculation of r.  on machines which have a consistent
                 ! flush-to-zero threshold and handle numbers above that
                 ! threshold correctly, it would not be necessary.
           if( discr>=zero .or. r==zero ) then
              sum = pp + sign( r, pp )
              diff = pp - sign( r, pp )
              wbig = shift + sum
              ! compute smaller eigenvalue
              wsmall = shift + diff
              if( half*abs( wbig )>max( abs( wsmall ), safmin ) ) then
                 wdet = ( a11*a22-a12*a21 )*( binv11*binv22 )
                 wsmall = wdet / wbig
              end if
              ! choose (real) eigenvalue closest to 2,2 element of a*b**(-1)
              ! for wr1.
              if( pp>abi22 ) then
                 wr1 = min( wbig, wsmall )
                 wr2 = max( wbig, wsmall )
              else
                 wr1 = max( wbig, wsmall )
                 wr2 = min( wbig, wsmall )
              end if
              wi = zero
           else
              ! complex eigenvalues
              wr1 = shift + pp
              wr2 = wr1
              wi = r
           end if
           ! further scaling to avoid underflow and overflow in computing
           ! scale1 and overflow in computing w*b.
           ! this scale factor (wscale) is bounded from above using c1 and c2,
           ! and from below using c3 and c4.
              ! c1 implements the condition  s a  must never overflow.
              ! c2 implements the condition  w b  must never overflow.
              ! c3, with c2,
                 ! implement the condition that s a - w b must never overflow.
              ! c4 implements the condition  s    should not underflow.
              ! c5 implements the condition  max(s,|w|) should be at least 2.
           c1 = bsize*( safmin*max( one, ascale ) )
           c2 = safmin*max( one, bnorm )
           c3 = bsize*safmin
           if( ascale<=one .and. bsize<=one ) then
              c4 = min( one, ( ascale / safmin )*bsize )
           else
              c4 = one
           end if
           if( ascale<=one .or. bsize<=one ) then
              c5 = min( one, ascale*bsize )
           else
              c5 = one
           end if
           ! scale first eigenvalue
           wabs = abs( wr1 ) + abs( wi )
           wsize = max( safmin, c1, fuzzy1*( wabs*c2+c3 ),min( c4, half*max( wabs, c5 ) ) )
                     
           if( wsize/=one ) then
              wscale = one / wsize
              if( wsize>one ) then
                 scale1 = ( max( ascale, bsize )*wscale )*min( ascale, bsize )
              else
                 scale1 = ( min( ascale, bsize )*wscale )*max( ascale, bsize )
              end if
              wr1 = wr1*wscale
              if( wi/=zero ) then
                 wi = wi*wscale
                 wr2 = wr1
                 scale2 = scale1
              end if
           else
              scale1 = ascale*bsize
              scale2 = scale1
           end if
           ! scale second eigenvalue (if real)
           if( wi==zero ) then
              wsize = max( safmin, c1, fuzzy1*( abs( wr2 )*c2+c3 ),min( c4, half*max( abs( wr2 ), &
                        c5 ) ) )
              if( wsize/=one ) then
                 wscale = one / wsize
                 if( wsize>one ) then
                    scale2 = ( max( ascale, bsize )*wscale )*min( ascale, bsize )
                 else
                    scale2 = ( min( ascale, bsize )*wscale )*max( ascale, bsize )
                 end if
                 wr2 = wr2*wscale
              else
                 scale2 = ascale*bsize
              end if
           end if
           return
     end subroutine stdlib${ii}$_dlag2

#:for rk,rt,ri in REAL_KINDS_TYPES
#:if not rk in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ri}$lag2( a, lda, b, ldb, safmin, scale1, scale2, wr1,wr2, wi )
     !! DLAG2: computes the eigenvalues of a 2 x 2 generalized eigenvalue
     !! problem  A - w B, with scaling as necessary to avoid over-/underflow.
     !! The scaling factor "s" results in a modified eigenvalue equation
     !! s A - w B
     !! where  s  is a non-negative scaling factor chosen so that  w,  w B,
     !! and  s A  do not overflow and, if possible, do not underflow, either.
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(in) :: lda, ldb
           real(${rk}$), intent(in) :: safmin
           real(${rk}$), intent(out) :: scale1, scale2, wi, wr1, wr2
           ! Array Arguments 
           real(${rk}$), intent(in) :: a(lda,*), b(ldb,*)
        ! =====================================================================
           ! Parameters 
           real(${rk}$), parameter :: fuzzy1 = one+1.0e-5_${rk}$
           
           
           
           ! Local Scalars 
           real(${rk}$) :: a11, a12, a21, a22, abi22, anorm, as11, as12, as22, ascale, b11, b12, b22, &
           binv11, binv22, bmin, bnorm, bscale, bsize, c1, c2, c3, c4, c5, diff, discr, pp, qq, r,&
            rtmax, rtmin, s1, s2, safmax, shift, ss, sum, wabs, wbig, wdet, wscale, wsize, &
                      wsmall
           ! Intrinsic Functions 
           ! Executable Statements 
           rtmin = sqrt( safmin )
           rtmax = one / rtmin
           safmax = one / safmin
           ! scale a
           anorm = max( abs( a( 1_${ik}$, 1_${ik}$ ) )+abs( a( 2_${ik}$, 1_${ik}$ ) ),abs( a( 1_${ik}$, 2_${ik}$ ) )+abs( a( 2_${ik}$, 2_${ik}$ ) ), &
                     safmin )
           ascale = one / anorm
           a11 = ascale*a( 1_${ik}$, 1_${ik}$ )
           a21 = ascale*a( 2_${ik}$, 1_${ik}$ )
           a12 = ascale*a( 1_${ik}$, 2_${ik}$ )
           a22 = ascale*a( 2_${ik}$, 2_${ik}$ )
           ! perturb b if necessary to insure non-singularity
           b11 = b( 1_${ik}$, 1_${ik}$ )
           b12 = b( 1_${ik}$, 2_${ik}$ )
           b22 = b( 2_${ik}$, 2_${ik}$ )
           bmin = rtmin*max( abs( b11 ), abs( b12 ), abs( b22 ), rtmin )
           if( abs( b11 )<bmin )b11 = sign( bmin, b11 )
           if( abs( b22 )<bmin )b22 = sign( bmin, b22 )
           ! scale b
           bnorm = max( abs( b11 ), abs( b12 )+abs( b22 ), safmin )
           bsize = max( abs( b11 ), abs( b22 ) )
           bscale = one / bsize
           b11 = b11*bscale
           b12 = b12*bscale
           b22 = b22*bscale
           ! compute larger eigenvalue by method described by c. van loan
           ! ( as is a shifted by -shift*b )
           binv11 = one / b11
           binv22 = one / b22
           s1 = a11*binv11
           s2 = a22*binv22
           if( abs( s1 )<=abs( s2 ) ) then
              as12 = a12 - s1*b12
              as22 = a22 - s1*b22
              ss = a21*( binv11*binv22 )
              abi22 = as22*binv22 - ss*b12
              pp = half*abi22
              shift = s1
           else
              as12 = a12 - s2*b12
              as11 = a11 - s2*b11
              ss = a21*( binv11*binv22 )
              abi22 = -ss*b12
              pp = half*( as11*binv11+abi22 )
              shift = s2
           end if
           qq = ss*as12
           if( abs( pp*rtmin )>=one ) then
              discr = ( rtmin*pp )**2_${ik}$ + qq*safmin
              r = sqrt( abs( discr ) )*rtmax
           else
              if( pp**2_${ik}$+abs( qq )<=safmin ) then
                 discr = ( rtmax*pp )**2_${ik}$ + qq*safmax
                 r = sqrt( abs( discr ) )*rtmin
              else
                 discr = pp**2_${ik}$ + qq
                 r = sqrt( abs( discr ) )
              end if
           end if
           ! note: the test of r in the following if is to cover the case when
                 ! discr is small and negative and is flushed to zero during
                 ! the calculation of r.  on machines which have a consistent
                 ! flush-to-zero threshold and handle numbers above that
                 ! threshold correctly, it would not be necessary.
           if( discr>=zero .or. r==zero ) then
              sum = pp + sign( r, pp )
              diff = pp - sign( r, pp )
              wbig = shift + sum
              ! compute smaller eigenvalue
              wsmall = shift + diff
              if( half*abs( wbig )>max( abs( wsmall ), safmin ) ) then
                 wdet = ( a11*a22-a12*a21 )*( binv11*binv22 )
                 wsmall = wdet / wbig
              end if
              ! choose (real) eigenvalue closest to 2,2 element of a*b**(-1)
              ! for wr1.
              if( pp>abi22 ) then
                 wr1 = min( wbig, wsmall )
                 wr2 = max( wbig, wsmall )
              else
                 wr1 = max( wbig, wsmall )
                 wr2 = min( wbig, wsmall )
              end if
              wi = zero
           else
              ! complex eigenvalues
              wr1 = shift + pp
              wr2 = wr1
              wi = r
           end if
           ! further scaling to avoid underflow and overflow in computing
           ! scale1 and overflow in computing w*b.
           ! this scale factor (wscale) is bounded from above using c1 and c2,
           ! and from below using c3 and c4.
              ! c1 implements the condition  s a  must never overflow.
              ! c2 implements the condition  w b  must never overflow.
              ! c3, with c2,
                 ! implement the condition that s a - w b must never overflow.
              ! c4 implements the condition  s    should not underflow.
              ! c5 implements the condition  max(s,|w|) should be at least 2.
           c1 = bsize*( safmin*max( one, ascale ) )
           c2 = safmin*max( one, bnorm )
           c3 = bsize*safmin
           if( ascale<=one .and. bsize<=one ) then
              c4 = min( one, ( ascale / safmin )*bsize )
           else
              c4 = one
           end if
           if( ascale<=one .or. bsize<=one ) then
              c5 = min( one, ascale*bsize )
           else
              c5 = one
           end if
           ! scale first eigenvalue
           wabs = abs( wr1 ) + abs( wi )
           wsize = max( safmin, c1, fuzzy1*( wabs*c2+c3 ),min( c4, half*max( wabs, c5 ) ) )
                     
           if( wsize/=one ) then
              wscale = one / wsize
              if( wsize>one ) then
                 scale1 = ( max( ascale, bsize )*wscale )*min( ascale, bsize )
              else
                 scale1 = ( min( ascale, bsize )*wscale )*max( ascale, bsize )
              end if
              wr1 = wr1*wscale
              if( wi/=zero ) then
                 wi = wi*wscale
                 wr2 = wr1
                 scale2 = scale1
              end if
           else
              scale1 = ascale*bsize
              scale2 = scale1
           end if
           ! scale second eigenvalue (if real)
           if( wi==zero ) then
              wsize = max( safmin, c1, fuzzy1*( abs( wr2 )*c2+c3 ),min( c4, half*max( abs( wr2 ), &
                        c5 ) ) )
              if( wsize/=one ) then
                 wscale = one / wsize
                 if( wsize>one ) then
                    scale2 = ( max( ascale, bsize )*wscale )*min( ascale, bsize )
                 else
                    scale2 = ( min( ascale, bsize )*wscale )*max( ascale, bsize )
                 end if
                 wr2 = wr2*wscale
              else
                 scale2 = ascale*bsize
              end if
           end if
           return
     end subroutine stdlib${ii}$_${ri}$lag2

#:endif
#:endfor

#:for rk,rt,ri in REAL_KINDS_TYPES
#:if not rk in ["sp","dp"]
     pure module subroutine stdlib${ii}$_dlag2${ri}$( m, n, sa, ldsa, a, lda, info )
     !! DLAG2Q converts a DOUBLE PRECISION matrix, SA, to an EXTENDED
     !! PRECISION matrix, A.
     !! Note that while it is possible to overflow while converting
     !! from double to single, it is not possible to overflow when
     !! converting from single to double.
     !! This is an auxiliary routine so there is no argument checking.
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, ldsa, m, n
           ! Array Arguments 
           real(dp), intent(in) :: sa(ldsa,*)
           real(${rk}$), intent(out) :: a(lda,*)
        ! =====================================================================
           ! Local Scalars 
           integer(${ik}$) :: i, j
           ! Executable Statements 
           info = 0_${ik}$
           do j = 1, n
              do i = 1, m
                 a( i, j ) = sa( i, j )
              end do
           end do
           return
     end subroutine stdlib${ii}$_dlag2${ri}$

#:endif
#:endfor



     pure module subroutine stdlib${ii}$_sorm22( side, trans, m, n, n1, n2, q, ldq, c, ldc,work, lwork, info )
               
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: side, trans
           integer(${ik}$), intent(in) :: m, n, n1, n2, ldq, ldc, lwork
           integer(${ik}$), intent(out) :: info
           ! Array Arguments 
           real(sp), intent(in) :: q(ldq,*)
           real(sp), intent(inout) :: c(ldc,*)
           real(sp), intent(out) :: work(*)
        ! =====================================================================
           
           ! Local Scalars 
           logical(lk) :: left, lquery, notran
           integer(${ik}$) :: i, ldwork, len, lwkopt, nb, nq, nw
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           left = stdlib_lsame( side, 'L' )
           notran = stdlib_lsame( trans, 'N' )
           lquery = ( lwork==-1_${ik}$ )
           ! nq is the order of q;
           ! nw is the minimum dimension of work.
           if( left ) then
              nq = m
           else
              nq = n
           end if
           nw = nq
           if( n1==0_${ik}$ .or. n2==0_${ik}$ ) nw = 1_${ik}$
           if( .not.left .and. .not.stdlib_lsame( side, 'R' ) ) then
              info = -1_${ik}$
           else if( .not.stdlib_lsame( trans, 'N' ) .and. .not.stdlib_lsame( trans, 'T' ) )&
                     then
              info = -2_${ik}$
           else if( m<0_${ik}$ ) then
              info = -3_${ik}$
           else if( n<0_${ik}$ ) then
              info = -4_${ik}$
           else if( n1<0_${ik}$ .or. n1+n2/=nq ) then
              info = -5_${ik}$
           else if( n2<0_${ik}$ ) then
              info = -6_${ik}$
           else if( ldq<max( 1_${ik}$, nq ) ) then
              info = -8_${ik}$
           else if( ldc<max( 1_${ik}$, m ) ) then
              info = -10_${ik}$
           else if( lwork<nw .and. .not.lquery ) then
              info = -12_${ik}$
           end if
           if( info==0_${ik}$ ) then
              lwkopt = m*n
              work( 1_${ik}$ ) = real( lwkopt,KIND=sp)
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'SORM22', -info )
              return
           else if( lquery ) then
              return
           end if
           ! quick return if possible
           if( m==0_${ik}$ .or. n==0_${ik}$ ) then
              work( 1_${ik}$ ) = 1_${ik}$
              return
           end if
           ! degenerate cases (n1 = 0 or n2 = 0) are handled using stdlib${ii}$_strmm.
           if( n1==0_${ik}$ ) then
              call stdlib${ii}$_strmm( side, 'UPPER', trans, 'NON-UNIT', m, n, one,q, ldq, c, ldc )
                        
              work( 1_${ik}$ ) = one
              return
           else if( n2==0_${ik}$ ) then
              call stdlib${ii}$_strmm( side, 'LOWER', trans, 'NON-UNIT', m, n, one,q, ldq, c, ldc )
                        
              work( 1_${ik}$ ) = one
              return
           end if
           ! compute the largest chunk size available from the workspace.
           nb = max( 1_${ik}$, min( lwork, lwkopt ) / nq )
           if( left ) then
              if( notran ) then
                 do i = 1, n, nb
                    len = min( nb, n-i+1 )
                    ldwork = m
                    ! multiply bottom part of c by q12.
                    call stdlib${ii}$_slacpy( 'ALL', n1, len, c( n2+1, i ), ldc, work,ldwork )
                    call stdlib${ii}$_strmm( 'LEFT', 'LOWER', 'NO TRANSPOSE', 'NON-UNIT',n1, len, one, &
                              q( 1_${ik}$, n2+1 ), ldq, work,ldwork )
                    ! multiply top part of c by q11.
                    call stdlib${ii}$_sgemm( 'NO TRANSPOSE', 'NO TRANSPOSE', n1, len, n2,one, q, ldq, c(&
                               1_${ik}$, i ), ldc, one, work,ldwork )
                    ! multiply top part of c by q21.
                    call stdlib${ii}$_slacpy( 'ALL', n2, len, c( 1_${ik}$, i ), ldc,work( n1+1 ), ldwork )
                              
                    call stdlib${ii}$_strmm( 'LEFT', 'UPPER', 'NO TRANSPOSE', 'NON-UNIT',n2, len, one, &
                              q( n1+1, 1_${ik}$ ), ldq,work( n1+1 ), ldwork )
                    ! multiply bottom part of c by q22.
                    call stdlib${ii}$_sgemm( 'NO TRANSPOSE', 'NO TRANSPOSE', n2, len, n1,one, q( n1+1, &
                              n2+1 ), ldq, c( n2+1, i ), ldc,one, work( n1+1 ), ldwork )
                    ! copy everything back.
                    call stdlib${ii}$_slacpy( 'ALL', m, len, work, ldwork, c( 1_${ik}$, i ),ldc )
                 end do
              else
                 do i = 1, n, nb
                    len = min( nb, n-i+1 )
                    ldwork = m
                    ! multiply bottom part of c by q21**t.
                    call stdlib${ii}$_slacpy( 'ALL', n2, len, c( n1+1, i ), ldc, work,ldwork )
                    call stdlib${ii}$_strmm( 'LEFT', 'UPPER', 'TRANSPOSE', 'NON-UNIT',n2, len, one, q( &
                              n1+1, 1_${ik}$ ), ldq, work,ldwork )
                    ! multiply top part of c by q11**t.
                    call stdlib${ii}$_sgemm( 'TRANSPOSE', 'NO TRANSPOSE', n2, len, n1,one, q, ldq, c( 1_${ik}$,&
                               i ), ldc, one, work,ldwork )
                    ! multiply top part of c by q12**t.
                    call stdlib${ii}$_slacpy( 'ALL', n1, len, c( 1_${ik}$, i ), ldc,work( n2+1 ), ldwork )
                              
                    call stdlib${ii}$_strmm( 'LEFT', 'LOWER', 'TRANSPOSE', 'NON-UNIT',n1, len, one, q( &
                              1_${ik}$, n2+1 ), ldq,work( n2+1 ), ldwork )
                    ! multiply bottom part of c by q22**t.
                    call stdlib${ii}$_sgemm( 'TRANSPOSE', 'NO TRANSPOSE', n1, len, n2,one, q( n1+1, n2+&
                              1_${ik}$ ), ldq, c( n1+1, i ), ldc,one, work( n2+1 ), ldwork )
                    ! copy everything back.
                    call stdlib${ii}$_slacpy( 'ALL', m, len, work, ldwork, c( 1_${ik}$, i ),ldc )
                 end do
              end if
           else
              if( notran ) then
                 do i = 1, m, nb
                    len = min( nb, m-i+1 )
                    ldwork = len
                    ! multiply right part of c by q21.
                    call stdlib${ii}$_slacpy( 'ALL', len, n2, c( i, n1+1 ), ldc, work,ldwork )
                    call stdlib${ii}$_strmm( 'RIGHT', 'UPPER', 'NO TRANSPOSE', 'NON-UNIT',len, n2, one, &
                              q( n1+1, 1_${ik}$ ), ldq, work,ldwork )
                    ! multiply left part of c by q11.
                    call stdlib${ii}$_sgemm( 'NO TRANSPOSE', 'NO TRANSPOSE', len, n2, n1,one, c( i, 1_${ik}$ ),&
                               ldc, q, ldq, one, work,ldwork )
                    ! multiply left part of c by q12.
                    call stdlib${ii}$_slacpy( 'ALL', len, n1, c( i, 1_${ik}$ ), ldc,work( 1_${ik}$ + n2*ldwork ), &
                              ldwork )
                    call stdlib${ii}$_strmm( 'RIGHT', 'LOWER', 'NO TRANSPOSE', 'NON-UNIT',len, n1, one, &
                              q( 1_${ik}$, n2+1 ), ldq,work( 1_${ik}$ + n2*ldwork ), ldwork )
                    ! multiply right part of c by q22.
                    call stdlib${ii}$_sgemm( 'NO TRANSPOSE', 'NO TRANSPOSE', len, n1, n2,one, c( i, n1+&
                              1_${ik}$ ), ldc, q( n1+1, n2+1 ), ldq,one, work( 1_${ik}$ + n2*ldwork ), ldwork )
                    ! copy everything back.
                    call stdlib${ii}$_slacpy( 'ALL', len, n, work, ldwork, c( i, 1_${ik}$ ),ldc )
                 end do
              else
                 do i = 1, m, nb
                    len = min( nb, m-i+1 )
                    ldwork = len
                    ! multiply right part of c by q12**t.
                    call stdlib${ii}$_slacpy( 'ALL', len, n1, c( i, n2+1 ), ldc, work,ldwork )
                    call stdlib${ii}$_strmm( 'RIGHT', 'LOWER', 'TRANSPOSE', 'NON-UNIT',len, n1, one, q( &
                              1_${ik}$, n2+1 ), ldq, work,ldwork )
                    ! multiply left part of c by q11**t.
                    call stdlib${ii}$_sgemm( 'NO TRANSPOSE', 'TRANSPOSE', len, n1, n2,one, c( i, 1_${ik}$ ), &
                              ldc, q, ldq, one, work,ldwork )
                    ! multiply left part of c by q21**t.
                    call stdlib${ii}$_slacpy( 'ALL', len, n2, c( i, 1_${ik}$ ), ldc,work( 1_${ik}$ + n1*ldwork ), &
                              ldwork )
                    call stdlib${ii}$_strmm( 'RIGHT', 'UPPER', 'TRANSPOSE', 'NON-UNIT',len, n2, one, q( &
                              n1+1, 1_${ik}$ ), ldq,work( 1_${ik}$ + n1*ldwork ), ldwork )
                    ! multiply right part of c by q22**t.
                    call stdlib${ii}$_sgemm( 'NO TRANSPOSE', 'TRANSPOSE', len, n2, n1,one, c( i, n2+1 ),&
                               ldc, q( n1+1, n2+1 ), ldq,one, work( 1_${ik}$ + n1*ldwork ), ldwork )
                    ! copy everything back.
                    call stdlib${ii}$_slacpy( 'ALL', len, n, work, ldwork, c( i, 1_${ik}$ ),ldc )
                 end do
              end if
           end if
           work( 1_${ik}$ ) = real( lwkopt,KIND=sp)
           return
     end subroutine stdlib${ii}$_sorm22

     pure module subroutine stdlib${ii}$_dorm22( side, trans, m, n, n1, n2, q, ldq, c, ldc,work, lwork, info )
               
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: side, trans
           integer(${ik}$), intent(in) :: m, n, n1, n2, ldq, ldc, lwork
           integer(${ik}$), intent(out) :: info
           ! Array Arguments 
           real(dp), intent(in) :: q(ldq,*)
           real(dp), intent(inout) :: c(ldc,*)
           real(dp), intent(out) :: work(*)
        ! =====================================================================
           
           ! Local Scalars 
           logical(lk) :: left, lquery, notran
           integer(${ik}$) :: i, ldwork, len, lwkopt, nb, nq, nw
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           left = stdlib_lsame( side, 'L' )
           notran = stdlib_lsame( trans, 'N' )
           lquery = ( lwork==-1_${ik}$ )
           ! nq is the order of q;
           ! nw is the minimum dimension of work.
           if( left ) then
              nq = m
           else
              nq = n
           end if
           nw = nq
           if( n1==0_${ik}$ .or. n2==0_${ik}$ ) nw = 1_${ik}$
           if( .not.left .and. .not.stdlib_lsame( side, 'R' ) ) then
              info = -1_${ik}$
           else if( .not.stdlib_lsame( trans, 'N' ) .and. .not.stdlib_lsame( trans, 'T' ) )&
                     then
              info = -2_${ik}$
           else if( m<0_${ik}$ ) then
              info = -3_${ik}$
           else if( n<0_${ik}$ ) then
              info = -4_${ik}$
           else if( n1<0_${ik}$ .or. n1+n2/=nq ) then
              info = -5_${ik}$
           else if( n2<0_${ik}$ ) then
              info = -6_${ik}$
           else if( ldq<max( 1_${ik}$, nq ) ) then
              info = -8_${ik}$
           else if( ldc<max( 1_${ik}$, m ) ) then
              info = -10_${ik}$
           else if( lwork<nw .and. .not.lquery ) then
              info = -12_${ik}$
           end if
           if( info==0_${ik}$ ) then
              lwkopt = m*n
              work( 1_${ik}$ ) = real( lwkopt,KIND=dp)
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DORM22', -info )
              return
           else if( lquery ) then
              return
           end if
           ! quick return if possible
           if( m==0_${ik}$ .or. n==0_${ik}$ ) then
              work( 1_${ik}$ ) = 1_${ik}$
              return
           end if
           ! degenerate cases (n1 = 0 or n2 = 0) are handled using stdlib${ii}$_dtrmm.
           if( n1==0_${ik}$ ) then
              call stdlib${ii}$_dtrmm( side, 'UPPER', trans, 'NON-UNIT', m, n, one,q, ldq, c, ldc )
                        
              work( 1_${ik}$ ) = one
              return
           else if( n2==0_${ik}$ ) then
              call stdlib${ii}$_dtrmm( side, 'LOWER', trans, 'NON-UNIT', m, n, one,q, ldq, c, ldc )
                        
              work( 1_${ik}$ ) = one
              return
           end if
           ! compute the largest chunk size available from the workspace.
           nb = max( 1_${ik}$, min( lwork, lwkopt ) / nq )
           if( left ) then
              if( notran ) then
                 do i = 1, n, nb
                    len = min( nb, n-i+1 )
                    ldwork = m
                    ! multiply bottom part of c by q12.
                    call stdlib${ii}$_dlacpy( 'ALL', n1, len, c( n2+1, i ), ldc, work,ldwork )
                    call stdlib${ii}$_dtrmm( 'LEFT', 'LOWER', 'NO TRANSPOSE', 'NON-UNIT',n1, len, one, &
                              q( 1_${ik}$, n2+1 ), ldq, work,ldwork )
                    ! multiply top part of c by q11.
                    call stdlib${ii}$_dgemm( 'NO TRANSPOSE', 'NO TRANSPOSE', n1, len, n2,one, q, ldq, c(&
                               1_${ik}$, i ), ldc, one, work,ldwork )
                    ! multiply top part of c by q21.
                    call stdlib${ii}$_dlacpy( 'ALL', n2, len, c( 1_${ik}$, i ), ldc,work( n1+1 ), ldwork )
                              
                    call stdlib${ii}$_dtrmm( 'LEFT', 'UPPER', 'NO TRANSPOSE', 'NON-UNIT',n2, len, one, &
                              q( n1+1, 1_${ik}$ ), ldq,work( n1+1 ), ldwork )
                    ! multiply bottom part of c by q22.
                    call stdlib${ii}$_dgemm( 'NO TRANSPOSE', 'NO TRANSPOSE', n2, len, n1,one, q( n1+1, &
                              n2+1 ), ldq, c( n2+1, i ), ldc,one, work( n1+1 ), ldwork )
                    ! copy everything back.
                    call stdlib${ii}$_dlacpy( 'ALL', m, len, work, ldwork, c( 1_${ik}$, i ),ldc )
                 end do
              else
                 do i = 1, n, nb
                    len = min( nb, n-i+1 )
                    ldwork = m
                    ! multiply bottom part of c by q21**t.
                    call stdlib${ii}$_dlacpy( 'ALL', n2, len, c( n1+1, i ), ldc, work,ldwork )
                    call stdlib${ii}$_dtrmm( 'LEFT', 'UPPER', 'TRANSPOSE', 'NON-UNIT',n2, len, one, q( &
                              n1+1, 1_${ik}$ ), ldq, work,ldwork )
                    ! multiply top part of c by q11**t.
                    call stdlib${ii}$_dgemm( 'TRANSPOSE', 'NO TRANSPOSE', n2, len, n1,one, q, ldq, c( 1_${ik}$,&
                               i ), ldc, one, work,ldwork )
                    ! multiply top part of c by q12**t.
                    call stdlib${ii}$_dlacpy( 'ALL', n1, len, c( 1_${ik}$, i ), ldc,work( n2+1 ), ldwork )
                              
                    call stdlib${ii}$_dtrmm( 'LEFT', 'LOWER', 'TRANSPOSE', 'NON-UNIT',n1, len, one, q( &
                              1_${ik}$, n2+1 ), ldq,work( n2+1 ), ldwork )
                    ! multiply bottom part of c by q22**t.
                    call stdlib${ii}$_dgemm( 'TRANSPOSE', 'NO TRANSPOSE', n1, len, n2,one, q( n1+1, n2+&
                              1_${ik}$ ), ldq, c( n1+1, i ), ldc,one, work( n2+1 ), ldwork )
                    ! copy everything back.
                    call stdlib${ii}$_dlacpy( 'ALL', m, len, work, ldwork, c( 1_${ik}$, i ),ldc )
                 end do
              end if
           else
              if( notran ) then
                 do i = 1, m, nb
                    len = min( nb, m-i+1 )
                    ldwork = len
                    ! multiply right part of c by q21.
                    call stdlib${ii}$_dlacpy( 'ALL', len, n2, c( i, n1+1 ), ldc, work,ldwork )
                    call stdlib${ii}$_dtrmm( 'RIGHT', 'UPPER', 'NO TRANSPOSE', 'NON-UNIT',len, n2, one, &
                              q( n1+1, 1_${ik}$ ), ldq, work,ldwork )
                    ! multiply left part of c by q11.
                    call stdlib${ii}$_dgemm( 'NO TRANSPOSE', 'NO TRANSPOSE', len, n2, n1,one, c( i, 1_${ik}$ ),&
                               ldc, q, ldq, one, work,ldwork )
                    ! multiply left part of c by q12.
                    call stdlib${ii}$_dlacpy( 'ALL', len, n1, c( i, 1_${ik}$ ), ldc,work( 1_${ik}$ + n2*ldwork ), &
                              ldwork )
                    call stdlib${ii}$_dtrmm( 'RIGHT', 'LOWER', 'NO TRANSPOSE', 'NON-UNIT',len, n1, one, &
                              q( 1_${ik}$, n2+1 ), ldq,work( 1_${ik}$ + n2*ldwork ), ldwork )
                    ! multiply right part of c by q22.
                    call stdlib${ii}$_dgemm( 'NO TRANSPOSE', 'NO TRANSPOSE', len, n1, n2,one, c( i, n1+&
                              1_${ik}$ ), ldc, q( n1+1, n2+1 ), ldq,one, work( 1_${ik}$ + n2*ldwork ), ldwork )
                    ! copy everything back.
                    call stdlib${ii}$_dlacpy( 'ALL', len, n, work, ldwork, c( i, 1_${ik}$ ),ldc )
                 end do
              else
                 do i = 1, m, nb
                    len = min( nb, m-i+1 )
                    ldwork = len
                    ! multiply right part of c by q12**t.
                    call stdlib${ii}$_dlacpy( 'ALL', len, n1, c( i, n2+1 ), ldc, work,ldwork )
                    call stdlib${ii}$_dtrmm( 'RIGHT', 'LOWER', 'TRANSPOSE', 'NON-UNIT',len, n1, one, q( &
                              1_${ik}$, n2+1 ), ldq, work,ldwork )
                    ! multiply left part of c by q11**t.
                    call stdlib${ii}$_dgemm( 'NO TRANSPOSE', 'TRANSPOSE', len, n1, n2,one, c( i, 1_${ik}$ ), &
                              ldc, q, ldq, one, work,ldwork )
                    ! multiply left part of c by q21**t.
                    call stdlib${ii}$_dlacpy( 'ALL', len, n2, c( i, 1_${ik}$ ), ldc,work( 1_${ik}$ + n1*ldwork ), &
                              ldwork )
                    call stdlib${ii}$_dtrmm( 'RIGHT', 'UPPER', 'TRANSPOSE', 'NON-UNIT',len, n2, one, q( &
                              n1+1, 1_${ik}$ ), ldq,work( 1_${ik}$ + n1*ldwork ), ldwork )
                    ! multiply right part of c by q22**t.
                    call stdlib${ii}$_dgemm( 'NO TRANSPOSE', 'TRANSPOSE', len, n2, n1,one, c( i, n2+1 ),&
                               ldc, q( n1+1, n2+1 ), ldq,one, work( 1_${ik}$ + n1*ldwork ), ldwork )
                    ! copy everything back.
                    call stdlib${ii}$_dlacpy( 'ALL', len, n, work, ldwork, c( i, 1_${ik}$ ),ldc )
                 end do
              end if
           end if
           work( 1_${ik}$ ) = real( lwkopt,KIND=dp)
           return
     end subroutine stdlib${ii}$_dorm22

#:for rk,rt,ri in REAL_KINDS_TYPES
#:if not rk in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ri}$orm22( side, trans, m, n, n1, n2, q, ldq, c, ldc,work, lwork, info )
               
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: side, trans
           integer(${ik}$), intent(in) :: m, n, n1, n2, ldq, ldc, lwork
           integer(${ik}$), intent(out) :: info
           ! Array Arguments 
           real(${rk}$), intent(in) :: q(ldq,*)
           real(${rk}$), intent(inout) :: c(ldc,*)
           real(${rk}$), intent(out) :: work(*)
        ! =====================================================================
           
           ! Local Scalars 
           logical(lk) :: left, lquery, notran
           integer(${ik}$) :: i, ldwork, len, lwkopt, nb, nq, nw
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           left = stdlib_lsame( side, 'L' )
           notran = stdlib_lsame( trans, 'N' )
           lquery = ( lwork==-1_${ik}$ )
           ! nq is the order of q;
           ! nw is the minimum dimension of work.
           if( left ) then
              nq = m
           else
              nq = n
           end if
           nw = nq
           if( n1==0_${ik}$ .or. n2==0_${ik}$ ) nw = 1_${ik}$
           if( .not.left .and. .not.stdlib_lsame( side, 'R' ) ) then
              info = -1_${ik}$
           else if( .not.stdlib_lsame( trans, 'N' ) .and. .not.stdlib_lsame( trans, 'T' ) )&
                     then
              info = -2_${ik}$
           else if( m<0_${ik}$ ) then
              info = -3_${ik}$
           else if( n<0_${ik}$ ) then
              info = -4_${ik}$
           else if( n1<0_${ik}$ .or. n1+n2/=nq ) then
              info = -5_${ik}$
           else if( n2<0_${ik}$ ) then
              info = -6_${ik}$
           else if( ldq<max( 1_${ik}$, nq ) ) then
              info = -8_${ik}$
           else if( ldc<max( 1_${ik}$, m ) ) then
              info = -10_${ik}$
           else if( lwork<nw .and. .not.lquery ) then
              info = -12_${ik}$
           end if
           if( info==0_${ik}$ ) then
              lwkopt = m*n
              work( 1_${ik}$ ) = real( lwkopt,KIND=${rk}$)
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DORM22', -info )
              return
           else if( lquery ) then
              return
           end if
           ! quick return if possible
           if( m==0_${ik}$ .or. n==0_${ik}$ ) then
              work( 1_${ik}$ ) = 1_${ik}$
              return
           end if
           ! degenerate cases (n1 = 0 or n2 = 0) are handled using stdlib${ii}$_${ri}$trmm.
           if( n1==0_${ik}$ ) then
              call stdlib${ii}$_${ri}$trmm( side, 'UPPER', trans, 'NON-UNIT', m, n, one,q, ldq, c, ldc )
                        
              work( 1_${ik}$ ) = one
              return
           else if( n2==0_${ik}$ ) then
              call stdlib${ii}$_${ri}$trmm( side, 'LOWER', trans, 'NON-UNIT', m, n, one,q, ldq, c, ldc )
                        
              work( 1_${ik}$ ) = one
              return
           end if
           ! compute the largest chunk size available from the workspace.
           nb = max( 1_${ik}$, min( lwork, lwkopt ) / nq )
           if( left ) then
              if( notran ) then
                 do i = 1, n, nb
                    len = min( nb, n-i+1 )
                    ldwork = m
                    ! multiply bottom part of c by q12.
                    call stdlib${ii}$_${ri}$lacpy( 'ALL', n1, len, c( n2+1, i ), ldc, work,ldwork )
                    call stdlib${ii}$_${ri}$trmm( 'LEFT', 'LOWER', 'NO TRANSPOSE', 'NON-UNIT',n1, len, one, &
                              q( 1_${ik}$, n2+1 ), ldq, work,ldwork )
                    ! multiply top part of c by q11.
                    call stdlib${ii}$_${ri}$gemm( 'NO TRANSPOSE', 'NO TRANSPOSE', n1, len, n2,one, q, ldq, c(&
                               1_${ik}$, i ), ldc, one, work,ldwork )
                    ! multiply top part of c by q21.
                    call stdlib${ii}$_${ri}$lacpy( 'ALL', n2, len, c( 1_${ik}$, i ), ldc,work( n1+1 ), ldwork )
                              
                    call stdlib${ii}$_${ri}$trmm( 'LEFT', 'UPPER', 'NO TRANSPOSE', 'NON-UNIT',n2, len, one, &
                              q( n1+1, 1_${ik}$ ), ldq,work( n1+1 ), ldwork )
                    ! multiply bottom part of c by q22.
                    call stdlib${ii}$_${ri}$gemm( 'NO TRANSPOSE', 'NO TRANSPOSE', n2, len, n1,one, q( n1+1, &
                              n2+1 ), ldq, c( n2+1, i ), ldc,one, work( n1+1 ), ldwork )
                    ! copy everything back.
                    call stdlib${ii}$_${ri}$lacpy( 'ALL', m, len, work, ldwork, c( 1_${ik}$, i ),ldc )
                 end do
              else
                 do i = 1, n, nb
                    len = min( nb, n-i+1 )
                    ldwork = m
                    ! multiply bottom part of c by q21**t.
                    call stdlib${ii}$_${ri}$lacpy( 'ALL', n2, len, c( n1+1, i ), ldc, work,ldwork )
                    call stdlib${ii}$_${ri}$trmm( 'LEFT', 'UPPER', 'TRANSPOSE', 'NON-UNIT',n2, len, one, q( &
                              n1+1, 1_${ik}$ ), ldq, work,ldwork )
                    ! multiply top part of c by q11**t.
                    call stdlib${ii}$_${ri}$gemm( 'TRANSPOSE', 'NO TRANSPOSE', n2, len, n1,one, q, ldq, c( 1_${ik}$,&
                               i ), ldc, one, work,ldwork )
                    ! multiply top part of c by q12**t.
                    call stdlib${ii}$_${ri}$lacpy( 'ALL', n1, len, c( 1_${ik}$, i ), ldc,work( n2+1 ), ldwork )
                              
                    call stdlib${ii}$_${ri}$trmm( 'LEFT', 'LOWER', 'TRANSPOSE', 'NON-UNIT',n1, len, one, q( &
                              1_${ik}$, n2+1 ), ldq,work( n2+1 ), ldwork )
                    ! multiply bottom part of c by q22**t.
                    call stdlib${ii}$_${ri}$gemm( 'TRANSPOSE', 'NO TRANSPOSE', n1, len, n2,one, q( n1+1, n2+&
                              1_${ik}$ ), ldq, c( n1+1, i ), ldc,one, work( n2+1 ), ldwork )
                    ! copy everything back.
                    call stdlib${ii}$_${ri}$lacpy( 'ALL', m, len, work, ldwork, c( 1_${ik}$, i ),ldc )
                 end do
              end if
           else
              if( notran ) then
                 do i = 1, m, nb
                    len = min( nb, m-i+1 )
                    ldwork = len
                    ! multiply right part of c by q21.
                    call stdlib${ii}$_${ri}$lacpy( 'ALL', len, n2, c( i, n1+1 ), ldc, work,ldwork )
                    call stdlib${ii}$_${ri}$trmm( 'RIGHT', 'UPPER', 'NO TRANSPOSE', 'NON-UNIT',len, n2, one, &
                              q( n1+1, 1_${ik}$ ), ldq, work,ldwork )
                    ! multiply left part of c by q11.
                    call stdlib${ii}$_${ri}$gemm( 'NO TRANSPOSE', 'NO TRANSPOSE', len, n2, n1,one, c( i, 1_${ik}$ ),&
                               ldc, q, ldq, one, work,ldwork )
                    ! multiply left part of c by q12.
                    call stdlib${ii}$_${ri}$lacpy( 'ALL', len, n1, c( i, 1_${ik}$ ), ldc,work( 1_${ik}$ + n2*ldwork ), &
                              ldwork )
                    call stdlib${ii}$_${ri}$trmm( 'RIGHT', 'LOWER', 'NO TRANSPOSE', 'NON-UNIT',len, n1, one, &
                              q( 1_${ik}$, n2+1 ), ldq,work( 1_${ik}$ + n2*ldwork ), ldwork )
                    ! multiply right part of c by q22.
                    call stdlib${ii}$_${ri}$gemm( 'NO TRANSPOSE', 'NO TRANSPOSE', len, n1, n2,one, c( i, n1+&
                              1_${ik}$ ), ldc, q( n1+1, n2+1 ), ldq,one, work( 1_${ik}$ + n2*ldwork ), ldwork )
                    ! copy everything back.
                    call stdlib${ii}$_${ri}$lacpy( 'ALL', len, n, work, ldwork, c( i, 1_${ik}$ ),ldc )
                 end do
              else
                 do i = 1, m, nb
                    len = min( nb, m-i+1 )
                    ldwork = len
                    ! multiply right part of c by q12**t.
                    call stdlib${ii}$_${ri}$lacpy( 'ALL', len, n1, c( i, n2+1 ), ldc, work,ldwork )
                    call stdlib${ii}$_${ri}$trmm( 'RIGHT', 'LOWER', 'TRANSPOSE', 'NON-UNIT',len, n1, one, q( &
                              1_${ik}$, n2+1 ), ldq, work,ldwork )
                    ! multiply left part of c by q11**t.
                    call stdlib${ii}$_${ri}$gemm( 'NO TRANSPOSE', 'TRANSPOSE', len, n1, n2,one, c( i, 1_${ik}$ ), &
                              ldc, q, ldq, one, work,ldwork )
                    ! multiply left part of c by q21**t.
                    call stdlib${ii}$_${ri}$lacpy( 'ALL', len, n2, c( i, 1_${ik}$ ), ldc,work( 1_${ik}$ + n1*ldwork ), &
                              ldwork )
                    call stdlib${ii}$_${ri}$trmm( 'RIGHT', 'UPPER', 'TRANSPOSE', 'NON-UNIT',len, n2, one, q( &
                              n1+1, 1_${ik}$ ), ldq,work( 1_${ik}$ + n1*ldwork ), ldwork )
                    ! multiply right part of c by q22**t.
                    call stdlib${ii}$_${ri}$gemm( 'NO TRANSPOSE', 'TRANSPOSE', len, n2, n1,one, c( i, n2+1 ),&
                               ldc, q( n1+1, n2+1 ), ldq,one, work( 1_${ik}$ + n1*ldwork ), ldwork )
                    ! copy everything back.
                    call stdlib${ii}$_${ri}$lacpy( 'ALL', len, n, work, ldwork, c( i, 1_${ik}$ ),ldc )
                 end do
              end if
           end if
           work( 1_${ik}$ ) = real( lwkopt,KIND=${rk}$)
           return
     end subroutine stdlib${ii}$_${ri}$orm22

#:endif
#:endfor



     pure module subroutine stdlib${ii}$_sdisna( job, m, n, d, sep, info )
     !! SDISNA computes the reciprocal condition numbers for the eigenvectors
     !! of a real symmetric or complex Hermitian matrix or for the left or
     !! right singular vectors of a general m-by-n matrix. The reciprocal
     !! condition number is the 'gap' between the corresponding eigenvalue or
     !! singular value and the nearest other one.
     !! The bound on the error, measured by angle in radians, in the I-th
     !! computed vector is given by
     !! SLAMCH( 'E' ) * ( ANORM / SEP( I ) )
     !! where ANORM = 2-norm(A) = max( abs( D(j) ) ).  SEP(I) is not allowed
     !! to be smaller than SLAMCH( 'E' )*ANORM in order to limit the size of
     !! the error bound.
     !! SDISNA may also be used to compute error bounds for eigenvectors of
     !! the generalized symmetric definite eigenproblem.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: job
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: m, n
           ! Array Arguments 
           real(sp), intent(in) :: d(*)
           real(sp), intent(out) :: sep(*)
        ! =====================================================================
           
           ! Local Scalars 
           logical(lk) :: decr, eigen, incr, left, right, sing
           integer(${ik}$) :: i, k
           real(sp) :: anorm, eps, newgap, oldgap, safmin, thresh
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           eigen = stdlib_lsame( job, 'E' )
           left = stdlib_lsame( job, 'L' )
           right = stdlib_lsame( job, 'R' )
           sing = left .or. right
           if( eigen ) then
              k = m
           else if( sing ) then
              k = min( m, n )
           end if
           if( .not.eigen .and. .not.sing ) then
              info = -1_${ik}$
           else if( m<0_${ik}$ ) then
              info = -2_${ik}$
           else if( k<0_${ik}$ ) then
              info = -3_${ik}$
           else
              incr = .true.
              decr = .true.
              do i = 1, k - 1
                 if( incr )incr = incr .and. d( i )<=d( i+1 )
                 if( decr )decr = decr .and. d( i )>=d( i+1 )
              end do
              if( sing .and. k>0_${ik}$ ) then
                 if( incr )incr = incr .and. zero<=d( 1_${ik}$ )
                 if( decr )decr = decr .and. d( k )>=zero
              end if
              if( .not.( incr .or. decr ) )info = -4_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'SDISNA', -info )
              return
           end if
           ! quick return if possible
           if( k==0 )return
           ! compute reciprocal condition numbers
           if( k==1_${ik}$ ) then
              sep( 1_${ik}$ ) = stdlib${ii}$_slamch( 'O' )
           else
              oldgap = abs( d( 2_${ik}$ )-d( 1_${ik}$ ) )
              sep( 1_${ik}$ ) = oldgap
              do i = 2, k - 1
                 newgap = abs( d( i+1 )-d( i ) )
                 sep( i ) = min( oldgap, newgap )
                 oldgap = newgap
              end do
              sep( k ) = oldgap
           end if
           if( sing ) then
              if( ( left .and. m>n ) .or. ( right .and. m<n ) ) then
                 if( incr )sep( 1_${ik}$ ) = min( sep( 1_${ik}$ ), d( 1_${ik}$ ) )
                 if( decr )sep( k ) = min( sep( k ), d( k ) )
              end if
           end if
           ! ensure that reciprocal condition numbers are not less than
           ! threshold, in order to limit the size of the error bound
           eps = stdlib${ii}$_slamch( 'E' )
           safmin = stdlib${ii}$_slamch( 'S' )
           anorm = max( abs( d( 1_${ik}$ ) ), abs( d( k ) ) )
           if( anorm==zero ) then
              thresh = eps
           else
              thresh = max( eps*anorm, safmin )
           end if
           do i = 1, k
              sep( i ) = max( sep( i ), thresh )
           end do
           return
     end subroutine stdlib${ii}$_sdisna

     pure module subroutine stdlib${ii}$_ddisna( job, m, n, d, sep, info )
     !! DDISNA computes the reciprocal condition numbers for the eigenvectors
     !! of a real symmetric or complex Hermitian matrix or for the left or
     !! right singular vectors of a general m-by-n matrix. The reciprocal
     !! condition number is the 'gap' between the corresponding eigenvalue or
     !! singular value and the nearest other one.
     !! The bound on the error, measured by angle in radians, in the I-th
     !! computed vector is given by
     !! DLAMCH( 'E' ) * ( ANORM / SEP( I ) )
     !! where ANORM = 2-norm(A) = max( abs( D(j) ) ).  SEP(I) is not allowed
     !! to be smaller than DLAMCH( 'E' )*ANORM in order to limit the size of
     !! the error bound.
     !! DDISNA may also be used to compute error bounds for eigenvectors of
     !! the generalized symmetric definite eigenproblem.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: job
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: m, n
           ! Array Arguments 
           real(dp), intent(in) :: d(*)
           real(dp), intent(out) :: sep(*)
        ! =====================================================================
           
           ! Local Scalars 
           logical(lk) :: decr, eigen, incr, left, right, sing
           integer(${ik}$) :: i, k
           real(dp) :: anorm, eps, newgap, oldgap, safmin, thresh
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           eigen = stdlib_lsame( job, 'E' )
           left = stdlib_lsame( job, 'L' )
           right = stdlib_lsame( job, 'R' )
           sing = left .or. right
           if( eigen ) then
              k = m
           else if( sing ) then
              k = min( m, n )
           end if
           if( .not.eigen .and. .not.sing ) then
              info = -1_${ik}$
           else if( m<0_${ik}$ ) then
              info = -2_${ik}$
           else if( k<0_${ik}$ ) then
              info = -3_${ik}$
           else
              incr = .true.
              decr = .true.
              do i = 1, k - 1
                 if( incr )incr = incr .and. d( i )<=d( i+1 )
                 if( decr )decr = decr .and. d( i )>=d( i+1 )
              end do
              if( sing .and. k>0_${ik}$ ) then
                 if( incr )incr = incr .and. zero<=d( 1_${ik}$ )
                 if( decr )decr = decr .and. d( k )>=zero
              end if
              if( .not.( incr .or. decr ) )info = -4_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DDISNA', -info )
              return
           end if
           ! quick return if possible
           if( k==0 )return
           ! compute reciprocal condition numbers
           if( k==1_${ik}$ ) then
              sep( 1_${ik}$ ) = stdlib${ii}$_dlamch( 'O' )
           else
              oldgap = abs( d( 2_${ik}$ )-d( 1_${ik}$ ) )
              sep( 1_${ik}$ ) = oldgap
              do i = 2, k - 1
                 newgap = abs( d( i+1 )-d( i ) )
                 sep( i ) = min( oldgap, newgap )
                 oldgap = newgap
              end do
              sep( k ) = oldgap
           end if
           if( sing ) then
              if( ( left .and. m>n ) .or. ( right .and. m<n ) ) then
                 if( incr )sep( 1_${ik}$ ) = min( sep( 1_${ik}$ ), d( 1_${ik}$ ) )
                 if( decr )sep( k ) = min( sep( k ), d( k ) )
              end if
           end if
           ! ensure that reciprocal condition numbers are not less than
           ! threshold, in order to limit the size of the error bound
           eps = stdlib${ii}$_dlamch( 'E' )
           safmin = stdlib${ii}$_dlamch( 'S' )
           anorm = max( abs( d( 1_${ik}$ ) ), abs( d( k ) ) )
           if( anorm==zero ) then
              thresh = eps
           else
              thresh = max( eps*anorm, safmin )
           end if
           do i = 1, k
              sep( i ) = max( sep( i ), thresh )
           end do
           return
     end subroutine stdlib${ii}$_ddisna

#:for rk,rt,ri in REAL_KINDS_TYPES
#:if not rk in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ri}$disna( job, m, n, d, sep, info )
     !! DDISNA: computes the reciprocal condition numbers for the eigenvectors
     !! of a real symmetric or complex Hermitian matrix or for the left or
     !! right singular vectors of a general m-by-n matrix. The reciprocal
     !! condition number is the 'gap' between the corresponding eigenvalue or
     !! singular value and the nearest other one.
     !! The bound on the error, measured by angle in radians, in the I-th
     !! computed vector is given by
     !! DLAMCH( 'E' ) * ( ANORM / SEP( I ) )
     !! where ANORM = 2-norm(A) = max( abs( D(j) ) ).  SEP(I) is not allowed
     !! to be smaller than DLAMCH( 'E' )*ANORM in order to limit the size of
     !! the error bound.
     !! DDISNA may also be used to compute error bounds for eigenvectors of
     !! the generalized symmetric definite eigenproblem.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: job
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: m, n
           ! Array Arguments 
           real(${rk}$), intent(in) :: d(*)
           real(${rk}$), intent(out) :: sep(*)
        ! =====================================================================
           
           ! Local Scalars 
           logical(lk) :: decr, eigen, incr, left, right, sing
           integer(${ik}$) :: i, k
           real(${rk}$) :: anorm, eps, newgap, oldgap, safmin, thresh
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           eigen = stdlib_lsame( job, 'E' )
           left = stdlib_lsame( job, 'L' )
           right = stdlib_lsame( job, 'R' )
           sing = left .or. right
           if( eigen ) then
              k = m
           else if( sing ) then
              k = min( m, n )
           end if
           if( .not.eigen .and. .not.sing ) then
              info = -1_${ik}$
           else if( m<0_${ik}$ ) then
              info = -2_${ik}$
           else if( k<0_${ik}$ ) then
              info = -3_${ik}$
           else
              incr = .true.
              decr = .true.
              do i = 1, k - 1
                 if( incr )incr = incr .and. d( i )<=d( i+1 )
                 if( decr )decr = decr .and. d( i )>=d( i+1 )
              end do
              if( sing .and. k>0_${ik}$ ) then
                 if( incr )incr = incr .and. zero<=d( 1_${ik}$ )
                 if( decr )decr = decr .and. d( k )>=zero
              end if
              if( .not.( incr .or. decr ) )info = -4_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DDISNA', -info )
              return
           end if
           ! quick return if possible
           if( k==0 )return
           ! compute reciprocal condition numbers
           if( k==1_${ik}$ ) then
              sep( 1_${ik}$ ) = stdlib${ii}$_${ri}$lamch( 'O' )
           else
              oldgap = abs( d( 2_${ik}$ )-d( 1_${ik}$ ) )
              sep( 1_${ik}$ ) = oldgap
              do i = 2, k - 1
                 newgap = abs( d( i+1 )-d( i ) )
                 sep( i ) = min( oldgap, newgap )
                 oldgap = newgap
              end do
              sep( k ) = oldgap
           end if
           if( sing ) then
              if( ( left .and. m>n ) .or. ( right .and. m<n ) ) then
                 if( incr )sep( 1_${ik}$ ) = min( sep( 1_${ik}$ ), d( 1_${ik}$ ) )
                 if( decr )sep( k ) = min( sep( k ), d( k ) )
              end if
           end if
           ! ensure that reciprocal condition numbers are not less than
           ! threshold, in order to limit the size of the error bound
           eps = stdlib${ii}$_${ri}$lamch( 'E' )
           safmin = stdlib${ii}$_${ri}$lamch( 'S' )
           anorm = max( abs( d( 1_${ik}$ ) ), abs( d( k ) ) )
           if( anorm==zero ) then
              thresh = eps
           else
              thresh = max( eps*anorm, safmin )
           end if
           do i = 1, k
              sep( i ) = max( sep( i ), thresh )
           end do
           return
     end subroutine stdlib${ii}$_${ri}$disna

#:endif
#:endfor



     pure module subroutine stdlib${ii}$_slatrd( uplo, n, nb, a, lda, e, tau, w, ldw )
     !! SLATRD reduces NB rows and columns of a real symmetric matrix A to
     !! symmetric tridiagonal form by an orthogonal similarity
     !! transformation Q**T * A * Q, and returns the matrices V and W which are
     !! needed to apply the transformation to the unreduced part of A.
     !! If UPLO = 'U', SLATRD reduces the last NB rows and columns of a
     !! matrix, of which the upper triangle is supplied;
     !! if UPLO = 'L', SLATRD reduces the first NB rows and columns of a
     !! matrix, of which the lower triangle is supplied.
     !! This is an auxiliary routine called by SSYTRD.
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: uplo
           integer(${ik}$), intent(in) :: lda, ldw, n, nb
           ! Array Arguments 
           real(sp), intent(inout) :: a(lda,*)
           real(sp), intent(out) :: e(*), tau(*), w(ldw,*)
        ! =====================================================================
           
           ! Local Scalars 
           integer(${ik}$) :: i, iw
           real(sp) :: alpha
           ! Intrinsic Functions 
           ! Executable Statements 
           ! quick return if possible
           if( n<=0 )return
           if( stdlib_lsame( uplo, 'U' ) ) then
              ! reduce last nb columns of upper triangle
              loop_10: do i = n, n - nb + 1, -1
                 iw = i - n + nb
                 if( i<n ) then
                    ! update a(1:i,i)
                    call stdlib${ii}$_sgemv( 'NO TRANSPOSE', i, n-i, -one, a( 1_${ik}$, i+1 ),lda, w( i, iw+1 )&
                              , ldw, one, a( 1_${ik}$, i ), 1_${ik}$ )
                    call stdlib${ii}$_sgemv( 'NO TRANSPOSE', i, n-i, -one, w( 1_${ik}$, iw+1 ),ldw, a( i, i+1 )&
                              , lda, one, a( 1_${ik}$, i ), 1_${ik}$ )
                 end if
                 if( i>1_${ik}$ ) then
                    ! generate elementary reflector h(i) to annihilate
                    ! a(1:i-2,i)
                    call stdlib${ii}$_slarfg( i-1, a( i-1, i ), a( 1_${ik}$, i ), 1_${ik}$, tau( i-1 ) )
                    e( i-1 ) = a( i-1, i )
                    a( i-1, i ) = one
                    ! compute w(1:i-1,i)
                    call stdlib${ii}$_ssymv( 'UPPER', i-1, one, a, lda, a( 1_${ik}$, i ), 1_${ik}$,zero, w( 1_${ik}$, iw ), &
                              1_${ik}$ )
                    if( i<n ) then
                       call stdlib${ii}$_sgemv( 'TRANSPOSE', i-1, n-i, one, w( 1_${ik}$, iw+1 ),ldw, a( 1_${ik}$, i ),&
                                  1_${ik}$, zero, w( i+1, iw ), 1_${ik}$ )
                       call stdlib${ii}$_sgemv( 'NO TRANSPOSE', i-1, n-i, -one,a( 1_${ik}$, i+1 ), lda, w( i+1,&
                                  iw ), 1_${ik}$, one,w( 1_${ik}$, iw ), 1_${ik}$ )
                       call stdlib${ii}$_sgemv( 'TRANSPOSE', i-1, n-i, one, a( 1_${ik}$, i+1 ),lda, a( 1_${ik}$, i ), &
                                 1_${ik}$, zero, w( i+1, iw ), 1_${ik}$ )
                       call stdlib${ii}$_sgemv( 'NO TRANSPOSE', i-1, n-i, -one,w( 1_${ik}$, iw+1 ), ldw, w( i+&
                                 1_${ik}$, iw ), 1_${ik}$, one,w( 1_${ik}$, iw ), 1_${ik}$ )
                    end if
                    call stdlib${ii}$_sscal( i-1, tau( i-1 ), w( 1_${ik}$, iw ), 1_${ik}$ )
                    alpha = -half*tau( i-1 )*stdlib${ii}$_sdot( i-1, w( 1_${ik}$, iw ), 1_${ik}$,a( 1_${ik}$, i ), 1_${ik}$ )
                              
                    call stdlib${ii}$_saxpy( i-1, alpha, a( 1_${ik}$, i ), 1_${ik}$, w( 1_${ik}$, iw ), 1_${ik}$ )
                 end if
              end do loop_10
           else
              ! reduce first nb columns of lower triangle
              do i = 1, nb
                 ! update a(i:n,i)
                 call stdlib${ii}$_sgemv( 'NO TRANSPOSE', n-i+1, i-1, -one, a( i, 1_${ik}$ ),lda, w( i, 1_${ik}$ ), &
                           ldw, one, a( i, i ), 1_${ik}$ )
                 call stdlib${ii}$_sgemv( 'NO TRANSPOSE', n-i+1, i-1, -one, w( i, 1_${ik}$ ),ldw, a( i, 1_${ik}$ ), &
                           lda, one, a( i, i ), 1_${ik}$ )
                 if( i<n ) then
                    ! generate elementary reflector h(i) to annihilate
                    ! a(i+2:n,i)
                    call stdlib${ii}$_slarfg( n-i, a( i+1, i ), a( min( i+2, n ), i ), 1_${ik}$,tau( i ) )
                              
                    e( i ) = a( i+1, i )
                    a( i+1, i ) = one
                    ! compute w(i+1:n,i)
                    call stdlib${ii}$_ssymv( 'LOWER', n-i, one, a( i+1, i+1 ), lda,a( i+1, i ), 1_${ik}$, zero,&
                               w( i+1, i ), 1_${ik}$ )
                    call stdlib${ii}$_sgemv( 'TRANSPOSE', n-i, i-1, one, w( i+1, 1_${ik}$ ), ldw,a( i+1, i ), &
                              1_${ik}$, zero, w( 1_${ik}$, i ), 1_${ik}$ )
                    call stdlib${ii}$_sgemv( 'NO TRANSPOSE', n-i, i-1, -one, a( i+1, 1_${ik}$ ),lda, w( 1_${ik}$, i ),&
                               1_${ik}$, one, w( i+1, i ), 1_${ik}$ )
                    call stdlib${ii}$_sgemv( 'TRANSPOSE', n-i, i-1, one, a( i+1, 1_${ik}$ ), lda,a( i+1, i ), &
                              1_${ik}$, zero, w( 1_${ik}$, i ), 1_${ik}$ )
                    call stdlib${ii}$_sgemv( 'NO TRANSPOSE', n-i, i-1, -one, w( i+1, 1_${ik}$ ),ldw, w( 1_${ik}$, i ),&
                               1_${ik}$, one, w( i+1, i ), 1_${ik}$ )
                    call stdlib${ii}$_sscal( n-i, tau( i ), w( i+1, i ), 1_${ik}$ )
                    alpha = -half*tau( i )*stdlib${ii}$_sdot( n-i, w( i+1, i ), 1_${ik}$,a( i+1, i ), 1_${ik}$ )
                              
                    call stdlib${ii}$_saxpy( n-i, alpha, a( i+1, i ), 1_${ik}$, w( i+1, i ), 1_${ik}$ )
                 end if
              end do
           end if
           return
     end subroutine stdlib${ii}$_slatrd

     pure module subroutine stdlib${ii}$_dlatrd( uplo, n, nb, a, lda, e, tau, w, ldw )
     !! DLATRD reduces NB rows and columns of a real symmetric matrix A to
     !! symmetric tridiagonal form by an orthogonal similarity
     !! transformation Q**T * A * Q, and returns the matrices V and W which are
     !! needed to apply the transformation to the unreduced part of A.
     !! If UPLO = 'U', DLATRD reduces the last NB rows and columns of a
     !! matrix, of which the upper triangle is supplied;
     !! if UPLO = 'L', DLATRD reduces the first NB rows and columns of a
     !! matrix, of which the lower triangle is supplied.
     !! This is an auxiliary routine called by DSYTRD.
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: uplo
           integer(${ik}$), intent(in) :: lda, ldw, n, nb
           ! Array Arguments 
           real(dp), intent(inout) :: a(lda,*)
           real(dp), intent(out) :: e(*), tau(*), w(ldw,*)
        ! =====================================================================
           
           ! Local Scalars 
           integer(${ik}$) :: i, iw
           real(dp) :: alpha
           ! Intrinsic Functions 
           ! Executable Statements 
           ! quick return if possible
           if( n<=0 )return
           if( stdlib_lsame( uplo, 'U' ) ) then
              ! reduce last nb columns of upper triangle
              loop_10: do i = n, n - nb + 1, -1
                 iw = i - n + nb
                 if( i<n ) then
                    ! update a(1:i,i)
                    call stdlib${ii}$_dgemv( 'NO TRANSPOSE', i, n-i, -one, a( 1_${ik}$, i+1 ),lda, w( i, iw+1 )&
                              , ldw, one, a( 1_${ik}$, i ), 1_${ik}$ )
                    call stdlib${ii}$_dgemv( 'NO TRANSPOSE', i, n-i, -one, w( 1_${ik}$, iw+1 ),ldw, a( i, i+1 )&
                              , lda, one, a( 1_${ik}$, i ), 1_${ik}$ )
                 end if
                 if( i>1_${ik}$ ) then
                    ! generate elementary reflector h(i) to annihilate
                    ! a(1:i-2,i)
                    call stdlib${ii}$_dlarfg( i-1, a( i-1, i ), a( 1_${ik}$, i ), 1_${ik}$, tau( i-1 ) )
                    e( i-1 ) = a( i-1, i )
                    a( i-1, i ) = one
                    ! compute w(1:i-1,i)
                    call stdlib${ii}$_dsymv( 'UPPER', i-1, one, a, lda, a( 1_${ik}$, i ), 1_${ik}$,zero, w( 1_${ik}$, iw ), &
                              1_${ik}$ )
                    if( i<n ) then
                       call stdlib${ii}$_dgemv( 'TRANSPOSE', i-1, n-i, one, w( 1_${ik}$, iw+1 ),ldw, a( 1_${ik}$, i ),&
                                  1_${ik}$, zero, w( i+1, iw ), 1_${ik}$ )
                       call stdlib${ii}$_dgemv( 'NO TRANSPOSE', i-1, n-i, -one,a( 1_${ik}$, i+1 ), lda, w( i+1,&
                                  iw ), 1_${ik}$, one,w( 1_${ik}$, iw ), 1_${ik}$ )
                       call stdlib${ii}$_dgemv( 'TRANSPOSE', i-1, n-i, one, a( 1_${ik}$, i+1 ),lda, a( 1_${ik}$, i ), &
                                 1_${ik}$, zero, w( i+1, iw ), 1_${ik}$ )
                       call stdlib${ii}$_dgemv( 'NO TRANSPOSE', i-1, n-i, -one,w( 1_${ik}$, iw+1 ), ldw, w( i+&
                                 1_${ik}$, iw ), 1_${ik}$, one,w( 1_${ik}$, iw ), 1_${ik}$ )
                    end if
                    call stdlib${ii}$_dscal( i-1, tau( i-1 ), w( 1_${ik}$, iw ), 1_${ik}$ )
                    alpha = -half*tau( i-1 )*stdlib${ii}$_ddot( i-1, w( 1_${ik}$, iw ), 1_${ik}$,a( 1_${ik}$, i ), 1_${ik}$ )
                              
                    call stdlib${ii}$_daxpy( i-1, alpha, a( 1_${ik}$, i ), 1_${ik}$, w( 1_${ik}$, iw ), 1_${ik}$ )
                 end if
              end do loop_10
           else
              ! reduce first nb columns of lower triangle
              do i = 1, nb
                 ! update a(i:n,i)
                 call stdlib${ii}$_dgemv( 'NO TRANSPOSE', n-i+1, i-1, -one, a( i, 1_${ik}$ ),lda, w( i, 1_${ik}$ ), &
                           ldw, one, a( i, i ), 1_${ik}$ )
                 call stdlib${ii}$_dgemv( 'NO TRANSPOSE', n-i+1, i-1, -one, w( i, 1_${ik}$ ),ldw, a( i, 1_${ik}$ ), &
                           lda, one, a( i, i ), 1_${ik}$ )
                 if( i<n ) then
                    ! generate elementary reflector h(i) to annihilate
                    ! a(i+2:n,i)
                    call stdlib${ii}$_dlarfg( n-i, a( i+1, i ), a( min( i+2, n ), i ), 1_${ik}$,tau( i ) )
                              
                    e( i ) = a( i+1, i )
                    a( i+1, i ) = one
                    ! compute w(i+1:n,i)
                    call stdlib${ii}$_dsymv( 'LOWER', n-i, one, a( i+1, i+1 ), lda,a( i+1, i ), 1_${ik}$, zero,&
                               w( i+1, i ), 1_${ik}$ )
                    call stdlib${ii}$_dgemv( 'TRANSPOSE', n-i, i-1, one, w( i+1, 1_${ik}$ ), ldw,a( i+1, i ), &
                              1_${ik}$, zero, w( 1_${ik}$, i ), 1_${ik}$ )
                    call stdlib${ii}$_dgemv( 'NO TRANSPOSE', n-i, i-1, -one, a( i+1, 1_${ik}$ ),lda, w( 1_${ik}$, i ),&
                               1_${ik}$, one, w( i+1, i ), 1_${ik}$ )
                    call stdlib${ii}$_dgemv( 'TRANSPOSE', n-i, i-1, one, a( i+1, 1_${ik}$ ), lda,a( i+1, i ), &
                              1_${ik}$, zero, w( 1_${ik}$, i ), 1_${ik}$ )
                    call stdlib${ii}$_dgemv( 'NO TRANSPOSE', n-i, i-1, -one, w( i+1, 1_${ik}$ ),ldw, w( 1_${ik}$, i ),&
                               1_${ik}$, one, w( i+1, i ), 1_${ik}$ )
                    call stdlib${ii}$_dscal( n-i, tau( i ), w( i+1, i ), 1_${ik}$ )
                    alpha = -half*tau( i )*stdlib${ii}$_ddot( n-i, w( i+1, i ), 1_${ik}$,a( i+1, i ), 1_${ik}$ )
                              
                    call stdlib${ii}$_daxpy( n-i, alpha, a( i+1, i ), 1_${ik}$, w( i+1, i ), 1_${ik}$ )
                 end if
              end do
           end if
           return
     end subroutine stdlib${ii}$_dlatrd

#:for rk,rt,ri in REAL_KINDS_TYPES
#:if not rk in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ri}$latrd( uplo, n, nb, a, lda, e, tau, w, ldw )
     !! DLATRD: reduces NB rows and columns of a real symmetric matrix A to
     !! symmetric tridiagonal form by an orthogonal similarity
     !! transformation Q**T * A * Q, and returns the matrices V and W which are
     !! needed to apply the transformation to the unreduced part of A.
     !! If UPLO = 'U', DLATRD reduces the last NB rows and columns of a
     !! matrix, of which the upper triangle is supplied;
     !! if UPLO = 'L', DLATRD reduces the first NB rows and columns of a
     !! matrix, of which the lower triangle is supplied.
     !! This is an auxiliary routine called by DSYTRD.
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: uplo
           integer(${ik}$), intent(in) :: lda, ldw, n, nb
           ! Array Arguments 
           real(${rk}$), intent(inout) :: a(lda,*)
           real(${rk}$), intent(out) :: e(*), tau(*), w(ldw,*)
        ! =====================================================================
           
           ! Local Scalars 
           integer(${ik}$) :: i, iw
           real(${rk}$) :: alpha
           ! Intrinsic Functions 
           ! Executable Statements 
           ! quick return if possible
           if( n<=0 )return
           if( stdlib_lsame( uplo, 'U' ) ) then
              ! reduce last nb columns of upper triangle
              loop_10: do i = n, n - nb + 1, -1
                 iw = i - n + nb
                 if( i<n ) then
                    ! update a(1:i,i)
                    call stdlib${ii}$_${ri}$gemv( 'NO TRANSPOSE', i, n-i, -one, a( 1_${ik}$, i+1 ),lda, w( i, iw+1 )&
                              , ldw, one, a( 1_${ik}$, i ), 1_${ik}$ )
                    call stdlib${ii}$_${ri}$gemv( 'NO TRANSPOSE', i, n-i, -one, w( 1_${ik}$, iw+1 ),ldw, a( i, i+1 )&
                              , lda, one, a( 1_${ik}$, i ), 1_${ik}$ )
                 end if
                 if( i>1_${ik}$ ) then
                    ! generate elementary reflector h(i) to annihilate
                    ! a(1:i-2,i)
                    call stdlib${ii}$_${ri}$larfg( i-1, a( i-1, i ), a( 1_${ik}$, i ), 1_${ik}$, tau( i-1 ) )
                    e( i-1 ) = a( i-1, i )
                    a( i-1, i ) = one
                    ! compute w(1:i-1,i)
                    call stdlib${ii}$_${ri}$symv( 'UPPER', i-1, one, a, lda, a( 1_${ik}$, i ), 1_${ik}$,zero, w( 1_${ik}$, iw ), &
                              1_${ik}$ )
                    if( i<n ) then
                       call stdlib${ii}$_${ri}$gemv( 'TRANSPOSE', i-1, n-i, one, w( 1_${ik}$, iw+1 ),ldw, a( 1_${ik}$, i ),&
                                  1_${ik}$, zero, w( i+1, iw ), 1_${ik}$ )
                       call stdlib${ii}$_${ri}$gemv( 'NO TRANSPOSE', i-1, n-i, -one,a( 1_${ik}$, i+1 ), lda, w( i+1,&
                                  iw ), 1_${ik}$, one,w( 1_${ik}$, iw ), 1_${ik}$ )
                       call stdlib${ii}$_${ri}$gemv( 'TRANSPOSE', i-1, n-i, one, a( 1_${ik}$, i+1 ),lda, a( 1_${ik}$, i ), &
                                 1_${ik}$, zero, w( i+1, iw ), 1_${ik}$ )
                       call stdlib${ii}$_${ri}$gemv( 'NO TRANSPOSE', i-1, n-i, -one,w( 1_${ik}$, iw+1 ), ldw, w( i+&
                                 1_${ik}$, iw ), 1_${ik}$, one,w( 1_${ik}$, iw ), 1_${ik}$ )
                    end if
                    call stdlib${ii}$_${ri}$scal( i-1, tau( i-1 ), w( 1_${ik}$, iw ), 1_${ik}$ )
                    alpha = -half*tau( i-1 )*stdlib${ii}$_${ri}$dot( i-1, w( 1_${ik}$, iw ), 1_${ik}$,a( 1_${ik}$, i ), 1_${ik}$ )
                              
                    call stdlib${ii}$_${ri}$axpy( i-1, alpha, a( 1_${ik}$, i ), 1_${ik}$, w( 1_${ik}$, iw ), 1_${ik}$ )
                 end if
              end do loop_10
           else
              ! reduce first nb columns of lower triangle
              do i = 1, nb
                 ! update a(i:n,i)
                 call stdlib${ii}$_${ri}$gemv( 'NO TRANSPOSE', n-i+1, i-1, -one, a( i, 1_${ik}$ ),lda, w( i, 1_${ik}$ ), &
                           ldw, one, a( i, i ), 1_${ik}$ )
                 call stdlib${ii}$_${ri}$gemv( 'NO TRANSPOSE', n-i+1, i-1, -one, w( i, 1_${ik}$ ),ldw, a( i, 1_${ik}$ ), &
                           lda, one, a( i, i ), 1_${ik}$ )
                 if( i<n ) then
                    ! generate elementary reflector h(i) to annihilate
                    ! a(i+2:n,i)
                    call stdlib${ii}$_${ri}$larfg( n-i, a( i+1, i ), a( min( i+2, n ), i ), 1_${ik}$,tau( i ) )
                              
                    e( i ) = a( i+1, i )
                    a( i+1, i ) = one
                    ! compute w(i+1:n,i)
                    call stdlib${ii}$_${ri}$symv( 'LOWER', n-i, one, a( i+1, i+1 ), lda,a( i+1, i ), 1_${ik}$, zero,&
                               w( i+1, i ), 1_${ik}$ )
                    call stdlib${ii}$_${ri}$gemv( 'TRANSPOSE', n-i, i-1, one, w( i+1, 1_${ik}$ ), ldw,a( i+1, i ), &
                              1_${ik}$, zero, w( 1_${ik}$, i ), 1_${ik}$ )
                    call stdlib${ii}$_${ri}$gemv( 'NO TRANSPOSE', n-i, i-1, -one, a( i+1, 1_${ik}$ ),lda, w( 1_${ik}$, i ),&
                               1_${ik}$, one, w( i+1, i ), 1_${ik}$ )
                    call stdlib${ii}$_${ri}$gemv( 'TRANSPOSE', n-i, i-1, one, a( i+1, 1_${ik}$ ), lda,a( i+1, i ), &
                              1_${ik}$, zero, w( 1_${ik}$, i ), 1_${ik}$ )
                    call stdlib${ii}$_${ri}$gemv( 'NO TRANSPOSE', n-i, i-1, -one, w( i+1, 1_${ik}$ ),ldw, w( 1_${ik}$, i ),&
                               1_${ik}$, one, w( i+1, i ), 1_${ik}$ )
                    call stdlib${ii}$_${ri}$scal( n-i, tau( i ), w( i+1, i ), 1_${ik}$ )
                    alpha = -half*tau( i )*stdlib${ii}$_${ri}$dot( n-i, w( i+1, i ), 1_${ik}$,a( i+1, i ), 1_${ik}$ )
                              
                    call stdlib${ii}$_${ri}$axpy( n-i, alpha, a( i+1, i ), 1_${ik}$, w( i+1, i ), 1_${ik}$ )
                 end if
              end do
           end if
           return
     end subroutine stdlib${ii}$_${ri}$latrd

#:endif
#:endfor

     pure module subroutine stdlib${ii}$_clatrd( uplo, n, nb, a, lda, e, tau, w, ldw )
     !! CLATRD reduces NB rows and columns of a complex Hermitian matrix A to
     !! Hermitian tridiagonal form by a unitary similarity
     !! transformation Q**H * A * Q, and returns the matrices V and W which are
     !! needed to apply the transformation to the unreduced part of A.
     !! If UPLO = 'U', CLATRD reduces the last NB rows and columns of a
     !! matrix, of which the upper triangle is supplied;
     !! if UPLO = 'L', CLATRD reduces the first NB rows and columns of a
     !! matrix, of which the lower triangle is supplied.
     !! This is an auxiliary routine called by CHETRD.
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: uplo
           integer(${ik}$), intent(in) :: lda, ldw, n, nb
           ! Array Arguments 
           real(sp), intent(out) :: e(*)
           complex(sp), intent(inout) :: a(lda,*)
           complex(sp), intent(out) :: tau(*), w(ldw,*)
        ! =====================================================================
           
           ! Local Scalars 
           integer(${ik}$) :: i, iw
           complex(sp) :: alpha
           ! Intrinsic Functions 
           ! Executable Statements 
           ! quick return if possible
           if( n<=0 )return
           if( stdlib_lsame( uplo, 'U' ) ) then
              ! reduce last nb columns of upper triangle
              loop_10: do i = n, n - nb + 1, -1
                 iw = i - n + nb
                 if( i<n ) then
                    ! update a(1:i,i)
                    a( i, i ) = real( a( i, i ),KIND=sp)
                    call stdlib${ii}$_clacgv( n-i, w( i, iw+1 ), ldw )
                    call stdlib${ii}$_cgemv( 'NO TRANSPOSE', i, n-i, -cone, a( 1_${ik}$, i+1 ),lda, w( i, iw+1 &
                              ), ldw, cone, a( 1_${ik}$, i ), 1_${ik}$ )
                    call stdlib${ii}$_clacgv( n-i, w( i, iw+1 ), ldw )
                    call stdlib${ii}$_clacgv( n-i, a( i, i+1 ), lda )
                    call stdlib${ii}$_cgemv( 'NO TRANSPOSE', i, n-i, -cone, w( 1_${ik}$, iw+1 ),ldw, a( i, i+1 &
                              ), lda, cone, a( 1_${ik}$, i ), 1_${ik}$ )
                    call stdlib${ii}$_clacgv( n-i, a( i, i+1 ), lda )
                    a( i, i ) = real( a( i, i ),KIND=sp)
                 end if
                 if( i>1_${ik}$ ) then
                    ! generate elementary reflector h(i) to annihilate
                    ! a(1:i-2,i)
                    alpha = a( i-1, i )
                    call stdlib${ii}$_clarfg( i-1, alpha, a( 1_${ik}$, i ), 1_${ik}$, tau( i-1 ) )
                    e( i-1 ) = real( alpha,KIND=sp)
                    a( i-1, i ) = cone
                    ! compute w(1:i-1,i)
                    call stdlib${ii}$_chemv( 'UPPER', i-1, cone, a, lda, a( 1_${ik}$, i ), 1_${ik}$,czero, w( 1_${ik}$, iw ),&
                               1_${ik}$ )
                    if( i<n ) then
                       call stdlib${ii}$_cgemv( 'CONJUGATE TRANSPOSE', i-1, n-i, cone,w( 1_${ik}$, iw+1 ), ldw,&
                                  a( 1_${ik}$, i ), 1_${ik}$, czero,w( i+1, iw ), 1_${ik}$ )
                       call stdlib${ii}$_cgemv( 'NO TRANSPOSE', i-1, n-i, -cone,a( 1_${ik}$, i+1 ), lda, w( i+&
                                 1_${ik}$, iw ), 1_${ik}$, cone,w( 1_${ik}$, iw ), 1_${ik}$ )
                       call stdlib${ii}$_cgemv( 'CONJUGATE TRANSPOSE', i-1, n-i, cone,a( 1_${ik}$, i+1 ), lda, &
                                 a( 1_${ik}$, i ), 1_${ik}$, czero,w( i+1, iw ), 1_${ik}$ )
                       call stdlib${ii}$_cgemv( 'NO TRANSPOSE', i-1, n-i, -cone,w( 1_${ik}$, iw+1 ), ldw, w( i+&
                                 1_${ik}$, iw ), 1_${ik}$, cone,w( 1_${ik}$, iw ), 1_${ik}$ )
                    end if
                    call stdlib${ii}$_cscal( i-1, tau( i-1 ), w( 1_${ik}$, iw ), 1_${ik}$ )
                    alpha = -chalf*tau( i-1 )*stdlib${ii}$_cdotc( i-1, w( 1_${ik}$, iw ), 1_${ik}$,a( 1_${ik}$, i ), 1_${ik}$ )
                              
                    call stdlib${ii}$_caxpy( i-1, alpha, a( 1_${ik}$, i ), 1_${ik}$, w( 1_${ik}$, iw ), 1_${ik}$ )
                 end if
              end do loop_10
           else
              ! reduce first nb columns of lower triangle
              loop_20: do i = 1, nb
                 ! update a(i:n,i)
                 a( i, i ) = real( a( i, i ),KIND=sp)
                 call stdlib${ii}$_clacgv( i-1, w( i, 1_${ik}$ ), ldw )
                 call stdlib${ii}$_cgemv( 'NO TRANSPOSE', n-i+1, i-1, -cone, a( i, 1_${ik}$ ),lda, w( i, 1_${ik}$ ), &
                           ldw, cone, a( i, i ), 1_${ik}$ )
                 call stdlib${ii}$_clacgv( i-1, w( i, 1_${ik}$ ), ldw )
                 call stdlib${ii}$_clacgv( i-1, a( i, 1_${ik}$ ), lda )
                 call stdlib${ii}$_cgemv( 'NO TRANSPOSE', n-i+1, i-1, -cone, w( i, 1_${ik}$ ),ldw, a( i, 1_${ik}$ ), &
                           lda, cone, a( i, i ), 1_${ik}$ )
                 call stdlib${ii}$_clacgv( i-1, a( i, 1_${ik}$ ), lda )
                 a( i, i ) = real( a( i, i ),KIND=sp)
                 if( i<n ) then
                    ! generate elementary reflector h(i) to annihilate
                    ! a(i+2:n,i)
                    alpha = a( i+1, i )
                    call stdlib${ii}$_clarfg( n-i, alpha, a( min( i+2, n ), i ), 1_${ik}$,tau( i ) )
                    e( i ) = real( alpha,KIND=sp)
                    a( i+1, i ) = cone
                    ! compute w(i+1:n,i)
                    call stdlib${ii}$_chemv( 'LOWER', n-i, cone, a( i+1, i+1 ), lda,a( i+1, i ), 1_${ik}$, &
                              czero, w( i+1, i ), 1_${ik}$ )
                    call stdlib${ii}$_cgemv( 'CONJUGATE TRANSPOSE', n-i, i-1, cone,w( i+1, 1_${ik}$ ), ldw, a( &
                              i+1, i ), 1_${ik}$, czero,w( 1_${ik}$, i ), 1_${ik}$ )
                    call stdlib${ii}$_cgemv( 'NO TRANSPOSE', n-i, i-1, -cone, a( i+1, 1_${ik}$ ),lda, w( 1_${ik}$, i )&
                              , 1_${ik}$, cone, w( i+1, i ), 1_${ik}$ )
                    call stdlib${ii}$_cgemv( 'CONJUGATE TRANSPOSE', n-i, i-1, cone,a( i+1, 1_${ik}$ ), lda, a( &
                              i+1, i ), 1_${ik}$, czero,w( 1_${ik}$, i ), 1_${ik}$ )
                    call stdlib${ii}$_cgemv( 'NO TRANSPOSE', n-i, i-1, -cone, w( i+1, 1_${ik}$ ),ldw, w( 1_${ik}$, i )&
                              , 1_${ik}$, cone, w( i+1, i ), 1_${ik}$ )
                    call stdlib${ii}$_cscal( n-i, tau( i ), w( i+1, i ), 1_${ik}$ )
                    alpha = -chalf*tau( i )*stdlib${ii}$_cdotc( n-i, w( i+1, i ), 1_${ik}$,a( i+1, i ), 1_${ik}$ )
                              
                    call stdlib${ii}$_caxpy( n-i, alpha, a( i+1, i ), 1_${ik}$, w( i+1, i ), 1_${ik}$ )
                 end if
              end do loop_20
           end if
           return
     end subroutine stdlib${ii}$_clatrd

     pure module subroutine stdlib${ii}$_zlatrd( uplo, n, nb, a, lda, e, tau, w, ldw )
     !! ZLATRD reduces NB rows and columns of a complex Hermitian matrix A to
     !! Hermitian tridiagonal form by a unitary similarity
     !! transformation Q**H * A * Q, and returns the matrices V and W which are
     !! needed to apply the transformation to the unreduced part of A.
     !! If UPLO = 'U', ZLATRD reduces the last NB rows and columns of a
     !! matrix, of which the upper triangle is supplied;
     !! if UPLO = 'L', ZLATRD reduces the first NB rows and columns of a
     !! matrix, of which the lower triangle is supplied.
     !! This is an auxiliary routine called by ZHETRD.
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: uplo
           integer(${ik}$), intent(in) :: lda, ldw, n, nb
           ! Array Arguments 
           real(dp), intent(out) :: e(*)
           complex(dp), intent(inout) :: a(lda,*)
           complex(dp), intent(out) :: tau(*), w(ldw,*)
        ! =====================================================================
           
           ! Local Scalars 
           integer(${ik}$) :: i, iw
           complex(dp) :: alpha
           ! Intrinsic Functions 
           ! Executable Statements 
           ! quick return if possible
           if( n<=0 )return
           if( stdlib_lsame( uplo, 'U' ) ) then
              ! reduce last nb columns of upper triangle
              loop_10: do i = n, n - nb + 1, -1
                 iw = i - n + nb
                 if( i<n ) then
                    ! update a(1:i,i)
                    a( i, i ) = real( a( i, i ),KIND=dp)
                    call stdlib${ii}$_zlacgv( n-i, w( i, iw+1 ), ldw )
                    call stdlib${ii}$_zgemv( 'NO TRANSPOSE', i, n-i, -cone, a( 1_${ik}$, i+1 ),lda, w( i, iw+1 &
                              ), ldw, cone, a( 1_${ik}$, i ), 1_${ik}$ )
                    call stdlib${ii}$_zlacgv( n-i, w( i, iw+1 ), ldw )
                    call stdlib${ii}$_zlacgv( n-i, a( i, i+1 ), lda )
                    call stdlib${ii}$_zgemv( 'NO TRANSPOSE', i, n-i, -cone, w( 1_${ik}$, iw+1 ),ldw, a( i, i+1 &
                              ), lda, cone, a( 1_${ik}$, i ), 1_${ik}$ )
                    call stdlib${ii}$_zlacgv( n-i, a( i, i+1 ), lda )
                    a( i, i ) = real( a( i, i ),KIND=dp)
                 end if
                 if( i>1_${ik}$ ) then
                    ! generate elementary reflector h(i) to annihilate
                    ! a(1:i-2,i)
                    alpha = a( i-1, i )
                    call stdlib${ii}$_zlarfg( i-1, alpha, a( 1_${ik}$, i ), 1_${ik}$, tau( i-1 ) )
                    e( i-1 ) = real( alpha,KIND=dp)
                    a( i-1, i ) = cone
                    ! compute w(1:i-1,i)
                    call stdlib${ii}$_zhemv( 'UPPER', i-1, cone, a, lda, a( 1_${ik}$, i ), 1_${ik}$,czero, w( 1_${ik}$, iw ),&
                               1_${ik}$ )
                    if( i<n ) then
                       call stdlib${ii}$_zgemv( 'CONJUGATE TRANSPOSE', i-1, n-i, cone,w( 1_${ik}$, iw+1 ), ldw,&
                                  a( 1_${ik}$, i ), 1_${ik}$, czero,w( i+1, iw ), 1_${ik}$ )
                       call stdlib${ii}$_zgemv( 'NO TRANSPOSE', i-1, n-i, -cone,a( 1_${ik}$, i+1 ), lda, w( i+&
                                 1_${ik}$, iw ), 1_${ik}$, cone,w( 1_${ik}$, iw ), 1_${ik}$ )
                       call stdlib${ii}$_zgemv( 'CONJUGATE TRANSPOSE', i-1, n-i, cone,a( 1_${ik}$, i+1 ), lda, &
                                 a( 1_${ik}$, i ), 1_${ik}$, czero,w( i+1, iw ), 1_${ik}$ )
                       call stdlib${ii}$_zgemv( 'NO TRANSPOSE', i-1, n-i, -cone,w( 1_${ik}$, iw+1 ), ldw, w( i+&
                                 1_${ik}$, iw ), 1_${ik}$, cone,w( 1_${ik}$, iw ), 1_${ik}$ )
                    end if
                    call stdlib${ii}$_zscal( i-1, tau( i-1 ), w( 1_${ik}$, iw ), 1_${ik}$ )
                    alpha = -chalf*tau( i-1 )*stdlib${ii}$_zdotc( i-1, w( 1_${ik}$, iw ), 1_${ik}$,a( 1_${ik}$, i ), 1_${ik}$ )
                              
                    call stdlib${ii}$_zaxpy( i-1, alpha, a( 1_${ik}$, i ), 1_${ik}$, w( 1_${ik}$, iw ), 1_${ik}$ )
                 end if
              end do loop_10
           else
              ! reduce first nb columns of lower triangle
              loop_20: do i = 1, nb
                 ! update a(i:n,i)
                 a( i, i ) = real( a( i, i ),KIND=dp)
                 call stdlib${ii}$_zlacgv( i-1, w( i, 1_${ik}$ ), ldw )
                 call stdlib${ii}$_zgemv( 'NO TRANSPOSE', n-i+1, i-1, -cone, a( i, 1_${ik}$ ),lda, w( i, 1_${ik}$ ), &
                           ldw, cone, a( i, i ), 1_${ik}$ )
                 call stdlib${ii}$_zlacgv( i-1, w( i, 1_${ik}$ ), ldw )
                 call stdlib${ii}$_zlacgv( i-1, a( i, 1_${ik}$ ), lda )
                 call stdlib${ii}$_zgemv( 'NO TRANSPOSE', n-i+1, i-1, -cone, w( i, 1_${ik}$ ),ldw, a( i, 1_${ik}$ ), &
                           lda, cone, a( i, i ), 1_${ik}$ )
                 call stdlib${ii}$_zlacgv( i-1, a( i, 1_${ik}$ ), lda )
                 a( i, i ) = real( a( i, i ),KIND=dp)
                 if( i<n ) then
                    ! generate elementary reflector h(i) to annihilate
                    ! a(i+2:n,i)
                    alpha = a( i+1, i )
                    call stdlib${ii}$_zlarfg( n-i, alpha, a( min( i+2, n ), i ), 1_${ik}$,tau( i ) )
                    e( i ) = real( alpha,KIND=dp)
                    a( i+1, i ) = cone
                    ! compute w(i+1:n,i)
                    call stdlib${ii}$_zhemv( 'LOWER', n-i, cone, a( i+1, i+1 ), lda,a( i+1, i ), 1_${ik}$, &
                              czero, w( i+1, i ), 1_${ik}$ )
                    call stdlib${ii}$_zgemv( 'CONJUGATE TRANSPOSE', n-i, i-1, cone,w( i+1, 1_${ik}$ ), ldw, a( &
                              i+1, i ), 1_${ik}$, czero,w( 1_${ik}$, i ), 1_${ik}$ )
                    call stdlib${ii}$_zgemv( 'NO TRANSPOSE', n-i, i-1, -cone, a( i+1, 1_${ik}$ ),lda, w( 1_${ik}$, i )&
                              , 1_${ik}$, cone, w( i+1, i ), 1_${ik}$ )
                    call stdlib${ii}$_zgemv( 'CONJUGATE TRANSPOSE', n-i, i-1, cone,a( i+1, 1_${ik}$ ), lda, a( &
                              i+1, i ), 1_${ik}$, czero,w( 1_${ik}$, i ), 1_${ik}$ )
                    call stdlib${ii}$_zgemv( 'NO TRANSPOSE', n-i, i-1, -cone, w( i+1, 1_${ik}$ ),ldw, w( 1_${ik}$, i )&
                              , 1_${ik}$, cone, w( i+1, i ), 1_${ik}$ )
                    call stdlib${ii}$_zscal( n-i, tau( i ), w( i+1, i ), 1_${ik}$ )
                    alpha = -chalf*tau( i )*stdlib${ii}$_zdotc( n-i, w( i+1, i ), 1_${ik}$,a( i+1, i ), 1_${ik}$ )
                              
                    call stdlib${ii}$_zaxpy( n-i, alpha, a( i+1, i ), 1_${ik}$, w( i+1, i ), 1_${ik}$ )
                 end if
              end do loop_20
           end if
           return
     end subroutine stdlib${ii}$_zlatrd

#:for ck,ct,ci in CMPLX_KINDS_TYPES
#:if not ck in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ci}$latrd( uplo, n, nb, a, lda, e, tau, w, ldw )
     !! ZLATRD: reduces NB rows and columns of a complex Hermitian matrix A to
     !! Hermitian tridiagonal form by a unitary similarity
     !! transformation Q**H * A * Q, and returns the matrices V and W which are
     !! needed to apply the transformation to the unreduced part of A.
     !! If UPLO = 'U', ZLATRD reduces the last NB rows and columns of a
     !! matrix, of which the upper triangle is supplied;
     !! if UPLO = 'L', ZLATRD reduces the first NB rows and columns of a
     !! matrix, of which the lower triangle is supplied.
     !! This is an auxiliary routine called by ZHETRD.
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${ck}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: uplo
           integer(${ik}$), intent(in) :: lda, ldw, n, nb
           ! Array Arguments 
           real(${ck}$), intent(out) :: e(*)
           complex(${ck}$), intent(inout) :: a(lda,*)
           complex(${ck}$), intent(out) :: tau(*), w(ldw,*)
        ! =====================================================================
           
           ! Local Scalars 
           integer(${ik}$) :: i, iw
           complex(${ck}$) :: alpha
           ! Intrinsic Functions 
           ! Executable Statements 
           ! quick return if possible
           if( n<=0 )return
           if( stdlib_lsame( uplo, 'U' ) ) then
              ! reduce last nb columns of upper triangle
              loop_10: do i = n, n - nb + 1, -1
                 iw = i - n + nb
                 if( i<n ) then
                    ! update a(1:i,i)
                    a( i, i ) = real( a( i, i ),KIND=${ck}$)
                    call stdlib${ii}$_${ci}$lacgv( n-i, w( i, iw+1 ), ldw )
                    call stdlib${ii}$_${ci}$gemv( 'NO TRANSPOSE', i, n-i, -cone, a( 1_${ik}$, i+1 ),lda, w( i, iw+1 &
                              ), ldw, cone, a( 1_${ik}$, i ), 1_${ik}$ )
                    call stdlib${ii}$_${ci}$lacgv( n-i, w( i, iw+1 ), ldw )
                    call stdlib${ii}$_${ci}$lacgv( n-i, a( i, i+1 ), lda )
                    call stdlib${ii}$_${ci}$gemv( 'NO TRANSPOSE', i, n-i, -cone, w( 1_${ik}$, iw+1 ),ldw, a( i, i+1 &
                              ), lda, cone, a( 1_${ik}$, i ), 1_${ik}$ )
                    call stdlib${ii}$_${ci}$lacgv( n-i, a( i, i+1 ), lda )
                    a( i, i ) = real( a( i, i ),KIND=${ck}$)
                 end if
                 if( i>1_${ik}$ ) then
                    ! generate elementary reflector h(i) to annihilate
                    ! a(1:i-2,i)
                    alpha = a( i-1, i )
                    call stdlib${ii}$_${ci}$larfg( i-1, alpha, a( 1_${ik}$, i ), 1_${ik}$, tau( i-1 ) )
                    e( i-1 ) = real( alpha,KIND=${ck}$)
                    a( i-1, i ) = cone
                    ! compute w(1:i-1,i)
                    call stdlib${ii}$_${ci}$hemv( 'UPPER', i-1, cone, a, lda, a( 1_${ik}$, i ), 1_${ik}$,czero, w( 1_${ik}$, iw ),&
                               1_${ik}$ )
                    if( i<n ) then
                       call stdlib${ii}$_${ci}$gemv( 'CONJUGATE TRANSPOSE', i-1, n-i, cone,w( 1_${ik}$, iw+1 ), ldw,&
                                  a( 1_${ik}$, i ), 1_${ik}$, czero,w( i+1, iw ), 1_${ik}$ )
                       call stdlib${ii}$_${ci}$gemv( 'NO TRANSPOSE', i-1, n-i, -cone,a( 1_${ik}$, i+1 ), lda, w( i+&
                                 1_${ik}$, iw ), 1_${ik}$, cone,w( 1_${ik}$, iw ), 1_${ik}$ )
                       call stdlib${ii}$_${ci}$gemv( 'CONJUGATE TRANSPOSE', i-1, n-i, cone,a( 1_${ik}$, i+1 ), lda, &
                                 a( 1_${ik}$, i ), 1_${ik}$, czero,w( i+1, iw ), 1_${ik}$ )
                       call stdlib${ii}$_${ci}$gemv( 'NO TRANSPOSE', i-1, n-i, -cone,w( 1_${ik}$, iw+1 ), ldw, w( i+&
                                 1_${ik}$, iw ), 1_${ik}$, cone,w( 1_${ik}$, iw ), 1_${ik}$ )
                    end if
                    call stdlib${ii}$_${ci}$scal( i-1, tau( i-1 ), w( 1_${ik}$, iw ), 1_${ik}$ )
                    alpha = -chalf*tau( i-1 )*stdlib${ii}$_${ci}$dotc( i-1, w( 1_${ik}$, iw ), 1_${ik}$,a( 1_${ik}$, i ), 1_${ik}$ )
                              
                    call stdlib${ii}$_${ci}$axpy( i-1, alpha, a( 1_${ik}$, i ), 1_${ik}$, w( 1_${ik}$, iw ), 1_${ik}$ )
                 end if
              end do loop_10
           else
              ! reduce first nb columns of lower triangle
              loop_20: do i = 1, nb
                 ! update a(i:n,i)
                 a( i, i ) = real( a( i, i ),KIND=${ck}$)
                 call stdlib${ii}$_${ci}$lacgv( i-1, w( i, 1_${ik}$ ), ldw )
                 call stdlib${ii}$_${ci}$gemv( 'NO TRANSPOSE', n-i+1, i-1, -cone, a( i, 1_${ik}$ ),lda, w( i, 1_${ik}$ ), &
                           ldw, cone, a( i, i ), 1_${ik}$ )
                 call stdlib${ii}$_${ci}$lacgv( i-1, w( i, 1_${ik}$ ), ldw )
                 call stdlib${ii}$_${ci}$lacgv( i-1, a( i, 1_${ik}$ ), lda )
                 call stdlib${ii}$_${ci}$gemv( 'NO TRANSPOSE', n-i+1, i-1, -cone, w( i, 1_${ik}$ ),ldw, a( i, 1_${ik}$ ), &
                           lda, cone, a( i, i ), 1_${ik}$ )
                 call stdlib${ii}$_${ci}$lacgv( i-1, a( i, 1_${ik}$ ), lda )
                 a( i, i ) = real( a( i, i ),KIND=${ck}$)
                 if( i<n ) then
                    ! generate elementary reflector h(i) to annihilate
                    ! a(i+2:n,i)
                    alpha = a( i+1, i )
                    call stdlib${ii}$_${ci}$larfg( n-i, alpha, a( min( i+2, n ), i ), 1_${ik}$,tau( i ) )
                    e( i ) = real( alpha,KIND=${ck}$)
                    a( i+1, i ) = cone
                    ! compute w(i+1:n,i)
                    call stdlib${ii}$_${ci}$hemv( 'LOWER', n-i, cone, a( i+1, i+1 ), lda,a( i+1, i ), 1_${ik}$, &
                              czero, w( i+1, i ), 1_${ik}$ )
                    call stdlib${ii}$_${ci}$gemv( 'CONJUGATE TRANSPOSE', n-i, i-1, cone,w( i+1, 1_${ik}$ ), ldw, a( &
                              i+1, i ), 1_${ik}$, czero,w( 1_${ik}$, i ), 1_${ik}$ )
                    call stdlib${ii}$_${ci}$gemv( 'NO TRANSPOSE', n-i, i-1, -cone, a( i+1, 1_${ik}$ ),lda, w( 1_${ik}$, i )&
                              , 1_${ik}$, cone, w( i+1, i ), 1_${ik}$ )
                    call stdlib${ii}$_${ci}$gemv( 'CONJUGATE TRANSPOSE', n-i, i-1, cone,a( i+1, 1_${ik}$ ), lda, a( &
                              i+1, i ), 1_${ik}$, czero,w( 1_${ik}$, i ), 1_${ik}$ )
                    call stdlib${ii}$_${ci}$gemv( 'NO TRANSPOSE', n-i, i-1, -cone, w( i+1, 1_${ik}$ ),ldw, w( 1_${ik}$, i )&
                              , 1_${ik}$, cone, w( i+1, i ), 1_${ik}$ )
                    call stdlib${ii}$_${ci}$scal( n-i, tau( i ), w( i+1, i ), 1_${ik}$ )
                    alpha = -chalf*tau( i )*stdlib${ii}$_${ci}$dotc( n-i, w( i+1, i ), 1_${ik}$,a( i+1, i ), 1_${ik}$ )
                              
                    call stdlib${ii}$_${ci}$axpy( n-i, alpha, a( i+1, i ), 1_${ik}$, w( i+1, i ), 1_${ik}$ )
                 end if
              end do loop_20
           end if
           return
     end subroutine stdlib${ii}$_${ci}$latrd

#:endif
#:endfor



     pure module subroutine stdlib${ii}$_slae2( a, b, c, rt1, rt2 )
     !! SLAE2 computes the eigenvalues of a 2-by-2 symmetric matrix
     !! [  A   B  ]
     !! [  B   C  ].
     !! On return, RT1 is the eigenvalue of larger absolute value, and RT2
     !! is the eigenvalue of smaller absolute value.
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           real(sp), intent(in) :: a, b, c
           real(sp), intent(out) :: rt1, rt2
       ! =====================================================================
           
           
           
           
           ! Local Scalars 
           real(sp) :: ab, acmn, acmx, adf, df, rt, sm, tb
           ! Intrinsic Functions 
           ! Executable Statements 
           ! compute the eigenvalues
           sm = a + c
           df = a - c
           adf = abs( df )
           tb = b + b
           ab = abs( tb )
           if( abs( a )>abs( c ) ) then
              acmx = a
              acmn = c
           else
              acmx = c
              acmn = a
           end if
           if( adf>ab ) then
              rt = adf*sqrt( one+( ab / adf )**2_${ik}$ )
           else if( adf<ab ) then
              rt = ab*sqrt( one+( adf / ab )**2_${ik}$ )
           else
              ! includes case ab=adf=0
              rt = ab*sqrt( two )
           end if
           if( sm<zero ) then
              rt1 = half*( sm-rt )
              ! order of execution important.
              ! to get fully accurate smaller eigenvalue,
              ! next line needs to be executed in higher precision.
              rt2 = ( acmx / rt1 )*acmn - ( b / rt1 )*b
           else if( sm>zero ) then
              rt1 = half*( sm+rt )
              ! order of execution important.
              ! to get fully accurate smaller eigenvalue,
              ! next line needs to be executed in higher precision.
              rt2 = ( acmx / rt1 )*acmn - ( b / rt1 )*b
           else
              ! includes case rt1 = rt2 = 0
              rt1 = half*rt
              rt2 = -half*rt
           end if
           return
     end subroutine stdlib${ii}$_slae2

     pure module subroutine stdlib${ii}$_dlae2( a, b, c, rt1, rt2 )
     !! DLAE2 computes the eigenvalues of a 2-by-2 symmetric matrix
     !! [  A   B  ]
     !! [  B   C  ].
     !! On return, RT1 is the eigenvalue of larger absolute value, and RT2
     !! is the eigenvalue of smaller absolute value.
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           real(dp), intent(in) :: a, b, c
           real(dp), intent(out) :: rt1, rt2
       ! =====================================================================
           
           
           
           
           ! Local Scalars 
           real(dp) :: ab, acmn, acmx, adf, df, rt, sm, tb
           ! Intrinsic Functions 
           ! Executable Statements 
           ! compute the eigenvalues
           sm = a + c
           df = a - c
           adf = abs( df )
           tb = b + b
           ab = abs( tb )
           if( abs( a )>abs( c ) ) then
              acmx = a
              acmn = c
           else
              acmx = c
              acmn = a
           end if
           if( adf>ab ) then
              rt = adf*sqrt( one+( ab / adf )**2_${ik}$ )
           else if( adf<ab ) then
              rt = ab*sqrt( one+( adf / ab )**2_${ik}$ )
           else
              ! includes case ab=adf=0
              rt = ab*sqrt( two )
           end if
           if( sm<zero ) then
              rt1 = half*( sm-rt )
              ! order of execution important.
              ! to get fully accurate smaller eigenvalue,
              ! next line needs to be executed in higher precision.
              rt2 = ( acmx / rt1 )*acmn - ( b / rt1 )*b
           else if( sm>zero ) then
              rt1 = half*( sm+rt )
              ! order of execution important.
              ! to get fully accurate smaller eigenvalue,
              ! next line needs to be executed in higher precision.
              rt2 = ( acmx / rt1 )*acmn - ( b / rt1 )*b
           else
              ! includes case rt1 = rt2 = 0
              rt1 = half*rt
              rt2 = -half*rt
           end if
           return
     end subroutine stdlib${ii}$_dlae2

#:for rk,rt,ri in REAL_KINDS_TYPES
#:if not rk in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ri}$lae2( a, b, c, rt1, rt2 )
     !! DLAE2:  computes the eigenvalues of a 2-by-2 symmetric matrix
     !! [  A   B  ]
     !! [  B   C  ].
     !! On return, RT1 is the eigenvalue of larger absolute value, and RT2
     !! is the eigenvalue of smaller absolute value.
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           real(${rk}$), intent(in) :: a, b, c
           real(${rk}$), intent(out) :: rt1, rt2
       ! =====================================================================
           
           
           
           
           ! Local Scalars 
           real(${rk}$) :: ab, acmn, acmx, adf, df, rt, sm, tb
           ! Intrinsic Functions 
           ! Executable Statements 
           ! compute the eigenvalues
           sm = a + c
           df = a - c
           adf = abs( df )
           tb = b + b
           ab = abs( tb )
           if( abs( a )>abs( c ) ) then
              acmx = a
              acmn = c
           else
              acmx = c
              acmn = a
           end if
           if( adf>ab ) then
              rt = adf*sqrt( one+( ab / adf )**2_${ik}$ )
           else if( adf<ab ) then
              rt = ab*sqrt( one+( adf / ab )**2_${ik}$ )
           else
              ! includes case ab=adf=0
              rt = ab*sqrt( two )
           end if
           if( sm<zero ) then
              rt1 = half*( sm-rt )
              ! order of execution important.
              ! to get fully accurate smaller eigenvalue,
              ! next line needs to be executed in higher precision.
              rt2 = ( acmx / rt1 )*acmn - ( b / rt1 )*b
           else if( sm>zero ) then
              rt1 = half*( sm+rt )
              ! order of execution important.
              ! to get fully accurate smaller eigenvalue,
              ! next line needs to be executed in higher precision.
              rt2 = ( acmx / rt1 )*acmn - ( b / rt1 )*b
           else
              ! includes case rt1 = rt2 = 0
              rt1 = half*rt
              rt2 = -half*rt
           end if
           return
     end subroutine stdlib${ii}$_${ri}$lae2

#:endif
#:endfor



     pure module subroutine stdlib${ii}$_claesy( a, b, c, rt1, rt2, evscal, cs1, sn1 )
     !! CLAESY computes the eigendecomposition of a 2-by-2 symmetric matrix
     !! ( ( A, B );( B, C ) )
     !! provided the norm of the matrix of eigenvectors is larger than
     !! some threshold value.
     !! RT1 is the eigenvalue of larger absolute value, and RT2 of
     !! smaller absolute value.  If the eigenvectors are computed, then
     !! on return ( CS1, SN1 ) is the unit eigenvector for RT1, hence
     !! [  CS1     SN1   ] . [ A  B ] . [ CS1    -SN1   ] = [ RT1  0  ]
     !! [ -SN1     CS1   ]   [ B  C ]   [ SN1     CS1   ]   [  0  RT2 ]
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           complex(sp), intent(in) :: a, b, c
           complex(sp), intent(out) :: cs1, evscal, rt1, rt2, sn1
       ! =====================================================================
           ! Parameters 
           real(sp), parameter :: thresh = 0.1_sp
           
           
           
           
           
           ! Local Scalars 
           real(sp) :: babs, evnorm, tabs, z
           complex(sp) :: s, t, tmp
           ! Intrinsic Functions 
           ! Executable Statements 
           ! special case:  the matrix is actually diagonal.
           ! to avoid divide by zero later, we treat this case separately.
           if( abs( b )==zero ) then
              rt1 = a
              rt2 = c
              if( abs( rt1 )<abs( rt2 ) ) then
                 tmp = rt1
                 rt1 = rt2
                 rt2 = tmp
                 cs1 = zero
                 sn1 = one
              else
                 cs1 = one
                 sn1 = zero
              end if
           else
              ! compute the eigenvalues and eigenvectors.
              ! the characteristic equation is
                 ! lambda **2 - (a+c) lambda + (a*c - b*b)
              ! and we solve it using the quadratic formula.
              s = ( a+c )*half
              t = ( a-c )*half
              ! take the square root carefully to avoid over/under flow.
              babs = abs( b )
              tabs = abs( t )
              z = max( babs, tabs )
              if( z>zero )t = z*sqrt( ( t / z )**2_${ik}$+( b / z )**2_${ik}$ )
              ! compute the two eigenvalues.  rt1 and rt2 are exchanged
              ! if necessary so that rt1 will have the greater magnitude.
              rt1 = s + t
              rt2 = s - t
              if( abs( rt1 )<abs( rt2 ) ) then
                 tmp = rt1
                 rt1 = rt2
                 rt2 = tmp
              end if
              ! choose cs1 = 1 and sn1 to satisfy the first equation, then
              ! scale the components of this eigenvector so that the matrix
              ! of eigenvectors x satisfies  x * x**t = i .  (no scaling is
              ! done if the norm of the eigenvalue matrix is less than thresh.)
              sn1 = ( rt1-a ) / b
              tabs = abs( sn1 )
              if( tabs>one ) then
                 t = tabs*sqrt( ( one / tabs )**2_${ik}$+( sn1 / tabs )**2_${ik}$ )
              else
                 t = sqrt( cone+sn1*sn1 )
              end if
              evnorm = abs( t )
              if( evnorm>=thresh ) then
                 evscal = cone / t
                 cs1 = evscal
                 sn1 = sn1*evscal
              else
                 evscal = zero
              end if
           end if
           return
     end subroutine stdlib${ii}$_claesy

     pure module subroutine stdlib${ii}$_zlaesy( a, b, c, rt1, rt2, evscal, cs1, sn1 )
     !! ZLAESY computes the eigendecomposition of a 2-by-2 symmetric matrix
     !! ( ( A, B );( B, C ) )
     !! provided the norm of the matrix of eigenvectors is larger than
     !! some threshold value.
     !! RT1 is the eigenvalue of larger absolute value, and RT2 of
     !! smaller absolute value.  If the eigenvectors are computed, then
     !! on return ( CS1, SN1 ) is the unit eigenvector for RT1, hence
     !! [  CS1     SN1   ] . [ A  B ] . [ CS1    -SN1   ] = [ RT1  0  ]
     !! [ -SN1     CS1   ]   [ B  C ]   [ SN1     CS1   ]   [  0  RT2 ]
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           complex(dp), intent(in) :: a, b, c
           complex(dp), intent(out) :: cs1, evscal, rt1, rt2, sn1
       ! =====================================================================
           ! Parameters 
           real(dp), parameter :: thresh = 0.1_dp
           
           
           
           
           
           ! Local Scalars 
           real(dp) :: babs, evnorm, tabs, z
           complex(dp) :: s, t, tmp
           ! Intrinsic Functions 
           ! Executable Statements 
           ! special case:  the matrix is actually diagonal.
           ! to avoid divide by zero later, we treat this case separately.
           if( abs( b )==zero ) then
              rt1 = a
              rt2 = c
              if( abs( rt1 )<abs( rt2 ) ) then
                 tmp = rt1
                 rt1 = rt2
                 rt2 = tmp
                 cs1 = zero
                 sn1 = one
              else
                 cs1 = one
                 sn1 = zero
              end if
           else
              ! compute the eigenvalues and eigenvectors.
              ! the characteristic equation is
                 ! lambda **2 - (a+c) lambda + (a*c - b*b)
              ! and we solve it using the quadratic formula.
              s = ( a+c )*half
              t = ( a-c )*half
              ! take the square root carefully to avoid over/under flow.
              babs = abs( b )
              tabs = abs( t )
              z = max( babs, tabs )
              if( z>zero )t = z*sqrt( ( t / z )**2_${ik}$+( b / z )**2_${ik}$ )
              ! compute the two eigenvalues.  rt1 and rt2 are exchanged
              ! if necessary so that rt1 will have the greater magnitude.
              rt1 = s + t
              rt2 = s - t
              if( abs( rt1 )<abs( rt2 ) ) then
                 tmp = rt1
                 rt1 = rt2
                 rt2 = tmp
              end if
              ! choose cs1 = 1 and sn1 to satisfy the first equation, then
              ! scale the components of this eigenvector so that the matrix
              ! of eigenvectors x satisfies  x * x**t = i .  (no scaling is
              ! done if the norm of the eigenvalue matrix is less than thresh.)
              sn1 = ( rt1-a ) / b
              tabs = abs( sn1 )
              if( tabs>one ) then
                 t = tabs*sqrt( ( one / tabs )**2_${ik}$+( sn1 / tabs )**2_${ik}$ )
              else
                 t = sqrt( cone+sn1*sn1 )
              end if
              evnorm = abs( t )
              if( evnorm>=thresh ) then
                 evscal = cone / t
                 cs1 = evscal
                 sn1 = sn1*evscal
              else
                 evscal = zero
              end if
           end if
           return
     end subroutine stdlib${ii}$_zlaesy

#:for ck,ct,ci in CMPLX_KINDS_TYPES
#:if not ck in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ci}$laesy( a, b, c, rt1, rt2, evscal, cs1, sn1 )
     !! ZLAESY: computes the eigendecomposition of a 2-by-2 symmetric matrix
     !! ( ( A, B );( B, C ) )
     !! provided the norm of the matrix of eigenvectors is larger than
     !! some threshold value.
     !! RT1 is the eigenvalue of larger absolute value, and RT2 of
     !! smaller absolute value.  If the eigenvectors are computed, then
     !! on return ( CS1, SN1 ) is the unit eigenvector for RT1, hence
     !! [  CS1     SN1   ] . [ A  B ] . [ CS1    -SN1   ] = [ RT1  0  ]
     !! [ -SN1     CS1   ]   [ B  C ]   [ SN1     CS1   ]   [  0  RT2 ]
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${ck}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           complex(${ck}$), intent(in) :: a, b, c
           complex(${ck}$), intent(out) :: cs1, evscal, rt1, rt2, sn1
       ! =====================================================================
           ! Parameters 
           real(${ck}$), parameter :: thresh = 0.1_${ck}$
           
           
           
           
           
           ! Local Scalars 
           real(${ck}$) :: babs, evnorm, tabs, z
           complex(${ck}$) :: s, t, tmp
           ! Intrinsic Functions 
           ! Executable Statements 
           ! special case:  the matrix is actually diagonal.
           ! to avoid divide by zero later, we treat this case separately.
           if( abs( b )==zero ) then
              rt1 = a
              rt2 = c
              if( abs( rt1 )<abs( rt2 ) ) then
                 tmp = rt1
                 rt1 = rt2
                 rt2 = tmp
                 cs1 = zero
                 sn1 = one
              else
                 cs1 = one
                 sn1 = zero
              end if
           else
              ! compute the eigenvalues and eigenvectors.
              ! the characteristic equation is
                 ! lambda **2 - (a+c) lambda + (a*c - b*b)
              ! and we solve it using the quadratic formula.
              s = ( a+c )*half
              t = ( a-c )*half
              ! take the square root carefully to avoid over/under flow.
              babs = abs( b )
              tabs = abs( t )
              z = max( babs, tabs )
              if( z>zero )t = z*sqrt( ( t / z )**2_${ik}$+( b / z )**2_${ik}$ )
              ! compute the two eigenvalues.  rt1 and rt2 are exchanged
              ! if necessary so that rt1 will have the greater magnitude.
              rt1 = s + t
              rt2 = s - t
              if( abs( rt1 )<abs( rt2 ) ) then
                 tmp = rt1
                 rt1 = rt2
                 rt2 = tmp
              end if
              ! choose cs1 = 1 and sn1 to satisfy the first equation, then
              ! scale the components of this eigenvector so that the matrix
              ! of eigenvectors x satisfies  x * x**t = i .  (no scaling is
              ! done if the norm of the eigenvalue matrix is less than thresh.)
              sn1 = ( rt1-a ) / b
              tabs = abs( sn1 )
              if( tabs>one ) then
                 t = tabs*sqrt( ( one / tabs )**2_${ik}$+( sn1 / tabs )**2_${ik}$ )
              else
                 t = sqrt( cone+sn1*sn1 )
              end if
              evnorm = abs( t )
              if( evnorm>=thresh ) then
                 evscal = cone / t
                 cs1 = evscal
                 sn1 = sn1*evscal
              else
                 evscal = zero
              end if
           end if
           return
     end subroutine stdlib${ii}$_${ci}$laesy

#:endif
#:endfor



     pure module subroutine stdlib${ii}$_slaev2( a, b, c, rt1, rt2, cs1, sn1 )
     !! SLAEV2 computes the eigendecomposition of a 2-by-2 symmetric matrix
     !! [  A   B  ]
     !! [  B   C  ].
     !! On return, RT1 is the eigenvalue of larger absolute value, RT2 is the
     !! eigenvalue of smaller absolute value, and (CS1,SN1) is the unit right
     !! eigenvector for RT1, giving the decomposition
     !! [ CS1  SN1 ] [  A   B  ] [ CS1 -SN1 ]  =  [ RT1  0  ]
     !! [-SN1  CS1 ] [  B   C  ] [ SN1  CS1 ]     [  0  RT2 ].
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           real(sp), intent(in) :: a, b, c
           real(sp), intent(out) :: cs1, rt1, rt2, sn1
       ! =====================================================================
           
           
           
           
           ! Local Scalars 
           integer(${ik}$) :: sgn1, sgn2
           real(sp) :: ab, acmn, acmx, acs, adf, cs, ct, df, rt, sm, tb, tn
           ! Intrinsic Functions 
           ! Executable Statements 
           ! compute the eigenvalues
           sm = a + c
           df = a - c
           adf = abs( df )
           tb = b + b
           ab = abs( tb )
           if( abs( a )>abs( c ) ) then
              acmx = a
              acmn = c
           else
              acmx = c
              acmn = a
           end if
           if( adf>ab ) then
              rt = adf*sqrt( one+( ab / adf )**2_${ik}$ )
           else if( adf<ab ) then
              rt = ab*sqrt( one+( adf / ab )**2_${ik}$ )
           else
              ! includes case ab=adf=0
              rt = ab*sqrt( two )
           end if
           if( sm<zero ) then
              rt1 = half*( sm-rt )
              sgn1 = -1_${ik}$
              ! order of execution important.
              ! to get fully accurate smaller eigenvalue,
              ! next line needs to be executed in higher precision.
              rt2 = ( acmx / rt1 )*acmn - ( b / rt1 )*b
           else if( sm>zero ) then
              rt1 = half*( sm+rt )
              sgn1 = 1_${ik}$
              ! order of execution important.
              ! to get fully accurate smaller eigenvalue,
              ! next line needs to be executed in higher precision.
              rt2 = ( acmx / rt1 )*acmn - ( b / rt1 )*b
           else
              ! includes case rt1 = rt2 = 0
              rt1 = half*rt
              rt2 = -half*rt
              sgn1 = 1_${ik}$
           end if
           ! compute the eigenvector
           if( df>=zero ) then
              cs = df + rt
              sgn2 = 1_${ik}$
           else
              cs = df - rt
              sgn2 = -1_${ik}$
           end if
           acs = abs( cs )
           if( acs>ab ) then
              ct = -tb / cs
              sn1 = one / sqrt( one+ct*ct )
              cs1 = ct*sn1
           else
              if( ab==zero ) then
                 cs1 = one
                 sn1 = zero
              else
                 tn = -cs / tb
                 cs1 = one / sqrt( one+tn*tn )
                 sn1 = tn*cs1
              end if
           end if
           if( sgn1==sgn2 ) then
              tn = cs1
              cs1 = -sn1
              sn1 = tn
           end if
           return
     end subroutine stdlib${ii}$_slaev2

     pure module subroutine stdlib${ii}$_dlaev2( a, b, c, rt1, rt2, cs1, sn1 )
     !! DLAEV2 computes the eigendecomposition of a 2-by-2 symmetric matrix
     !! [  A   B  ]
     !! [  B   C  ].
     !! On return, RT1 is the eigenvalue of larger absolute value, RT2 is the
     !! eigenvalue of smaller absolute value, and (CS1,SN1) is the unit right
     !! eigenvector for RT1, giving the decomposition
     !! [ CS1  SN1 ] [  A   B  ] [ CS1 -SN1 ]  =  [ RT1  0  ]
     !! [-SN1  CS1 ] [  B   C  ] [ SN1  CS1 ]     [  0  RT2 ].
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           real(dp), intent(in) :: a, b, c
           real(dp), intent(out) :: cs1, rt1, rt2, sn1
       ! =====================================================================
           
           
           
           
           ! Local Scalars 
           integer(${ik}$) :: sgn1, sgn2
           real(dp) :: ab, acmn, acmx, acs, adf, cs, ct, df, rt, sm, tb, tn
           ! Intrinsic Functions 
           ! Executable Statements 
           ! compute the eigenvalues
           sm = a + c
           df = a - c
           adf = abs( df )
           tb = b + b
           ab = abs( tb )
           if( abs( a )>abs( c ) ) then
              acmx = a
              acmn = c
           else
              acmx = c
              acmn = a
           end if
           if( adf>ab ) then
              rt = adf*sqrt( one+( ab / adf )**2_${ik}$ )
           else if( adf<ab ) then
              rt = ab*sqrt( one+( adf / ab )**2_${ik}$ )
           else
              ! includes case ab=adf=0
              rt = ab*sqrt( two )
           end if
           if( sm<zero ) then
              rt1 = half*( sm-rt )
              sgn1 = -1_${ik}$
              ! order of execution important.
              ! to get fully accurate smaller eigenvalue,
              ! next line needs to be executed in higher precision.
              rt2 = ( acmx / rt1 )*acmn - ( b / rt1 )*b
           else if( sm>zero ) then
              rt1 = half*( sm+rt )
              sgn1 = 1_${ik}$
              ! order of execution important.
              ! to get fully accurate smaller eigenvalue,
              ! next line needs to be executed in higher precision.
              rt2 = ( acmx / rt1 )*acmn - ( b / rt1 )*b
           else
              ! includes case rt1 = rt2 = 0
              rt1 = half*rt
              rt2 = -half*rt
              sgn1 = 1_${ik}$
           end if
           ! compute the eigenvector
           if( df>=zero ) then
              cs = df + rt
              sgn2 = 1_${ik}$
           else
              cs = df - rt
              sgn2 = -1_${ik}$
           end if
           acs = abs( cs )
           if( acs>ab ) then
              ct = -tb / cs
              sn1 = one / sqrt( one+ct*ct )
              cs1 = ct*sn1
           else
              if( ab==zero ) then
                 cs1 = one
                 sn1 = zero
              else
                 tn = -cs / tb
                 cs1 = one / sqrt( one+tn*tn )
                 sn1 = tn*cs1
              end if
           end if
           if( sgn1==sgn2 ) then
              tn = cs1
              cs1 = -sn1
              sn1 = tn
           end if
           return
     end subroutine stdlib${ii}$_dlaev2

#:for rk,rt,ri in REAL_KINDS_TYPES
#:if not rk in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ri}$laev2( a, b, c, rt1, rt2, cs1, sn1 )
     !! DLAEV2: computes the eigendecomposition of a 2-by-2 symmetric matrix
     !! [  A   B  ]
     !! [  B   C  ].
     !! On return, RT1 is the eigenvalue of larger absolute value, RT2 is the
     !! eigenvalue of smaller absolute value, and (CS1,SN1) is the unit right
     !! eigenvector for RT1, giving the decomposition
     !! [ CS1  SN1 ] [  A   B  ] [ CS1 -SN1 ]  =  [ RT1  0  ]
     !! [-SN1  CS1 ] [  B   C  ] [ SN1  CS1 ]     [  0  RT2 ].
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           real(${rk}$), intent(in) :: a, b, c
           real(${rk}$), intent(out) :: cs1, rt1, rt2, sn1
       ! =====================================================================
           
           
           
           
           ! Local Scalars 
           integer(${ik}$) :: sgn1, sgn2
           real(${rk}$) :: ab, acmn, acmx, acs, adf, cs, ct, df, rt, sm, tb, tn
           ! Intrinsic Functions 
           ! Executable Statements 
           ! compute the eigenvalues
           sm = a + c
           df = a - c
           adf = abs( df )
           tb = b + b
           ab = abs( tb )
           if( abs( a )>abs( c ) ) then
              acmx = a
              acmn = c
           else
              acmx = c
              acmn = a
           end if
           if( adf>ab ) then
              rt = adf*sqrt( one+( ab / adf )**2_${ik}$ )
           else if( adf<ab ) then
              rt = ab*sqrt( one+( adf / ab )**2_${ik}$ )
           else
              ! includes case ab=adf=0
              rt = ab*sqrt( two )
           end if
           if( sm<zero ) then
              rt1 = half*( sm-rt )
              sgn1 = -1_${ik}$
              ! order of execution important.
              ! to get fully accurate smaller eigenvalue,
              ! next line needs to be executed in higher precision.
              rt2 = ( acmx / rt1 )*acmn - ( b / rt1 )*b
           else if( sm>zero ) then
              rt1 = half*( sm+rt )
              sgn1 = 1_${ik}$
              ! order of execution important.
              ! to get fully accurate smaller eigenvalue,
              ! next line needs to be executed in higher precision.
              rt2 = ( acmx / rt1 )*acmn - ( b / rt1 )*b
           else
              ! includes case rt1 = rt2 = 0
              rt1 = half*rt
              rt2 = -half*rt
              sgn1 = 1_${ik}$
           end if
           ! compute the eigenvector
           if( df>=zero ) then
              cs = df + rt
              sgn2 = 1_${ik}$
           else
              cs = df - rt
              sgn2 = -1_${ik}$
           end if
           acs = abs( cs )
           if( acs>ab ) then
              ct = -tb / cs
              sn1 = one / sqrt( one+ct*ct )
              cs1 = ct*sn1
           else
              if( ab==zero ) then
                 cs1 = one
                 sn1 = zero
              else
                 tn = -cs / tb
                 cs1 = one / sqrt( one+tn*tn )
                 sn1 = tn*cs1
              end if
           end if
           if( sgn1==sgn2 ) then
              tn = cs1
              cs1 = -sn1
              sn1 = tn
           end if
           return
     end subroutine stdlib${ii}$_${ri}$laev2

#:endif
#:endfor

     pure module subroutine stdlib${ii}$_claev2( a, b, c, rt1, rt2, cs1, sn1 )
     !! CLAEV2 computes the eigendecomposition of a 2-by-2 Hermitian matrix
     !! [  A         B  ]
     !! [  CONJG(B)  C  ].
     !! On return, RT1 is the eigenvalue of larger absolute value, RT2 is the
     !! eigenvalue of smaller absolute value, and (CS1,SN1) is the unit right
     !! eigenvector for RT1, giving the decomposition
     !! [ CS1  CONJG(SN1) ] [    A     B ] [ CS1 -CONJG(SN1) ] = [ RT1  0  ]
     !! [-SN1     CS1     ] [ CONJG(B) C ] [ SN1     CS1     ]   [  0  RT2 ].
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           real(sp), intent(out) :: cs1, rt1, rt2
           complex(sp), intent(in) :: a, b, c
           complex(sp), intent(out) :: sn1
       ! =====================================================================
           
           
           ! Local Scalars 
           real(sp) :: t
           complex(sp) :: w
           ! Intrinsic Functions 
           ! Executable Statements 
           if( abs( b )==zero ) then
              w = one
           else
              w = conjg( b ) / abs( b )
           end if
           call stdlib${ii}$_slaev2( real( a,KIND=sp), abs( b ), real( c,KIND=sp), rt1, rt2, cs1, t )
                     
           sn1 = w*t
           return
     end subroutine stdlib${ii}$_claev2

     pure module subroutine stdlib${ii}$_zlaev2( a, b, c, rt1, rt2, cs1, sn1 )
     !! ZLAEV2 computes the eigendecomposition of a 2-by-2 Hermitian matrix
     !! [  A         B  ]
     !! [  CONJG(B)  C  ].
     !! On return, RT1 is the eigenvalue of larger absolute value, RT2 is the
     !! eigenvalue of smaller absolute value, and (CS1,SN1) is the unit right
     !! eigenvector for RT1, giving the decomposition
     !! [ CS1  CONJG(SN1) ] [    A     B ] [ CS1 -CONJG(SN1) ] = [ RT1  0  ]
     !! [-SN1     CS1     ] [ CONJG(B) C ] [ SN1     CS1     ]   [  0  RT2 ].
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           real(dp), intent(out) :: cs1, rt1, rt2
           complex(dp), intent(in) :: a, b, c
           complex(dp), intent(out) :: sn1
       ! =====================================================================
           
           
           ! Local Scalars 
           real(dp) :: t
           complex(dp) :: w
           ! Intrinsic Functions 
           ! Executable Statements 
           if( abs( b )==zero ) then
              w = one
           else
              w = conjg( b ) / abs( b )
           end if
           call stdlib${ii}$_dlaev2( real( a,KIND=dp), abs( b ), real( c,KIND=dp), rt1, rt2, cs1, t )
                     
           sn1 = w*t
           return
     end subroutine stdlib${ii}$_zlaev2

#:for ck,ct,ci in CMPLX_KINDS_TYPES
#:if not ck in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ci}$laev2( a, b, c, rt1, rt2, cs1, sn1 )
     !! ZLAEV2: computes the eigendecomposition of a 2-by-2 Hermitian matrix
     !! [  A         B  ]
     !! [  CONJG(B)  C  ].
     !! On return, RT1 is the eigenvalue of larger absolute value, RT2 is the
     !! eigenvalue of smaller absolute value, and (CS1,SN1) is the unit right
     !! eigenvector for RT1, giving the decomposition
     !! [ CS1  CONJG(SN1) ] [    A     B ] [ CS1 -CONJG(SN1) ] = [ RT1  0  ]
     !! [-SN1     CS1     ] [ CONJG(B) C ] [ SN1     CS1     ]   [  0  RT2 ].
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${ck}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           real(${ck}$), intent(out) :: cs1, rt1, rt2
           complex(${ck}$), intent(in) :: a, b, c
           complex(${ck}$), intent(out) :: sn1
       ! =====================================================================
           
           
           ! Local Scalars 
           real(${ck}$) :: t
           complex(${ck}$) :: w
           ! Intrinsic Functions 
           ! Executable Statements 
           if( abs( b )==zero ) then
              w = one
           else
              w = conjg( b ) / abs( b )
           end if
           call stdlib${ii}$_${c2ri(ci)}$laev2( real( a,KIND=${ck}$), abs( b ), real( c,KIND=${ck}$), rt1, rt2, cs1, t )
                     
           sn1 = w*t
           return
     end subroutine stdlib${ii}$_${ci}$laev2

#:endif
#:endfor



     pure module subroutine stdlib${ii}$_slagtf( n, a, lambda, b, c, tol, d, in, info )
     !! SLAGTF factorizes the matrix (T - lambda*I), where T is an n by n
     !! tridiagonal matrix and lambda is a scalar, as
     !! T - lambda*I = PLU,
     !! where P is a permutation matrix, L is a unit lower tridiagonal matrix
     !! with at most one non-zero sub-diagonal elements per column and U is
     !! an upper triangular matrix with at most two non-zero super-diagonal
     !! elements per column.
     !! The factorization is obtained by Gaussian elimination with partial
     !! pivoting and implicit row scaling.
     !! The parameter LAMBDA is included in the routine so that SLAGTF may
     !! be used, in conjunction with SLAGTS, to obtain eigenvectors of T by
     !! inverse iteration.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: n
           real(sp), intent(in) :: lambda, tol
           ! Array Arguments 
           integer(${ik}$), intent(out) :: in(*)
           real(sp), intent(inout) :: a(*), b(*), c(*)
           real(sp), intent(out) :: d(*)
       ! =====================================================================
           
           ! Local Scalars 
           integer(${ik}$) :: k
           real(sp) :: eps, mult, piv1, piv2, scale1, scale2, temp, tl
           ! Intrinsic Functions 
           ! Executable Statements 
           info = 0_${ik}$
           if( n<0_${ik}$ ) then
              info = -1_${ik}$
              call stdlib${ii}$_xerbla( 'SLAGTF', -info )
              return
           end if
           if( n==0 )return
           a( 1_${ik}$ ) = a( 1_${ik}$ ) - lambda
           in( n ) = 0_${ik}$
           if( n==1_${ik}$ ) then
              if( a( 1_${ik}$ )==zero )in( 1_${ik}$ ) = 1_${ik}$
              return
           end if
           eps = stdlib${ii}$_slamch( 'EPSILON' )
           tl = max( tol, eps )
           scale1 = abs( a( 1_${ik}$ ) ) + abs( b( 1_${ik}$ ) )
           loop_10: do k = 1, n - 1
              a( k+1 ) = a( k+1 ) - lambda
              scale2 = abs( c( k ) ) + abs( a( k+1 ) )
              if( k<( n-1 ) )scale2 = scale2 + abs( b( k+1 ) )
              if( a( k )==zero ) then
                 piv1 = zero
              else
                 piv1 = abs( a( k ) ) / scale1
              end if
              if( c( k )==zero ) then
                 in( k ) = 0_${ik}$
                 piv2 = zero
                 scale1 = scale2
                 if( k<( n-1 ) )d( k ) = zero
              else
                 piv2 = abs( c( k ) ) / scale2
                 if( piv2<=piv1 ) then
                    in( k ) = 0_${ik}$
                    scale1 = scale2
                    c( k ) = c( k ) / a( k )
                    a( k+1 ) = a( k+1 ) - c( k )*b( k )
                    if( k<( n-1 ) )d( k ) = zero
                 else
                    in( k ) = 1_${ik}$
                    mult = a( k ) / c( k )
                    a( k ) = c( k )
                    temp = a( k+1 )
                    a( k+1 ) = b( k ) - mult*temp
                    if( k<( n-1 ) ) then
                       d( k ) = b( k+1 )
                       b( k+1 ) = -mult*d( k )
                    end if
                    b( k ) = temp
                    c( k ) = mult
                 end if
              end if
              if( ( max( piv1, piv2 )<=tl ) .and. ( in( n )==0_${ik}$ ) )in( n ) = k
           end do loop_10
           if( ( abs( a( n ) )<=scale1*tl ) .and. ( in( n )==0_${ik}$ ) )in( n ) = n
           return
     end subroutine stdlib${ii}$_slagtf

     pure module subroutine stdlib${ii}$_dlagtf( n, a, lambda, b, c, tol, d, in, info )
     !! DLAGTF factorizes the matrix (T - lambda*I), where T is an n by n
     !! tridiagonal matrix and lambda is a scalar, as
     !! T - lambda*I = PLU,
     !! where P is a permutation matrix, L is a unit lower tridiagonal matrix
     !! with at most one non-zero sub-diagonal elements per column and U is
     !! an upper triangular matrix with at most two non-zero super-diagonal
     !! elements per column.
     !! The factorization is obtained by Gaussian elimination with partial
     !! pivoting and implicit row scaling.
     !! The parameter LAMBDA is included in the routine so that DLAGTF may
     !! be used, in conjunction with DLAGTS, to obtain eigenvectors of T by
     !! inverse iteration.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: n
           real(dp), intent(in) :: lambda, tol
           ! Array Arguments 
           integer(${ik}$), intent(out) :: in(*)
           real(dp), intent(inout) :: a(*), b(*), c(*)
           real(dp), intent(out) :: d(*)
       ! =====================================================================
           
           ! Local Scalars 
           integer(${ik}$) :: k
           real(dp) :: eps, mult, piv1, piv2, scale1, scale2, temp, tl
           ! Intrinsic Functions 
           ! Executable Statements 
           info = 0_${ik}$
           if( n<0_${ik}$ ) then
              info = -1_${ik}$
              call stdlib${ii}$_xerbla( 'DLAGTF', -info )
              return
           end if
           if( n==0 )return
           a( 1_${ik}$ ) = a( 1_${ik}$ ) - lambda
           in( n ) = 0_${ik}$
           if( n==1_${ik}$ ) then
              if( a( 1_${ik}$ )==zero )in( 1_${ik}$ ) = 1_${ik}$
              return
           end if
           eps = stdlib${ii}$_dlamch( 'EPSILON' )
           tl = max( tol, eps )
           scale1 = abs( a( 1_${ik}$ ) ) + abs( b( 1_${ik}$ ) )
           loop_10: do k = 1, n - 1
              a( k+1 ) = a( k+1 ) - lambda
              scale2 = abs( c( k ) ) + abs( a( k+1 ) )
              if( k<( n-1 ) )scale2 = scale2 + abs( b( k+1 ) )
              if( a( k )==zero ) then
                 piv1 = zero
              else
                 piv1 = abs( a( k ) ) / scale1
              end if
              if( c( k )==zero ) then
                 in( k ) = 0_${ik}$
                 piv2 = zero
                 scale1 = scale2
                 if( k<( n-1 ) )d( k ) = zero
              else
                 piv2 = abs( c( k ) ) / scale2
                 if( piv2<=piv1 ) then
                    in( k ) = 0_${ik}$
                    scale1 = scale2
                    c( k ) = c( k ) / a( k )
                    a( k+1 ) = a( k+1 ) - c( k )*b( k )
                    if( k<( n-1 ) )d( k ) = zero
                 else
                    in( k ) = 1_${ik}$
                    mult = a( k ) / c( k )
                    a( k ) = c( k )
                    temp = a( k+1 )
                    a( k+1 ) = b( k ) - mult*temp
                    if( k<( n-1 ) ) then
                       d( k ) = b( k+1 )
                       b( k+1 ) = -mult*d( k )
                    end if
                    b( k ) = temp
                    c( k ) = mult
                 end if
              end if
              if( ( max( piv1, piv2 )<=tl ) .and. ( in( n )==0_${ik}$ ) )in( n ) = k
           end do loop_10
           if( ( abs( a( n ) )<=scale1*tl ) .and. ( in( n )==0_${ik}$ ) )in( n ) = n
           return
     end subroutine stdlib${ii}$_dlagtf

#:for rk,rt,ri in REAL_KINDS_TYPES
#:if not rk in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ri}$lagtf( n, a, lambda, b, c, tol, d, in, info )
     !! DLAGTF: factorizes the matrix (T - lambda*I), where T is an n by n
     !! tridiagonal matrix and lambda is a scalar, as
     !! T - lambda*I = PLU,
     !! where P is a permutation matrix, L is a unit lower tridiagonal matrix
     !! with at most one non-zero sub-diagonal elements per column and U is
     !! an upper triangular matrix with at most two non-zero super-diagonal
     !! elements per column.
     !! The factorization is obtained by Gaussian elimination with partial
     !! pivoting and implicit row scaling.
     !! The parameter LAMBDA is included in the routine so that DLAGTF may
     !! be used, in conjunction with DLAGTS, to obtain eigenvectors of T by
     !! inverse iteration.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: n
           real(${rk}$), intent(in) :: lambda, tol
           ! Array Arguments 
           integer(${ik}$), intent(out) :: in(*)
           real(${rk}$), intent(inout) :: a(*), b(*), c(*)
           real(${rk}$), intent(out) :: d(*)
       ! =====================================================================
           
           ! Local Scalars 
           integer(${ik}$) :: k
           real(${rk}$) :: eps, mult, piv1, piv2, scale1, scale2, temp, tl
           ! Intrinsic Functions 
           ! Executable Statements 
           info = 0_${ik}$
           if( n<0_${ik}$ ) then
              info = -1_${ik}$
              call stdlib${ii}$_xerbla( 'DLAGTF', -info )
              return
           end if
           if( n==0 )return
           a( 1_${ik}$ ) = a( 1_${ik}$ ) - lambda
           in( n ) = 0_${ik}$
           if( n==1_${ik}$ ) then
              if( a( 1_${ik}$ )==zero )in( 1_${ik}$ ) = 1_${ik}$
              return
           end if
           eps = stdlib${ii}$_${ri}$lamch( 'EPSILON' )
           tl = max( tol, eps )
           scale1 = abs( a( 1_${ik}$ ) ) + abs( b( 1_${ik}$ ) )
           loop_10: do k = 1, n - 1
              a( k+1 ) = a( k+1 ) - lambda
              scale2 = abs( c( k ) ) + abs( a( k+1 ) )
              if( k<( n-1 ) )scale2 = scale2 + abs( b( k+1 ) )
              if( a( k )==zero ) then
                 piv1 = zero
              else
                 piv1 = abs( a( k ) ) / scale1
              end if
              if( c( k )==zero ) then
                 in( k ) = 0_${ik}$
                 piv2 = zero
                 scale1 = scale2
                 if( k<( n-1 ) )d( k ) = zero
              else
                 piv2 = abs( c( k ) ) / scale2
                 if( piv2<=piv1 ) then
                    in( k ) = 0_${ik}$
                    scale1 = scale2
                    c( k ) = c( k ) / a( k )
                    a( k+1 ) = a( k+1 ) - c( k )*b( k )
                    if( k<( n-1 ) )d( k ) = zero
                 else
                    in( k ) = 1_${ik}$
                    mult = a( k ) / c( k )
                    a( k ) = c( k )
                    temp = a( k+1 )
                    a( k+1 ) = b( k ) - mult*temp
                    if( k<( n-1 ) ) then
                       d( k ) = b( k+1 )
                       b( k+1 ) = -mult*d( k )
                    end if
                    b( k ) = temp
                    c( k ) = mult
                 end if
              end if
              if( ( max( piv1, piv2 )<=tl ) .and. ( in( n )==0_${ik}$ ) )in( n ) = k
           end do loop_10
           if( ( abs( a( n ) )<=scale1*tl ) .and. ( in( n )==0_${ik}$ ) )in( n ) = n
           return
     end subroutine stdlib${ii}$_${ri}$lagtf

#:endif
#:endfor



     pure module subroutine stdlib${ii}$_slagts( job, n, a, b, c, d, in, y, tol, info )
     !! SLAGTS may be used to solve one of the systems of equations
     !! (T - lambda*I)*x = y   or   (T - lambda*I)**T*x = y,
     !! where T is an n by n tridiagonal matrix, for x, following the
     !! factorization of (T - lambda*I) as
     !! (T - lambda*I) = P*L*U ,
     !! by routine SLAGTF. The choice of equation to be solved is
     !! controlled by the argument JOB, and in each case there is an option
     !! to perturb zero or very small diagonal elements of U, this option
     !! being intended for use in applications such as inverse iteration.
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: job, n
           real(sp), intent(inout) :: tol
           ! Array Arguments 
           integer(${ik}$), intent(in) :: in(*)
           real(sp), intent(in) :: a(*), b(*), c(*), d(*)
           real(sp), intent(inout) :: y(*)
        ! =====================================================================
           
           ! Local Scalars 
           integer(${ik}$) :: k
           real(sp) :: absak, ak, bignum, eps, pert, sfmin, temp
           ! Intrinsic Functions 
           ! Executable Statements 
           info = 0_${ik}$
           if( ( abs( job )>2_${ik}$ ) .or. ( job==0_${ik}$ ) ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ ) then
              info = -2_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'SLAGTS', -info )
              return
           end if
           if( n==0 )return
           eps = stdlib${ii}$_slamch( 'EPSILON' )
           sfmin = stdlib${ii}$_slamch( 'SAFE MINIMUM' )
           bignum = one / sfmin
           if( job<0_${ik}$ ) then
              if( tol<=zero ) then
                 tol = abs( a( 1_${ik}$ ) )
                 if( n>1_${ik}$ )tol = max( tol, abs( a( 2_${ik}$ ) ), abs( b( 1_${ik}$ ) ) )
                 do k = 3, n
                    tol = max( tol, abs( a( k ) ), abs( b( k-1 ) ),abs( d( k-2 ) ) )
                 end do
                 tol = tol*eps
                 if( tol==zero )tol = eps
              end if
           end if
           if( abs( job )==1_${ik}$ ) then
              do k = 2, n
                 if( in( k-1 )==0_${ik}$ ) then
                    y( k ) = y( k ) - c( k-1 )*y( k-1 )
                 else
                    temp = y( k-1 )
                    y( k-1 ) = y( k )
                    y( k ) = temp - c( k-1 )*y( k )
                 end if
              end do
              if( job==1_${ik}$ ) then
                 loop_30: do k = n, 1, -1
                    if( k<=n-2 ) then
                       temp = y( k ) - b( k )*y( k+1 ) - d( k )*y( k+2 )
                    else if( k==n-1 ) then
                       temp = y( k ) - b( k )*y( k+1 )
                    else
                       temp = y( k )
                    end if
                    ak = a( k )
                    absak = abs( ak )
                    if( absak<one ) then
                       if( absak<sfmin ) then
                          if( absak==zero .or. abs( temp )*sfmin>absak )then
                             info = k
                             return
                          else
                             temp = temp*bignum
                             ak = ak*bignum
                          end if
                       else if( abs( temp )>absak*bignum ) then
                          info = k
                          return
                       end if
                    end if
                    y( k ) = temp / ak
                 end do loop_30
              else
                 loop_50: do k = n, 1, -1
                    if( k<=n-2 ) then
                       temp = y( k ) - b( k )*y( k+1 ) - d( k )*y( k+2 )
                    else if( k==n-1 ) then
                       temp = y( k ) - b( k )*y( k+1 )
                    else
                       temp = y( k )
                    end if
                    ak = a( k )
                    pert = sign( tol, ak )
                    40 continue
                    absak = abs( ak )
                    if( absak<one ) then
                       if( absak<sfmin ) then
                          if( absak==zero .or. abs( temp )*sfmin>absak )then
                             ak = ak + pert
                             pert = 2_${ik}$*pert
                             go to 40
                          else
                             temp = temp*bignum
                             ak = ak*bignum
                          end if
                       else if( abs( temp )>absak*bignum ) then
                          ak = ak + pert
                          pert = 2_${ik}$*pert
                          go to 40
                       end if
                    end if
                    y( k ) = temp / ak
                 end do loop_50
              end if
           else
              ! come to here if  job = 2 or -2
              if( job==2_${ik}$ ) then
                 loop_60: do k = 1, n
                    if( k>=3_${ik}$ ) then
                       temp = y( k ) - b( k-1 )*y( k-1 ) - d( k-2 )*y( k-2 )
                    else if( k==2_${ik}$ ) then
                       temp = y( k ) - b( k-1 )*y( k-1 )
                    else
                       temp = y( k )
                    end if
                    ak = a( k )
                    absak = abs( ak )
                    if( absak<one ) then
                       if( absak<sfmin ) then
                          if( absak==zero .or. abs( temp )*sfmin>absak )then
                             info = k
                             return
                          else
                             temp = temp*bignum
                             ak = ak*bignum
                          end if
                       else if( abs( temp )>absak*bignum ) then
                          info = k
                          return
                       end if
                    end if
                    y( k ) = temp / ak
                 end do loop_60
              else
                 loop_80: do k = 1, n
                    if( k>=3_${ik}$ ) then
                       temp = y( k ) - b( k-1 )*y( k-1 ) - d( k-2 )*y( k-2 )
                    else if( k==2_${ik}$ ) then
                       temp = y( k ) - b( k-1 )*y( k-1 )
                    else
                       temp = y( k )
                    end if
                    ak = a( k )
                    pert = sign( tol, ak )
                    70 continue
                    absak = abs( ak )
                    if( absak<one ) then
                       if( absak<sfmin ) then
                          if( absak==zero .or. abs( temp )*sfmin>absak )then
                             ak = ak + pert
                             pert = 2_${ik}$*pert
                             go to 70
                          else
                             temp = temp*bignum
                             ak = ak*bignum
                          end if
                       else if( abs( temp )>absak*bignum ) then
                          ak = ak + pert
                          pert = 2_${ik}$*pert
                          go to 70
                       end if
                    end if
                    y( k ) = temp / ak
                 end do loop_80
              end if
              do k = n, 2, -1
                 if( in( k-1 )==0_${ik}$ ) then
                    y( k-1 ) = y( k-1 ) - c( k-1 )*y( k )
                 else
                    temp = y( k-1 )
                    y( k-1 ) = y( k )
                    y( k ) = temp - c( k-1 )*y( k )
                 end if
              end do
           end if
     end subroutine stdlib${ii}$_slagts

     pure module subroutine stdlib${ii}$_dlagts( job, n, a, b, c, d, in, y, tol, info )
     !! DLAGTS may be used to solve one of the systems of equations
     !! (T - lambda*I)*x = y   or   (T - lambda*I)**T*x = y,
     !! where T is an n by n tridiagonal matrix, for x, following the
     !! factorization of (T - lambda*I) as
     !! (T - lambda*I) = P*L*U ,
     !! by routine DLAGTF. The choice of equation to be solved is
     !! controlled by the argument JOB, and in each case there is an option
     !! to perturb zero or very small diagonal elements of U, this option
     !! being intended for use in applications such as inverse iteration.
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: job, n
           real(dp), intent(inout) :: tol
           ! Array Arguments 
           integer(${ik}$), intent(in) :: in(*)
           real(dp), intent(in) :: a(*), b(*), c(*), d(*)
           real(dp), intent(inout) :: y(*)
        ! =====================================================================
           
           ! Local Scalars 
           integer(${ik}$) :: k
           real(dp) :: absak, ak, bignum, eps, pert, sfmin, temp
           ! Intrinsic Functions 
           ! Executable Statements 
           info = 0_${ik}$
           if( ( abs( job )>2_${ik}$ ) .or. ( job==0_${ik}$ ) ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ ) then
              info = -2_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DLAGTS', -info )
              return
           end if
           if( n==0 )return
           eps = stdlib${ii}$_dlamch( 'EPSILON' )
           sfmin = stdlib${ii}$_dlamch( 'SAFE MINIMUM' )
           bignum = one / sfmin
           if( job<0_${ik}$ ) then
              if( tol<=zero ) then
                 tol = abs( a( 1_${ik}$ ) )
                 if( n>1_${ik}$ )tol = max( tol, abs( a( 2_${ik}$ ) ), abs( b( 1_${ik}$ ) ) )
                 do k = 3, n
                    tol = max( tol, abs( a( k ) ), abs( b( k-1 ) ),abs( d( k-2 ) ) )
                 end do
                 tol = tol*eps
                 if( tol==zero )tol = eps
              end if
           end if
           if( abs( job )==1_${ik}$ ) then
              do k = 2, n
                 if( in( k-1 )==0_${ik}$ ) then
                    y( k ) = y( k ) - c( k-1 )*y( k-1 )
                 else
                    temp = y( k-1 )
                    y( k-1 ) = y( k )
                    y( k ) = temp - c( k-1 )*y( k )
                 end if
              end do
              if( job==1_${ik}$ ) then
                 loop_30: do k = n, 1, -1
                    if( k<=n-2 ) then
                       temp = y( k ) - b( k )*y( k+1 ) - d( k )*y( k+2 )
                    else if( k==n-1 ) then