stdlib_lapack_svd_comp.fypp Source File


Source Code

#:include "common.fypp" 
submodule(stdlib_lapack_eig_svd_lsq) stdlib_lapack_svd_comp
  implicit none


  contains
#:for ik,it,ii in LINALG_INT_KINDS_TYPES

     pure module subroutine stdlib${ii}$_sgebrd( m, n, a, lda, d, e, tauq, taup, work, lwork,info )
     !! SGEBRD reduces a general real M-by-N matrix A to upper or lower
     !! bidiagonal form B by an orthogonal transformation: Q**T * A * P = B.
     !! If m >= n, B is upper bidiagonal; if m < n, B is lower bidiagonal.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, lwork, m, n
           ! Array Arguments 
           real(sp), intent(inout) :: a(lda,*)
           real(sp), intent(out) :: d(*), e(*), taup(*), tauq(*), work(*)
        ! =====================================================================
           
           ! Local Scalars 
           logical(lk) :: lquery
           integer(${ik}$) :: i, iinfo, j, ldwrkx, ldwrky, lwkopt, minmn, nb, nbmin, nx, ws
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input parameters
           info = 0_${ik}$
           nb = max( 1_${ik}$, stdlib${ii}$_ilaenv( 1_${ik}$, 'SGEBRD', ' ', m, n, -1_${ik}$, -1_${ik}$ ) )
           lwkopt = ( m+n )*nb
           work( 1_${ik}$ ) = real( lwkopt,KIND=sp)
           lquery = ( lwork==-1_${ik}$ )
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -4_${ik}$
           else if( lwork<max( 1_${ik}$, m, n ) .and. .not.lquery ) then
              info = -10_${ik}$
           end if
           if( info<0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'SGEBRD', -info )
              return
           else if( lquery ) then
              return
           end if
           ! quick return if possible
           minmn = min( m, n )
           if( minmn==0_${ik}$ ) then
              work( 1_${ik}$ ) = 1_${ik}$
              return
           end if
           ws = max( m, n )
           ldwrkx = m
           ldwrky = n
           if( nb>1_${ik}$ .and. nb<minmn ) then
              ! set the crossover point nx.
              nx = max( nb, stdlib${ii}$_ilaenv( 3_${ik}$, 'SGEBRD', ' ', m, n, -1_${ik}$, -1_${ik}$ ) )
              ! determine when to switch from blocked to unblocked code.
              if( nx<minmn ) then
                 ws = ( m+n )*nb
                 if( lwork<ws ) then
                    ! not enough work space for the optimal nb, consider using
                    ! a smaller block size.
                    nbmin = stdlib${ii}$_ilaenv( 2_${ik}$, 'SGEBRD', ' ', m, n, -1_${ik}$, -1_${ik}$ )
                    if( lwork>=( m+n )*nbmin ) then
                       nb = lwork / ( m+n )
                    else
                       nb = 1_${ik}$
                       nx = minmn
                    end if
                 end if
              end if
           else
              nx = minmn
           end if
           do i = 1, minmn - nx, nb
              ! reduce rows and columns i:i+nb-1 to bidiagonal form and return
              ! the matrices x and y which are needed to update the unreduced
              ! part of the matrix
              call stdlib${ii}$_slabrd( m-i+1, n-i+1, nb, a( i, i ), lda, d( i ), e( i ),tauq( i ), &
                        taup( i ), work, ldwrkx,work( ldwrkx*nb+1 ), ldwrky )
              ! update the trailing submatrix a(i+nb:m,i+nb:n), using an update
              ! of the form  a := a - v*y**t - x*u**t
              call stdlib${ii}$_sgemm( 'NO TRANSPOSE', 'TRANSPOSE', m-i-nb+1, n-i-nb+1,nb, -one, a( i+&
                        nb, i ), lda,work( ldwrkx*nb+nb+1 ), ldwrky, one,a( i+nb, i+nb ), lda )
              call stdlib${ii}$_sgemm( 'NO TRANSPOSE', 'NO TRANSPOSE', m-i-nb+1, n-i-nb+1,nb, -one, &
                        work( nb+1 ), ldwrkx, a( i, i+nb ), lda,one, a( i+nb, i+nb ), lda )
              ! copy diagonal and off-diagonal elements of b back into a
              if( m>=n ) then
                 do j = i, i + nb - 1
                    a( j, j ) = d( j )
                    a( j, j+1 ) = e( j )
                 end do
              else
                 do j = i, i + nb - 1
                    a( j, j ) = d( j )
                    a( j+1, j ) = e( j )
                 end do
              end if
           end do
           ! use unblocked code to reduce the remainder of the matrix
           call stdlib${ii}$_sgebd2( m-i+1, n-i+1, a( i, i ), lda, d( i ), e( i ),tauq( i ), taup( i ), &
                     work, iinfo )
           work( 1_${ik}$ ) = ws
           return
     end subroutine stdlib${ii}$_sgebrd

     pure module subroutine stdlib${ii}$_dgebrd( m, n, a, lda, d, e, tauq, taup, work, lwork,info )
     !! DGEBRD reduces a general real M-by-N matrix A to upper or lower
     !! bidiagonal form B by an orthogonal transformation: Q**T * A * P = B.
     !! If m >= n, B is upper bidiagonal; if m < n, B is lower bidiagonal.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, lwork, m, n
           ! Array Arguments 
           real(dp), intent(inout) :: a(lda,*)
           real(dp), intent(out) :: d(*), e(*), taup(*), tauq(*), work(*)
        ! =====================================================================
           
           ! Local Scalars 
           logical(lk) :: lquery
           integer(${ik}$) :: i, iinfo, j, ldwrkx, ldwrky, lwkopt, minmn, nb, nbmin, nx, ws
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input parameters
           info = 0_${ik}$
           nb = max( 1_${ik}$, stdlib${ii}$_ilaenv( 1_${ik}$, 'DGEBRD', ' ', m, n, -1_${ik}$, -1_${ik}$ ) )
           lwkopt = ( m+n )*nb
           work( 1_${ik}$ ) = real( lwkopt,KIND=dp)
           lquery = ( lwork==-1_${ik}$ )
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -4_${ik}$
           else if( lwork<max( 1_${ik}$, m, n ) .and. .not.lquery ) then
              info = -10_${ik}$
           end if
           if( info<0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DGEBRD', -info )
              return
           else if( lquery ) then
              return
           end if
           ! quick return if possible
           minmn = min( m, n )
           if( minmn==0_${ik}$ ) then
              work( 1_${ik}$ ) = 1_${ik}$
              return
           end if
           ws = max( m, n )
           ldwrkx = m
           ldwrky = n
           if( nb>1_${ik}$ .and. nb<minmn ) then
              ! set the crossover point nx.
              nx = max( nb, stdlib${ii}$_ilaenv( 3_${ik}$, 'DGEBRD', ' ', m, n, -1_${ik}$, -1_${ik}$ ) )
              ! determine when to switch from blocked to unblocked code.
              if( nx<minmn ) then
                 ws = ( m+n )*nb
                 if( lwork<ws ) then
                    ! not enough work space for the optimal nb, consider using
                    ! a smaller block size.
                    nbmin = stdlib${ii}$_ilaenv( 2_${ik}$, 'DGEBRD', ' ', m, n, -1_${ik}$, -1_${ik}$ )
                    if( lwork>=( m+n )*nbmin ) then
                       nb = lwork / ( m+n )
                    else
                       nb = 1_${ik}$
                       nx = minmn
                    end if
                 end if
              end if
           else
              nx = minmn
           end if
           do i = 1, minmn - nx, nb
              ! reduce rows and columns i:i+nb-1 to bidiagonal form and return
              ! the matrices x and y which are needed to update the unreduced
              ! part of the matrix
              call stdlib${ii}$_dlabrd( m-i+1, n-i+1, nb, a( i, i ), lda, d( i ), e( i ),tauq( i ), &
                        taup( i ), work, ldwrkx,work( ldwrkx*nb+1 ), ldwrky )
              ! update the trailing submatrix a(i+nb:m,i+nb:n), using an update
              ! of the form  a := a - v*y**t - x*u**t
              call stdlib${ii}$_dgemm( 'NO TRANSPOSE', 'TRANSPOSE', m-i-nb+1, n-i-nb+1,nb, -one, a( i+&
                        nb, i ), lda,work( ldwrkx*nb+nb+1 ), ldwrky, one,a( i+nb, i+nb ), lda )
              call stdlib${ii}$_dgemm( 'NO TRANSPOSE', 'NO TRANSPOSE', m-i-nb+1, n-i-nb+1,nb, -one, &
                        work( nb+1 ), ldwrkx, a( i, i+nb ), lda,one, a( i+nb, i+nb ), lda )
              ! copy diagonal and off-diagonal elements of b back into a
              if( m>=n ) then
                 do j = i, i + nb - 1
                    a( j, j ) = d( j )
                    a( j, j+1 ) = e( j )
                 end do
              else
                 do j = i, i + nb - 1
                    a( j, j ) = d( j )
                    a( j+1, j ) = e( j )
                 end do
              end if
           end do
           ! use unblocked code to reduce the remainder of the matrix
           call stdlib${ii}$_dgebd2( m-i+1, n-i+1, a( i, i ), lda, d( i ), e( i ),tauq( i ), taup( i ), &
                     work, iinfo )
           work( 1_${ik}$ ) = ws
           return
     end subroutine stdlib${ii}$_dgebrd

#:for rk,rt,ri in REAL_KINDS_TYPES
#:if not rk in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ri}$gebrd( m, n, a, lda, d, e, tauq, taup, work, lwork,info )
     !! DGEBRD: reduces a general real M-by-N matrix A to upper or lower
     !! bidiagonal form B by an orthogonal transformation: Q**T * A * P = B.
     !! If m >= n, B is upper bidiagonal; if m < n, B is lower bidiagonal.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, lwork, m, n
           ! Array Arguments 
           real(${rk}$), intent(inout) :: a(lda,*)
           real(${rk}$), intent(out) :: d(*), e(*), taup(*), tauq(*), work(*)
        ! =====================================================================
           
           ! Local Scalars 
           logical(lk) :: lquery
           integer(${ik}$) :: i, iinfo, j, ldwrkx, ldwrky, lwkopt, minmn, nb, nbmin, nx, ws
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input parameters
           info = 0_${ik}$
           nb = max( 1_${ik}$, stdlib${ii}$_ilaenv( 1_${ik}$, 'DGEBRD', ' ', m, n, -1_${ik}$, -1_${ik}$ ) )
           lwkopt = ( m+n )*nb
           work( 1_${ik}$ ) = real( lwkopt,KIND=${rk}$)
           lquery = ( lwork==-1_${ik}$ )
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -4_${ik}$
           else if( lwork<max( 1_${ik}$, m, n ) .and. .not.lquery ) then
              info = -10_${ik}$
           end if
           if( info<0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DGEBRD', -info )
              return
           else if( lquery ) then
              return
           end if
           ! quick return if possible
           minmn = min( m, n )
           if( minmn==0_${ik}$ ) then
              work( 1_${ik}$ ) = 1_${ik}$
              return
           end if
           ws = max( m, n )
           ldwrkx = m
           ldwrky = n
           if( nb>1_${ik}$ .and. nb<minmn ) then
              ! set the crossover point nx.
              nx = max( nb, stdlib${ii}$_ilaenv( 3_${ik}$, 'DGEBRD', ' ', m, n, -1_${ik}$, -1_${ik}$ ) )
              ! determine when to switch from blocked to unblocked code.
              if( nx<minmn ) then
                 ws = ( m+n )*nb
                 if( lwork<ws ) then
                    ! not enough work space for the optimal nb, consider using
                    ! a smaller block size.
                    nbmin = stdlib${ii}$_ilaenv( 2_${ik}$, 'DGEBRD', ' ', m, n, -1_${ik}$, -1_${ik}$ )
                    if( lwork>=( m+n )*nbmin ) then
                       nb = lwork / ( m+n )
                    else
                       nb = 1_${ik}$
                       nx = minmn
                    end if
                 end if
              end if
           else
              nx = minmn
           end if
           do i = 1, minmn - nx, nb
              ! reduce rows and columns i:i+nb-1 to bidiagonal form and return
              ! the matrices x and y which are needed to update the unreduced
              ! part of the matrix
              call stdlib${ii}$_${ri}$labrd( m-i+1, n-i+1, nb, a( i, i ), lda, d( i ), e( i ),tauq( i ), &
                        taup( i ), work, ldwrkx,work( ldwrkx*nb+1 ), ldwrky )
              ! update the trailing submatrix a(i+nb:m,i+nb:n), using an update
              ! of the form  a := a - v*y**t - x*u**t
              call stdlib${ii}$_${ri}$gemm( 'NO TRANSPOSE', 'TRANSPOSE', m-i-nb+1, n-i-nb+1,nb, -one, a( i+&
                        nb, i ), lda,work( ldwrkx*nb+nb+1 ), ldwrky, one,a( i+nb, i+nb ), lda )
              call stdlib${ii}$_${ri}$gemm( 'NO TRANSPOSE', 'NO TRANSPOSE', m-i-nb+1, n-i-nb+1,nb, -one, &
                        work( nb+1 ), ldwrkx, a( i, i+nb ), lda,one, a( i+nb, i+nb ), lda )
              ! copy diagonal and off-diagonal elements of b back into a
              if( m>=n ) then
                 do j = i, i + nb - 1
                    a( j, j ) = d( j )
                    a( j, j+1 ) = e( j )
                 end do
              else
                 do j = i, i + nb - 1
                    a( j, j ) = d( j )
                    a( j+1, j ) = e( j )
                 end do
              end if
           end do
           ! use unblocked code to reduce the remainder of the matrix
           call stdlib${ii}$_${ri}$gebd2( m-i+1, n-i+1, a( i, i ), lda, d( i ), e( i ),tauq( i ), taup( i ), &
                     work, iinfo )
           work( 1_${ik}$ ) = ws
           return
     end subroutine stdlib${ii}$_${ri}$gebrd

#:endif
#:endfor

     pure module subroutine stdlib${ii}$_cgebrd( m, n, a, lda, d, e, tauq, taup, work, lwork,info )
     !! CGEBRD reduces a general complex M-by-N matrix A to upper or lower
     !! bidiagonal form B by a unitary transformation: Q**H * A * P = B.
     !! If m >= n, B is upper bidiagonal; if m < n, B is lower bidiagonal.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, lwork, m, n
           ! Array Arguments 
           real(sp), intent(out) :: d(*), e(*)
           complex(sp), intent(inout) :: a(lda,*)
           complex(sp), intent(out) :: taup(*), tauq(*), work(*)
        ! =====================================================================
           
           ! Local Scalars 
           logical(lk) :: lquery
           integer(${ik}$) :: i, iinfo, j, ldwrkx, ldwrky, lwkopt, minmn, nb, nbmin, nx, ws
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input parameters
           info = 0_${ik}$
           nb = max( 1_${ik}$, stdlib${ii}$_ilaenv( 1_${ik}$, 'CGEBRD', ' ', m, n, -1_${ik}$, -1_${ik}$ ) )
           lwkopt = ( m+n )*nb
           work( 1_${ik}$ ) = real( lwkopt,KIND=sp)
           lquery = ( lwork==-1_${ik}$ )
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -4_${ik}$
           else if( lwork<max( 1_${ik}$, m, n ) .and. .not.lquery ) then
              info = -10_${ik}$
           end if
           if( info<0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'CGEBRD', -info )
              return
           else if( lquery ) then
              return
           end if
           ! quick return if possible
           minmn = min( m, n )
           if( minmn==0_${ik}$ ) then
              work( 1_${ik}$ ) = 1_${ik}$
              return
           end if
           ws = max( m, n )
           ldwrkx = m
           ldwrky = n
           if( nb>1_${ik}$ .and. nb<minmn ) then
              ! set the crossover point nx.
              nx = max( nb, stdlib${ii}$_ilaenv( 3_${ik}$, 'CGEBRD', ' ', m, n, -1_${ik}$, -1_${ik}$ ) )
              ! determine when to switch from blocked to unblocked code.
              if( nx<minmn ) then
                 ws = ( m+n )*nb
                 if( lwork<ws ) then
                    ! not enough work space for the optimal nb, consider using
                    ! a smaller block size.
                    nbmin = stdlib${ii}$_ilaenv( 2_${ik}$, 'CGEBRD', ' ', m, n, -1_${ik}$, -1_${ik}$ )
                    if( lwork>=( m+n )*nbmin ) then
                       nb = lwork / ( m+n )
                    else
                       nb = 1_${ik}$
                       nx = minmn
                    end if
                 end if
              end if
           else
              nx = minmn
           end if
           do i = 1, minmn - nx, nb
              ! reduce rows and columns i:i+ib-1 to bidiagonal form and return
              ! the matrices x and y which are needed to update the unreduced
              ! part of the matrix
              call stdlib${ii}$_clabrd( m-i+1, n-i+1, nb, a( i, i ), lda, d( i ), e( i ),tauq( i ), &
                        taup( i ), work, ldwrkx,work( ldwrkx*nb+1 ), ldwrky )
              ! update the trailing submatrix a(i+ib:m,i+ib:n), using
              ! an update of the form  a := a - v*y**h - x*u**h
              call stdlib${ii}$_cgemm( 'NO TRANSPOSE', 'CONJUGATE TRANSPOSE', m-i-nb+1,n-i-nb+1, nb, -&
              cone, a( i+nb, i ), lda,work( ldwrkx*nb+nb+1 ), ldwrky, cone,a( i+nb, i+nb ), lda )
                        
              call stdlib${ii}$_cgemm( 'NO TRANSPOSE', 'NO TRANSPOSE', m-i-nb+1, n-i-nb+1,nb, -cone, &
                        work( nb+1 ), ldwrkx, a( i, i+nb ), lda,cone, a( i+nb, i+nb ), lda )
              ! copy diagonal and off-diagonal elements of b back into a
              if( m>=n ) then
                 do j = i, i + nb - 1
                    a( j, j ) = d( j )
                    a( j, j+1 ) = e( j )
                 end do
              else
                 do j = i, i + nb - 1
                    a( j, j ) = d( j )
                    a( j+1, j ) = e( j )
                 end do
              end if
           end do
           ! use unblocked code to reduce the remainder of the matrix
           call stdlib${ii}$_cgebd2( m-i+1, n-i+1, a( i, i ), lda, d( i ), e( i ),tauq( i ), taup( i ), &
                     work, iinfo )
           work( 1_${ik}$ ) = ws
           return
     end subroutine stdlib${ii}$_cgebrd

     pure module subroutine stdlib${ii}$_zgebrd( m, n, a, lda, d, e, tauq, taup, work, lwork,info )
     !! ZGEBRD reduces a general complex M-by-N matrix A to upper or lower
     !! bidiagonal form B by a unitary transformation: Q**H * A * P = B.
     !! If m >= n, B is upper bidiagonal; if m < n, B is lower bidiagonal.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, lwork, m, n
           ! Array Arguments 
           real(dp), intent(out) :: d(*), e(*)
           complex(dp), intent(inout) :: a(lda,*)
           complex(dp), intent(out) :: taup(*), tauq(*), work(*)
        ! =====================================================================
           
           ! Local Scalars 
           logical(lk) :: lquery
           integer(${ik}$) :: i, iinfo, j, ldwrkx, ldwrky, lwkopt, minmn, nb, nbmin, nx, ws
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input parameters
           info = 0_${ik}$
           nb = max( 1_${ik}$, stdlib${ii}$_ilaenv( 1_${ik}$, 'ZGEBRD', ' ', m, n, -1_${ik}$, -1_${ik}$ ) )
           lwkopt = ( m+n )*nb
           work( 1_${ik}$ ) = real( lwkopt,KIND=dp)
           lquery = ( lwork==-1_${ik}$ )
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -4_${ik}$
           else if( lwork<max( 1_${ik}$, m, n ) .and. .not.lquery ) then
              info = -10_${ik}$
           end if
           if( info<0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'ZGEBRD', -info )
              return
           else if( lquery ) then
              return
           end if
           ! quick return if possible
           minmn = min( m, n )
           if( minmn==0_${ik}$ ) then
              work( 1_${ik}$ ) = 1_${ik}$
              return
           end if
           ws = max( m, n )
           ldwrkx = m
           ldwrky = n
           if( nb>1_${ik}$ .and. nb<minmn ) then
              ! set the crossover point nx.
              nx = max( nb, stdlib${ii}$_ilaenv( 3_${ik}$, 'ZGEBRD', ' ', m, n, -1_${ik}$, -1_${ik}$ ) )
              ! determine when to switch from blocked to unblocked code.
              if( nx<minmn ) then
                 ws = ( m+n )*nb
                 if( lwork<ws ) then
                    ! not enough work space for the optimal nb, consider using
                    ! a smaller block size.
                    nbmin = stdlib${ii}$_ilaenv( 2_${ik}$, 'ZGEBRD', ' ', m, n, -1_${ik}$, -1_${ik}$ )
                    if( lwork>=( m+n )*nbmin ) then
                       nb = lwork / ( m+n )
                    else
                       nb = 1_${ik}$
                       nx = minmn
                    end if
                 end if
              end if
           else
              nx = minmn
           end if
           do i = 1, minmn - nx, nb
              ! reduce rows and columns i:i+ib-1 to bidiagonal form and return
              ! the matrices x and y which are needed to update the unreduced
              ! part of the matrix
              call stdlib${ii}$_zlabrd( m-i+1, n-i+1, nb, a( i, i ), lda, d( i ), e( i ),tauq( i ), &
                        taup( i ), work, ldwrkx,work( ldwrkx*nb+1 ), ldwrky )
              ! update the trailing submatrix a(i+ib:m,i+ib:n), using
              ! an update of the form  a := a - v*y**h - x*u**h
              call stdlib${ii}$_zgemm( 'NO TRANSPOSE', 'CONJUGATE TRANSPOSE', m-i-nb+1,n-i-nb+1, nb, -&
              cone, a( i+nb, i ), lda,work( ldwrkx*nb+nb+1 ), ldwrky, cone,a( i+nb, i+nb ), lda )
                        
              call stdlib${ii}$_zgemm( 'NO TRANSPOSE', 'NO TRANSPOSE', m-i-nb+1, n-i-nb+1,nb, -cone, &
                        work( nb+1 ), ldwrkx, a( i, i+nb ), lda,cone, a( i+nb, i+nb ), lda )
              ! copy diagonal and off-diagonal elements of b back into a
              if( m>=n ) then
                 do j = i, i + nb - 1
                    a( j, j ) = d( j )
                    a( j, j+1 ) = e( j )
                 end do
              else
                 do j = i, i + nb - 1
                    a( j, j ) = d( j )
                    a( j+1, j ) = e( j )
                 end do
              end if
           end do
           ! use unblocked code to reduce the remainder of the matrix
           call stdlib${ii}$_zgebd2( m-i+1, n-i+1, a( i, i ), lda, d( i ), e( i ),tauq( i ), taup( i ), &
                     work, iinfo )
           work( 1_${ik}$ ) = ws
           return
     end subroutine stdlib${ii}$_zgebrd

#:for ck,ct,ci in CMPLX_KINDS_TYPES
#:if not ck in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ci}$gebrd( m, n, a, lda, d, e, tauq, taup, work, lwork,info )
     !! ZGEBRD: reduces a general complex M-by-N matrix A to upper or lower
     !! bidiagonal form B by a unitary transformation: Q**H * A * P = B.
     !! If m >= n, B is upper bidiagonal; if m < n, B is lower bidiagonal.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${ck}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, lwork, m, n
           ! Array Arguments 
           real(${ck}$), intent(out) :: d(*), e(*)
           complex(${ck}$), intent(inout) :: a(lda,*)
           complex(${ck}$), intent(out) :: taup(*), tauq(*), work(*)
        ! =====================================================================
           
           ! Local Scalars 
           logical(lk) :: lquery
           integer(${ik}$) :: i, iinfo, j, ldwrkx, ldwrky, lwkopt, minmn, nb, nbmin, nx, ws
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input parameters
           info = 0_${ik}$
           nb = max( 1_${ik}$, stdlib${ii}$_ilaenv( 1_${ik}$, 'ZGEBRD', ' ', m, n, -1_${ik}$, -1_${ik}$ ) )
           lwkopt = ( m+n )*nb
           work( 1_${ik}$ ) = real( lwkopt,KIND=${ck}$)
           lquery = ( lwork==-1_${ik}$ )
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -4_${ik}$
           else if( lwork<max( 1_${ik}$, m, n ) .and. .not.lquery ) then
              info = -10_${ik}$
           end if
           if( info<0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'ZGEBRD', -info )
              return
           else if( lquery ) then
              return
           end if
           ! quick return if possible
           minmn = min( m, n )
           if( minmn==0_${ik}$ ) then
              work( 1_${ik}$ ) = 1_${ik}$
              return
           end if
           ws = max( m, n )
           ldwrkx = m
           ldwrky = n
           if( nb>1_${ik}$ .and. nb<minmn ) then
              ! set the crossover point nx.
              nx = max( nb, stdlib${ii}$_ilaenv( 3_${ik}$, 'ZGEBRD', ' ', m, n, -1_${ik}$, -1_${ik}$ ) )
              ! determine when to switch from blocked to unblocked code.
              if( nx<minmn ) then
                 ws = ( m+n )*nb
                 if( lwork<ws ) then
                    ! not enough work space for the optimal nb, consider using
                    ! a smaller block size.
                    nbmin = stdlib${ii}$_ilaenv( 2_${ik}$, 'ZGEBRD', ' ', m, n, -1_${ik}$, -1_${ik}$ )
                    if( lwork>=( m+n )*nbmin ) then
                       nb = lwork / ( m+n )
                    else
                       nb = 1_${ik}$
                       nx = minmn
                    end if
                 end if
              end if
           else
              nx = minmn
           end if
           do i = 1, minmn - nx, nb
              ! reduce rows and columns i:i+ib-1 to bidiagonal form and return
              ! the matrices x and y which are needed to update the unreduced
              ! part of the matrix
              call stdlib${ii}$_${ci}$labrd( m-i+1, n-i+1, nb, a( i, i ), lda, d( i ), e( i ),tauq( i ), &
                        taup( i ), work, ldwrkx,work( ldwrkx*nb+1 ), ldwrky )
              ! update the trailing submatrix a(i+ib:m,i+ib:n), using
              ! an update of the form  a := a - v*y**h - x*u**h
              call stdlib${ii}$_${ci}$gemm( 'NO TRANSPOSE', 'CONJUGATE TRANSPOSE', m-i-nb+1,n-i-nb+1, nb, -&
              cone, a( i+nb, i ), lda,work( ldwrkx*nb+nb+1 ), ldwrky, cone,a( i+nb, i+nb ), lda )
                        
              call stdlib${ii}$_${ci}$gemm( 'NO TRANSPOSE', 'NO TRANSPOSE', m-i-nb+1, n-i-nb+1,nb, -cone, &
                        work( nb+1 ), ldwrkx, a( i, i+nb ), lda,cone, a( i+nb, i+nb ), lda )
              ! copy diagonal and off-diagonal elements of b back into a
              if( m>=n ) then
                 do j = i, i + nb - 1
                    a( j, j ) = d( j )
                    a( j, j+1 ) = e( j )
                 end do
              else
                 do j = i, i + nb - 1
                    a( j, j ) = d( j )
                    a( j+1, j ) = e( j )
                 end do
              end if
           end do
           ! use unblocked code to reduce the remainder of the matrix
           call stdlib${ii}$_${ci}$gebd2( m-i+1, n-i+1, a( i, i ), lda, d( i ), e( i ),tauq( i ), taup( i ), &
                     work, iinfo )
           work( 1_${ik}$ ) = ws
           return
     end subroutine stdlib${ii}$_${ci}$gebrd

#:endif
#:endfor



     pure module subroutine stdlib${ii}$_sgebd2( m, n, a, lda, d, e, tauq, taup, work, info )
     !! SGEBD2 reduces a real general m by n matrix A to upper or lower
     !! bidiagonal form B by an orthogonal transformation: Q**T * A * P = B.
     !! If m >= n, B is upper bidiagonal; if m < n, B is lower bidiagonal.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, m, n
           ! Array Arguments 
           real(sp), intent(inout) :: a(lda,*)
           real(sp), intent(out) :: d(*), e(*), taup(*), tauq(*), work(*)
        ! =====================================================================
           
           ! Local Scalars 
           integer(${ik}$) :: i
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input parameters
           info = 0_${ik}$
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -4_${ik}$
           end if
           if( info<0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'SGEBD2', -info )
              return
           end if
           if( m>=n ) then
              ! reduce to upper bidiagonal form
              do i = 1, n
                 ! generate elementary reflector h(i) to annihilate a(i+1:m,i)
                 call stdlib${ii}$_slarfg( m-i+1, a( i, i ), a( min( i+1, m ), i ), 1_${ik}$,tauq( i ) )
                           
                 d( i ) = a( i, i )
                 a( i, i ) = one
                 ! apply h(i) to a(i:m,i+1:n) from the left
                 if( i<n )call stdlib${ii}$_slarf( 'LEFT', m-i+1, n-i, a( i, i ), 1_${ik}$, tauq( i ),a( i, i+&
                           1_${ik}$ ), lda, work )
                 a( i, i ) = d( i )
                 if( i<n ) then
                    ! generate elementary reflector g(i) to annihilate
                    ! a(i,i+2:n)
                    call stdlib${ii}$_slarfg( n-i, a( i, i+1 ), a( i, min( i+2, n ) ),lda, taup( i ) )
                              
                    e( i ) = a( i, i+1 )
                    a( i, i+1 ) = one
                    ! apply g(i) to a(i+1:m,i+1:n) from the right
                    call stdlib${ii}$_slarf( 'RIGHT', m-i, n-i, a( i, i+1 ), lda,taup( i ), a( i+1, i+1 &
                              ), lda, work )
                    a( i, i+1 ) = e( i )
                 else
                    taup( i ) = zero
                 end if
              end do
           else
              ! reduce to lower bidiagonal form
              do i = 1, m
                 ! generate elementary reflector g(i) to annihilate a(i,i+1:n)
                 call stdlib${ii}$_slarfg( n-i+1, a( i, i ), a( i, min( i+1, n ) ), lda,taup( i ) )
                           
                 d( i ) = a( i, i )
                 a( i, i ) = one
                 ! apply g(i) to a(i+1:m,i:n) from the right
                 if( i<m )call stdlib${ii}$_slarf( 'RIGHT', m-i, n-i+1, a( i, i ), lda,taup( i ), a( i+&
                           1_${ik}$, i ), lda, work )
                 a( i, i ) = d( i )
                 if( i<m ) then
                    ! generate elementary reflector h(i) to annihilate
                    ! a(i+2:m,i)
                    call stdlib${ii}$_slarfg( m-i, a( i+1, i ), a( min( i+2, m ), i ), 1_${ik}$,tauq( i ) )
                              
                    e( i ) = a( i+1, i )
                    a( i+1, i ) = one
                    ! apply h(i) to a(i+1:m,i+1:n) from the left
                    call stdlib${ii}$_slarf( 'LEFT', m-i, n-i, a( i+1, i ), 1_${ik}$, tauq( i ),a( i+1, i+1 ), &
                              lda, work )
                    a( i+1, i ) = e( i )
                 else
                    tauq( i ) = zero
                 end if
              end do
           end if
           return
     end subroutine stdlib${ii}$_sgebd2

     pure module subroutine stdlib${ii}$_dgebd2( m, n, a, lda, d, e, tauq, taup, work, info )
     !! DGEBD2 reduces a real general m by n matrix A to upper or lower
     !! bidiagonal form B by an orthogonal transformation: Q**T * A * P = B.
     !! If m >= n, B is upper bidiagonal; if m < n, B is lower bidiagonal.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, m, n
           ! Array Arguments 
           real(dp), intent(inout) :: a(lda,*)
           real(dp), intent(out) :: d(*), e(*), taup(*), tauq(*), work(*)
        ! =====================================================================
           
           ! Local Scalars 
           integer(${ik}$) :: i
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input parameters
           info = 0_${ik}$
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -4_${ik}$
           end if
           if( info<0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DGEBD2', -info )
              return
           end if
           if( m>=n ) then
              ! reduce to upper bidiagonal form
              do i = 1, n
                 ! generate elementary reflector h(i) to annihilate a(i+1:m,i)
                 call stdlib${ii}$_dlarfg( m-i+1, a( i, i ), a( min( i+1, m ), i ), 1_${ik}$,tauq( i ) )
                           
                 d( i ) = a( i, i )
                 a( i, i ) = one
                 ! apply h(i) to a(i:m,i+1:n) from the left
                 if( i<n )call stdlib${ii}$_dlarf( 'LEFT', m-i+1, n-i, a( i, i ), 1_${ik}$, tauq( i ),a( i, i+&
                           1_${ik}$ ), lda, work )
                 a( i, i ) = d( i )
                 if( i<n ) then
                    ! generate elementary reflector g(i) to annihilate
                    ! a(i,i+2:n)
                    call stdlib${ii}$_dlarfg( n-i, a( i, i+1 ), a( i, min( i+2, n ) ),lda, taup( i ) )
                              
                    e( i ) = a( i, i+1 )
                    a( i, i+1 ) = one
                    ! apply g(i) to a(i+1:m,i+1:n) from the right
                    call stdlib${ii}$_dlarf( 'RIGHT', m-i, n-i, a( i, i+1 ), lda,taup( i ), a( i+1, i+1 &
                              ), lda, work )
                    a( i, i+1 ) = e( i )
                 else
                    taup( i ) = zero
                 end if
              end do
           else
              ! reduce to lower bidiagonal form
              do i = 1, m
                 ! generate elementary reflector g(i) to annihilate a(i,i+1:n)
                 call stdlib${ii}$_dlarfg( n-i+1, a( i, i ), a( i, min( i+1, n ) ), lda,taup( i ) )
                           
                 d( i ) = a( i, i )
                 a( i, i ) = one
                 ! apply g(i) to a(i+1:m,i:n) from the right
                 if( i<m )call stdlib${ii}$_dlarf( 'RIGHT', m-i, n-i+1, a( i, i ), lda,taup( i ), a( i+&
                           1_${ik}$, i ), lda, work )
                 a( i, i ) = d( i )
                 if( i<m ) then
                    ! generate elementary reflector h(i) to annihilate
                    ! a(i+2:m,i)
                    call stdlib${ii}$_dlarfg( m-i, a( i+1, i ), a( min( i+2, m ), i ), 1_${ik}$,tauq( i ) )
                              
                    e( i ) = a( i+1, i )
                    a( i+1, i ) = one
                    ! apply h(i) to a(i+1:m,i+1:n) from the left
                    call stdlib${ii}$_dlarf( 'LEFT', m-i, n-i, a( i+1, i ), 1_${ik}$, tauq( i ),a( i+1, i+1 ), &
                              lda, work )
                    a( i+1, i ) = e( i )
                 else
                    tauq( i ) = zero
                 end if
              end do
           end if
           return
     end subroutine stdlib${ii}$_dgebd2

#:for rk,rt,ri in REAL_KINDS_TYPES
#:if not rk in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ri}$gebd2( m, n, a, lda, d, e, tauq, taup, work, info )
     !! DGEBD2: reduces a real general m by n matrix A to upper or lower
     !! bidiagonal form B by an orthogonal transformation: Q**T * A * P = B.
     !! If m >= n, B is upper bidiagonal; if m < n, B is lower bidiagonal.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, m, n
           ! Array Arguments 
           real(${rk}$), intent(inout) :: a(lda,*)
           real(${rk}$), intent(out) :: d(*), e(*), taup(*), tauq(*), work(*)
        ! =====================================================================
           
           ! Local Scalars 
           integer(${ik}$) :: i
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input parameters
           info = 0_${ik}$
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -4_${ik}$
           end if
           if( info<0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DGEBD2', -info )
              return
           end if
           if( m>=n ) then
              ! reduce to upper bidiagonal form
              do i = 1, n
                 ! generate elementary reflector h(i) to annihilate a(i+1:m,i)
                 call stdlib${ii}$_${ri}$larfg( m-i+1, a( i, i ), a( min( i+1, m ), i ), 1_${ik}$,tauq( i ) )
                           
                 d( i ) = a( i, i )
                 a( i, i ) = one
                 ! apply h(i) to a(i:m,i+1:n) from the left
                 if( i<n )call stdlib${ii}$_${ri}$larf( 'LEFT', m-i+1, n-i, a( i, i ), 1_${ik}$, tauq( i ),a( i, i+&
                           1_${ik}$ ), lda, work )
                 a( i, i ) = d( i )
                 if( i<n ) then
                    ! generate elementary reflector g(i) to annihilate
                    ! a(i,i+2:n)
                    call stdlib${ii}$_${ri}$larfg( n-i, a( i, i+1 ), a( i, min( i+2, n ) ),lda, taup( i ) )
                              
                    e( i ) = a( i, i+1 )
                    a( i, i+1 ) = one
                    ! apply g(i) to a(i+1:m,i+1:n) from the right
                    call stdlib${ii}$_${ri}$larf( 'RIGHT', m-i, n-i, a( i, i+1 ), lda,taup( i ), a( i+1, i+1 &
                              ), lda, work )
                    a( i, i+1 ) = e( i )
                 else
                    taup( i ) = zero
                 end if
              end do
           else
              ! reduce to lower bidiagonal form
              do i = 1, m
                 ! generate elementary reflector g(i) to annihilate a(i,i+1:n)
                 call stdlib${ii}$_${ri}$larfg( n-i+1, a( i, i ), a( i, min( i+1, n ) ), lda,taup( i ) )
                           
                 d( i ) = a( i, i )
                 a( i, i ) = one
                 ! apply g(i) to a(i+1:m,i:n) from the right
                 if( i<m )call stdlib${ii}$_${ri}$larf( 'RIGHT', m-i, n-i+1, a( i, i ), lda,taup( i ), a( i+&
                           1_${ik}$, i ), lda, work )
                 a( i, i ) = d( i )
                 if( i<m ) then
                    ! generate elementary reflector h(i) to annihilate
                    ! a(i+2:m,i)
                    call stdlib${ii}$_${ri}$larfg( m-i, a( i+1, i ), a( min( i+2, m ), i ), 1_${ik}$,tauq( i ) )
                              
                    e( i ) = a( i+1, i )
                    a( i+1, i ) = one
                    ! apply h(i) to a(i+1:m,i+1:n) from the left
                    call stdlib${ii}$_${ri}$larf( 'LEFT', m-i, n-i, a( i+1, i ), 1_${ik}$, tauq( i ),a( i+1, i+1 ), &
                              lda, work )
                    a( i+1, i ) = e( i )
                 else
                    tauq( i ) = zero
                 end if
              end do
           end if
           return
     end subroutine stdlib${ii}$_${ri}$gebd2

#:endif
#:endfor

     pure module subroutine stdlib${ii}$_cgebd2( m, n, a, lda, d, e, tauq, taup, work, info )
     !! CGEBD2 reduces a complex general m by n matrix A to upper or lower
     !! real bidiagonal form B by a unitary transformation: Q**H * A * P = B.
     !! If m >= n, B is upper bidiagonal; if m < n, B is lower bidiagonal.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, m, n
           ! Array Arguments 
           real(sp), intent(out) :: d(*), e(*)
           complex(sp), intent(inout) :: a(lda,*)
           complex(sp), intent(out) :: taup(*), tauq(*), work(*)
        ! =====================================================================
           
           ! Local Scalars 
           integer(${ik}$) :: i
           complex(sp) :: alpha
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input parameters
           info = 0_${ik}$
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -4_${ik}$
           end if
           if( info<0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'CGEBD2', -info )
              return
           end if
           if( m>=n ) then
              ! reduce to upper bidiagonal form
              do i = 1, n
                 ! generate elementary reflector h(i) to annihilate a(i+1:m,i)
                 alpha = a( i, i )
                 call stdlib${ii}$_clarfg( m-i+1, alpha, a( min( i+1, m ), i ), 1_${ik}$,tauq( i ) )
                 d( i ) = real( alpha,KIND=sp)
                 a( i, i ) = cone
                 ! apply h(i)**h to a(i:m,i+1:n) from the left
                 if( i<n )call stdlib${ii}$_clarf( 'LEFT', m-i+1, n-i, a( i, i ), 1_${ik}$,conjg( tauq( i ) ), &
                           a( i, i+1 ), lda, work )
                 a( i, i ) = d( i )
                 if( i<n ) then
                    ! generate elementary reflector g(i) to annihilate
                    ! a(i,i+2:n)
                    call stdlib${ii}$_clacgv( n-i, a( i, i+1 ), lda )
                    alpha = a( i, i+1 )
                    call stdlib${ii}$_clarfg( n-i, alpha, a( i, min( i+2, n ) ),lda, taup( i ) )
                    e( i ) = real( alpha,KIND=sp)
                    a( i, i+1 ) = cone
                    ! apply g(i) to a(i+1:m,i+1:n) from the right
                    call stdlib${ii}$_clarf( 'RIGHT', m-i, n-i, a( i, i+1 ), lda,taup( i ), a( i+1, i+1 &
                              ), lda, work )
                    call stdlib${ii}$_clacgv( n-i, a( i, i+1 ), lda )
                    a( i, i+1 ) = e( i )
                 else
                    taup( i ) = czero
                 end if
              end do
           else
              ! reduce to lower bidiagonal form
              do i = 1, m
                 ! generate elementary reflector g(i) to annihilate a(i,i+1:n)
                 call stdlib${ii}$_clacgv( n-i+1, a( i, i ), lda )
                 alpha = a( i, i )
                 call stdlib${ii}$_clarfg( n-i+1, alpha, a( i, min( i+1, n ) ), lda,taup( i ) )
                 d( i ) = real( alpha,KIND=sp)
                 a( i, i ) = cone
                 ! apply g(i) to a(i+1:m,i:n) from the right
                 if( i<m )call stdlib${ii}$_clarf( 'RIGHT', m-i, n-i+1, a( i, i ), lda,taup( i ), a( i+&
                           1_${ik}$, i ), lda, work )
                 call stdlib${ii}$_clacgv( n-i+1, a( i, i ), lda )
                 a( i, i ) = d( i )
                 if( i<m ) then
                    ! generate elementary reflector h(i) to annihilate
                    ! a(i+2:m,i)
                    alpha = a( i+1, i )
                    call stdlib${ii}$_clarfg( m-i, alpha, a( min( i+2, m ), i ), 1_${ik}$,tauq( i ) )
                    e( i ) = real( alpha,KIND=sp)
                    a( i+1, i ) = cone
                    ! apply h(i)**h to a(i+1:m,i+1:n) from the left
                    call stdlib${ii}$_clarf( 'LEFT', m-i, n-i, a( i+1, i ), 1_${ik}$,conjg( tauq( i ) ), a( i+&
                              1_${ik}$, i+1 ), lda,work )
                    a( i+1, i ) = e( i )
                 else
                    tauq( i ) = czero
                 end if
              end do
           end if
           return
     end subroutine stdlib${ii}$_cgebd2

     pure module subroutine stdlib${ii}$_zgebd2( m, n, a, lda, d, e, tauq, taup, work, info )
     !! ZGEBD2 reduces a complex general m by n matrix A to upper or lower
     !! real bidiagonal form B by a unitary transformation: Q**H * A * P = B.
     !! If m >= n, B is upper bidiagonal; if m < n, B is lower bidiagonal.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, m, n
           ! Array Arguments 
           real(dp), intent(out) :: d(*), e(*)
           complex(dp), intent(inout) :: a(lda,*)
           complex(dp), intent(out) :: taup(*), tauq(*), work(*)
        ! =====================================================================
           
           ! Local Scalars 
           integer(${ik}$) :: i
           complex(dp) :: alpha
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input parameters
           info = 0_${ik}$
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -4_${ik}$
           end if
           if( info<0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'ZGEBD2', -info )
              return
           end if
           if( m>=n ) then
              ! reduce to upper bidiagonal form
              do i = 1, n
                 ! generate elementary reflector h(i) to annihilate a(i+1:m,i)
                 alpha = a( i, i )
                 call stdlib${ii}$_zlarfg( m-i+1, alpha, a( min( i+1, m ), i ), 1_${ik}$,tauq( i ) )
                 d( i ) = real( alpha,KIND=dp)
                 a( i, i ) = cone
                 ! apply h(i)**h to a(i:m,i+1:n) from the left
                 if( i<n )call stdlib${ii}$_zlarf( 'LEFT', m-i+1, n-i, a( i, i ), 1_${ik}$,conjg( tauq( i ) ), &
                           a( i, i+1 ), lda, work )
                 a( i, i ) = d( i )
                 if( i<n ) then
                    ! generate elementary reflector g(i) to annihilate
                    ! a(i,i+2:n)
                    call stdlib${ii}$_zlacgv( n-i, a( i, i+1 ), lda )
                    alpha = a( i, i+1 )
                    call stdlib${ii}$_zlarfg( n-i, alpha, a( i, min( i+2, n ) ), lda,taup( i ) )
                    e( i ) = real( alpha,KIND=dp)
                    a( i, i+1 ) = cone
                    ! apply g(i) to a(i+1:m,i+1:n) from the right
                    call stdlib${ii}$_zlarf( 'RIGHT', m-i, n-i, a( i, i+1 ), lda,taup( i ), a( i+1, i+1 &
                              ), lda, work )
                    call stdlib${ii}$_zlacgv( n-i, a( i, i+1 ), lda )
                    a( i, i+1 ) = e( i )
                 else
                    taup( i ) = czero
                 end if
              end do
           else
              ! reduce to lower bidiagonal form
              do i = 1, m
                 ! generate elementary reflector g(i) to annihilate a(i,i+1:n)
                 call stdlib${ii}$_zlacgv( n-i+1, a( i, i ), lda )
                 alpha = a( i, i )
                 call stdlib${ii}$_zlarfg( n-i+1, alpha, a( i, min( i+1, n ) ), lda,taup( i ) )
                 d( i ) = real( alpha,KIND=dp)
                 a( i, i ) = cone
                 ! apply g(i) to a(i+1:m,i:n) from the right
                 if( i<m )call stdlib${ii}$_zlarf( 'RIGHT', m-i, n-i+1, a( i, i ), lda,taup( i ), a( i+&
                           1_${ik}$, i ), lda, work )
                 call stdlib${ii}$_zlacgv( n-i+1, a( i, i ), lda )
                 a( i, i ) = d( i )
                 if( i<m ) then
                    ! generate elementary reflector h(i) to annihilate
                    ! a(i+2:m,i)
                    alpha = a( i+1, i )
                    call stdlib${ii}$_zlarfg( m-i, alpha, a( min( i+2, m ), i ), 1_${ik}$,tauq( i ) )
                    e( i ) = real( alpha,KIND=dp)
                    a( i+1, i ) = cone
                    ! apply h(i)**h to a(i+1:m,i+1:n) from the left
                    call stdlib${ii}$_zlarf( 'LEFT', m-i, n-i, a( i+1, i ), 1_${ik}$,conjg( tauq( i ) ), a( i+&
                              1_${ik}$, i+1 ), lda,work )
                    a( i+1, i ) = e( i )
                 else
                    tauq( i ) = czero
                 end if
              end do
           end if
           return
     end subroutine stdlib${ii}$_zgebd2

#:for ck,ct,ci in CMPLX_KINDS_TYPES
#:if not ck in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ci}$gebd2( m, n, a, lda, d, e, tauq, taup, work, info )
     !! ZGEBD2: reduces a complex general m by n matrix A to upper or lower
     !! real bidiagonal form B by a unitary transformation: Q**H * A * P = B.
     !! If m >= n, B is upper bidiagonal; if m < n, B is lower bidiagonal.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${ck}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, m, n
           ! Array Arguments 
           real(${ck}$), intent(out) :: d(*), e(*)
           complex(${ck}$), intent(inout) :: a(lda,*)
           complex(${ck}$), intent(out) :: taup(*), tauq(*), work(*)
        ! =====================================================================
           
           ! Local Scalars 
           integer(${ik}$) :: i
           complex(${ck}$) :: alpha
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input parameters
           info = 0_${ik}$
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -4_${ik}$
           end if
           if( info<0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'ZGEBD2', -info )
              return
           end if
           if( m>=n ) then
              ! reduce to upper bidiagonal form
              do i = 1, n
                 ! generate elementary reflector h(i) to annihilate a(i+1:m,i)
                 alpha = a( i, i )
                 call stdlib${ii}$_${ci}$larfg( m-i+1, alpha, a( min( i+1, m ), i ), 1_${ik}$,tauq( i ) )
                 d( i ) = real( alpha,KIND=${ck}$)
                 a( i, i ) = cone
                 ! apply h(i)**h to a(i:m,i+1:n) from the left
                 if( i<n )call stdlib${ii}$_${ci}$larf( 'LEFT', m-i+1, n-i, a( i, i ), 1_${ik}$,conjg( tauq( i ) ), &
                           a( i, i+1 ), lda, work )
                 a( i, i ) = d( i )
                 if( i<n ) then
                    ! generate elementary reflector g(i) to annihilate
                    ! a(i,i+2:n)
                    call stdlib${ii}$_${ci}$lacgv( n-i, a( i, i+1 ), lda )
                    alpha = a( i, i+1 )
                    call stdlib${ii}$_${ci}$larfg( n-i, alpha, a( i, min( i+2, n ) ), lda,taup( i ) )
                    e( i ) = real( alpha,KIND=${ck}$)
                    a( i, i+1 ) = cone
                    ! apply g(i) to a(i+1:m,i+1:n) from the right
                    call stdlib${ii}$_${ci}$larf( 'RIGHT', m-i, n-i, a( i, i+1 ), lda,taup( i ), a( i+1, i+1 &
                              ), lda, work )
                    call stdlib${ii}$_${ci}$lacgv( n-i, a( i, i+1 ), lda )
                    a( i, i+1 ) = e( i )
                 else
                    taup( i ) = czero
                 end if
              end do
           else
              ! reduce to lower bidiagonal form
              do i = 1, m
                 ! generate elementary reflector g(i) to annihilate a(i,i+1:n)
                 call stdlib${ii}$_${ci}$lacgv( n-i+1, a( i, i ), lda )
                 alpha = a( i, i )
                 call stdlib${ii}$_${ci}$larfg( n-i+1, alpha, a( i, min( i+1, n ) ), lda,taup( i ) )
                 d( i ) = real( alpha,KIND=${ck}$)
                 a( i, i ) = cone
                 ! apply g(i) to a(i+1:m,i:n) from the right
                 if( i<m )call stdlib${ii}$_${ci}$larf( 'RIGHT', m-i, n-i+1, a( i, i ), lda,taup( i ), a( i+&
                           1_${ik}$, i ), lda, work )
                 call stdlib${ii}$_${ci}$lacgv( n-i+1, a( i, i ), lda )
                 a( i, i ) = d( i )
                 if( i<m ) then
                    ! generate elementary reflector h(i) to annihilate
                    ! a(i+2:m,i)
                    alpha = a( i+1, i )
                    call stdlib${ii}$_${ci}$larfg( m-i, alpha, a( min( i+2, m ), i ), 1_${ik}$,tauq( i ) )
                    e( i ) = real( alpha,KIND=${ck}$)
                    a( i+1, i ) = cone
                    ! apply h(i)**h to a(i+1:m,i+1:n) from the left
                    call stdlib${ii}$_${ci}$larf( 'LEFT', m-i, n-i, a( i+1, i ), 1_${ik}$,conjg( tauq( i ) ), a( i+&
                              1_${ik}$, i+1 ), lda,work )
                    a( i+1, i ) = e( i )
                 else
                    tauq( i ) = czero
                 end if
              end do
           end if
           return
     end subroutine stdlib${ii}$_${ci}$gebd2

#:endif
#:endfor



     pure module subroutine stdlib${ii}$_sgbbrd( vect, m, n, ncc, kl, ku, ab, ldab, d, e, q,ldq, pt, ldpt, c, &
     !! SGBBRD reduces a real general m-by-n band matrix A to upper
     !! bidiagonal form B by an orthogonal transformation: Q**T * A * P = B.
     !! The routine computes B, and optionally forms Q or P**T, or computes
     !! Q**T*C for a given matrix C.
               ldc, work, info )
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: vect
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: kl, ku, ldab, ldc, ldpt, ldq, m, n, ncc
           ! Array Arguments 
           real(sp), intent(inout) :: ab(ldab,*), c(ldc,*)
           real(sp), intent(out) :: d(*), e(*), pt(ldpt,*), q(ldq,*), work(*)
        ! =====================================================================
           
           ! Local Scalars 
           logical(lk) :: wantb, wantc, wantpt, wantq
           integer(${ik}$) :: i, inca, j, j1, j2, kb, kb1, kk, klm, klu1, kun, l, minmn, ml, ml0, mn,&
                      mu, mu0, nr, nrt
           real(sp) :: ra, rb, rc, rs
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input parameters
           wantb = stdlib_lsame( vect, 'B' )
           wantq = stdlib_lsame( vect, 'Q' ) .or. wantb
           wantpt = stdlib_lsame( vect, 'P' ) .or. wantb
           wantc = ncc>0_${ik}$
           klu1 = kl + ku + 1_${ik}$
           info = 0_${ik}$
           if( .not.wantq .and. .not.wantpt .and. .not.stdlib_lsame( vect, 'N' ) )then
              info = -1_${ik}$
           else if( m<0_${ik}$ ) then
              info = -2_${ik}$
           else if( n<0_${ik}$ ) then
              info = -3_${ik}$
           else if( ncc<0_${ik}$ ) then
              info = -4_${ik}$
           else if( kl<0_${ik}$ ) then
              info = -5_${ik}$
           else if( ku<0_${ik}$ ) then
              info = -6_${ik}$
           else if( ldab<klu1 ) then
              info = -8_${ik}$
           else if( ldq<1_${ik}$ .or. wantq .and. ldq<max( 1_${ik}$, m ) ) then
              info = -12_${ik}$
           else if( ldpt<1_${ik}$ .or. wantpt .and. ldpt<max( 1_${ik}$, n ) ) then
              info = -14_${ik}$
           else if( ldc<1_${ik}$ .or. wantc .and. ldc<max( 1_${ik}$, m ) ) then
              info = -16_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'SGBBRD', -info )
              return
           end if
           ! initialize q and p**t to the unit matrix, if needed
           if( wantq )call stdlib${ii}$_slaset( 'FULL', m, m, zero, one, q, ldq )
           if( wantpt )call stdlib${ii}$_slaset( 'FULL', n, n, zero, one, pt, ldpt )
           ! quick return if possible.
           if( m==0 .or. n==0 )return
           minmn = min( m, n )
           if( kl+ku>1_${ik}$ ) then
              ! reduce to upper bidiagonal form if ku > 0; if ku = 0, reduce
              ! first to lower bidiagonal form and then transform to upper
              ! bidiagonal
              if( ku>0_${ik}$ ) then
                 ml0 = 1_${ik}$
                 mu0 = 2_${ik}$
              else
                 ml0 = 2_${ik}$
                 mu0 = 1_${ik}$
              end if
              ! wherever possible, plane rotations are generated and applied in
              ! vector operations of length nr over the index set j1:j2:klu1.
              ! the sines of the plane rotations are stored in work(1:max(m,n))
              ! and the cosines in work(max(m,n)+1:2*max(m,n)).
              mn = max( m, n )
              klm = min( m-1, kl )
              kun = min( n-1, ku )
              kb = klm + kun
              kb1 = kb + 1_${ik}$
              inca = kb1*ldab
              nr = 0_${ik}$
              j1 = klm + 2_${ik}$
              j2 = 1_${ik}$ - kun
              loop_90: do i = 1, minmn
                 ! reduce i-th column and i-th row of matrix to bidiagonal form
                 ml = klm + 1_${ik}$
                 mu = kun + 1_${ik}$
                 loop_80: do kk = 1, kb
                    j1 = j1 + kb
                    j2 = j2 + kb
                    ! generate plane rotations to annihilate nonzero elements
                    ! which have been created below the band
                    if( nr>0_${ik}$ )call stdlib${ii}$_slargv( nr, ab( klu1, j1-klm-1 ), inca,work( j1 ), kb1, &
                              work( mn+j1 ), kb1 )
                    ! apply plane rotations from the left
                    do l = 1, kb
                       if( j2-klm+l-1>n ) then
                          nrt = nr - 1_${ik}$
                       else
                          nrt = nr
                       end if
                       if( nrt>0_${ik}$ )call stdlib${ii}$_slartv( nrt, ab( klu1-l, j1-klm+l-1 ), inca,ab( &
                                 klu1-l+1, j1-klm+l-1 ), inca,work( mn+j1 ), work( j1 ), kb1 )
                    end do
                    if( ml>ml0 ) then
                       if( ml<=m-i+1 ) then
                          ! generate plane rotation to annihilate a(i+ml-1,i)
                          ! within the band, and apply rotation from the left
                          call stdlib${ii}$_slartg( ab( ku+ml-1, i ), ab( ku+ml, i ),work( mn+i+ml-1 ), &
                                    work( i+ml-1 ),ra )
                          ab( ku+ml-1, i ) = ra
                          if( i<n )call stdlib${ii}$_srot( min( ku+ml-2, n-i ),ab( ku+ml-2, i+1 ), ldab-&
                                    1_${ik}$,ab( ku+ml-1, i+1 ), ldab-1,work( mn+i+ml-1 ), work( i+ml-1 ) )
                       end if
                       nr = nr + 1_${ik}$
                       j1 = j1 - kb1
                    end if
                    if( wantq ) then
                       ! accumulate product of plane rotations in q
                       do j = j1, j2, kb1
                          call stdlib${ii}$_srot( m, q( 1_${ik}$, j-1 ), 1_${ik}$, q( 1_${ik}$, j ), 1_${ik}$,work( mn+j ), work( j &
                                    ) )
                       end do
                    end if
                    if( wantc ) then
                       ! apply plane rotations to c
                       do j = j1, j2, kb1
                          call stdlib${ii}$_srot( ncc, c( j-1, 1_${ik}$ ), ldc, c( j, 1_${ik}$ ), ldc,work( mn+j ), &
                                    work( j ) )
                       end do
                    end if
                    if( j2+kun>n ) then
                       ! adjust j2 to keep within the bounds of the matrix
                       nr = nr - 1_${ik}$
                       j2 = j2 - kb1
                    end if
                    do j = j1, j2, kb1
                       ! create nonzero element a(j-1,j+ku) above the band
                       ! and store it in work(n+1:2*n)
                       work( j+kun ) = work( j )*ab( 1_${ik}$, j+kun )
                       ab( 1_${ik}$, j+kun ) = work( mn+j )*ab( 1_${ik}$, j+kun )
                    end do
                    ! generate plane rotations to annihilate nonzero elements
                    ! which have been generated above the band
                    if( nr>0_${ik}$ )call stdlib${ii}$_slargv( nr, ab( 1_${ik}$, j1+kun-1 ), inca,work( j1+kun ), kb1,&
                               work( mn+j1+kun ),kb1 )
                    ! apply plane rotations from the right
                    do l = 1, kb
                       if( j2+l-1>m ) then
                          nrt = nr - 1_${ik}$
                       else
                          nrt = nr
                       end if
                       if( nrt>0_${ik}$ )call stdlib${ii}$_slartv( nrt, ab( l+1, j1+kun-1 ), inca,ab( l, j1+&
                                 kun ), inca,work( mn+j1+kun ), work( j1+kun ),kb1 )
                    end do
                    if( ml==ml0 .and. mu>mu0 ) then
                       if( mu<=n-i+1 ) then
                          ! generate plane rotation to annihilate a(i,i+mu-1)
                          ! within the band, and apply rotation from the right
                          call stdlib${ii}$_slartg( ab( ku-mu+3, i+mu-2 ),ab( ku-mu+2, i+mu-1 ),work( &
                                    mn+i+mu-1 ), work( i+mu-1 ),ra )
                          ab( ku-mu+3, i+mu-2 ) = ra
                          call stdlib${ii}$_srot( min( kl+mu-2, m-i ),ab( ku-mu+4, i+mu-2 ), 1_${ik}$,ab( ku-&
                                    mu+3, i+mu-1 ), 1_${ik}$,work( mn+i+mu-1 ), work( i+mu-1 ) )
                       end if
                       nr = nr + 1_${ik}$
                       j1 = j1 - kb1
                    end if
                    if( wantpt ) then
                       ! accumulate product of plane rotations in p**t
                       do j = j1, j2, kb1
                          call stdlib${ii}$_srot( n, pt( j+kun-1, 1_${ik}$ ), ldpt,pt( j+kun, 1_${ik}$ ), ldpt, work( &
                                    mn+j+kun ),work( j+kun ) )
                       end do
                    end if
                    if( j2+kb>m ) then
                       ! adjust j2 to keep within the bounds of the matrix
                       nr = nr - 1_${ik}$
                       j2 = j2 - kb1
                    end if
                    do j = j1, j2, kb1
                       ! create nonzero element a(j+kl+ku,j+ku-1) below the
                       ! band and store it in work(1:n)
                       work( j+kb ) = work( j+kun )*ab( klu1, j+kun )
                       ab( klu1, j+kun ) = work( mn+j+kun )*ab( klu1, j+kun )
                    end do
                    if( ml>ml0 ) then
                       ml = ml - 1_${ik}$
                    else
                       mu = mu - 1_${ik}$
                    end if
                 end do loop_80
              end do loop_90
           end if
           if( ku==0_${ik}$ .and. kl>0_${ik}$ ) then
              ! a has been reduced to lower bidiagonal form
              ! transform lower bidiagonal form to upper bidiagonal by applying
              ! plane rotations from the left, storing diagonal elements in d
              ! and off-diagonal elements in e
              do i = 1, min( m-1, n )
                 call stdlib${ii}$_slartg( ab( 1_${ik}$, i ), ab( 2_${ik}$, i ), rc, rs, ra )
                 d( i ) = ra
                 if( i<n ) then
                    e( i ) = rs*ab( 1_${ik}$, i+1 )
                    ab( 1_${ik}$, i+1 ) = rc*ab( 1_${ik}$, i+1 )
                 end if
                 if( wantq )call stdlib${ii}$_srot( m, q( 1_${ik}$, i ), 1_${ik}$, q( 1_${ik}$, i+1 ), 1_${ik}$, rc, rs )
                 if( wantc )call stdlib${ii}$_srot( ncc, c( i, 1_${ik}$ ), ldc, c( i+1, 1_${ik}$ ), ldc, rc,rs )
                           
              end do
              if( m<=n )d( m ) = ab( 1_${ik}$, m )
           else if( ku>0_${ik}$ ) then
              ! a has been reduced to upper bidiagonal form
              if( m<n ) then
                 ! annihilate a(m,m+1) by applying plane rotations from the
                 ! right, storing diagonal elements in d and off-diagonal
                 ! elements in e
                 rb = ab( ku, m+1 )
                 do i = m, 1, -1
                    call stdlib${ii}$_slartg( ab( ku+1, i ), rb, rc, rs, ra )
                    d( i ) = ra
                    if( i>1_${ik}$ ) then
                       rb = -rs*ab( ku, i )
                       e( i-1 ) = rc*ab( ku, i )
                    end if
                    if( wantpt )call stdlib${ii}$_srot( n, pt( i, 1_${ik}$ ), ldpt, pt( m+1, 1_${ik}$ ), ldpt,rc, rs )
                              
                 end do
              else
                 ! copy off-diagonal elements to e and diagonal elements to d
                 do i = 1, minmn - 1
                    e( i ) = ab( ku, i+1 )
                 end do
                 do i = 1, minmn
                    d( i ) = ab( ku+1, i )
                 end do
              end if
           else
              ! a is diagonal. set elements of e to zero and copy diagonal
              ! elements to d.
              do i = 1, minmn - 1
                 e( i ) = zero
              end do
              do i = 1, minmn
                 d( i ) = ab( 1_${ik}$, i )
              end do
           end if
           return
     end subroutine stdlib${ii}$_sgbbrd

     pure module subroutine stdlib${ii}$_dgbbrd( vect, m, n, ncc, kl, ku, ab, ldab, d, e, q,ldq, pt, ldpt, c, &
     !! DGBBRD reduces a real general m-by-n band matrix A to upper
     !! bidiagonal form B by an orthogonal transformation: Q**T * A * P = B.
     !! The routine computes B, and optionally forms Q or P**T, or computes
     !! Q**T*C for a given matrix C.
               ldc, work, info )
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: vect
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: kl, ku, ldab, ldc, ldpt, ldq, m, n, ncc
           ! Array Arguments 
           real(dp), intent(inout) :: ab(ldab,*), c(ldc,*)
           real(dp), intent(out) :: d(*), e(*), pt(ldpt,*), q(ldq,*), work(*)
        ! =====================================================================
           
           ! Local Scalars 
           logical(lk) :: wantb, wantc, wantpt, wantq
           integer(${ik}$) :: i, inca, j, j1, j2, kb, kb1, kk, klm, klu1, kun, l, minmn, ml, ml0, mn,&
                      mu, mu0, nr, nrt
           real(dp) :: ra, rb, rc, rs
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input parameters
           wantb = stdlib_lsame( vect, 'B' )
           wantq = stdlib_lsame( vect, 'Q' ) .or. wantb
           wantpt = stdlib_lsame( vect, 'P' ) .or. wantb
           wantc = ncc>0_${ik}$
           klu1 = kl + ku + 1_${ik}$
           info = 0_${ik}$
           if( .not.wantq .and. .not.wantpt .and. .not.stdlib_lsame( vect, 'N' ) )then
              info = -1_${ik}$
           else if( m<0_${ik}$ ) then
              info = -2_${ik}$
           else if( n<0_${ik}$ ) then
              info = -3_${ik}$
           else if( ncc<0_${ik}$ ) then
              info = -4_${ik}$
           else if( kl<0_${ik}$ ) then
              info = -5_${ik}$
           else if( ku<0_${ik}$ ) then
              info = -6_${ik}$
           else if( ldab<klu1 ) then
              info = -8_${ik}$
           else if( ldq<1_${ik}$ .or. wantq .and. ldq<max( 1_${ik}$, m ) ) then
              info = -12_${ik}$
           else if( ldpt<1_${ik}$ .or. wantpt .and. ldpt<max( 1_${ik}$, n ) ) then
              info = -14_${ik}$
           else if( ldc<1_${ik}$ .or. wantc .and. ldc<max( 1_${ik}$, m ) ) then
              info = -16_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DGBBRD', -info )
              return
           end if
           ! initialize q and p**t to the unit matrix, if needed
           if( wantq )call stdlib${ii}$_dlaset( 'FULL', m, m, zero, one, q, ldq )
           if( wantpt )call stdlib${ii}$_dlaset( 'FULL', n, n, zero, one, pt, ldpt )
           ! quick return if possible.
           if( m==0 .or. n==0 )return
           minmn = min( m, n )
           if( kl+ku>1_${ik}$ ) then
              ! reduce to upper bidiagonal form if ku > 0; if ku = 0, reduce
              ! first to lower bidiagonal form and then transform to upper
              ! bidiagonal
              if( ku>0_${ik}$ ) then
                 ml0 = 1_${ik}$
                 mu0 = 2_${ik}$
              else
                 ml0 = 2_${ik}$
                 mu0 = 1_${ik}$
              end if
              ! wherever possible, plane rotations are generated and applied in
              ! vector operations of length nr over the index set j1:j2:klu1.
              ! the sines of the plane rotations are stored in work(1:max(m,n))
              ! and the cosines in work(max(m,n)+1:2*max(m,n)).
              mn = max( m, n )
              klm = min( m-1, kl )
              kun = min( n-1, ku )
              kb = klm + kun
              kb1 = kb + 1_${ik}$
              inca = kb1*ldab
              nr = 0_${ik}$
              j1 = klm + 2_${ik}$
              j2 = 1_${ik}$ - kun
              loop_90: do i = 1, minmn
                 ! reduce i-th column and i-th row of matrix to bidiagonal form
                 ml = klm + 1_${ik}$
                 mu = kun + 1_${ik}$
                 loop_80: do kk = 1, kb
                    j1 = j1 + kb
                    j2 = j2 + kb
                    ! generate plane rotations to annihilate nonzero elements
                    ! which have been created below the band
                    if( nr>0_${ik}$ )call stdlib${ii}$_dlargv( nr, ab( klu1, j1-klm-1 ), inca,work( j1 ), kb1, &
                              work( mn+j1 ), kb1 )
                    ! apply plane rotations from the left
                    do l = 1, kb
                       if( j2-klm+l-1>n ) then
                          nrt = nr - 1_${ik}$
                       else
                          nrt = nr
                       end if
                       if( nrt>0_${ik}$ )call stdlib${ii}$_dlartv( nrt, ab( klu1-l, j1-klm+l-1 ), inca,ab( &
                                 klu1-l+1, j1-klm+l-1 ), inca,work( mn+j1 ), work( j1 ), kb1 )
                    end do
                    if( ml>ml0 ) then
                       if( ml<=m-i+1 ) then
                          ! generate plane rotation to annihilate a(i+ml-1,i)
                          ! within the band, and apply rotation from the left
                          call stdlib${ii}$_dlartg( ab( ku+ml-1, i ), ab( ku+ml, i ),work( mn+i+ml-1 ), &
                                    work( i+ml-1 ),ra )
                          ab( ku+ml-1, i ) = ra
                          if( i<n )call stdlib${ii}$_drot( min( ku+ml-2, n-i ),ab( ku+ml-2, i+1 ), ldab-&
                                    1_${ik}$,ab( ku+ml-1, i+1 ), ldab-1,work( mn+i+ml-1 ), work( i+ml-1 ) )
                       end if
                       nr = nr + 1_${ik}$
                       j1 = j1 - kb1
                    end if
                    if( wantq ) then
                       ! accumulate product of plane rotations in q
                       do j = j1, j2, kb1
                          call stdlib${ii}$_drot( m, q( 1_${ik}$, j-1 ), 1_${ik}$, q( 1_${ik}$, j ), 1_${ik}$,work( mn+j ), work( j &
                                    ) )
                       end do
                    end if
                    if( wantc ) then
                       ! apply plane rotations to c
                       do j = j1, j2, kb1
                          call stdlib${ii}$_drot( ncc, c( j-1, 1_${ik}$ ), ldc, c( j, 1_${ik}$ ), ldc,work( mn+j ), &
                                    work( j ) )
                       end do
                    end if
                    if( j2+kun>n ) then
                       ! adjust j2 to keep within the bounds of the matrix
                       nr = nr - 1_${ik}$
                       j2 = j2 - kb1
                    end if
                    do j = j1, j2, kb1
                       ! create nonzero element a(j-1,j+ku) above the band
                       ! and store it in work(n+1:2*n)
                       work( j+kun ) = work( j )*ab( 1_${ik}$, j+kun )
                       ab( 1_${ik}$, j+kun ) = work( mn+j )*ab( 1_${ik}$, j+kun )
                    end do
                    ! generate plane rotations to annihilate nonzero elements
                    ! which have been generated above the band
                    if( nr>0_${ik}$ )call stdlib${ii}$_dlargv( nr, ab( 1_${ik}$, j1+kun-1 ), inca,work( j1+kun ), kb1,&
                               work( mn+j1+kun ),kb1 )
                    ! apply plane rotations from the right
                    do l = 1, kb
                       if( j2+l-1>m ) then
                          nrt = nr - 1_${ik}$
                       else
                          nrt = nr
                       end if
                       if( nrt>0_${ik}$ )call stdlib${ii}$_dlartv( nrt, ab( l+1, j1+kun-1 ), inca,ab( l, j1+&
                                 kun ), inca,work( mn+j1+kun ), work( j1+kun ),kb1 )
                    end do
                    if( ml==ml0 .and. mu>mu0 ) then
                       if( mu<=n-i+1 ) then
                          ! generate plane rotation to annihilate a(i,i+mu-1)
                          ! within the band, and apply rotation from the right
                          call stdlib${ii}$_dlartg( ab( ku-mu+3, i+mu-2 ),ab( ku-mu+2, i+mu-1 ),work( &
                                    mn+i+mu-1 ), work( i+mu-1 ),ra )
                          ab( ku-mu+3, i+mu-2 ) = ra
                          call stdlib${ii}$_drot( min( kl+mu-2, m-i ),ab( ku-mu+4, i+mu-2 ), 1_${ik}$,ab( ku-&
                                    mu+3, i+mu-1 ), 1_${ik}$,work( mn+i+mu-1 ), work( i+mu-1 ) )
                       end if
                       nr = nr + 1_${ik}$
                       j1 = j1 - kb1
                    end if
                    if( wantpt ) then
                       ! accumulate product of plane rotations in p**t
                       do j = j1, j2, kb1
                          call stdlib${ii}$_drot( n, pt( j+kun-1, 1_${ik}$ ), ldpt,pt( j+kun, 1_${ik}$ ), ldpt, work( &
                                    mn+j+kun ),work( j+kun ) )
                       end do
                    end if
                    if( j2+kb>m ) then
                       ! adjust j2 to keep within the bounds of the matrix
                       nr = nr - 1_${ik}$
                       j2 = j2 - kb1
                    end if
                    do j = j1, j2, kb1
                       ! create nonzero element a(j+kl+ku,j+ku-1) below the
                       ! band and store it in work(1:n)
                       work( j+kb ) = work( j+kun )*ab( klu1, j+kun )
                       ab( klu1, j+kun ) = work( mn+j+kun )*ab( klu1, j+kun )
                    end do
                    if( ml>ml0 ) then
                       ml = ml - 1_${ik}$
                    else
                       mu = mu - 1_${ik}$
                    end if
                 end do loop_80
              end do loop_90
           end if
           if( ku==0_${ik}$ .and. kl>0_${ik}$ ) then
              ! a has been reduced to lower bidiagonal form
              ! transform lower bidiagonal form to upper bidiagonal by applying
              ! plane rotations from the left, storing diagonal elements in d
              ! and off-diagonal elements in e
              do i = 1, min( m-1, n )
                 call stdlib${ii}$_dlartg( ab( 1_${ik}$, i ), ab( 2_${ik}$, i ), rc, rs, ra )
                 d( i ) = ra
                 if( i<n ) then
                    e( i ) = rs*ab( 1_${ik}$, i+1 )
                    ab( 1_${ik}$, i+1 ) = rc*ab( 1_${ik}$, i+1 )
                 end if
                 if( wantq )call stdlib${ii}$_drot( m, q( 1_${ik}$, i ), 1_${ik}$, q( 1_${ik}$, i+1 ), 1_${ik}$, rc, rs )
                 if( wantc )call stdlib${ii}$_drot( ncc, c( i, 1_${ik}$ ), ldc, c( i+1, 1_${ik}$ ), ldc, rc,rs )
                           
              end do
              if( m<=n )d( m ) = ab( 1_${ik}$, m )
           else if( ku>0_${ik}$ ) then
              ! a has been reduced to upper bidiagonal form
              if( m<n ) then
                 ! annihilate a(m,m+1) by applying plane rotations from the
                 ! right, storing diagonal elements in d and off-diagonal
                 ! elements in e
                 rb = ab( ku, m+1 )
                 do i = m, 1, -1
                    call stdlib${ii}$_dlartg( ab( ku+1, i ), rb, rc, rs, ra )
                    d( i ) = ra
                    if( i>1_${ik}$ ) then
                       rb = -rs*ab( ku, i )
                       e( i-1 ) = rc*ab( ku, i )
                    end if
                    if( wantpt )call stdlib${ii}$_drot( n, pt( i, 1_${ik}$ ), ldpt, pt( m+1, 1_${ik}$ ), ldpt,rc, rs )
                              
                 end do
              else
                 ! copy off-diagonal elements to e and diagonal elements to d
                 do i = 1, minmn - 1
                    e( i ) = ab( ku, i+1 )
                 end do
                 do i = 1, minmn
                    d( i ) = ab( ku+1, i )
                 end do
              end if
           else
              ! a is diagonal. set elements of e to zero and copy diagonal
              ! elements to d.
              do i = 1, minmn - 1
                 e( i ) = zero
              end do
              do i = 1, minmn
                 d( i ) = ab( 1_${ik}$, i )
              end do
           end if
           return
     end subroutine stdlib${ii}$_dgbbrd

#:for rk,rt,ri in REAL_KINDS_TYPES
#:if not rk in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ri}$gbbrd( vect, m, n, ncc, kl, ku, ab, ldab, d, e, q,ldq, pt, ldpt, c, &
     !! DGBBRD: reduces a real general m-by-n band matrix A to upper
     !! bidiagonal form B by an orthogonal transformation: Q**T * A * P = B.
     !! The routine computes B, and optionally forms Q or P**T, or computes
     !! Q**T*C for a given matrix C.
               ldc, work, info )
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: vect
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: kl, ku, ldab, ldc, ldpt, ldq, m, n, ncc
           ! Array Arguments 
           real(${rk}$), intent(inout) :: ab(ldab,*), c(ldc,*)
           real(${rk}$), intent(out) :: d(*), e(*), pt(ldpt,*), q(ldq,*), work(*)
        ! =====================================================================
           
           ! Local Scalars 
           logical(lk) :: wantb, wantc, wantpt, wantq
           integer(${ik}$) :: i, inca, j, j1, j2, kb, kb1, kk, klm, klu1, kun, l, minmn, ml, ml0, mn,&
                      mu, mu0, nr, nrt
           real(${rk}$) :: ra, rb, rc, rs
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input parameters
           wantb = stdlib_lsame( vect, 'B' )
           wantq = stdlib_lsame( vect, 'Q' ) .or. wantb
           wantpt = stdlib_lsame( vect, 'P' ) .or. wantb
           wantc = ncc>0_${ik}$
           klu1 = kl + ku + 1_${ik}$
           info = 0_${ik}$
           if( .not.wantq .and. .not.wantpt .and. .not.stdlib_lsame( vect, 'N' ) )then
              info = -1_${ik}$
           else if( m<0_${ik}$ ) then
              info = -2_${ik}$
           else if( n<0_${ik}$ ) then
              info = -3_${ik}$
           else if( ncc<0_${ik}$ ) then
              info = -4_${ik}$
           else if( kl<0_${ik}$ ) then
              info = -5_${ik}$
           else if( ku<0_${ik}$ ) then
              info = -6_${ik}$
           else if( ldab<klu1 ) then
              info = -8_${ik}$
           else if( ldq<1_${ik}$ .or. wantq .and. ldq<max( 1_${ik}$, m ) ) then
              info = -12_${ik}$
           else if( ldpt<1_${ik}$ .or. wantpt .and. ldpt<max( 1_${ik}$, n ) ) then
              info = -14_${ik}$
           else if( ldc<1_${ik}$ .or. wantc .and. ldc<max( 1_${ik}$, m ) ) then
              info = -16_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DGBBRD', -info )
              return
           end if
           ! initialize q and p**t to the unit matrix, if needed
           if( wantq )call stdlib${ii}$_${ri}$laset( 'FULL', m, m, zero, one, q, ldq )
           if( wantpt )call stdlib${ii}$_${ri}$laset( 'FULL', n, n, zero, one, pt, ldpt )
           ! quick return if possible.
           if( m==0 .or. n==0 )return
           minmn = min( m, n )
           if( kl+ku>1_${ik}$ ) then
              ! reduce to upper bidiagonal form if ku > 0; if ku = 0, reduce
              ! first to lower bidiagonal form and then transform to upper
              ! bidiagonal
              if( ku>0_${ik}$ ) then
                 ml0 = 1_${ik}$
                 mu0 = 2_${ik}$
              else
                 ml0 = 2_${ik}$
                 mu0 = 1_${ik}$
              end if
              ! wherever possible, plane rotations are generated and applied in
              ! vector operations of length nr over the index set j1:j2:klu1.
              ! the sines of the plane rotations are stored in work(1:max(m,n))
              ! and the cosines in work(max(m,n)+1:2*max(m,n)).
              mn = max( m, n )
              klm = min( m-1, kl )
              kun = min( n-1, ku )
              kb = klm + kun
              kb1 = kb + 1_${ik}$
              inca = kb1*ldab
              nr = 0_${ik}$
              j1 = klm + 2_${ik}$
              j2 = 1_${ik}$ - kun
              loop_90: do i = 1, minmn
                 ! reduce i-th column and i-th row of matrix to bidiagonal form
                 ml = klm + 1_${ik}$
                 mu = kun + 1_${ik}$
                 loop_80: do kk = 1, kb
                    j1 = j1 + kb
                    j2 = j2 + kb
                    ! generate plane rotations to annihilate nonzero elements
                    ! which have been created below the band
                    if( nr>0_${ik}$ )call stdlib${ii}$_${ri}$largv( nr, ab( klu1, j1-klm-1 ), inca,work( j1 ), kb1, &
                              work( mn+j1 ), kb1 )
                    ! apply plane rotations from the left
                    do l = 1, kb
                       if( j2-klm+l-1>n ) then
                          nrt = nr - 1_${ik}$
                       else
                          nrt = nr
                       end if
                       if( nrt>0_${ik}$ )call stdlib${ii}$_${ri}$lartv( nrt, ab( klu1-l, j1-klm+l-1 ), inca,ab( &
                                 klu1-l+1, j1-klm+l-1 ), inca,work( mn+j1 ), work( j1 ), kb1 )
                    end do
                    if( ml>ml0 ) then
                       if( ml<=m-i+1 ) then
                          ! generate plane rotation to annihilate a(i+ml-1,i)
                          ! within the band, and apply rotation from the left
                          call stdlib${ii}$_${ri}$lartg( ab( ku+ml-1, i ), ab( ku+ml, i ),work( mn+i+ml-1 ), &
                                    work( i+ml-1 ),ra )
                          ab( ku+ml-1, i ) = ra
                          if( i<n )call stdlib${ii}$_${ri}$rot( min( ku+ml-2, n-i ),ab( ku+ml-2, i+1 ), ldab-&
                                    1_${ik}$,ab( ku+ml-1, i+1 ), ldab-1,work( mn+i+ml-1 ), work( i+ml-1 ) )
                       end if
                       nr = nr + 1_${ik}$
                       j1 = j1 - kb1
                    end if
                    if( wantq ) then
                       ! accumulate product of plane rotations in q
                       do j = j1, j2, kb1
                          call stdlib${ii}$_${ri}$rot( m, q( 1_${ik}$, j-1 ), 1_${ik}$, q( 1_${ik}$, j ), 1_${ik}$,work( mn+j ), work( j &
                                    ) )
                       end do
                    end if
                    if( wantc ) then
                       ! apply plane rotations to c
                       do j = j1, j2, kb1
                          call stdlib${ii}$_${ri}$rot( ncc, c( j-1, 1_${ik}$ ), ldc, c( j, 1_${ik}$ ), ldc,work( mn+j ), &
                                    work( j ) )
                       end do
                    end if
                    if( j2+kun>n ) then
                       ! adjust j2 to keep within the bounds of the matrix
                       nr = nr - 1_${ik}$
                       j2 = j2 - kb1
                    end if
                    do j = j1, j2, kb1
                       ! create nonzero element a(j-1,j+ku) above the band
                       ! and store it in work(n+1:2*n)
                       work( j+kun ) = work( j )*ab( 1_${ik}$, j+kun )
                       ab( 1_${ik}$, j+kun ) = work( mn+j )*ab( 1_${ik}$, j+kun )
                    end do
                    ! generate plane rotations to annihilate nonzero elements
                    ! which have been generated above the band
                    if( nr>0_${ik}$ )call stdlib${ii}$_${ri}$largv( nr, ab( 1_${ik}$, j1+kun-1 ), inca,work( j1+kun ), kb1,&
                               work( mn+j1+kun ),kb1 )
                    ! apply plane rotations from the right
                    do l = 1, kb
                       if( j2+l-1>m ) then
                          nrt = nr - 1_${ik}$
                       else
                          nrt = nr
                       end if
                       if( nrt>0_${ik}$ )call stdlib${ii}$_${ri}$lartv( nrt, ab( l+1, j1+kun-1 ), inca,ab( l, j1+&
                                 kun ), inca,work( mn+j1+kun ), work( j1+kun ),kb1 )
                    end do
                    if( ml==ml0 .and. mu>mu0 ) then
                       if( mu<=n-i+1 ) then
                          ! generate plane rotation to annihilate a(i,i+mu-1)
                          ! within the band, and apply rotation from the right
                          call stdlib${ii}$_${ri}$lartg( ab( ku-mu+3, i+mu-2 ),ab( ku-mu+2, i+mu-1 ),work( &
                                    mn+i+mu-1 ), work( i+mu-1 ),ra )
                          ab( ku-mu+3, i+mu-2 ) = ra
                          call stdlib${ii}$_${ri}$rot( min( kl+mu-2, m-i ),ab( ku-mu+4, i+mu-2 ), 1_${ik}$,ab( ku-&
                                    mu+3, i+mu-1 ), 1_${ik}$,work( mn+i+mu-1 ), work( i+mu-1 ) )
                       end if
                       nr = nr + 1_${ik}$
                       j1 = j1 - kb1
                    end if
                    if( wantpt ) then
                       ! accumulate product of plane rotations in p**t
                       do j = j1, j2, kb1
                          call stdlib${ii}$_${ri}$rot( n, pt( j+kun-1, 1_${ik}$ ), ldpt,pt( j+kun, 1_${ik}$ ), ldpt, work( &
                                    mn+j+kun ),work( j+kun ) )
                       end do
                    end if
                    if( j2+kb>m ) then
                       ! adjust j2 to keep within the bounds of the matrix
                       nr = nr - 1_${ik}$
                       j2 = j2 - kb1
                    end if
                    do j = j1, j2, kb1
                       ! create nonzero element a(j+kl+ku,j+ku-1) below the
                       ! band and store it in work(1:n)
                       work( j+kb ) = work( j+kun )*ab( klu1, j+kun )
                       ab( klu1, j+kun ) = work( mn+j+kun )*ab( klu1, j+kun )
                    end do
                    if( ml>ml0 ) then
                       ml = ml - 1_${ik}$
                    else
                       mu = mu - 1_${ik}$
                    end if
                 end do loop_80
              end do loop_90
           end if
           if( ku==0_${ik}$ .and. kl>0_${ik}$ ) then
              ! a has been reduced to lower bidiagonal form
              ! transform lower bidiagonal form to upper bidiagonal by applying
              ! plane rotations from the left, storing diagonal elements in d
              ! and off-diagonal elements in e
              do i = 1, min( m-1, n )
                 call stdlib${ii}$_${ri}$lartg( ab( 1_${ik}$, i ), ab( 2_${ik}$, i ), rc, rs, ra )
                 d( i ) = ra
                 if( i<n ) then
                    e( i ) = rs*ab( 1_${ik}$, i+1 )
                    ab( 1_${ik}$, i+1 ) = rc*ab( 1_${ik}$, i+1 )
                 end if
                 if( wantq )call stdlib${ii}$_${ri}$rot( m, q( 1_${ik}$, i ), 1_${ik}$, q( 1_${ik}$, i+1 ), 1_${ik}$, rc, rs )
                 if( wantc )call stdlib${ii}$_${ri}$rot( ncc, c( i, 1_${ik}$ ), ldc, c( i+1, 1_${ik}$ ), ldc, rc,rs )
                           
              end do
              if( m<=n )d( m ) = ab( 1_${ik}$, m )
           else if( ku>0_${ik}$ ) then
              ! a has been reduced to upper bidiagonal form
              if( m<n ) then
                 ! annihilate a(m,m+1) by applying plane rotations from the
                 ! right, storing diagonal elements in d and off-diagonal
                 ! elements in e
                 rb = ab( ku, m+1 )
                 do i = m, 1, -1
                    call stdlib${ii}$_${ri}$lartg( ab( ku+1, i ), rb, rc, rs, ra )
                    d( i ) = ra
                    if( i>1_${ik}$ ) then
                       rb = -rs*ab( ku, i )
                       e( i-1 ) = rc*ab( ku, i )
                    end if
                    if( wantpt )call stdlib${ii}$_${ri}$rot( n, pt( i, 1_${ik}$ ), ldpt, pt( m+1, 1_${ik}$ ), ldpt,rc, rs )
                              
                 end do
              else
                 ! copy off-diagonal elements to e and diagonal elements to d
                 do i = 1, minmn - 1
                    e( i ) = ab( ku, i+1 )
                 end do
                 do i = 1, minmn
                    d( i ) = ab( ku+1, i )
                 end do
              end if
           else
              ! a is diagonal. set elements of e to zero and copy diagonal
              ! elements to d.
              do i = 1, minmn - 1
                 e( i ) = zero
              end do
              do i = 1, minmn
                 d( i ) = ab( 1_${ik}$, i )
              end do
           end if
           return
     end subroutine stdlib${ii}$_${ri}$gbbrd

#:endif
#:endfor

     pure module subroutine stdlib${ii}$_cgbbrd( vect, m, n, ncc, kl, ku, ab, ldab, d, e, q,ldq, pt, ldpt, c, &
     !! CGBBRD reduces a complex general m-by-n band matrix A to real upper
     !! bidiagonal form B by a unitary transformation: Q**H * A * P = B.
     !! The routine computes B, and optionally forms Q or P**H, or computes
     !! Q**H*C for a given matrix C.
               ldc, work, rwork, info )
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: vect
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: kl, ku, ldab, ldc, ldpt, ldq, m, n, ncc
           ! Array Arguments 
           real(sp), intent(out) :: d(*), e(*), rwork(*)
           complex(sp), intent(inout) :: ab(ldab,*), c(ldc,*)
           complex(sp), intent(out) :: pt(ldpt,*), q(ldq,*), work(*)
        ! =====================================================================
           
           
           ! Local Scalars 
           logical(lk) :: wantb, wantc, wantpt, wantq
           integer(${ik}$) :: i, inca, j, j1, j2, kb, kb1, kk, klm, klu1, kun, l, minmn, ml, ml0, mu,&
                      mu0, nr, nrt
           real(sp) :: abst, rc
           complex(sp) :: ra, rb, rs, t
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input parameters
           wantb = stdlib_lsame( vect, 'B' )
           wantq = stdlib_lsame( vect, 'Q' ) .or. wantb
           wantpt = stdlib_lsame( vect, 'P' ) .or. wantb
           wantc = ncc>0_${ik}$
           klu1 = kl + ku + 1_${ik}$
           info = 0_${ik}$
           if( .not.wantq .and. .not.wantpt .and. .not.stdlib_lsame( vect, 'N' ) )then
              info = -1_${ik}$
           else if( m<0_${ik}$ ) then
              info = -2_${ik}$
           else if( n<0_${ik}$ ) then
              info = -3_${ik}$
           else if( ncc<0_${ik}$ ) then
              info = -4_${ik}$
           else if( kl<0_${ik}$ ) then
              info = -5_${ik}$
           else if( ku<0_${ik}$ ) then
              info = -6_${ik}$
           else if( ldab<klu1 ) then
              info = -8_${ik}$
           else if( ldq<1_${ik}$ .or. wantq .and. ldq<max( 1_${ik}$, m ) ) then
              info = -12_${ik}$
           else if( ldpt<1_${ik}$ .or. wantpt .and. ldpt<max( 1_${ik}$, n ) ) then
              info = -14_${ik}$
           else if( ldc<1_${ik}$ .or. wantc .and. ldc<max( 1_${ik}$, m ) ) then
              info = -16_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'CGBBRD', -info )
              return
           end if
           ! initialize q and p**h to the unit matrix, if needed
           if( wantq )call stdlib${ii}$_claset( 'FULL', m, m, czero, cone, q, ldq )
           if( wantpt )call stdlib${ii}$_claset( 'FULL', n, n, czero, cone, pt, ldpt )
           ! quick return if possible.
           if( m==0 .or. n==0 )return
           minmn = min( m, n )
           if( kl+ku>1_${ik}$ ) then
              ! reduce to upper bidiagonal form if ku > 0; if ku = 0, reduce
              ! first to lower bidiagonal form and then transform to upper
              ! bidiagonal
              if( ku>0_${ik}$ ) then
                 ml0 = 1_${ik}$
                 mu0 = 2_${ik}$
              else
                 ml0 = 2_${ik}$
                 mu0 = 1_${ik}$
              end if
              ! wherever possible, plane rotations are generated and applied in
              ! vector operations of length nr over the index set j1:j2:klu1.
              ! the complex sines of the plane rotations are stored in work,
              ! and the real cosines in rwork.
              klm = min( m-1, kl )
              kun = min( n-1, ku )
              kb = klm + kun
              kb1 = kb + 1_${ik}$
              inca = kb1*ldab
              nr = 0_${ik}$
              j1 = klm + 2_${ik}$
              j2 = 1_${ik}$ - kun
              loop_90: do i = 1, minmn
                 ! reduce i-th column and i-th row of matrix to bidiagonal form
                 ml = klm + 1_${ik}$
                 mu = kun + 1_${ik}$
                 loop_80: do kk = 1, kb
                    j1 = j1 + kb
                    j2 = j2 + kb
                    ! generate plane rotations to annihilate nonzero elements
                    ! which have been created below the band
                    if( nr>0_${ik}$ )call stdlib${ii}$_clargv( nr, ab( klu1, j1-klm-1 ), inca,work( j1 ), kb1, &
                              rwork( j1 ), kb1 )
                    ! apply plane rotations from the left
                    do l = 1, kb
                       if( j2-klm+l-1>n ) then
                          nrt = nr - 1_${ik}$
                       else
                          nrt = nr
                       end if
                       if( nrt>0_${ik}$ )call stdlib${ii}$_clartv( nrt, ab( klu1-l, j1-klm+l-1 ), inca,ab( &
                                 klu1-l+1, j1-klm+l-1 ), inca,rwork( j1 ), work( j1 ), kb1 )
                    end do
                    if( ml>ml0 ) then
                       if( ml<=m-i+1 ) then
                          ! generate plane rotation to annihilate a(i+ml-1,i)
                          ! within the band, and apply rotation from the left
                          call stdlib${ii}$_clartg( ab( ku+ml-1, i ), ab( ku+ml, i ),rwork( i+ml-1 ), &
                                    work( i+ml-1 ), ra )
                          ab( ku+ml-1, i ) = ra
                          if( i<n )call stdlib${ii}$_crot( min( ku+ml-2, n-i ),ab( ku+ml-2, i+1 ), ldab-&
                                    1_${ik}$,ab( ku+ml-1, i+1 ), ldab-1,rwork( i+ml-1 ), work( i+ml-1 ) )
                       end if
                       nr = nr + 1_${ik}$
                       j1 = j1 - kb1
                    end if
                    if( wantq ) then
                       ! accumulate product of plane rotations in q
                       do j = j1, j2, kb1
                          call stdlib${ii}$_crot( m, q( 1_${ik}$, j-1 ), 1_${ik}$, q( 1_${ik}$, j ), 1_${ik}$,rwork( j ), conjg( &
                                    work( j ) ) )
                       end do
                    end if
                    if( wantc ) then
                       ! apply plane rotations to c
                       do j = j1, j2, kb1
                          call stdlib${ii}$_crot( ncc, c( j-1, 1_${ik}$ ), ldc, c( j, 1_${ik}$ ), ldc,rwork( j ), &
                                    work( j ) )
                       end do
                    end if
                    if( j2+kun>n ) then
                       ! adjust j2 to keep within the bounds of the matrix
                       nr = nr - 1_${ik}$
                       j2 = j2 - kb1
                    end if
                    do j = j1, j2, kb1
                       ! create nonzero element a(j-1,j+ku) above the band
                       ! and store it in work(n+1:2*n)
                       work( j+kun ) = work( j )*ab( 1_${ik}$, j+kun )
                       ab( 1_${ik}$, j+kun ) = rwork( j )*ab( 1_${ik}$, j+kun )
                    end do
                    ! generate plane rotations to annihilate nonzero elements
                    ! which have been generated above the band
                    if( nr>0_${ik}$ )call stdlib${ii}$_clargv( nr, ab( 1_${ik}$, j1+kun-1 ), inca,work( j1+kun ), kb1,&
                               rwork( j1+kun ),kb1 )
                    ! apply plane rotations from the right
                    do l = 1, kb
                       if( j2+l-1>m ) then
                          nrt = nr - 1_${ik}$
                       else
                          nrt = nr
                       end if
                       if( nrt>0_${ik}$ )call stdlib${ii}$_clartv( nrt, ab( l+1, j1+kun-1 ), inca,ab( l, j1+&
                                 kun ), inca,rwork( j1+kun ), work( j1+kun ), kb1 )
                    end do
                    if( ml==ml0 .and. mu>mu0 ) then
                       if( mu<=n-i+1 ) then
                          ! generate plane rotation to annihilate a(i,i+mu-1)
                          ! within the band, and apply rotation from the right
                          call stdlib${ii}$_clartg( ab( ku-mu+3, i+mu-2 ),ab( ku-mu+2, i+mu-1 ),rwork( &
                                    i+mu-1 ), work( i+mu-1 ), ra )
                          ab( ku-mu+3, i+mu-2 ) = ra
                          call stdlib${ii}$_crot( min( kl+mu-2, m-i ),ab( ku-mu+4, i+mu-2 ), 1_${ik}$,ab( ku-&
                                    mu+3, i+mu-1 ), 1_${ik}$,rwork( i+mu-1 ), work( i+mu-1 ) )
                       end if
                       nr = nr + 1_${ik}$
                       j1 = j1 - kb1
                    end if
                    if( wantpt ) then
                       ! accumulate product of plane rotations in p**h
                       do j = j1, j2, kb1
                          call stdlib${ii}$_crot( n, pt( j+kun-1, 1_${ik}$ ), ldpt,pt( j+kun, 1_${ik}$ ), ldpt, rwork(&
                                     j+kun ),conjg( work( j+kun ) ) )
                       end do
                    end if
                    if( j2+kb>m ) then
                       ! adjust j2 to keep within the bounds of the matrix
                       nr = nr - 1_${ik}$
                       j2 = j2 - kb1
                    end if
                    do j = j1, j2, kb1
                       ! create nonzero element a(j+kl+ku,j+ku-1) below the
                       ! band and store it in work(1:n)
                       work( j+kb ) = work( j+kun )*ab( klu1, j+kun )
                       ab( klu1, j+kun ) = rwork( j+kun )*ab( klu1, j+kun )
                    end do
                    if( ml>ml0 ) then
                       ml = ml - 1_${ik}$
                    else
                       mu = mu - 1_${ik}$
                    end if
                 end do loop_80
              end do loop_90
           end if
           if( ku==0_${ik}$ .and. kl>0_${ik}$ ) then
              ! a has been reduced to complex lower bidiagonal form
              ! transform lower bidiagonal form to upper bidiagonal by applying
              ! plane rotations from the left, overwriting superdiagonal
              ! elements on subdiagonal elements
              do i = 1, min( m-1, n )
                 call stdlib${ii}$_clartg( ab( 1_${ik}$, i ), ab( 2_${ik}$, i ), rc, rs, ra )
                 ab( 1_${ik}$, i ) = ra
                 if( i<n ) then
                    ab( 2_${ik}$, i ) = rs*ab( 1_${ik}$, i+1 )
                    ab( 1_${ik}$, i+1 ) = rc*ab( 1_${ik}$, i+1 )
                 end if
                 if( wantq )call stdlib${ii}$_crot( m, q( 1_${ik}$, i ), 1_${ik}$, q( 1_${ik}$, i+1 ), 1_${ik}$, rc,conjg( rs ) )
                           
                 if( wantc )call stdlib${ii}$_crot( ncc, c( i, 1_${ik}$ ), ldc, c( i+1, 1_${ik}$ ), ldc, rc,rs )
                           
              end do
           else
              ! a has been reduced to complex upper bidiagonal form or is
              ! diagonal
              if( ku>0_${ik}$ .and. m<n ) then
                 ! annihilate a(m,m+1) by applying plane rotations from the
                 ! right
                 rb = ab( ku, m+1 )
                 do i = m, 1, -1
                    call stdlib${ii}$_clartg( ab( ku+1, i ), rb, rc, rs, ra )
                    ab( ku+1, i ) = ra
                    if( i>1_${ik}$ ) then
                       rb = -conjg( rs )*ab( ku, i )
                       ab( ku, i ) = rc*ab( ku, i )
                    end if
                    if( wantpt )call stdlib${ii}$_crot( n, pt( i, 1_${ik}$ ), ldpt, pt( m+1, 1_${ik}$ ), ldpt,rc, &
                              conjg( rs ) )
                 end do
              end if
           end if
           ! make diagonal and superdiagonal elements real, storing them in d
           ! and e
           t = ab( ku+1, 1_${ik}$ )
           loop_120: do i = 1, minmn
              abst = abs( t )
              d( i ) = abst
              if( abst/=zero ) then
                 t = t / abst
              else
                 t = cone
              end if
              if( wantq )call stdlib${ii}$_cscal( m, t, q( 1_${ik}$, i ), 1_${ik}$ )
              if( wantc )call stdlib${ii}$_cscal( ncc, conjg( t ), c( i, 1_${ik}$ ), ldc )
              if( i<minmn ) then
                 if( ku==0_${ik}$ .and. kl==0_${ik}$ ) then
                    e( i ) = zero
                    t = ab( 1_${ik}$, i+1 )
                 else
                    if( ku==0_${ik}$ ) then
                       t = ab( 2_${ik}$, i )*conjg( t )
                    else
                       t = ab( ku, i+1 )*conjg( t )
                    end if
                    abst = abs( t )
                    e( i ) = abst
                    if( abst/=zero ) then
                       t = t / abst
                    else
                       t = cone
                    end if
                    if( wantpt )call stdlib${ii}$_cscal( n, t, pt( i+1, 1_${ik}$ ), ldpt )
                    t = ab( ku+1, i+1 )*conjg( t )
                 end if
              end if
           end do loop_120
           return
     end subroutine stdlib${ii}$_cgbbrd

     pure module subroutine stdlib${ii}$_zgbbrd( vect, m, n, ncc, kl, ku, ab, ldab, d, e, q,ldq, pt, ldpt, c, &
     !! ZGBBRD reduces a complex general m-by-n band matrix A to real upper
     !! bidiagonal form B by a unitary transformation: Q**H * A * P = B.
     !! The routine computes B, and optionally forms Q or P**H, or computes
     !! Q**H*C for a given matrix C.
               ldc, work, rwork, info )
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: vect
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: kl, ku, ldab, ldc, ldpt, ldq, m, n, ncc
           ! Array Arguments 
           real(dp), intent(out) :: d(*), e(*), rwork(*)
           complex(dp), intent(inout) :: ab(ldab,*), c(ldc,*)
           complex(dp), intent(out) :: pt(ldpt,*), q(ldq,*), work(*)
        ! =====================================================================
           
           
           ! Local Scalars 
           logical(lk) :: wantb, wantc, wantpt, wantq
           integer(${ik}$) :: i, inca, j, j1, j2, kb, kb1, kk, klm, klu1, kun, l, minmn, ml, ml0, mu,&
                      mu0, nr, nrt
           real(dp) :: abst, rc
           complex(dp) :: ra, rb, rs, t
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input parameters
           wantb = stdlib_lsame( vect, 'B' )
           wantq = stdlib_lsame( vect, 'Q' ) .or. wantb
           wantpt = stdlib_lsame( vect, 'P' ) .or. wantb
           wantc = ncc>0_${ik}$
           klu1 = kl + ku + 1_${ik}$
           info = 0_${ik}$
           if( .not.wantq .and. .not.wantpt .and. .not.stdlib_lsame( vect, 'N' ) )then
              info = -1_${ik}$
           else if( m<0_${ik}$ ) then
              info = -2_${ik}$
           else if( n<0_${ik}$ ) then
              info = -3_${ik}$
           else if( ncc<0_${ik}$ ) then
              info = -4_${ik}$
           else if( kl<0_${ik}$ ) then
              info = -5_${ik}$
           else if( ku<0_${ik}$ ) then
              info = -6_${ik}$
           else if( ldab<klu1 ) then
              info = -8_${ik}$
           else if( ldq<1_${ik}$ .or. wantq .and. ldq<max( 1_${ik}$, m ) ) then
              info = -12_${ik}$
           else if( ldpt<1_${ik}$ .or. wantpt .and. ldpt<max( 1_${ik}$, n ) ) then
              info = -14_${ik}$
           else if( ldc<1_${ik}$ .or. wantc .and. ldc<max( 1_${ik}$, m ) ) then
              info = -16_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'ZGBBRD', -info )
              return
           end if
           ! initialize q and p**h to the unit matrix, if needed
           if( wantq )call stdlib${ii}$_zlaset( 'FULL', m, m, czero, cone, q, ldq )
           if( wantpt )call stdlib${ii}$_zlaset( 'FULL', n, n, czero, cone, pt, ldpt )
           ! quick return if possible.
           if( m==0 .or. n==0 )return
           minmn = min( m, n )
           if( kl+ku>1_${ik}$ ) then
              ! reduce to upper bidiagonal form if ku > 0; if ku = 0, reduce
              ! first to lower bidiagonal form and then transform to upper
              ! bidiagonal
              if( ku>0_${ik}$ ) then
                 ml0 = 1_${ik}$
                 mu0 = 2_${ik}$
              else
                 ml0 = 2_${ik}$
                 mu0 = 1_${ik}$
              end if
              ! wherever possible, plane rotations are generated and applied in
              ! vector operations of length nr over the index set j1:j2:klu1.
              ! the complex sines of the plane rotations are stored in work,
              ! and the real cosines in rwork.
              klm = min( m-1, kl )
              kun = min( n-1, ku )
              kb = klm + kun
              kb1 = kb + 1_${ik}$
              inca = kb1*ldab
              nr = 0_${ik}$
              j1 = klm + 2_${ik}$
              j2 = 1_${ik}$ - kun
              loop_90: do i = 1, minmn
                 ! reduce i-th column and i-th row of matrix to bidiagonal form
                 ml = klm + 1_${ik}$
                 mu = kun + 1_${ik}$
                 loop_80: do kk = 1, kb
                    j1 = j1 + kb
                    j2 = j2 + kb
                    ! generate plane rotations to annihilate nonzero elements
                    ! which have been created below the band
                    if( nr>0_${ik}$ )call stdlib${ii}$_zlargv( nr, ab( klu1, j1-klm-1 ), inca,work( j1 ), kb1, &
                              rwork( j1 ), kb1 )
                    ! apply plane rotations from the left
                    do l = 1, kb
                       if( j2-klm+l-1>n ) then
                          nrt = nr - 1_${ik}$
                       else
                          nrt = nr
                       end if
                       if( nrt>0_${ik}$ )call stdlib${ii}$_zlartv( nrt, ab( klu1-l, j1-klm+l-1 ), inca,ab( &
                                 klu1-l+1, j1-klm+l-1 ), inca,rwork( j1 ), work( j1 ), kb1 )
                    end do
                    if( ml>ml0 ) then
                       if( ml<=m-i+1 ) then
                          ! generate plane rotation to annihilate a(i+ml-1,i)
                          ! within the band, and apply rotation from the left
                          call stdlib${ii}$_zlartg( ab( ku+ml-1, i ), ab( ku+ml, i ),rwork( i+ml-1 ), &
                                    work( i+ml-1 ), ra )
                          ab( ku+ml-1, i ) = ra
                          if( i<n )call stdlib${ii}$_zrot( min( ku+ml-2, n-i ),ab( ku+ml-2, i+1 ), ldab-&
                                    1_${ik}$,ab( ku+ml-1, i+1 ), ldab-1,rwork( i+ml-1 ), work( i+ml-1 ) )
                       end if
                       nr = nr + 1_${ik}$
                       j1 = j1 - kb1
                    end if
                    if( wantq ) then
                       ! accumulate product of plane rotations in q
                       do j = j1, j2, kb1
                          call stdlib${ii}$_zrot( m, q( 1_${ik}$, j-1 ), 1_${ik}$, q( 1_${ik}$, j ), 1_${ik}$,rwork( j ), conjg( &
                                    work( j ) ) )
                       end do
                    end if
                    if( wantc ) then
                       ! apply plane rotations to c
                       do j = j1, j2, kb1
                          call stdlib${ii}$_zrot( ncc, c( j-1, 1_${ik}$ ), ldc, c( j, 1_${ik}$ ), ldc,rwork( j ), &
                                    work( j ) )
                       end do
                    end if
                    if( j2+kun>n ) then
                       ! adjust j2 to keep within the bounds of the matrix
                       nr = nr - 1_${ik}$
                       j2 = j2 - kb1
                    end if
                    do j = j1, j2, kb1
                       ! create nonzero element a(j-1,j+ku) above the band
                       ! and store it in work(n+1:2*n)
                       work( j+kun ) = work( j )*ab( 1_${ik}$, j+kun )
                       ab( 1_${ik}$, j+kun ) = rwork( j )*ab( 1_${ik}$, j+kun )
                    end do
                    ! generate plane rotations to annihilate nonzero elements
                    ! which have been generated above the band
                    if( nr>0_${ik}$ )call stdlib${ii}$_zlargv( nr, ab( 1_${ik}$, j1+kun-1 ), inca,work( j1+kun ), kb1,&
                               rwork( j1+kun ),kb1 )
                    ! apply plane rotations from the right
                    do l = 1, kb
                       if( j2+l-1>m ) then
                          nrt = nr - 1_${ik}$
                       else
                          nrt = nr
                       end if
                       if( nrt>0_${ik}$ )call stdlib${ii}$_zlartv( nrt, ab( l+1, j1+kun-1 ), inca,ab( l, j1+&
                                 kun ), inca,rwork( j1+kun ), work( j1+kun ), kb1 )
                    end do
                    if( ml==ml0 .and. mu>mu0 ) then
                       if( mu<=n-i+1 ) then
                          ! generate plane rotation to annihilate a(i,i+mu-1)
                          ! within the band, and apply rotation from the right
                          call stdlib${ii}$_zlartg( ab( ku-mu+3, i+mu-2 ),ab( ku-mu+2, i+mu-1 ),rwork( &
                                    i+mu-1 ), work( i+mu-1 ), ra )
                          ab( ku-mu+3, i+mu-2 ) = ra
                          call stdlib${ii}$_zrot( min( kl+mu-2, m-i ),ab( ku-mu+4, i+mu-2 ), 1_${ik}$,ab( ku-&
                                    mu+3, i+mu-1 ), 1_${ik}$,rwork( i+mu-1 ), work( i+mu-1 ) )
                       end if
                       nr = nr + 1_${ik}$
                       j1 = j1 - kb1
                    end if
                    if( wantpt ) then
                       ! accumulate product of plane rotations in p**h
                       do j = j1, j2, kb1
                          call stdlib${ii}$_zrot( n, pt( j+kun-1, 1_${ik}$ ), ldpt,pt( j+kun, 1_${ik}$ ), ldpt, rwork(&
                                     j+kun ),conjg( work( j+kun ) ) )
                       end do
                    end if
                    if( j2+kb>m ) then
                       ! adjust j2 to keep within the bounds of the matrix
                       nr = nr - 1_${ik}$
                       j2 = j2 - kb1
                    end if
                    do j = j1, j2, kb1
                       ! create nonzero element a(j+kl+ku,j+ku-1) below the
                       ! band and store it in work(1:n)
                       work( j+kb ) = work( j+kun )*ab( klu1, j+kun )
                       ab( klu1, j+kun ) = rwork( j+kun )*ab( klu1, j+kun )
                    end do
                    if( ml>ml0 ) then
                       ml = ml - 1_${ik}$
                    else
                       mu = mu - 1_${ik}$
                    end if
                 end do loop_80
              end do loop_90
           end if
           if( ku==0_${ik}$ .and. kl>0_${ik}$ ) then
              ! a has been reduced to complex lower bidiagonal form
              ! transform lower bidiagonal form to upper bidiagonal by applying
              ! plane rotations from the left, overwriting superdiagonal
              ! elements on subdiagonal elements
              do i = 1, min( m-1, n )
                 call stdlib${ii}$_zlartg( ab( 1_${ik}$, i ), ab( 2_${ik}$, i ), rc, rs, ra )
                 ab( 1_${ik}$, i ) = ra
                 if( i<n ) then
                    ab( 2_${ik}$, i ) = rs*ab( 1_${ik}$, i+1 )
                    ab( 1_${ik}$, i+1 ) = rc*ab( 1_${ik}$, i+1 )
                 end if
                 if( wantq )call stdlib${ii}$_zrot( m, q( 1_${ik}$, i ), 1_${ik}$, q( 1_${ik}$, i+1 ), 1_${ik}$, rc,conjg( rs ) )
                           
                 if( wantc )call stdlib${ii}$_zrot( ncc, c( i, 1_${ik}$ ), ldc, c( i+1, 1_${ik}$ ), ldc, rc,rs )
                           
              end do
           else
              ! a has been reduced to complex upper bidiagonal form or is
              ! diagonal
              if( ku>0_${ik}$ .and. m<n ) then
                 ! annihilate a(m,m+1) by applying plane rotations from the
                 ! right
                 rb = ab( ku, m+1 )
                 do i = m, 1, -1
                    call stdlib${ii}$_zlartg( ab( ku+1, i ), rb, rc, rs, ra )
                    ab( ku+1, i ) = ra
                    if( i>1_${ik}$ ) then
                       rb = -conjg( rs )*ab( ku, i )
                       ab( ku, i ) = rc*ab( ku, i )
                    end if
                    if( wantpt )call stdlib${ii}$_zrot( n, pt( i, 1_${ik}$ ), ldpt, pt( m+1, 1_${ik}$ ), ldpt,rc, &
                              conjg( rs ) )
                 end do
              end if
           end if
           ! make diagonal and superdiagonal elements real, storing them in d
           ! and e
           t = ab( ku+1, 1_${ik}$ )
           loop_120: do i = 1, minmn
              abst = abs( t )
              d( i ) = abst
              if( abst/=zero ) then
                 t = t / abst
              else
                 t = cone
              end if
              if( wantq )call stdlib${ii}$_zscal( m, t, q( 1_${ik}$, i ), 1_${ik}$ )
              if( wantc )call stdlib${ii}$_zscal( ncc, conjg( t ), c( i, 1_${ik}$ ), ldc )
              if( i<minmn ) then
                 if( ku==0_${ik}$ .and. kl==0_${ik}$ ) then
                    e( i ) = zero
                    t = ab( 1_${ik}$, i+1 )
                 else
                    if( ku==0_${ik}$ ) then
                       t = ab( 2_${ik}$, i )*conjg( t )
                    else
                       t = ab( ku, i+1 )*conjg( t )
                    end if
                    abst = abs( t )
                    e( i ) = abst
                    if( abst/=zero ) then
                       t = t / abst
                    else
                       t = cone
                    end if
                    if( wantpt )call stdlib${ii}$_zscal( n, t, pt( i+1, 1_${ik}$ ), ldpt )
                    t = ab( ku+1, i+1 )*conjg( t )
                 end if
              end if
           end do loop_120
           return
     end subroutine stdlib${ii}$_zgbbrd

#:for ck,ct,ci in CMPLX_KINDS_TYPES
#:if not ck in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ci}$gbbrd( vect, m, n, ncc, kl, ku, ab, ldab, d, e, q,ldq, pt, ldpt, c, &
     !! ZGBBRD: reduces a complex general m-by-n band matrix A to real upper
     !! bidiagonal form B by a unitary transformation: Q**H * A * P = B.
     !! The routine computes B, and optionally forms Q or P**H, or computes
     !! Q**H*C for a given matrix C.
               ldc, work, rwork, info )
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${ck}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: vect
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: kl, ku, ldab, ldc, ldpt, ldq, m, n, ncc
           ! Array Arguments 
           real(${ck}$), intent(out) :: d(*), e(*), rwork(*)
           complex(${ck}$), intent(inout) :: ab(ldab,*), c(ldc,*)
           complex(${ck}$), intent(out) :: pt(ldpt,*), q(ldq,*), work(*)
        ! =====================================================================
           
           
           ! Local Scalars 
           logical(lk) :: wantb, wantc, wantpt, wantq
           integer(${ik}$) :: i, inca, j, j1, j2, kb, kb1, kk, klm, klu1, kun, l, minmn, ml, ml0, mu,&
                      mu0, nr, nrt
           real(${ck}$) :: abst, rc
           complex(${ck}$) :: ra, rb, rs, t
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input parameters
           wantb = stdlib_lsame( vect, 'B' )
           wantq = stdlib_lsame( vect, 'Q' ) .or. wantb
           wantpt = stdlib_lsame( vect, 'P' ) .or. wantb
           wantc = ncc>0_${ik}$
           klu1 = kl + ku + 1_${ik}$
           info = 0_${ik}$
           if( .not.wantq .and. .not.wantpt .and. .not.stdlib_lsame( vect, 'N' ) )then
              info = -1_${ik}$
           else if( m<0_${ik}$ ) then
              info = -2_${ik}$
           else if( n<0_${ik}$ ) then
              info = -3_${ik}$
           else if( ncc<0_${ik}$ ) then
              info = -4_${ik}$
           else if( kl<0_${ik}$ ) then
              info = -5_${ik}$
           else if( ku<0_${ik}$ ) then
              info = -6_${ik}$
           else if( ldab<klu1 ) then
              info = -8_${ik}$
           else if( ldq<1_${ik}$ .or. wantq .and. ldq<max( 1_${ik}$, m ) ) then
              info = -12_${ik}$
           else if( ldpt<1_${ik}$ .or. wantpt .and. ldpt<max( 1_${ik}$, n ) ) then
              info = -14_${ik}$
           else if( ldc<1_${ik}$ .or. wantc .and. ldc<max( 1_${ik}$, m ) ) then
              info = -16_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'ZGBBRD', -info )
              return
           end if
           ! initialize q and p**h to the unit matrix, if needed
           if( wantq )call stdlib${ii}$_${ci}$laset( 'FULL', m, m, czero, cone, q, ldq )
           if( wantpt )call stdlib${ii}$_${ci}$laset( 'FULL', n, n, czero, cone, pt, ldpt )
           ! quick return if possible.
           if( m==0 .or. n==0 )return
           minmn = min( m, n )
           if( kl+ku>1_${ik}$ ) then
              ! reduce to upper bidiagonal form if ku > 0; if ku = 0, reduce
              ! first to lower bidiagonal form and then transform to upper
              ! bidiagonal
              if( ku>0_${ik}$ ) then
                 ml0 = 1_${ik}$
                 mu0 = 2_${ik}$
              else
                 ml0 = 2_${ik}$
                 mu0 = 1_${ik}$
              end if
              ! wherever possible, plane rotations are generated and applied in
              ! vector operations of length nr over the index set j1:j2:klu1.
              ! the complex sines of the plane rotations are stored in work,
              ! and the real cosines in rwork.
              klm = min( m-1, kl )
              kun = min( n-1, ku )
              kb = klm + kun
              kb1 = kb + 1_${ik}$
              inca = kb1*ldab
              nr = 0_${ik}$
              j1 = klm + 2_${ik}$
              j2 = 1_${ik}$ - kun
              loop_90: do i = 1, minmn
                 ! reduce i-th column and i-th row of matrix to bidiagonal form
                 ml = klm + 1_${ik}$
                 mu = kun + 1_${ik}$
                 loop_80: do kk = 1, kb
                    j1 = j1 + kb
                    j2 = j2 + kb
                    ! generate plane rotations to annihilate nonzero elements
                    ! which have been created below the band
                    if( nr>0_${ik}$ )call stdlib${ii}$_${ci}$largv( nr, ab( klu1, j1-klm-1 ), inca,work( j1 ), kb1, &
                              rwork( j1 ), kb1 )
                    ! apply plane rotations from the left
                    do l = 1, kb
                       if( j2-klm+l-1>n ) then
                          nrt = nr - 1_${ik}$
                       else
                          nrt = nr
                       end if
                       if( nrt>0_${ik}$ )call stdlib${ii}$_${ci}$lartv( nrt, ab( klu1-l, j1-klm+l-1 ), inca,ab( &
                                 klu1-l+1, j1-klm+l-1 ), inca,rwork( j1 ), work( j1 ), kb1 )
                    end do
                    if( ml>ml0 ) then
                       if( ml<=m-i+1 ) then
                          ! generate plane rotation to annihilate a(i+ml-1,i)
                          ! within the band, and apply rotation from the left
                          call stdlib${ii}$_${ci}$lartg( ab( ku+ml-1, i ), ab( ku+ml, i ),rwork( i+ml-1 ), &
                                    work( i+ml-1 ), ra )
                          ab( ku+ml-1, i ) = ra
                          if( i<n )call stdlib${ii}$_${ci}$rot( min( ku+ml-2, n-i ),ab( ku+ml-2, i+1 ), ldab-&
                                    1_${ik}$,ab( ku+ml-1, i+1 ), ldab-1,rwork( i+ml-1 ), work( i+ml-1 ) )
                       end if
                       nr = nr + 1_${ik}$
                       j1 = j1 - kb1
                    end if
                    if( wantq ) then
                       ! accumulate product of plane rotations in q
                       do j = j1, j2, kb1
                          call stdlib${ii}$_${ci}$rot( m, q( 1_${ik}$, j-1 ), 1_${ik}$, q( 1_${ik}$, j ), 1_${ik}$,rwork( j ), conjg( &
                                    work( j ) ) )
                       end do
                    end if
                    if( wantc ) then
                       ! apply plane rotations to c
                       do j = j1, j2, kb1
                          call stdlib${ii}$_${ci}$rot( ncc, c( j-1, 1_${ik}$ ), ldc, c( j, 1_${ik}$ ), ldc,rwork( j ), &
                                    work( j ) )
                       end do
                    end if
                    if( j2+kun>n ) then
                       ! adjust j2 to keep within the bounds of the matrix
                       nr = nr - 1_${ik}$
                       j2 = j2 - kb1
                    end if
                    do j = j1, j2, kb1
                       ! create nonzero element a(j-1,j+ku) above the band
                       ! and store it in work(n+1:2*n)
                       work( j+kun ) = work( j )*ab( 1_${ik}$, j+kun )
                       ab( 1_${ik}$, j+kun ) = rwork( j )*ab( 1_${ik}$, j+kun )
                    end do
                    ! generate plane rotations to annihilate nonzero elements
                    ! which have been generated above the band
                    if( nr>0_${ik}$ )call stdlib${ii}$_${ci}$largv( nr, ab( 1_${ik}$, j1+kun-1 ), inca,work( j1+kun ), kb1,&
                               rwork( j1+kun ),kb1 )
                    ! apply plane rotations from the right
                    do l = 1, kb
                       if( j2+l-1>m ) then
                          nrt = nr - 1_${ik}$
                       else
                          nrt = nr
                       end if
                       if( nrt>0_${ik}$ )call stdlib${ii}$_${ci}$lartv( nrt, ab( l+1, j1+kun-1 ), inca,ab( l, j1+&
                                 kun ), inca,rwork( j1+kun ), work( j1+kun ), kb1 )
                    end do
                    if( ml==ml0 .and. mu>mu0 ) then
                       if( mu<=n-i+1 ) then
                          ! generate plane rotation to annihilate a(i,i+mu-1)
                          ! within the band, and apply rotation from the right
                          call stdlib${ii}$_${ci}$lartg( ab( ku-mu+3, i+mu-2 ),ab( ku-mu+2, i+mu-1 ),rwork( &
                                    i+mu-1 ), work( i+mu-1 ), ra )
                          ab( ku-mu+3, i+mu-2 ) = ra
                          call stdlib${ii}$_${ci}$rot( min( kl+mu-2, m-i ),ab( ku-mu+4, i+mu-2 ), 1_${ik}$,ab( ku-&
                                    mu+3, i+mu-1 ), 1_${ik}$,rwork( i+mu-1 ), work( i+mu-1 ) )
                       end if
                       nr = nr + 1_${ik}$
                       j1 = j1 - kb1
                    end if
                    if( wantpt ) then
                       ! accumulate product of plane rotations in p**h
                       do j = j1, j2, kb1
                          call stdlib${ii}$_${ci}$rot( n, pt( j+kun-1, 1_${ik}$ ), ldpt,pt( j+kun, 1_${ik}$ ), ldpt, rwork(&
                                     j+kun ),conjg( work( j+kun ) ) )
                       end do
                    end if
                    if( j2+kb>m ) then
                       ! adjust j2 to keep within the bounds of the matrix
                       nr = nr - 1_${ik}$
                       j2 = j2 - kb1
                    end if
                    do j = j1, j2, kb1
                       ! create nonzero element a(j+kl+ku,j+ku-1) below the
                       ! band and store it in work(1:n)
                       work( j+kb ) = work( j+kun )*ab( klu1, j+kun )
                       ab( klu1, j+kun ) = rwork( j+kun )*ab( klu1, j+kun )
                    end do
                    if( ml>ml0 ) then
                       ml = ml - 1_${ik}$
                    else
                       mu = mu - 1_${ik}$
                    end if
                 end do loop_80
              end do loop_90
           end if
           if( ku==0_${ik}$ .and. kl>0_${ik}$ ) then
              ! a has been reduced to complex lower bidiagonal form
              ! transform lower bidiagonal form to upper bidiagonal by applying
              ! plane rotations from the left, overwriting superdiagonal
              ! elements on subdiagonal elements
              do i = 1, min( m-1, n )
                 call stdlib${ii}$_${ci}$lartg( ab( 1_${ik}$, i ), ab( 2_${ik}$, i ), rc, rs, ra )
                 ab( 1_${ik}$, i ) = ra
                 if( i<n ) then
                    ab( 2_${ik}$, i ) = rs*ab( 1_${ik}$, i+1 )
                    ab( 1_${ik}$, i+1 ) = rc*ab( 1_${ik}$, i+1 )
                 end if
                 if( wantq )call stdlib${ii}$_${ci}$rot( m, q( 1_${ik}$, i ), 1_${ik}$, q( 1_${ik}$, i+1 ), 1_${ik}$, rc,conjg( rs ) )
                           
                 if( wantc )call stdlib${ii}$_${ci}$rot( ncc, c( i, 1_${ik}$ ), ldc, c( i+1, 1_${ik}$ ), ldc, rc,rs )
                           
              end do
           else
              ! a has been reduced to complex upper bidiagonal form or is
              ! diagonal
              if( ku>0_${ik}$ .and. m<n ) then
                 ! annihilate a(m,m+1) by applying plane rotations from the
                 ! right
                 rb = ab( ku, m+1 )
                 do i = m, 1, -1
                    call stdlib${ii}$_${ci}$lartg( ab( ku+1, i ), rb, rc, rs, ra )
                    ab( ku+1, i ) = ra
                    if( i>1_${ik}$ ) then
                       rb = -conjg( rs )*ab( ku, i )
                       ab( ku, i ) = rc*ab( ku, i )
                    end if
                    if( wantpt )call stdlib${ii}$_${ci}$rot( n, pt( i, 1_${ik}$ ), ldpt, pt( m+1, 1_${ik}$ ), ldpt,rc, &
                              conjg( rs ) )
                 end do
              end if
           end if
           ! make diagonal and superdiagonal elements real, storing them in d
           ! and e
           t = ab( ku+1, 1_${ik}$ )
           loop_120: do i = 1, minmn
              abst = abs( t )
              d( i ) = abst
              if( abst/=zero ) then
                 t = t / abst
              else
                 t = cone
              end if
              if( wantq )call stdlib${ii}$_${ci}$scal( m, t, q( 1_${ik}$, i ), 1_${ik}$ )
              if( wantc )call stdlib${ii}$_${ci}$scal( ncc, conjg( t ), c( i, 1_${ik}$ ), ldc )
              if( i<minmn ) then
                 if( ku==0_${ik}$ .and. kl==0_${ik}$ ) then
                    e( i ) = zero
                    t = ab( 1_${ik}$, i+1 )
                 else
                    if( ku==0_${ik}$ ) then
                       t = ab( 2_${ik}$, i )*conjg( t )
                    else
                       t = ab( ku, i+1 )*conjg( t )
                    end if
                    abst = abs( t )
                    e( i ) = abst
                    if( abst/=zero ) then
                       t = t / abst
                    else
                       t = cone
                    end if
                    if( wantpt )call stdlib${ii}$_${ci}$scal( n, t, pt( i+1, 1_${ik}$ ), ldpt )
                    t = ab( ku+1, i+1 )*conjg( t )
                 end if
              end if
           end do loop_120
           return
     end subroutine stdlib${ii}$_${ci}$gbbrd

#:endif
#:endfor



     pure module subroutine stdlib${ii}$_sgsvj0( jobv, m, n, a, lda, d, sva, mv, v, ldv, eps,sfmin, tol, &
     !! SGSVJ0 is called from SGESVJ as a pre-processor and that is its main
     !! purpose. It applies Jacobi rotations in the same way as SGESVJ does, but
     !! it does not check convergence (stopping criterion). Few tuning
     !! parameters (marked by [TP]) are available for the implementer.
               nsweep, work, lwork, info )
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, ldv, lwork, m, mv, n, nsweep
           real(sp), intent(in) :: eps, sfmin, tol
           character, intent(in) :: jobv
           ! Array Arguments 
           real(sp), intent(inout) :: a(lda,*), sva(n), d(n), v(ldv,*)
           real(sp), intent(out) :: work(lwork)
        ! =====================================================================
           
           ! Local Scalars 
           real(sp) :: aapp, aapp0, aapq, aaqq, apoaq, aqoap, big, bigtheta, cs, mxaapq, mxsinj, &
                     rootbig, rooteps, rootsfmin, roottol, small, sn, t, temp1, theta, thsign
           integer(${ik}$) :: blskip, emptsw, i, ibr, ierr, igl, ijblsk, ir1, iswrot, jbc, jgl, kbl, &
                     lkahead, mvl, nbl, notrot, p, pskipped, q, rowskip, swband
           logical(lk) :: applv, rotok, rsvec
           ! Local Arrays 
           real(sp) :: fastr(5_${ik}$)
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input parameters.
           applv = stdlib_lsame( jobv, 'A' )
           rsvec = stdlib_lsame( jobv, 'V' )
           if( .not.( rsvec .or. applv .or. stdlib_lsame( jobv, 'N' ) ) ) then
              info = -1_${ik}$
           else if( m<0_${ik}$ ) then
              info = -2_${ik}$
           else if( ( n<0_${ik}$ ) .or. ( n>m ) ) then
              info = -3_${ik}$
           else if( lda<m ) then
              info = -5_${ik}$
           else if( ( rsvec.or.applv ) .and. ( mv<0_${ik}$ ) ) then
              info = -8_${ik}$
           else if( ( rsvec.and.( ldv<n ) ).or.( applv.and.( ldv<mv ) ) ) then
              info = -10_${ik}$
           else if( tol<=eps ) then
              info = -13_${ik}$
           else if( nsweep<0_${ik}$ ) then
              info = -14_${ik}$
           else if( lwork<m ) then
              info = -16_${ik}$
           else
              info = 0_${ik}$
           end if
           ! #:(
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'SGSVJ0', -info )
              return
           end if
           if( rsvec ) then
              mvl = n
           else if( applv ) then
              mvl = mv
           end if
           rsvec = rsvec .or. applv
           rooteps = sqrt( eps )
           rootsfmin = sqrt( sfmin )
           small = sfmin / eps
           big = one / sfmin
           rootbig = one / rootsfmin
           bigtheta = one / rooteps
           roottol = sqrt( tol )
           ! .. row-cyclic jacobi svd algorithm with column pivoting ..
           emptsw = ( n*( n-1 ) ) / 2_${ik}$
           notrot = 0_${ik}$
           fastr( 1_${ik}$ ) = zero
           ! .. row-cyclic pivot strategy with de rijk's pivoting ..
           swband = 0_${ik}$
      ! [tp] swband is a tuning parameter. it is meaningful and effective
           ! if stdlib${ii}$_sgesvj is used as a computational routine in the preconditioned
           ! jacobi svd algorithm stdlib${ii}$_sgesvj. for sweeps i=1:swband the procedure
           ! ......
           kbl = min( 8_${ik}$, n )
      ! [tp] kbl is a tuning parameter that defines the tile size in the
           ! tiling of the p-q loops of pivot pairs. in general, an optimal
           ! value of kbl depends on the matrix dimensions and on the
           ! parameters of the computer's memory.
           nbl = n / kbl
           if( ( nbl*kbl )/=n )nbl = nbl + 1_${ik}$
           blskip = ( kbl**2_${ik}$ ) + 1_${ik}$
      ! [tp] blkskip is a tuning parameter that depends on swband and kbl.
           rowskip = min( 5_${ik}$, kbl )
      ! [tp] rowskip is a tuning parameter.
           lkahead = 1_${ik}$
      ! [tp] lkahead is a tuning parameter.
           swband = 0_${ik}$
           pskipped = 0_${ik}$
           loop_1993: do i = 1, nsweep
           ! .. go go go ...
              mxaapq = zero
              mxsinj = zero
              iswrot = 0_${ik}$
              notrot = 0_${ik}$
              pskipped = 0_${ik}$
              loop_2000: do ibr = 1, nbl
                 igl = ( ibr-1 )*kbl + 1_${ik}$
                 loop_1002: do ir1 = 0, min( lkahead, nbl-ibr )
                    igl = igl + ir1*kbl
                    loop_2001: do p = igl, min( igl+kbl-1, n-1 )
           ! .. de rijk's pivoting
                       q = stdlib${ii}$_isamax( n-p+1, sva( p ), 1_${ik}$ ) + p - 1_${ik}$
                       if( p/=q ) then
                          call stdlib${ii}$_sswap( m, a( 1_${ik}$, p ), 1_${ik}$, a( 1_${ik}$, q ), 1_${ik}$ )
                          if( rsvec )call stdlib${ii}$_sswap( mvl, v( 1_${ik}$, p ), 1_${ik}$,v( 1_${ik}$, q ), 1_${ik}$ )
                          temp1 = sva( p )
                          sva( p ) = sva( q )
                          sva( q ) = temp1
                          temp1 = d( p )
                          d( p ) = d( q )
                          d( q ) = temp1
                       end if
                       if( ir1==0_${ik}$ ) then
              ! column norms are periodically updated by explicit
              ! norm computation.
              ! caveat:
              ! some blas implementations compute stdlib${ii}$_snrm2(m,a(1,p),1)
              ! as sqrt(stdlib${ii}$_sdot(m,a(1,p),1,a(1,p),1)), which may result in
              ! overflow for ||a(:,p)||_2 > sqrt(overflow_threshold), and
              ! underflow for ||a(:,p)||_2 < sqrt(underflow_threshold).
              ! hence, stdlib${ii}$_snrm2 cannot be trusted, not even in the case when
              ! the true norm is far from the under(over)flow boundaries.
              ! if properly implemented stdlib${ii}$_snrm2 is available, the if-then-else
              ! below should read "aapp = stdlib${ii}$_snrm2( m, a(1,p), 1 ) * d(p)".
                          if( ( sva( p )<rootbig ) .and.( sva( p )>rootsfmin ) ) then
                             sva( p ) = stdlib${ii}$_snrm2( m, a( 1_${ik}$, p ), 1_${ik}$ )*d( p )
                          else
                             temp1 = zero
                             aapp = one
                             call stdlib${ii}$_slassq( m, a( 1_${ik}$, p ), 1_${ik}$, temp1, aapp )
                             sva( p ) = temp1*sqrt( aapp )*d( p )
                          end if
                          aapp = sva( p )
                       else
                          aapp = sva( p )
                       end if
                       if( aapp>zero ) then
                          pskipped = 0_${ik}$
                          loop_2002: do q = p + 1, min( igl+kbl-1, n )
                             aaqq = sva( q )
                             if( aaqq>zero ) then
                                aapp0 = aapp
                                if( aaqq>=one ) then
                                   rotok = ( small*aapp )<=aaqq
                                   if( aapp<( big / aaqq ) ) then
                                      aapq = ( stdlib${ii}$_sdot( m, a( 1_${ik}$, p ), 1_${ik}$, a( 1_${ik}$,q ), 1_${ik}$ )*d( p )&
                                                *d( q ) / aaqq )/ aapp
                                   else
                                      call stdlib${ii}$_scopy( m, a( 1_${ik}$, p ), 1_${ik}$, work, 1_${ik}$ )
                                      call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, aapp, d( p ),m, 1_${ik}$, work, lda,&
                                                 ierr )
                                      aapq = stdlib${ii}$_sdot( m, work, 1_${ik}$, a( 1_${ik}$, q ),1_${ik}$ )*d( q ) / &
                                                aaqq
                                   end if
                                else
                                   rotok = aapp<=( aaqq / small )
                                   if( aapp>( small / aaqq ) ) then
                                      aapq = ( stdlib${ii}$_sdot( m, a( 1_${ik}$, p ), 1_${ik}$, a( 1_${ik}$,q ), 1_${ik}$ )*d( p )&
                                                *d( q ) / aaqq )/ aapp
                                   else
                                      call stdlib${ii}$_scopy( m, a( 1_${ik}$, q ), 1_${ik}$, work, 1_${ik}$ )
                                      call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, aaqq, d( q ),m, 1_${ik}$, work, lda,&
                                                 ierr )
                                      aapq = stdlib${ii}$_sdot( m, work, 1_${ik}$, a( 1_${ik}$, p ),1_${ik}$ )*d( p ) / &
                                                aapp
                                   end if
                                end if
                                mxaapq = max( mxaapq, abs( aapq ) )
              ! to rotate or not to rotate, that is the question ...
                                if( abs( aapq )>tol ) then
                 ! Rotate
                 ! rotated = rotated + one
                                   if( ir1==0_${ik}$ ) then
                                      notrot = 0_${ik}$
                                      pskipped = 0_${ik}$
                                      iswrot = iswrot + 1_${ik}$
                                   end if
                                   if( rotok ) then
                                      aqoap = aaqq / aapp
                                      apoaq = aapp / aaqq
                                      theta = -half*abs( aqoap-apoaq ) / aapq
                                      if( abs( theta )>bigtheta ) then
                                         t = half / theta
                                         fastr( 3_${ik}$ ) = t*d( p ) / d( q )
                                         fastr( 4_${ik}$ ) = -t*d( q ) / d( p )
                                         call stdlib${ii}$_srotm( m, a( 1_${ik}$, p ), 1_${ik}$,a( 1_${ik}$, q ), 1_${ik}$, fastr )
                                                   
                                         if( rsvec )call stdlib${ii}$_srotm( mvl,v( 1_${ik}$, p ), 1_${ik}$,v( 1_${ik}$, q ),&
                                                    1_${ik}$,fastr )
                                         sva( q ) = aaqq*sqrt( max( zero,one+t*apoaq*aapq ) )
                                                   
                                         aapp = aapp*sqrt( max( zero,one-t*aqoap*aapq ) )
                                         mxsinj = max( mxsinj, abs( t ) )
                                      else
                       ! Choose Correct Signum For Theta And Rotate
                                         thsign = -sign( one, aapq )
                                         t = one / ( theta+thsign*sqrt( one+theta*theta ) )
                                                   
                                         cs = sqrt( one / ( one+t*t ) )
                                         sn = t*cs
                                         mxsinj = max( mxsinj, abs( sn ) )
                                         sva( q ) = aaqq*sqrt( max( zero,one+t*apoaq*aapq ) )
                                                   
                                         aapp = aapp*sqrt( max( zero,one-t*aqoap*aapq ) )
                                         apoaq = d( p ) / d( q )
                                         aqoap = d( q ) / d( p )
                                         if( d( p )>=one ) then
                                            if( d( q )>=one ) then
                                               fastr( 3_${ik}$ ) = t*apoaq
                                               fastr( 4_${ik}$ ) = -t*aqoap
                                               d( p ) = d( p )*cs
                                               d( q ) = d( q )*cs
                                               call stdlib${ii}$_srotm( m, a( 1_${ik}$, p ), 1_${ik}$,a( 1_${ik}$, q ), 1_${ik}$,&
                                                         fastr )
                                               if( rsvec )call stdlib${ii}$_srotm( mvl,v( 1_${ik}$, p ), 1_${ik}$, v( &
                                                         1_${ik}$, q ),1_${ik}$, fastr )
                                            else
                                               call stdlib${ii}$_saxpy( m, -t*aqoap,a( 1_${ik}$, q ), 1_${ik}$,a( 1_${ik}$, &
                                                         p ), 1_${ik}$ )
                                               call stdlib${ii}$_saxpy( m, cs*sn*apoaq,a( 1_${ik}$, p ), 1_${ik}$,a( &
                                                         1_${ik}$, q ), 1_${ik}$ )
                                               d( p ) = d( p )*cs
                                               d( q ) = d( q ) / cs
                                               if( rsvec ) then
                                                  call stdlib${ii}$_saxpy( mvl, -t*aqoap,v( 1_${ik}$, q ), 1_${ik}$,v(&
                                                             1_${ik}$, p ), 1_${ik}$ )
                                                  call stdlib${ii}$_saxpy( mvl,cs*sn*apoaq,v( 1_${ik}$, p ), 1_${ik}$,&
                                                            v( 1_${ik}$, q ), 1_${ik}$ )
                                               end if
                                            end if
                                         else
                                            if( d( q )>=one ) then
                                               call stdlib${ii}$_saxpy( m, t*apoaq,a( 1_${ik}$, p ), 1_${ik}$,a( 1_${ik}$, q &
                                                         ), 1_${ik}$ )
                                               call stdlib${ii}$_saxpy( m, -cs*sn*aqoap,a( 1_${ik}$, q ), 1_${ik}$,a( &
                                                         1_${ik}$, p ), 1_${ik}$ )
                                               d( p ) = d( p ) / cs
                                               d( q ) = d( q )*cs
                                               if( rsvec ) then
                                                  call stdlib${ii}$_saxpy( mvl, t*apoaq,v( 1_${ik}$, p ), 1_${ik}$,v( &
                                                            1_${ik}$, q ), 1_${ik}$ )
                                                  call stdlib${ii}$_saxpy( mvl,-cs*sn*aqoap,v( 1_${ik}$, q ), &
                                                            1_${ik}$,v( 1_${ik}$, p ), 1_${ik}$ )
                                               end if
                                            else
                                               if( d( p )>=d( q ) ) then
                                                  call stdlib${ii}$_saxpy( m, -t*aqoap,a( 1_${ik}$, q ), 1_${ik}$,a( &
                                                            1_${ik}$, p ), 1_${ik}$ )
                                                  call stdlib${ii}$_saxpy( m, cs*sn*apoaq,a( 1_${ik}$, p ), 1_${ik}$,&
                                                            a( 1_${ik}$, q ), 1_${ik}$ )
                                                  d( p ) = d( p )*cs
                                                  d( q ) = d( q ) / cs
                                                  if( rsvec ) then
                                                     call stdlib${ii}$_saxpy( mvl,-t*aqoap,v( 1_${ik}$, q ), 1_${ik}$,&
                                                               v( 1_${ik}$, p ), 1_${ik}$ )
                                                     call stdlib${ii}$_saxpy( mvl,cs*sn*apoaq,v( 1_${ik}$, p ),&
                                                                1_${ik}$,v( 1_${ik}$, q ), 1_${ik}$ )
                                                  end if
                                               else
                                                  call stdlib${ii}$_saxpy( m, t*apoaq,a( 1_${ik}$, p ), 1_${ik}$,a( 1_${ik}$,&
                                                             q ), 1_${ik}$ )
                                                  call stdlib${ii}$_saxpy( m,-cs*sn*aqoap,a( 1_${ik}$, q ), 1_${ik}$,&
                                                            a( 1_${ik}$, p ), 1_${ik}$ )
                                                  d( p ) = d( p ) / cs
                                                  d( q ) = d( q )*cs
                                                  if( rsvec ) then
                                                     call stdlib${ii}$_saxpy( mvl,t*apoaq, v( 1_${ik}$, p ),1_${ik}$, &
                                                               v( 1_${ik}$, q ), 1_${ik}$ )
                                                     call stdlib${ii}$_saxpy( mvl,-cs*sn*aqoap,v( 1_${ik}$, q )&
                                                               , 1_${ik}$,v( 1_${ik}$, p ), 1_${ik}$ )
                                                  end if
                                               end if
                                            end if
                                         end if
                                      end if
                                   else
                    ! .. have to use modified gram-schmidt like transformation
                                      call stdlib${ii}$_scopy( m, a( 1_${ik}$, p ), 1_${ik}$, work, 1_${ik}$ )
                                      call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, aapp, one, m,1_${ik}$, work, lda, &
                                                ierr )
                                      call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, aaqq, one, m,1_${ik}$, a( 1_${ik}$, q ), &
                                                lda, ierr )
                                      temp1 = -aapq*d( p ) / d( q )
                                      call stdlib${ii}$_saxpy( m, temp1, work, 1_${ik}$,a( 1_${ik}$, q ), 1_${ik}$ )
                                      call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, one, aaqq, m,1_${ik}$, a( 1_${ik}$, q ), &
                                                lda, ierr )
                                      sva( q ) = aaqq*sqrt( max( zero,one-aapq*aapq ) )
                                      mxsinj = max( mxsinj, sfmin )
                                   end if
                 ! end if rotok then ... else
                 ! in the case of cancellation in updating sva(q), sva(p)
                 ! recompute sva(q), sva(p).
                                   if( ( sva( q ) / aaqq )**2_${ik}$<=rooteps )then
                                      if( ( aaqq<rootbig ) .and.( aaqq>rootsfmin ) ) then
                                         sva( q ) = stdlib${ii}$_snrm2( m, a( 1_${ik}$, q ), 1_${ik}$ )*d( q )
                                      else
                                         t = zero
                                         aaqq = one
                                         call stdlib${ii}$_slassq( m, a( 1_${ik}$, q ), 1_${ik}$, t,aaqq )
                                         sva( q ) = t*sqrt( aaqq )*d( q )
                                      end if
                                   end if
                                   if( ( aapp / aapp0 )<=rooteps ) then
                                      if( ( aapp<rootbig ) .and.( aapp>rootsfmin ) ) then
                                         aapp = stdlib${ii}$_snrm2( m, a( 1_${ik}$, p ), 1_${ik}$ )*d( p )
                                      else
                                         t = zero
                                         aapp = one
                                         call stdlib${ii}$_slassq( m, a( 1_${ik}$, p ), 1_${ik}$, t,aapp )
                                         aapp = t*sqrt( aapp )*d( p )
                                      end if
                                      sva( p ) = aapp
                                   end if
                                else
              ! a(:,p) and a(:,q) already numerically orthogonal
                                   if( ir1==0_${ik}$ )notrot = notrot + 1_${ik}$
                                   pskipped = pskipped + 1_${ik}$
                                end if
                             else
              ! a(:,q) is zero column
                                if( ir1==0_${ik}$ )notrot = notrot + 1_${ik}$
                                pskipped = pskipped + 1_${ik}$
                             end if
                             if( ( i<=swband ) .and.( pskipped>rowskip ) ) then
                                if( ir1==0_${ik}$ )aapp = -aapp
                                notrot = 0_${ik}$
                                go to 2103
                             end if
                          end do loop_2002
           ! end q-loop
           2103 continue
           ! bailed out of q-loop
                          sva( p ) = aapp
                       else
                          sva( p ) = aapp
                          if( ( ir1==0_${ik}$ ) .and. ( aapp==zero ) )notrot = notrot + min( igl+kbl-1, &
                                    n ) - p
                       end if
                    end do loop_2001
           ! end of the p-loop
           ! end of doing the block ( ibr, ibr )
                 end do loop_1002
           ! end of ir1-loop
      ! ........................................................
       ! ... go to the off diagonal blocks
                 igl = ( ibr-1 )*kbl + 1_${ik}$
                 loop_2010: do jbc = ibr + 1, nbl
                    jgl = ( jbc-1 )*kbl + 1_${ik}$
              ! doing the block at ( ibr, jbc )
                    ijblsk = 0_${ik}$
                    loop_2100: do p = igl, min( igl+kbl-1, n )
                       aapp = sva( p )
                       if( aapp>zero ) then
                          pskipped = 0_${ik}$
                          loop_2200: do q = jgl, min( jgl+kbl-1, n )
                             aaqq = sva( q )
                             if( aaqq>zero ) then
                                aapp0 = aapp
           ! M X 2 Jacobi Svd 
              ! Safe Gram Matrix Computation 
                                if( aaqq>=one ) then
                                   if( aapp>=aaqq ) then
                                      rotok = ( small*aapp )<=aaqq
                                   else
                                      rotok = ( small*aaqq )<=aapp
                                   end if
                                   if( aapp<( big / aaqq ) ) then
                                      aapq = ( stdlib${ii}$_sdot( m, a( 1_${ik}$, p ), 1_${ik}$, a( 1_${ik}$,q ), 1_${ik}$ )*d( p )&
                                                *d( q ) / aaqq )/ aapp
                                   else
                                      call stdlib${ii}$_scopy( m, a( 1_${ik}$, p ), 1_${ik}$, work, 1_${ik}$ )
                                      call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, aapp, d( p ),m, 1_${ik}$, work, lda,&
                                                 ierr )
                                      aapq = stdlib${ii}$_sdot( m, work, 1_${ik}$, a( 1_${ik}$, q ),1_${ik}$ )*d( q ) / &
                                                aaqq
                                   end if
                                else
                                   if( aapp>=aaqq ) then
                                      rotok = aapp<=( aaqq / small )
                                   else
                                      rotok = aaqq<=( aapp / small )
                                   end if
                                   if( aapp>( small / aaqq ) ) then
                                      aapq = ( stdlib${ii}$_sdot( m, a( 1_${ik}$, p ), 1_${ik}$, a( 1_${ik}$,q ), 1_${ik}$ )*d( p )&
                                                *d( q ) / aaqq )/ aapp
                                   else
                                      call stdlib${ii}$_scopy( m, a( 1_${ik}$, q ), 1_${ik}$, work, 1_${ik}$ )
                                      call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, aaqq, d( q ),m, 1_${ik}$, work, lda,&
                                                 ierr )
                                      aapq = stdlib${ii}$_sdot( m, work, 1_${ik}$, a( 1_${ik}$, p ),1_${ik}$ )*d( p ) / &
                                                aapp
                                   end if
                                end if
                                mxaapq = max( mxaapq, abs( aapq ) )
              ! to rotate or not to rotate, that is the question ...
                                if( abs( aapq )>tol ) then
                                   notrot = 0_${ik}$
                 ! rotated  = rotated + 1
                                   pskipped = 0_${ik}$
                                   iswrot = iswrot + 1_${ik}$
                                   if( rotok ) then
                                      aqoap = aaqq / aapp
                                      apoaq = aapp / aaqq
                                      theta = -half*abs( aqoap-apoaq ) / aapq
                                      if( aaqq>aapp0 )theta = -theta
                                      if( abs( theta )>bigtheta ) then
                                         t = half / theta
                                         fastr( 3_${ik}$ ) = t*d( p ) / d( q )
                                         fastr( 4_${ik}$ ) = -t*d( q ) / d( p )
                                         call stdlib${ii}$_srotm( m, a( 1_${ik}$, p ), 1_${ik}$,a( 1_${ik}$, q ), 1_${ik}$, fastr )
                                                   
                                         if( rsvec )call stdlib${ii}$_srotm( mvl,v( 1_${ik}$, p ), 1_${ik}$,v( 1_${ik}$, q ),&
                                                    1_${ik}$,fastr )
                                         sva( q ) = aaqq*sqrt( max( zero,one+t*apoaq*aapq ) )
                                                   
                                         aapp = aapp*sqrt( max( zero,one-t*aqoap*aapq ) )
                                         mxsinj = max( mxsinj, abs( t ) )
                                      else
                       ! Choose Correct Signum For Theta And Rotate
                                         thsign = -sign( one, aapq )
                                         if( aaqq>aapp0 )thsign = -thsign
                                         t = one / ( theta+thsign*sqrt( one+theta*theta ) )
                                                   
                                         cs = sqrt( one / ( one+t*t ) )
                                         sn = t*cs
                                         mxsinj = max( mxsinj, abs( sn ) )
                                         sva( q ) = aaqq*sqrt( max( zero,one+t*apoaq*aapq ) )
                                                   
                                         aapp = aapp*sqrt( max( zero,one-t*aqoap*aapq ) )
                                         apoaq = d( p ) / d( q )
                                         aqoap = d( q ) / d( p )
                                         if( d( p )>=one ) then
                                            if( d( q )>=one ) then
                                               fastr( 3_${ik}$ ) = t*apoaq
                                               fastr( 4_${ik}$ ) = -t*aqoap
                                               d( p ) = d( p )*cs
                                               d( q ) = d( q )*cs
                                               call stdlib${ii}$_srotm( m, a( 1_${ik}$, p ), 1_${ik}$,a( 1_${ik}$, q ), 1_${ik}$,&
                                                         fastr )
                                               if( rsvec )call stdlib${ii}$_srotm( mvl,v( 1_${ik}$, p ), 1_${ik}$, v( &
                                                         1_${ik}$, q ),1_${ik}$, fastr )
                                            else
                                               call stdlib${ii}$_saxpy( m, -t*aqoap,a( 1_${ik}$, q ), 1_${ik}$,a( 1_${ik}$, &
                                                         p ), 1_${ik}$ )
                                               call stdlib${ii}$_saxpy( m, cs*sn*apoaq,a( 1_${ik}$, p ), 1_${ik}$,a( &
                                                         1_${ik}$, q ), 1_${ik}$ )
                                               if( rsvec ) then
                                                  call stdlib${ii}$_saxpy( mvl, -t*aqoap,v( 1_${ik}$, q ), 1_${ik}$,v(&
                                                             1_${ik}$, p ), 1_${ik}$ )
                                                  call stdlib${ii}$_saxpy( mvl,cs*sn*apoaq,v( 1_${ik}$, p ), 1_${ik}$,&
                                                            v( 1_${ik}$, q ), 1_${ik}$ )
                                               end if
                                               d( p ) = d( p )*cs
                                               d( q ) = d( q ) / cs
                                            end if
                                         else
                                            if( d( q )>=one ) then
                                               call stdlib${ii}$_saxpy( m, t*apoaq,a( 1_${ik}$, p ), 1_${ik}$,a( 1_${ik}$, q &
                                                         ), 1_${ik}$ )
                                               call stdlib${ii}$_saxpy( m, -cs*sn*aqoap,a( 1_${ik}$, q ), 1_${ik}$,a( &
                                                         1_${ik}$, p ), 1_${ik}$ )
                                               if( rsvec ) then
                                                  call stdlib${ii}$_saxpy( mvl, t*apoaq,v( 1_${ik}$, p ), 1_${ik}$,v( &
                                                            1_${ik}$, q ), 1_${ik}$ )
                                                  call stdlib${ii}$_saxpy( mvl,-cs*sn*aqoap,v( 1_${ik}$, q ), &
                                                            1_${ik}$,v( 1_${ik}$, p ), 1_${ik}$ )
                                               end if
                                               d( p ) = d( p ) / cs
                                               d( q ) = d( q )*cs
                                            else
                                               if( d( p )>=d( q ) ) then
                                                  call stdlib${ii}$_saxpy( m, -t*aqoap,a( 1_${ik}$, q ), 1_${ik}$,a( &
                                                            1_${ik}$, p ), 1_${ik}$ )
                                                  call stdlib${ii}$_saxpy( m, cs*sn*apoaq,a( 1_${ik}$, p ), 1_${ik}$,&
                                                            a( 1_${ik}$, q ), 1_${ik}$ )
                                                  d( p ) = d( p )*cs
                                                  d( q ) = d( q ) / cs
                                                  if( rsvec ) then
                                                     call stdlib${ii}$_saxpy( mvl,-t*aqoap,v( 1_${ik}$, q ), 1_${ik}$,&
                                                               v( 1_${ik}$, p ), 1_${ik}$ )
                                                     call stdlib${ii}$_saxpy( mvl,cs*sn*apoaq,v( 1_${ik}$, p ),&
                                                                1_${ik}$,v( 1_${ik}$, q ), 1_${ik}$ )
                                                  end if
                                               else
                                                  call stdlib${ii}$_saxpy( m, t*apoaq,a( 1_${ik}$, p ), 1_${ik}$,a( 1_${ik}$,&
                                                             q ), 1_${ik}$ )
                                                  call stdlib${ii}$_saxpy( m,-cs*sn*aqoap,a( 1_${ik}$, q ), 1_${ik}$,&
                                                            a( 1_${ik}$, p ), 1_${ik}$ )
                                                  d( p ) = d( p ) / cs
                                                  d( q ) = d( q )*cs
                                                  if( rsvec ) then
                                                     call stdlib${ii}$_saxpy( mvl,t*apoaq, v( 1_${ik}$, p ),1_${ik}$, &
                                                               v( 1_${ik}$, q ), 1_${ik}$ )
                                                     call stdlib${ii}$_saxpy( mvl,-cs*sn*aqoap,v( 1_${ik}$, q )&
                                                               , 1_${ik}$,v( 1_${ik}$, p ), 1_${ik}$ )
                                                  end if
                                               end if
                                            end if
                                         end if
                                      end if
                                   else
                                      if( aapp>aaqq ) then
                                         call stdlib${ii}$_scopy( m, a( 1_${ik}$, p ), 1_${ik}$, work,1_${ik}$ )
                                         call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, aapp, one,m, 1_${ik}$, work, lda,&
                                                    ierr )
                                         call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, aaqq, one,m, 1_${ik}$, a( 1_${ik}$, q ),&
                                                    lda,ierr )
                                         temp1 = -aapq*d( p ) / d( q )
                                         call stdlib${ii}$_saxpy( m, temp1, work, 1_${ik}$,a( 1_${ik}$, q ), 1_${ik}$ )
                                                   
                                         call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, one, aaqq,m, 1_${ik}$, a( 1_${ik}$, q ),&
                                                    lda,ierr )
                                         sva( q ) = aaqq*sqrt( max( zero,one-aapq*aapq ) )
                                         mxsinj = max( mxsinj, sfmin )
                                      else
                                         call stdlib${ii}$_scopy( m, a( 1_${ik}$, q ), 1_${ik}$, work,1_${ik}$ )
                                         call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, aaqq, one,m, 1_${ik}$, work, lda,&
                                                    ierr )
                                         call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, aapp, one,m, 1_${ik}$, a( 1_${ik}$, p ),&
                                                    lda,ierr )
                                         temp1 = -aapq*d( q ) / d( p )
                                         call stdlib${ii}$_saxpy( m, temp1, work, 1_${ik}$,a( 1_${ik}$, p ), 1_${ik}$ )
                                                   
                                         call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, one, aapp,m, 1_${ik}$, a( 1_${ik}$, p ),&
                                                    lda,ierr )
                                         sva( p ) = aapp*sqrt( max( zero,one-aapq*aapq ) )
                                         mxsinj = max( mxsinj, sfmin )
                                      end if
                                   end if
                 ! end if rotok then ... else
                 ! in the case of cancellation in updating sva(q)
                 ! .. recompute sva(q)
                                   if( ( sva( q ) / aaqq )**2_${ik}$<=rooteps )then
                                      if( ( aaqq<rootbig ) .and.( aaqq>rootsfmin ) ) then
                                         sva( q ) = stdlib${ii}$_snrm2( m, a( 1_${ik}$, q ), 1_${ik}$ )*d( q )
                                      else
                                         t = zero
                                         aaqq = one
                                         call stdlib${ii}$_slassq( m, a( 1_${ik}$, q ), 1_${ik}$, t,aaqq )
                                         sva( q ) = t*sqrt( aaqq )*d( q )
                                      end if
                                   end if
                                   if( ( aapp / aapp0 )**2_${ik}$<=rooteps ) then
                                      if( ( aapp<rootbig ) .and.( aapp>rootsfmin ) ) then
                                         aapp = stdlib${ii}$_snrm2( m, a( 1_${ik}$, p ), 1_${ik}$ )*d( p )
                                      else
                                         t = zero
                                         aapp = one
                                         call stdlib${ii}$_slassq( m, a( 1_${ik}$, p ), 1_${ik}$, t,aapp )
                                         aapp = t*sqrt( aapp )*d( p )
                                      end if
                                      sva( p ) = aapp
                                   end if
                    ! end of ok rotation
                                else
                                   notrot = notrot + 1_${ik}$
                                   pskipped = pskipped + 1_${ik}$
                                   ijblsk = ijblsk + 1_${ik}$
                                end if
                             else
                                notrot = notrot + 1_${ik}$
                                pskipped = pskipped + 1_${ik}$
                                ijblsk = ijblsk + 1_${ik}$
                             end if
                             if( ( i<=swband ) .and. ( ijblsk>=blskip ) )then
                                sva( p ) = aapp
                                notrot = 0_${ik}$
                                go to 2011
                             end if
                             if( ( i<=swband ) .and.( pskipped>rowskip ) ) then
                                aapp = -aapp
                                notrot = 0_${ik}$
                                go to 2203
                             end if
                          end do loop_2200
              ! end of the q-loop
              2203 continue
                          sva( p ) = aapp
                       else
                          if( aapp==zero )notrot = notrot +min( jgl+kbl-1, n ) - jgl + 1_${ik}$
                          if( aapp<zero )notrot = 0_${ik}$
                       end if
                    end do loop_2100
           ! end of the p-loop
                 end do loop_2010
           ! end of the jbc-loop
           2011 continue
      ! 2011 bailed out of the jbc-loop
                 do p = igl, min( igl+kbl-1, n )
                    sva( p ) = abs( sva( p ) )
                 end do
              end do loop_2000
      ! 2000 :: end of the ibr-loop
           ! .. update sva(n)
              if( ( sva( n )<rootbig ) .and. ( sva( n )>rootsfmin ) )then
                 sva( n ) = stdlib${ii}$_snrm2( m, a( 1_${ik}$, n ), 1_${ik}$ )*d( n )
              else
                 t = zero
                 aapp = one
                 call stdlib${ii}$_slassq( m, a( 1_${ik}$, n ), 1_${ik}$, t, aapp )
                 sva( n ) = t*sqrt( aapp )*d( n )
              end if
           ! additional steering devices
              if( ( i<swband ) .and. ( ( mxaapq<=roottol ) .or.( iswrot<=n ) ) )swband = i
              if( ( i>swband+1 ) .and. ( mxaapq<real( n,KIND=sp)*tol ) .and.( real( n,KIND=sp)&
                        *mxaapq*mxsinj<tol ) ) then
                 go to 1994
              end if
              if( notrot>=emptsw )go to 1994
           end do loop_1993
           ! end i=1:nsweep loop
       ! #:) reaching this point means that the procedure has completed the given
           ! number of iterations.
           info = nsweep - 1_${ik}$
           go to 1995
           1994 continue
       ! #:) reaching this point means that during the i-th sweep all pivots were
           ! below the given tolerance, causing early exit.
           info = 0_${ik}$
       ! #:) info = 0 confirms successful iterations.
       1995 continue
           ! sort the vector d.
           do p = 1, n - 1
              q = stdlib${ii}$_isamax( n-p+1, sva( p ), 1_${ik}$ ) + p - 1_${ik}$
              if( p/=q ) then
                 temp1 = sva( p )
                 sva( p ) = sva( q )
                 sva( q ) = temp1
                 temp1 = d( p )
                 d( p ) = d( q )
                 d( q ) = temp1
                 call stdlib${ii}$_sswap( m, a( 1_${ik}$, p ), 1_${ik}$, a( 1_${ik}$, q ), 1_${ik}$ )
                 if( rsvec )call stdlib${ii}$_sswap( mvl, v( 1_${ik}$, p ), 1_${ik}$, v( 1_${ik}$, q ), 1_${ik}$ )
              end if
           end do
           return
     end subroutine stdlib${ii}$_sgsvj0

     pure module subroutine stdlib${ii}$_dgsvj0( jobv, m, n, a, lda, d, sva, mv, v, ldv, eps,sfmin, tol, &
     !! DGSVJ0 is called from DGESVJ as a pre-processor and that is its main
     !! purpose. It applies Jacobi rotations in the same way as DGESVJ does, but
     !! it does not check convergence (stopping criterion). Few tuning
     !! parameters (marked by [TP]) are available for the implementer.
               nsweep, work, lwork, info )
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, ldv, lwork, m, mv, n, nsweep
           real(dp), intent(in) :: eps, sfmin, tol
           character, intent(in) :: jobv
           ! Array Arguments 
           real(dp), intent(inout) :: a(lda,*), sva(n), d(n), v(ldv,*)
           real(dp), intent(out) :: work(lwork)
        ! =====================================================================
           
           ! Local Scalars 
           real(dp) :: aapp, aapp0, aapq, aaqq, apoaq, aqoap, big, bigtheta, cs, mxaapq, mxsinj, &
                     rootbig, rooteps, rootsfmin, roottol, small, sn, t, temp1, theta, thsign
           integer(${ik}$) :: blskip, emptsw, i, ibr, ierr, igl, ijblsk, ir1, iswrot, jbc, jgl, kbl, &
                     lkahead, mvl, nbl, notrot, p, pskipped, q, rowskip, swband
           logical(lk) :: applv, rotok, rsvec
           ! Local Arrays 
           real(dp) :: fastr(5_${ik}$)
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input parameters.
           applv = stdlib_lsame( jobv, 'A' )
           rsvec = stdlib_lsame( jobv, 'V' )
           if( .not.( rsvec .or. applv .or. stdlib_lsame( jobv, 'N' ) ) ) then
              info = -1_${ik}$
           else if( m<0_${ik}$ ) then
              info = -2_${ik}$
           else if( ( n<0_${ik}$ ) .or. ( n>m ) ) then
              info = -3_${ik}$
           else if( lda<m ) then
              info = -5_${ik}$
           else if( ( rsvec.or.applv ) .and. ( mv<0_${ik}$ ) ) then
              info = -8_${ik}$
           else if( ( rsvec.and.( ldv<n ) ).or.( applv.and.( ldv<mv ) ) ) then
              info = -10_${ik}$
           else if( tol<=eps ) then
              info = -13_${ik}$
           else if( nsweep<0_${ik}$ ) then
              info = -14_${ik}$
           else if( lwork<m ) then
              info = -16_${ik}$
           else
              info = 0_${ik}$
           end if
           ! #:(
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DGSVJ0', -info )
              return
           end if
           if( rsvec ) then
              mvl = n
           else if( applv ) then
              mvl = mv
           end if
           rsvec = rsvec .or. applv
           rooteps = sqrt( eps )
           rootsfmin = sqrt( sfmin )
           small = sfmin / eps
           big = one / sfmin
           rootbig = one / rootsfmin
           bigtheta = one / rooteps
           roottol = sqrt( tol )
           ! -#- row-cyclic jacobi svd algorithm with column pivoting -#-
           emptsw = ( n*( n-1 ) ) / 2_${ik}$
           notrot = 0_${ik}$
           fastr( 1_${ik}$ ) = zero
           ! -#- row-cyclic pivot strategy with de rijk's pivoting -#-
           swband = 0_${ik}$
      ! [tp] swband is a tuning parameter. it is meaningful and effective
           ! if stdlib${ii}$_sgesvj is used as a computational routine in the preconditioned
           ! jacobi svd algorithm stdlib${ii}$_sgesvj. for sweeps i=1:swband the procedure
           ! ......
           kbl = min( 8_${ik}$, n )
      ! [tp] kbl is a tuning parameter that defines the tile size in the
           ! tiling of the p-q loops of pivot pairs. in general, an optimal
           ! value of kbl depends on the matrix dimensions and on the
           ! parameters of the computer's memory.
           nbl = n / kbl
           if( ( nbl*kbl )/=n )nbl = nbl + 1_${ik}$
           blskip = ( kbl**2_${ik}$ ) + 1_${ik}$
      ! [tp] blkskip is a tuning parameter that depends on swband and kbl.
           rowskip = min( 5_${ik}$, kbl )
      ! [tp] rowskip is a tuning parameter.
           lkahead = 1_${ik}$
      ! [tp] lkahead is a tuning parameter.
           swband = 0_${ik}$
           pskipped = 0_${ik}$
           loop_1993: do i = 1, nsweep
           ! .. go go go ...
              mxaapq = zero
              mxsinj = zero
              iswrot = 0_${ik}$
              notrot = 0_${ik}$
              pskipped = 0_${ik}$
              loop_2000: do ibr = 1, nbl
                 igl = ( ibr-1 )*kbl + 1_${ik}$
                 loop_1002: do ir1 = 0, min( lkahead, nbl-ibr )
                    igl = igl + ir1*kbl
                    loop_2001: do p = igl, min( igl+kbl-1, n-1 )
           ! .. de rijk's pivoting
                       q = stdlib${ii}$_idamax( n-p+1, sva( p ), 1_${ik}$ ) + p - 1_${ik}$
                       if( p/=q ) then
                          call stdlib${ii}$_dswap( m, a( 1_${ik}$, p ), 1_${ik}$, a( 1_${ik}$, q ), 1_${ik}$ )
                          if( rsvec )call stdlib${ii}$_dswap( mvl, v( 1_${ik}$, p ), 1_${ik}$,v( 1_${ik}$, q ), 1_${ik}$ )
                          temp1 = sva( p )
                          sva( p ) = sva( q )
                          sva( q ) = temp1
                          temp1 = d( p )
                          d( p ) = d( q )
                          d( q ) = temp1
                       end if
                       if( ir1==0_${ik}$ ) then
              ! column norms are periodically updated by explicit
              ! norm computation.
              ! caveat:
              ! some blas implementations compute stdlib${ii}$_dnrm2(m,a(1,p),1)
              ! as sqrt(stdlib${ii}$_ddot(m,a(1,p),1,a(1,p),1)), which may result in
              ! overflow for ||a(:,p)||_2 > sqrt(overflow_threshold), and
              ! underflow for ||a(:,p)||_2 < sqrt(underflow_threshold).
              ! hence, stdlib${ii}$_dnrm2 cannot be trusted, not even in the case when
              ! the true norm is far from the under(over)flow boundaries.
              ! if properly implemented stdlib${ii}$_dnrm2 is available, the if-then-else
              ! below should read "aapp = stdlib${ii}$_dnrm2( m, a(1,p), 1 ) * d(p)".
                          if( ( sva( p )<rootbig ) .and.( sva( p )>rootsfmin ) ) then
                             sva( p ) = stdlib${ii}$_dnrm2( m, a( 1_${ik}$, p ), 1_${ik}$ )*d( p )
                          else
                             temp1 = zero
                             aapp = one
                             call stdlib${ii}$_dlassq( m, a( 1_${ik}$, p ), 1_${ik}$, temp1, aapp )
                             sva( p ) = temp1*sqrt( aapp )*d( p )
                          end if
                          aapp = sva( p )
                       else
                          aapp = sva( p )
                       end if
                       if( aapp>zero ) then
                          pskipped = 0_${ik}$
                          loop_2002: do q = p + 1, min( igl+kbl-1, n )
                             aaqq = sva( q )
                             if( aaqq>zero ) then
                                aapp0 = aapp
                                if( aaqq>=one ) then
                                   rotok = ( small*aapp )<=aaqq
                                   if( aapp<( big / aaqq ) ) then
                                      aapq = ( stdlib${ii}$_ddot( m, a( 1_${ik}$, p ), 1_${ik}$, a( 1_${ik}$,q ), 1_${ik}$ )*d( p )&
                                                *d( q ) / aaqq )/ aapp
                                   else
                                      call stdlib${ii}$_dcopy( m, a( 1_${ik}$, p ), 1_${ik}$, work, 1_${ik}$ )
                                      call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, aapp, d( p ),m, 1_${ik}$, work, lda,&
                                                 ierr )
                                      aapq = stdlib${ii}$_ddot( m, work, 1_${ik}$, a( 1_${ik}$, q ),1_${ik}$ )*d( q ) / &
                                                aaqq
                                   end if
                                else
                                   rotok = aapp<=( aaqq / small )
                                   if( aapp>( small / aaqq ) ) then
                                      aapq = ( stdlib${ii}$_ddot( m, a( 1_${ik}$, p ), 1_${ik}$, a( 1_${ik}$,q ), 1_${ik}$ )*d( p )&
                                                *d( q ) / aaqq )/ aapp
                                   else
                                      call stdlib${ii}$_dcopy( m, a( 1_${ik}$, q ), 1_${ik}$, work, 1_${ik}$ )
                                      call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, aaqq, d( q ),m, 1_${ik}$, work, lda,&
                                                 ierr )
                                      aapq = stdlib${ii}$_ddot( m, work, 1_${ik}$, a( 1_${ik}$, p ),1_${ik}$ )*d( p ) / &
                                                aapp
                                   end if
                                end if
                                mxaapq = max( mxaapq, abs( aapq ) )
              ! to rotate or not to rotate, that is the question ...
                                if( abs( aapq )>tol ) then
                 ! Rotate
                 ! rotated = rotated + one
                                   if( ir1==0_${ik}$ ) then
                                      notrot = 0_${ik}$
                                      pskipped = 0_${ik}$
                                      iswrot = iswrot + 1_${ik}$
                                   end if
                                   if( rotok ) then
                                      aqoap = aaqq / aapp
                                      apoaq = aapp / aaqq
                                      theta = -half*abs( aqoap-apoaq )/aapq
                                      if( abs( theta )>bigtheta ) then
                                         t = half / theta
                                         fastr( 3_${ik}$ ) = t*d( p ) / d( q )
                                         fastr( 4_${ik}$ ) = -t*d( q ) / d( p )
                                         call stdlib${ii}$_drotm( m, a( 1_${ik}$, p ), 1_${ik}$,a( 1_${ik}$, q ), 1_${ik}$, fastr )
                                                   
                                         if( rsvec )call stdlib${ii}$_drotm( mvl,v( 1_${ik}$, p ), 1_${ik}$,v( 1_${ik}$, q ),&
                                                    1_${ik}$,fastr )
                                         sva( q ) = aaqq*sqrt( max( zero,one+t*apoaq*aapq ) )
                                                   
                                         aapp = aapp*sqrt( max( zero,one-t*aqoap*aapq ) )
                                         mxsinj = max( mxsinj, abs( t ) )
                                      else
                       ! Choose Correct Signum For Theta And Rotate
                                         thsign = -sign( one, aapq )
                                         t = one / ( theta+thsign*sqrt( one+theta*theta ) )
                                                   
                                         cs = sqrt( one / ( one+t*t ) )
                                         sn = t*cs
                                         mxsinj = max( mxsinj, abs( sn ) )
                                         sva( q ) = aaqq*sqrt( max( zero,one+t*apoaq*aapq ) )
                                                   
                                         aapp = aapp*sqrt( max( zero,one-t*aqoap*aapq ) )
                                         apoaq = d( p ) / d( q )
                                         aqoap = d( q ) / d( p )
                                         if( d( p )>=one ) then
                                            if( d( q )>=one ) then
                                               fastr( 3_${ik}$ ) = t*apoaq
                                               fastr( 4_${ik}$ ) = -t*aqoap
                                               d( p ) = d( p )*cs
                                               d( q ) = d( q )*cs
                                               call stdlib${ii}$_drotm( m, a( 1_${ik}$, p ), 1_${ik}$,a( 1_${ik}$, q ), 1_${ik}$,&
                                                         fastr )
                                               if( rsvec )call stdlib${ii}$_drotm( mvl,v( 1_${ik}$, p ), 1_${ik}$, v( &
                                                         1_${ik}$, q ),1_${ik}$, fastr )
                                            else
                                               call stdlib${ii}$_daxpy( m, -t*aqoap,a( 1_${ik}$, q ), 1_${ik}$,a( 1_${ik}$, &
                                                         p ), 1_${ik}$ )
                                               call stdlib${ii}$_daxpy( m, cs*sn*apoaq,a( 1_${ik}$, p ), 1_${ik}$,a( &
                                                         1_${ik}$, q ), 1_${ik}$ )
                                               d( p ) = d( p )*cs
                                               d( q ) = d( q ) / cs
                                               if( rsvec ) then
                                                  call stdlib${ii}$_daxpy( mvl, -t*aqoap,v( 1_${ik}$, q ), 1_${ik}$,v(&
                                                             1_${ik}$, p ), 1_${ik}$ )
                                                  call stdlib${ii}$_daxpy( mvl,cs*sn*apoaq,v( 1_${ik}$, p ), 1_${ik}$,&
                                                            v( 1_${ik}$, q ), 1_${ik}$ )
                                               end if
                                            end if
                                         else
                                            if( d( q )>=one ) then
                                               call stdlib${ii}$_daxpy( m, t*apoaq,a( 1_${ik}$, p ), 1_${ik}$,a( 1_${ik}$, q &
                                                         ), 1_${ik}$ )
                                               call stdlib${ii}$_daxpy( m, -cs*sn*aqoap,a( 1_${ik}$, q ), 1_${ik}$,a( &
                                                         1_${ik}$, p ), 1_${ik}$ )
                                               d( p ) = d( p ) / cs
                                               d( q ) = d( q )*cs
                                               if( rsvec ) then
                                                  call stdlib${ii}$_daxpy( mvl, t*apoaq,v( 1_${ik}$, p ), 1_${ik}$,v( &
                                                            1_${ik}$, q ), 1_${ik}$ )
                                                  call stdlib${ii}$_daxpy( mvl,-cs*sn*aqoap,v( 1_${ik}$, q ), &
                                                            1_${ik}$,v( 1_${ik}$, p ), 1_${ik}$ )
                                               end if
                                            else
                                               if( d( p )>=d( q ) ) then
                                                  call stdlib${ii}$_daxpy( m, -t*aqoap,a( 1_${ik}$, q ), 1_${ik}$,a( &
                                                            1_${ik}$, p ), 1_${ik}$ )
                                                  call stdlib${ii}$_daxpy( m, cs*sn*apoaq,a( 1_${ik}$, p ), 1_${ik}$,&
                                                            a( 1_${ik}$, q ), 1_${ik}$ )
                                                  d( p ) = d( p )*cs
                                                  d( q ) = d( q ) / cs
                                                  if( rsvec ) then
                                                     call stdlib${ii}$_daxpy( mvl,-t*aqoap,v( 1_${ik}$, q ), 1_${ik}$,&
                                                               v( 1_${ik}$, p ), 1_${ik}$ )
                                                     call stdlib${ii}$_daxpy( mvl,cs*sn*apoaq,v( 1_${ik}$, p ),&
                                                                1_${ik}$,v( 1_${ik}$, q ), 1_${ik}$ )
                                                  end if
                                               else
                                                  call stdlib${ii}$_daxpy( m, t*apoaq,a( 1_${ik}$, p ), 1_${ik}$,a( 1_${ik}$,&
                                                             q ), 1_${ik}$ )
                                                  call stdlib${ii}$_daxpy( m,-cs*sn*aqoap,a( 1_${ik}$, q ), 1_${ik}$,&
                                                            a( 1_${ik}$, p ), 1_${ik}$ )
                                                  d( p ) = d( p ) / cs
                                                  d( q ) = d( q )*cs
                                                  if( rsvec ) then
                                                     call stdlib${ii}$_daxpy( mvl,t*apoaq, v( 1_${ik}$, p ),1_${ik}$, &
                                                               v( 1_${ik}$, q ), 1_${ik}$ )
                                                     call stdlib${ii}$_daxpy( mvl,-cs*sn*aqoap,v( 1_${ik}$, q )&
                                                               , 1_${ik}$,v( 1_${ik}$, p ), 1_${ik}$ )
                                                  end if
                                               end if
                                            end if
                                         end if
                                      end if
                                   else
                    ! .. have to use modified gram-schmidt like transformation
                                      call stdlib${ii}$_dcopy( m, a( 1_${ik}$, p ), 1_${ik}$, work, 1_${ik}$ )
                                      call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, aapp, one, m,1_${ik}$, work, lda, &
                                                ierr )
                                      call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, aaqq, one, m,1_${ik}$, a( 1_${ik}$, q ), &
                                                lda, ierr )
                                      temp1 = -aapq*d( p ) / d( q )
                                      call stdlib${ii}$_daxpy( m, temp1, work, 1_${ik}$,a( 1_${ik}$, q ), 1_${ik}$ )
                                      call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, one, aaqq, m,1_${ik}$, a( 1_${ik}$, q ), &
                                                lda, ierr )
                                      sva( q ) = aaqq*sqrt( max( zero,one-aapq*aapq ) )
                                      mxsinj = max( mxsinj, sfmin )
                                   end if
                 ! end if rotok then ... else
                 ! in the case of cancellation in updating sva(q), sva(p)
                 ! recompute sva(q), sva(p).
                                   if( ( sva( q ) / aaqq )**2_${ik}$<=rooteps )then
                                      if( ( aaqq<rootbig ) .and.( aaqq>rootsfmin ) ) then
                                         sva( q ) = stdlib${ii}$_dnrm2( m, a( 1_${ik}$, q ), 1_${ik}$ )*d( q )
                                      else
                                         t = zero
                                         aaqq = one
                                         call stdlib${ii}$_dlassq( m, a( 1_${ik}$, q ), 1_${ik}$, t,aaqq )
                                         sva( q ) = t*sqrt( aaqq )*d( q )
                                      end if
                                   end if
                                   if( ( aapp / aapp0 )<=rooteps ) then
                                      if( ( aapp<rootbig ) .and.( aapp>rootsfmin ) ) then
                                         aapp = stdlib${ii}$_dnrm2( m, a( 1_${ik}$, p ), 1_${ik}$ )*d( p )
                                      else
                                         t = zero
                                         aapp = one
                                         call stdlib${ii}$_dlassq( m, a( 1_${ik}$, p ), 1_${ik}$, t,aapp )
                                         aapp = t*sqrt( aapp )*d( p )
                                      end if
                                      sva( p ) = aapp
                                   end if
                                else
              ! a(:,p) and a(:,q) already numerically orthogonal
                                   if( ir1==0_${ik}$ )notrot = notrot + 1_${ik}$
                                   pskipped = pskipped + 1_${ik}$
                                end if
                             else
              ! a(:,q) is zero column
                                if( ir1==0_${ik}$ )notrot = notrot + 1_${ik}$
                                pskipped = pskipped + 1_${ik}$
                             end if
                             if( ( i<=swband ) .and.( pskipped>rowskip ) ) then
                                if( ir1==0_${ik}$ )aapp = -aapp
                                notrot = 0_${ik}$
                                go to 2103
                             end if
                          end do loop_2002
           ! end q-loop
           2103 continue
           ! bailed out of q-loop
                          sva( p ) = aapp
                       else
                          sva( p ) = aapp
                          if( ( ir1==0_${ik}$ ) .and. ( aapp==zero ) )notrot = notrot + min( igl+kbl-1, &
                                    n ) - p
                       end if
                    end do loop_2001
           ! end of the p-loop
           ! end of doing the block ( ibr, ibr )
                 end do loop_1002
           ! end of ir1-loop
      ! ........................................................
       ! ... go to the off diagonal blocks
                 igl = ( ibr-1 )*kbl + 1_${ik}$
                 loop_2010: do jbc = ibr + 1, nbl
                    jgl = ( jbc-1 )*kbl + 1_${ik}$
              ! doing the block at ( ibr, jbc )
                    ijblsk = 0_${ik}$
                    loop_2100: do p = igl, min( igl+kbl-1, n )
                       aapp = sva( p )
                       if( aapp>zero ) then
                          pskipped = 0_${ik}$
                          loop_2200: do q = jgl, min( jgl+kbl-1, n )
                             aaqq = sva( q )
                             if( aaqq>zero ) then
                                aapp0 = aapp
           ! -#- m x 2 jacobi svd -#-
              ! -#- safe gram matrix computation -#-
                                if( aaqq>=one ) then
                                   if( aapp>=aaqq ) then
                                      rotok = ( small*aapp )<=aaqq
                                   else
                                      rotok = ( small*aaqq )<=aapp
                                   end if
                                   if( aapp<( big / aaqq ) ) then
                                      aapq = ( stdlib${ii}$_ddot( m, a( 1_${ik}$, p ), 1_${ik}$, a( 1_${ik}$,q ), 1_${ik}$ )*d( p )&
                                                *d( q ) / aaqq )/ aapp
                                   else
                                      call stdlib${ii}$_dcopy( m, a( 1_${ik}$, p ), 1_${ik}$, work, 1_${ik}$ )
                                      call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, aapp, d( p ),m, 1_${ik}$, work, lda,&
                                                 ierr )
                                      aapq = stdlib${ii}$_ddot( m, work, 1_${ik}$, a( 1_${ik}$, q ),1_${ik}$ )*d( q ) / &
                                                aaqq
                                   end if
                                else
                                   if( aapp>=aaqq ) then
                                      rotok = aapp<=( aaqq / small )
                                   else
                                      rotok = aaqq<=( aapp / small )
                                   end if
                                   if( aapp>( small / aaqq ) ) then
                                      aapq = ( stdlib${ii}$_ddot( m, a( 1_${ik}$, p ), 1_${ik}$, a( 1_${ik}$,q ), 1_${ik}$ )*d( p )&
                                                *d( q ) / aaqq )/ aapp
                                   else
                                      call stdlib${ii}$_dcopy( m, a( 1_${ik}$, q ), 1_${ik}$, work, 1_${ik}$ )
                                      call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, aaqq, d( q ),m, 1_${ik}$, work, lda,&
                                                 ierr )
                                      aapq = stdlib${ii}$_ddot( m, work, 1_${ik}$, a( 1_${ik}$, p ),1_${ik}$ )*d( p ) / &
                                                aapp
                                   end if
                                end if
                                mxaapq = max( mxaapq, abs( aapq ) )
              ! to rotate or not to rotate, that is the question ...
                                if( abs( aapq )>tol ) then
                                   notrot = 0_${ik}$
                 ! rotated  = rotated + 1
                                   pskipped = 0_${ik}$
                                   iswrot = iswrot + 1_${ik}$
                                   if( rotok ) then
                                      aqoap = aaqq / aapp
                                      apoaq = aapp / aaqq
                                      theta = -half*abs( aqoap-apoaq )/aapq
                                      if( aaqq>aapp0 )theta = -theta
                                      if( abs( theta )>bigtheta ) then
                                         t = half / theta
                                         fastr( 3_${ik}$ ) = t*d( p ) / d( q )
                                         fastr( 4_${ik}$ ) = -t*d( q ) / d( p )
                                         call stdlib${ii}$_drotm( m, a( 1_${ik}$, p ), 1_${ik}$,a( 1_${ik}$, q ), 1_${ik}$, fastr )
                                                   
                                         if( rsvec )call stdlib${ii}$_drotm( mvl,v( 1_${ik}$, p ), 1_${ik}$,v( 1_${ik}$, q ),&
                                                    1_${ik}$,fastr )
                                         sva( q ) = aaqq*sqrt( max( zero,one+t*apoaq*aapq ) )
                                                   
                                         aapp = aapp*sqrt( max( zero,one-t*aqoap*aapq ) )
                                         mxsinj = max( mxsinj, abs( t ) )
                                      else
                       ! Choose Correct Signum For Theta And Rotate
                                         thsign = -sign( one, aapq )
                                         if( aaqq>aapp0 )thsign = -thsign
                                         t = one / ( theta+thsign*sqrt( one+theta*theta ) )
                                                   
                                         cs = sqrt( one / ( one+t*t ) )
                                         sn = t*cs
                                         mxsinj = max( mxsinj, abs( sn ) )
                                         sva( q ) = aaqq*sqrt( max( zero,one+t*apoaq*aapq ) )
                                                   
                                         aapp = aapp*sqrt( max( zero,one-t*aqoap*aapq ) )
                                         apoaq = d( p ) / d( q )
                                         aqoap = d( q ) / d( p )
                                         if( d( p )>=one ) then
                                            if( d( q )>=one ) then
                                               fastr( 3_${ik}$ ) = t*apoaq
                                               fastr( 4_${ik}$ ) = -t*aqoap
                                               d( p ) = d( p )*cs
                                               d( q ) = d( q )*cs
                                               call stdlib${ii}$_drotm( m, a( 1_${ik}$, p ), 1_${ik}$,a( 1_${ik}$, q ), 1_${ik}$,&
                                                         fastr )
                                               if( rsvec )call stdlib${ii}$_drotm( mvl,v( 1_${ik}$, p ), 1_${ik}$, v( &
                                                         1_${ik}$, q ),1_${ik}$, fastr )
                                            else
                                               call stdlib${ii}$_daxpy( m, -t*aqoap,a( 1_${ik}$, q ), 1_${ik}$,a( 1_${ik}$, &
                                                         p ), 1_${ik}$ )
                                               call stdlib${ii}$_daxpy( m, cs*sn*apoaq,a( 1_${ik}$, p ), 1_${ik}$,a( &
                                                         1_${ik}$, q ), 1_${ik}$ )
                                               if( rsvec ) then
                                                  call stdlib${ii}$_daxpy( mvl, -t*aqoap,v( 1_${ik}$, q ), 1_${ik}$,v(&
                                                             1_${ik}$, p ), 1_${ik}$ )
                                                  call stdlib${ii}$_daxpy( mvl,cs*sn*apoaq,v( 1_${ik}$, p ), 1_${ik}$,&
                                                            v( 1_${ik}$, q ), 1_${ik}$ )
                                               end if
                                               d( p ) = d( p )*cs
                                               d( q ) = d( q ) / cs
                                            end if
                                         else
                                            if( d( q )>=one ) then
                                               call stdlib${ii}$_daxpy( m, t*apoaq,a( 1_${ik}$, p ), 1_${ik}$,a( 1_${ik}$, q &
                                                         ), 1_${ik}$ )
                                               call stdlib${ii}$_daxpy( m, -cs*sn*aqoap,a( 1_${ik}$, q ), 1_${ik}$,a( &
                                                         1_${ik}$, p ), 1_${ik}$ )
                                               if( rsvec ) then
                                                  call stdlib${ii}$_daxpy( mvl, t*apoaq,v( 1_${ik}$, p ), 1_${ik}$,v( &
                                                            1_${ik}$, q ), 1_${ik}$ )
                                                  call stdlib${ii}$_daxpy( mvl,-cs*sn*aqoap,v( 1_${ik}$, q ), &
                                                            1_${ik}$,v( 1_${ik}$, p ), 1_${ik}$ )
                                               end if
                                               d( p ) = d( p ) / cs
                                               d( q ) = d( q )*cs
                                            else
                                               if( d( p )>=d( q ) ) then
                                                  call stdlib${ii}$_daxpy( m, -t*aqoap,a( 1_${ik}$, q ), 1_${ik}$,a( &
                                                            1_${ik}$, p ), 1_${ik}$ )
                                                  call stdlib${ii}$_daxpy( m, cs*sn*apoaq,a( 1_${ik}$, p ), 1_${ik}$,&
                                                            a( 1_${ik}$, q ), 1_${ik}$ )
                                                  d( p ) = d( p )*cs
                                                  d( q ) = d( q ) / cs
                                                  if( rsvec ) then
                                                     call stdlib${ii}$_daxpy( mvl,-t*aqoap,v( 1_${ik}$, q ), 1_${ik}$,&
                                                               v( 1_${ik}$, p ), 1_${ik}$ )
                                                     call stdlib${ii}$_daxpy( mvl,cs*sn*apoaq,v( 1_${ik}$, p ),&
                                                                1_${ik}$,v( 1_${ik}$, q ), 1_${ik}$ )
                                                  end if
                                               else
                                                  call stdlib${ii}$_daxpy( m, t*apoaq,a( 1_${ik}$, p ), 1_${ik}$,a( 1_${ik}$,&
                                                             q ), 1_${ik}$ )
                                                  call stdlib${ii}$_daxpy( m,-cs*sn*aqoap,a( 1_${ik}$, q ), 1_${ik}$,&
                                                            a( 1_${ik}$, p ), 1_${ik}$ )
                                                  d( p ) = d( p ) / cs
                                                  d( q ) = d( q )*cs
                                                  if( rsvec ) then
                                                     call stdlib${ii}$_daxpy( mvl,t*apoaq, v( 1_${ik}$, p ),1_${ik}$, &
                                                               v( 1_${ik}$, q ), 1_${ik}$ )
                                                     call stdlib${ii}$_daxpy( mvl,-cs*sn*aqoap,v( 1_${ik}$, q )&
                                                               , 1_${ik}$,v( 1_${ik}$, p ), 1_${ik}$ )
                                                  end if
                                               end if
                                            end if
                                         end if
                                      end if
                                   else
                                      if( aapp>aaqq ) then
                                         call stdlib${ii}$_dcopy( m, a( 1_${ik}$, p ), 1_${ik}$, work,1_${ik}$ )
                                         call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, aapp, one,m, 1_${ik}$, work, lda,&
                                                    ierr )
                                         call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, aaqq, one,m, 1_${ik}$, a( 1_${ik}$, q ),&
                                                    lda,ierr )
                                         temp1 = -aapq*d( p ) / d( q )
                                         call stdlib${ii}$_daxpy( m, temp1, work, 1_${ik}$,a( 1_${ik}$, q ), 1_${ik}$ )
                                                   
                                         call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, one, aaqq,m, 1_${ik}$, a( 1_${ik}$, q ),&
                                                    lda,ierr )
                                         sva( q ) = aaqq*sqrt( max( zero,one-aapq*aapq ) )
                                         mxsinj = max( mxsinj, sfmin )
                                      else
                                         call stdlib${ii}$_dcopy( m, a( 1_${ik}$, q ), 1_${ik}$, work,1_${ik}$ )
                                         call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, aaqq, one,m, 1_${ik}$, work, lda,&
                                                    ierr )
                                         call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, aapp, one,m, 1_${ik}$, a( 1_${ik}$, p ),&
                                                    lda,ierr )
                                         temp1 = -aapq*d( q ) / d( p )
                                         call stdlib${ii}$_daxpy( m, temp1, work, 1_${ik}$,a( 1_${ik}$, p ), 1_${ik}$ )
                                                   
                                         call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, one, aapp,m, 1_${ik}$, a( 1_${ik}$, p ),&
                                                    lda,ierr )
                                         sva( p ) = aapp*sqrt( max( zero,one-aapq*aapq ) )
                                         mxsinj = max( mxsinj, sfmin )
                                      end if
                                   end if
                 ! end if rotok then ... else
                 ! in the case of cancellation in updating sva(q)
                 ! .. recompute sva(q)
                                   if( ( sva( q ) / aaqq )**2_${ik}$<=rooteps )then
                                      if( ( aaqq<rootbig ) .and.( aaqq>rootsfmin ) ) then
                                         sva( q ) = stdlib${ii}$_dnrm2( m, a( 1_${ik}$, q ), 1_${ik}$ )*d( q )
                                      else
                                         t = zero
                                         aaqq = one
                                         call stdlib${ii}$_dlassq( m, a( 1_${ik}$, q ), 1_${ik}$, t,aaqq )
                                         sva( q ) = t*sqrt( aaqq )*d( q )
                                      end if
                                   end if
                                   if( ( aapp / aapp0 )**2_${ik}$<=rooteps ) then
                                      if( ( aapp<rootbig ) .and.( aapp>rootsfmin ) ) then
                                         aapp = stdlib${ii}$_dnrm2( m, a( 1_${ik}$, p ), 1_${ik}$ )*d( p )
                                      else
                                         t = zero
                                         aapp = one
                                         call stdlib${ii}$_dlassq( m, a( 1_${ik}$, p ), 1_${ik}$, t,aapp )
                                         aapp = t*sqrt( aapp )*d( p )
                                      end if
                                      sva( p ) = aapp
                                   end if
                    ! end of ok rotation
                                else
                                   notrot = notrot + 1_${ik}$
                                   pskipped = pskipped + 1_${ik}$
                                   ijblsk = ijblsk + 1_${ik}$
                                end if
                             else
                                notrot = notrot + 1_${ik}$
                                pskipped = pskipped + 1_${ik}$
                                ijblsk = ijblsk + 1_${ik}$
                             end if
                             if( ( i<=swband ) .and. ( ijblsk>=blskip ) )then
                                sva( p ) = aapp
                                notrot = 0_${ik}$
                                go to 2011
                             end if
                             if( ( i<=swband ) .and.( pskipped>rowskip ) ) then
                                aapp = -aapp
                                notrot = 0_${ik}$
                                go to 2203
                             end if
                          end do loop_2200
              ! end of the q-loop
              2203 continue
                          sva( p ) = aapp
                       else
                          if( aapp==zero )notrot = notrot +min( jgl+kbl-1, n ) - jgl + 1_${ik}$
                          if( aapp<zero )notrot = 0_${ik}$
                       end if
                    end do loop_2100
           ! end of the p-loop
                 end do loop_2010
           ! end of the jbc-loop
           2011 continue
      ! 2011 bailed out of the jbc-loop
                 do p = igl, min( igl+kbl-1, n )
                    sva( p ) = abs( sva( p ) )
                 end do
              end do loop_2000
      ! 2000 :: end of the ibr-loop
           ! .. update sva(n)
              if( ( sva( n )<rootbig ) .and. ( sva( n )>rootsfmin ) )then
                 sva( n ) = stdlib${ii}$_dnrm2( m, a( 1_${ik}$, n ), 1_${ik}$ )*d( n )
              else
                 t = zero
                 aapp = one
                 call stdlib${ii}$_dlassq( m, a( 1_${ik}$, n ), 1_${ik}$, t, aapp )
                 sva( n ) = t*sqrt( aapp )*d( n )
              end if
           ! additional steering devices
              if( ( i<swband ) .and. ( ( mxaapq<=roottol ) .or.( iswrot<=n ) ) )swband = i
              if( ( i>swband+1 ) .and. ( mxaapq<real( n,KIND=dp)*tol ) .and.( real( n,KIND=dp)&
                        *mxaapq*mxsinj<tol ) ) then
                 go to 1994
              end if
              if( notrot>=emptsw )go to 1994
           end do loop_1993
           ! end i=1:nsweep loop
       ! #:) reaching this point means that the procedure has completed the given
           ! number of iterations.
           info = nsweep - 1_${ik}$
           go to 1995
           1994 continue
       ! #:) reaching this point means that during the i-th sweep all pivots were
           ! below the given tolerance, causing early exit.
           info = 0_${ik}$
       ! #:) info = 0 confirms successful iterations.
       1995 continue
           ! sort the vector d.
           do p = 1, n - 1
              q = stdlib${ii}$_idamax( n-p+1, sva( p ), 1_${ik}$ ) + p - 1_${ik}$
              if( p/=q ) then
                 temp1 = sva( p )
                 sva( p ) = sva( q )
                 sva( q ) = temp1
                 temp1 = d( p )
                 d( p ) = d( q )
                 d( q ) = temp1
                 call stdlib${ii}$_dswap( m, a( 1_${ik}$, p ), 1_${ik}$, a( 1_${ik}$, q ), 1_${ik}$ )
                 if( rsvec )call stdlib${ii}$_dswap( mvl, v( 1_${ik}$, p ), 1_${ik}$, v( 1_${ik}$, q ), 1_${ik}$ )
              end if
           end do
           return
     end subroutine stdlib${ii}$_dgsvj0

#:for rk,rt,ri in REAL_KINDS_TYPES
#:if not rk in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ri}$gsvj0( jobv, m, n, a, lda, d, sva, mv, v, ldv, eps,sfmin, tol, &
     !! DGSVJ0: is called from DGESVJ as a pre-processor and that is its main
     !! purpose. It applies Jacobi rotations in the same way as DGESVJ does, but
     !! it does not check convergence (stopping criterion). Few tuning
     !! parameters (marked by [TP]) are available for the implementer.
               nsweep, work, lwork, info )
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, ldv, lwork, m, mv, n, nsweep
           real(${rk}$), intent(in) :: eps, sfmin, tol
           character, intent(in) :: jobv
           ! Array Arguments 
           real(${rk}$), intent(inout) :: a(lda,*), sva(n), d(n), v(ldv,*)
           real(${rk}$), intent(out) :: work(lwork)
        ! =====================================================================
           
           ! Local Scalars 
           real(${rk}$) :: aapp, aapp0, aapq, aaqq, apoaq, aqoap, big, bigtheta, cs, mxaapq, mxsinj, &
                     rootbig, rooteps, rootsfmin, roottol, small, sn, t, temp1, theta, thsign
           integer(${ik}$) :: blskip, emptsw, i, ibr, ierr, igl, ijblsk, ir1, iswrot, jbc, jgl, kbl, &
                     lkahead, mvl, nbl, notrot, p, pskipped, q, rowskip, swband
           logical(lk) :: applv, rotok, rsvec
           ! Local Arrays 
           real(${rk}$) :: fastr(5_${ik}$)
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input parameters.
           applv = stdlib_lsame( jobv, 'A' )
           rsvec = stdlib_lsame( jobv, 'V' )
           if( .not.( rsvec .or. applv .or. stdlib_lsame( jobv, 'N' ) ) ) then
              info = -1_${ik}$
           else if( m<0_${ik}$ ) then
              info = -2_${ik}$
           else if( ( n<0_${ik}$ ) .or. ( n>m ) ) then
              info = -3_${ik}$
           else if( lda<m ) then
              info = -5_${ik}$
           else if( ( rsvec.or.applv ) .and. ( mv<0_${ik}$ ) ) then
              info = -8_${ik}$
           else if( ( rsvec.and.( ldv<n ) ).or.( applv.and.( ldv<mv ) ) ) then
              info = -10_${ik}$
           else if( tol<=eps ) then
              info = -13_${ik}$
           else if( nsweep<0_${ik}$ ) then
              info = -14_${ik}$
           else if( lwork<m ) then
              info = -16_${ik}$
           else
              info = 0_${ik}$
           end if
           ! #:(
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DGSVJ0', -info )
              return
           end if
           if( rsvec ) then
              mvl = n
           else if( applv ) then
              mvl = mv
           end if
           rsvec = rsvec .or. applv
           rooteps = sqrt( eps )
           rootsfmin = sqrt( sfmin )
           small = sfmin / eps
           big = one / sfmin
           rootbig = one / rootsfmin
           bigtheta = one / rooteps
           roottol = sqrt( tol )
           ! -#- row-cyclic jacobi svd algorithm with column pivoting -#-
           emptsw = ( n*( n-1 ) ) / 2_${ik}$
           notrot = 0_${ik}$
           fastr( 1_${ik}$ ) = zero
           ! -#- row-cyclic pivot strategy with de rijk's pivoting -#-
           swband = 0_${ik}$
      ! [tp] swband is a tuning parameter. it is meaningful and effective
           ! if stdlib${ii}$_dgesvj is used as a computational routine in the preconditioned
           ! jacobi svd algorithm stdlib${ii}$_dgesvj. for sweeps i=1:swband the procedure
           ! ......
           kbl = min( 8_${ik}$, n )
      ! [tp] kbl is a tuning parameter that defines the tile size in the
           ! tiling of the p-q loops of pivot pairs. in general, an optimal
           ! value of kbl depends on the matrix dimensions and on the
           ! parameters of the computer's memory.
           nbl = n / kbl
           if( ( nbl*kbl )/=n )nbl = nbl + 1_${ik}$
           blskip = ( kbl**2_${ik}$ ) + 1_${ik}$
      ! [tp] blkskip is a tuning parameter that depends on swband and kbl.
           rowskip = min( 5_${ik}$, kbl )
      ! [tp] rowskip is a tuning parameter.
           lkahead = 1_${ik}$
      ! [tp] lkahead is a tuning parameter.
           swband = 0_${ik}$
           pskipped = 0_${ik}$
           loop_1993: do i = 1, nsweep
           ! .. go go go ...
              mxaapq = zero
              mxsinj = zero
              iswrot = 0_${ik}$
              notrot = 0_${ik}$
              pskipped = 0_${ik}$
              loop_2000: do ibr = 1, nbl
                 igl = ( ibr-1 )*kbl + 1_${ik}$
                 loop_1002: do ir1 = 0, min( lkahead, nbl-ibr )
                    igl = igl + ir1*kbl
                    loop_2001: do p = igl, min( igl+kbl-1, n-1 )
           ! .. de rijk's pivoting
                       q = stdlib${ii}$_i${ri}$amax( n-p+1, sva( p ), 1_${ik}$ ) + p - 1_${ik}$
                       if( p/=q ) then
                          call stdlib${ii}$_${ri}$swap( m, a( 1_${ik}$, p ), 1_${ik}$, a( 1_${ik}$, q ), 1_${ik}$ )
                          if( rsvec )call stdlib${ii}$_${ri}$swap( mvl, v( 1_${ik}$, p ), 1_${ik}$,v( 1_${ik}$, q ), 1_${ik}$ )
                          temp1 = sva( p )
                          sva( p ) = sva( q )
                          sva( q ) = temp1
                          temp1 = d( p )
                          d( p ) = d( q )
                          d( q ) = temp1
                       end if
                       if( ir1==0_${ik}$ ) then
              ! column norms are periodically updated by explicit
              ! norm computation.
              ! caveat:
              ! some blas implementations compute stdlib${ii}$_${ri}$nrm2(m,a(1,p),1)
              ! as sqrt(stdlib${ii}$_${ri}$dot(m,a(1,p),1,a(1,p),1)), which may result in
              ! overflow for ||a(:,p)||_2 > sqrt(overflow_threshold), and
              ! underflow for ||a(:,p)||_2 < sqrt(underflow_threshold).
              ! hence, stdlib${ii}$_${ri}$nrm2 cannot be trusted, not even in the case when
              ! the true norm is far from the under(over)flow boundaries.
              ! if properly implemented stdlib${ii}$_${ri}$nrm2 is available, the if-then-else
              ! below should read "aapp = stdlib${ii}$_${ri}$nrm2( m, a(1,p), 1 ) * d(p)".
                          if( ( sva( p )<rootbig ) .and.( sva( p )>rootsfmin ) ) then
                             sva( p ) = stdlib${ii}$_${ri}$nrm2( m, a( 1_${ik}$, p ), 1_${ik}$ )*d( p )
                          else
                             temp1 = zero
                             aapp = one
                             call stdlib${ii}$_${ri}$lassq( m, a( 1_${ik}$, p ), 1_${ik}$, temp1, aapp )
                             sva( p ) = temp1*sqrt( aapp )*d( p )
                          end if
                          aapp = sva( p )
                       else
                          aapp = sva( p )
                       end if
                       if( aapp>zero ) then
                          pskipped = 0_${ik}$
                          loop_2002: do q = p + 1, min( igl+kbl-1, n )
                             aaqq = sva( q )
                             if( aaqq>zero ) then
                                aapp0 = aapp
                                if( aaqq>=one ) then
                                   rotok = ( small*aapp )<=aaqq
                                   if( aapp<( big / aaqq ) ) then
                                      aapq = ( stdlib${ii}$_${ri}$dot( m, a( 1_${ik}$, p ), 1_${ik}$, a( 1_${ik}$,q ), 1_${ik}$ )*d( p )&
                                                *d( q ) / aaqq )/ aapp
                                   else
                                      call stdlib${ii}$_${ri}$copy( m, a( 1_${ik}$, p ), 1_${ik}$, work, 1_${ik}$ )
                                      call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, aapp, d( p ),m, 1_${ik}$, work, lda,&
                                                 ierr )
                                      aapq = stdlib${ii}$_${ri}$dot( m, work, 1_${ik}$, a( 1_${ik}$, q ),1_${ik}$ )*d( q ) / &
                                                aaqq
                                   end if
                                else
                                   rotok = aapp<=( aaqq / small )
                                   if( aapp>( small / aaqq ) ) then
                                      aapq = ( stdlib${ii}$_${ri}$dot( m, a( 1_${ik}$, p ), 1_${ik}$, a( 1_${ik}$,q ), 1_${ik}$ )*d( p )&
                                                *d( q ) / aaqq )/ aapp
                                   else
                                      call stdlib${ii}$_${ri}$copy( m, a( 1_${ik}$, q ), 1_${ik}$, work, 1_${ik}$ )
                                      call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, aaqq, d( q ),m, 1_${ik}$, work, lda,&
                                                 ierr )
                                      aapq = stdlib${ii}$_${ri}$dot( m, work, 1_${ik}$, a( 1_${ik}$, p ),1_${ik}$ )*d( p ) / &
                                                aapp
                                   end if
                                end if
                                mxaapq = max( mxaapq, abs( aapq ) )
              ! to rotate or not to rotate, that is the question ...
                                if( abs( aapq )>tol ) then
                 ! Rotate
                 ! rotated = rotated + one
                                   if( ir1==0_${ik}$ ) then
                                      notrot = 0_${ik}$
                                      pskipped = 0_${ik}$
                                      iswrot = iswrot + 1_${ik}$
                                   end if
                                   if( rotok ) then
                                      aqoap = aaqq / aapp
                                      apoaq = aapp / aaqq
                                      theta = -half*abs( aqoap-apoaq )/aapq
                                      if( abs( theta )>bigtheta ) then
                                         t = half / theta
                                         fastr( 3_${ik}$ ) = t*d( p ) / d( q )
                                         fastr( 4_${ik}$ ) = -t*d( q ) / d( p )
                                         call stdlib${ii}$_${ri}$rotm( m, a( 1_${ik}$, p ), 1_${ik}$,a( 1_${ik}$, q ), 1_${ik}$, fastr )
                                                   
                                         if( rsvec )call stdlib${ii}$_${ri}$rotm( mvl,v( 1_${ik}$, p ), 1_${ik}$,v( 1_${ik}$, q ),&
                                                    1_${ik}$,fastr )
                                         sva( q ) = aaqq*sqrt( max( zero,one+t*apoaq*aapq ) )
                                                   
                                         aapp = aapp*sqrt( max( zero,one-t*aqoap*aapq ) )
                                         mxsinj = max( mxsinj, abs( t ) )
                                      else
                       ! Choose Correct Signum For Theta And Rotate
                                         thsign = -sign( one, aapq )
                                         t = one / ( theta+thsign*sqrt( one+theta*theta ) )
                                                   
                                         cs = sqrt( one / ( one+t*t ) )
                                         sn = t*cs
                                         mxsinj = max( mxsinj, abs( sn ) )
                                         sva( q ) = aaqq*sqrt( max( zero,one+t*apoaq*aapq ) )
                                                   
                                         aapp = aapp*sqrt( max( zero,one-t*aqoap*aapq ) )
                                         apoaq = d( p ) / d( q )
                                         aqoap = d( q ) / d( p )
                                         if( d( p )>=one ) then
                                            if( d( q )>=one ) then
                                               fastr( 3_${ik}$ ) = t*apoaq
                                               fastr( 4_${ik}$ ) = -t*aqoap
                                               d( p ) = d( p )*cs
                                               d( q ) = d( q )*cs
                                               call stdlib${ii}$_${ri}$rotm( m, a( 1_${ik}$, p ), 1_${ik}$,a( 1_${ik}$, q ), 1_${ik}$,&
                                                         fastr )
                                               if( rsvec )call stdlib${ii}$_${ri}$rotm( mvl,v( 1_${ik}$, p ), 1_${ik}$, v( &
                                                         1_${ik}$, q ),1_${ik}$, fastr )
                                            else
                                               call stdlib${ii}$_${ri}$axpy( m, -t*aqoap,a( 1_${ik}$, q ), 1_${ik}$,a( 1_${ik}$, &
                                                         p ), 1_${ik}$ )
                                               call stdlib${ii}$_${ri}$axpy( m, cs*sn*apoaq,a( 1_${ik}$, p ), 1_${ik}$,a( &
                                                         1_${ik}$, q ), 1_${ik}$ )
                                               d( p ) = d( p )*cs
                                               d( q ) = d( q ) / cs
                                               if( rsvec ) then
                                                  call stdlib${ii}$_${ri}$axpy( mvl, -t*aqoap,v( 1_${ik}$, q ), 1_${ik}$,v(&
                                                             1_${ik}$, p ), 1_${ik}$ )
                                                  call stdlib${ii}$_${ri}$axpy( mvl,cs*sn*apoaq,v( 1_${ik}$, p ), 1_${ik}$,&
                                                            v( 1_${ik}$, q ), 1_${ik}$ )
                                               end if
                                            end if
                                         else
                                            if( d( q )>=one ) then
                                               call stdlib${ii}$_${ri}$axpy( m, t*apoaq,a( 1_${ik}$, p ), 1_${ik}$,a( 1_${ik}$, q &
                                                         ), 1_${ik}$ )
                                               call stdlib${ii}$_${ri}$axpy( m, -cs*sn*aqoap,a( 1_${ik}$, q ), 1_${ik}$,a( &
                                                         1_${ik}$, p ), 1_${ik}$ )
                                               d( p ) = d( p ) / cs
                                               d( q ) = d( q )*cs
                                               if( rsvec ) then
                                                  call stdlib${ii}$_${ri}$axpy( mvl, t*apoaq,v( 1_${ik}$, p ), 1_${ik}$,v( &
                                                            1_${ik}$, q ), 1_${ik}$ )
                                                  call stdlib${ii}$_${ri}$axpy( mvl,-cs*sn*aqoap,v( 1_${ik}$, q ), &
                                                            1_${ik}$,v( 1_${ik}$, p ), 1_${ik}$ )
                                               end if
                                            else
                                               if( d( p )>=d( q ) ) then
                                                  call stdlib${ii}$_${ri}$axpy( m, -t*aqoap,a( 1_${ik}$, q ), 1_${ik}$,a( &
                                                            1_${ik}$, p ), 1_${ik}$ )
                                                  call stdlib${ii}$_${ri}$axpy( m, cs*sn*apoaq,a( 1_${ik}$, p ), 1_${ik}$,&
                                                            a( 1_${ik}$, q ), 1_${ik}$ )
                                                  d( p ) = d( p )*cs
                                                  d( q ) = d( q ) / cs
                                                  if( rsvec ) then
                                                     call stdlib${ii}$_${ri}$axpy( mvl,-t*aqoap,v( 1_${ik}$, q ), 1_${ik}$,&
                                                               v( 1_${ik}$, p ), 1_${ik}$ )
                                                     call stdlib${ii}$_${ri}$axpy( mvl,cs*sn*apoaq,v( 1_${ik}$, p ),&
                                                                1_${ik}$,v( 1_${ik}$, q ), 1_${ik}$ )
                                                  end if
                                               else
                                                  call stdlib${ii}$_${ri}$axpy( m, t*apoaq,a( 1_${ik}$, p ), 1_${ik}$,a( 1_${ik}$,&
                                                             q ), 1_${ik}$ )
                                                  call stdlib${ii}$_${ri}$axpy( m,-cs*sn*aqoap,a( 1_${ik}$, q ), 1_${ik}$,&
                                                            a( 1_${ik}$, p ), 1_${ik}$ )
                                                  d( p ) = d( p ) / cs
                                                  d( q ) = d( q )*cs
                                                  if( rsvec ) then
                                                     call stdlib${ii}$_${ri}$axpy( mvl,t*apoaq, v( 1_${ik}$, p ),1_${ik}$, &
                                                               v( 1_${ik}$, q ), 1_${ik}$ )
                                                     call stdlib${ii}$_${ri}$axpy( mvl,-cs*sn*aqoap,v( 1_${ik}$, q )&
                                                               , 1_${ik}$,v( 1_${ik}$, p ), 1_${ik}$ )
                                                  end if
                                               end if
                                            end if
                                         end if
                                      end if
                                   else
                    ! .. have to use modified gram-schmidt like transformation
                                      call stdlib${ii}$_${ri}$copy( m, a( 1_${ik}$, p ), 1_${ik}$, work, 1_${ik}$ )
                                      call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, aapp, one, m,1_${ik}$, work, lda, &
                                                ierr )
                                      call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, aaqq, one, m,1_${ik}$, a( 1_${ik}$, q ), &
                                                lda, ierr )
                                      temp1 = -aapq*d( p ) / d( q )
                                      call stdlib${ii}$_${ri}$axpy( m, temp1, work, 1_${ik}$,a( 1_${ik}$, q ), 1_${ik}$ )
                                      call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, one, aaqq, m,1_${ik}$, a( 1_${ik}$, q ), &
                                                lda, ierr )
                                      sva( q ) = aaqq*sqrt( max( zero,one-aapq*aapq ) )
                                      mxsinj = max( mxsinj, sfmin )
                                   end if
                 ! end if rotok then ... else
                 ! in the case of cancellation in updating sva(q), sva(p)
                 ! recompute sva(q), sva(p).
                                   if( ( sva( q ) / aaqq )**2_${ik}$<=rooteps )then
                                      if( ( aaqq<rootbig ) .and.( aaqq>rootsfmin ) ) then
                                         sva( q ) = stdlib${ii}$_${ri}$nrm2( m, a( 1_${ik}$, q ), 1_${ik}$ )*d( q )
                                      else
                                         t = zero
                                         aaqq = one
                                         call stdlib${ii}$_${ri}$lassq( m, a( 1_${ik}$, q ), 1_${ik}$, t,aaqq )
                                         sva( q ) = t*sqrt( aaqq )*d( q )
                                      end if
                                   end if
                                   if( ( aapp / aapp0 )<=rooteps ) then
                                      if( ( aapp<rootbig ) .and.( aapp>rootsfmin ) ) then
                                         aapp = stdlib${ii}$_${ri}$nrm2( m, a( 1_${ik}$, p ), 1_${ik}$ )*d( p )
                                      else
                                         t = zero
                                         aapp = one
                                         call stdlib${ii}$_${ri}$lassq( m, a( 1_${ik}$, p ), 1_${ik}$, t,aapp )
                                         aapp = t*sqrt( aapp )*d( p )
                                      end if
                                      sva( p ) = aapp
                                   end if
                                else
              ! a(:,p) and a(:,q) already numerically orthogonal
                                   if( ir1==0_${ik}$ )notrot = notrot + 1_${ik}$
                                   pskipped = pskipped + 1_${ik}$
                                end if
                             else
              ! a(:,q) is zero column
                                if( ir1==0_${ik}$ )notrot = notrot + 1_${ik}$
                                pskipped = pskipped + 1_${ik}$
                             end if
                             if( ( i<=swband ) .and.( pskipped>rowskip ) ) then
                                if( ir1==0_${ik}$ )aapp = -aapp
                                notrot = 0_${ik}$
                                go to 2103
                             end if
                          end do loop_2002
           ! end q-loop
           2103 continue
           ! bailed out of q-loop
                          sva( p ) = aapp
                       else
                          sva( p ) = aapp
                          if( ( ir1==0_${ik}$ ) .and. ( aapp==zero ) )notrot = notrot + min( igl+kbl-1, &
                                    n ) - p
                       end if
                    end do loop_2001
           ! end of the p-loop
           ! end of doing the block ( ibr, ibr )
                 end do loop_1002
           ! end of ir1-loop
      ! ........................................................
       ! ... go to the off diagonal blocks
                 igl = ( ibr-1 )*kbl + 1_${ik}$
                 loop_2010: do jbc = ibr + 1, nbl
                    jgl = ( jbc-1 )*kbl + 1_${ik}$
              ! doing the block at ( ibr, jbc )
                    ijblsk = 0_${ik}$
                    loop_2100: do p = igl, min( igl+kbl-1, n )
                       aapp = sva( p )
                       if( aapp>zero ) then
                          pskipped = 0_${ik}$
                          loop_2200: do q = jgl, min( jgl+kbl-1, n )
                             aaqq = sva( q )
                             if( aaqq>zero ) then
                                aapp0 = aapp
           ! -#- m x 2 jacobi svd -#-
              ! -#- safe gram matrix computation -#-
                                if( aaqq>=one ) then
                                   if( aapp>=aaqq ) then
                                      rotok = ( small*aapp )<=aaqq
                                   else
                                      rotok = ( small*aaqq )<=aapp
                                   end if
                                   if( aapp<( big / aaqq ) ) then
                                      aapq = ( stdlib${ii}$_${ri}$dot( m, a( 1_${ik}$, p ), 1_${ik}$, a( 1_${ik}$,q ), 1_${ik}$ )*d( p )&
                                                *d( q ) / aaqq )/ aapp
                                   else
                                      call stdlib${ii}$_${ri}$copy( m, a( 1_${ik}$, p ), 1_${ik}$, work, 1_${ik}$ )
                                      call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, aapp, d( p ),m, 1_${ik}$, work, lda,&
                                                 ierr )
                                      aapq = stdlib${ii}$_${ri}$dot( m, work, 1_${ik}$, a( 1_${ik}$, q ),1_${ik}$ )*d( q ) / &
                                                aaqq
                                   end if
                                else
                                   if( aapp>=aaqq ) then
                                      rotok = aapp<=( aaqq / small )
                                   else
                                      rotok = aaqq<=( aapp / small )
                                   end if
                                   if( aapp>( small / aaqq ) ) then
                                      aapq = ( stdlib${ii}$_${ri}$dot( m, a( 1_${ik}$, p ), 1_${ik}$, a( 1_${ik}$,q ), 1_${ik}$ )*d( p )&
                                                *d( q ) / aaqq )/ aapp
                                   else
                                      call stdlib${ii}$_${ri}$copy( m, a( 1_${ik}$, q ), 1_${ik}$, work, 1_${ik}$ )
                                      call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, aaqq, d( q ),m, 1_${ik}$, work, lda,&
                                                 ierr )
                                      aapq = stdlib${ii}$_${ri}$dot( m, work, 1_${ik}$, a( 1_${ik}$, p ),1_${ik}$ )*d( p ) / &
                                                aapp
                                   end if
                                end if
                                mxaapq = max( mxaapq, abs( aapq ) )
              ! to rotate or not to rotate, that is the question ...
                                if( abs( aapq )>tol ) then
                                   notrot = 0_${ik}$
                 ! rotated  = rotated + 1
                                   pskipped = 0_${ik}$
                                   iswrot = iswrot + 1_${ik}$
                                   if( rotok ) then
                                      aqoap = aaqq / aapp
                                      apoaq = aapp / aaqq
                                      theta = -half*abs( aqoap-apoaq )/aapq
                                      if( aaqq>aapp0 )theta = -theta
                                      if( abs( theta )>bigtheta ) then
                                         t = half / theta
                                         fastr( 3_${ik}$ ) = t*d( p ) / d( q )
                                         fastr( 4_${ik}$ ) = -t*d( q ) / d( p )
                                         call stdlib${ii}$_${ri}$rotm( m, a( 1_${ik}$, p ), 1_${ik}$,a( 1_${ik}$, q ), 1_${ik}$, fastr )
                                                   
                                         if( rsvec )call stdlib${ii}$_${ri}$rotm( mvl,v( 1_${ik}$, p ), 1_${ik}$,v( 1_${ik}$, q ),&
                                                    1_${ik}$,fastr )
                                         sva( q ) = aaqq*sqrt( max( zero,one+t*apoaq*aapq ) )
                                                   
                                         aapp = aapp*sqrt( max( zero,one-t*aqoap*aapq ) )
                                         mxsinj = max( mxsinj, abs( t ) )
                                      else
                       ! Choose Correct Signum For Theta And Rotate
                                         thsign = -sign( one, aapq )
                                         if( aaqq>aapp0 )thsign = -thsign
                                         t = one / ( theta+thsign*sqrt( one+theta*theta ) )
                                                   
                                         cs = sqrt( one / ( one+t*t ) )
                                         sn = t*cs
                                         mxsinj = max( mxsinj, abs( sn ) )
                                         sva( q ) = aaqq*sqrt( max( zero,one+t*apoaq*aapq ) )
                                                   
                                         aapp = aapp*sqrt( max( zero,one-t*aqoap*aapq ) )
                                         apoaq = d( p ) / d( q )
                                         aqoap = d( q ) / d( p )
                                         if( d( p )>=one ) then
                                            if( d( q )>=one ) then
                                               fastr( 3_${ik}$ ) = t*apoaq
                                               fastr( 4_${ik}$ ) = -t*aqoap
                                               d( p ) = d( p )*cs
                                               d( q ) = d( q )*cs
                                               call stdlib${ii}$_${ri}$rotm( m, a( 1_${ik}$, p ), 1_${ik}$,a( 1_${ik}$, q ), 1_${ik}$,&
                                                         fastr )
                                               if( rsvec )call stdlib${ii}$_${ri}$rotm( mvl,v( 1_${ik}$, p ), 1_${ik}$, v( &
                                                         1_${ik}$, q ),1_${ik}$, fastr )
                                            else
                                               call stdlib${ii}$_${ri}$axpy( m, -t*aqoap,a( 1_${ik}$, q ), 1_${ik}$,a( 1_${ik}$, &
                                                         p ), 1_${ik}$ )
                                               call stdlib${ii}$_${ri}$axpy( m, cs*sn*apoaq,a( 1_${ik}$, p ), 1_${ik}$,a( &
                                                         1_${ik}$, q ), 1_${ik}$ )
                                               if( rsvec ) then
                                                  call stdlib${ii}$_${ri}$axpy( mvl, -t*aqoap,v( 1_${ik}$, q ), 1_${ik}$,v(&
                                                             1_${ik}$, p ), 1_${ik}$ )
                                                  call stdlib${ii}$_${ri}$axpy( mvl,cs*sn*apoaq,v( 1_${ik}$, p ), 1_${ik}$,&
                                                            v( 1_${ik}$, q ), 1_${ik}$ )
                                               end if
                                               d( p ) = d( p )*cs
                                               d( q ) = d( q ) / cs
                                            end if
                                         else
                                            if( d( q )>=one ) then
                                               call stdlib${ii}$_${ri}$axpy( m, t*apoaq,a( 1_${ik}$, p ), 1_${ik}$,a( 1_${ik}$, q &
                                                         ), 1_${ik}$ )
                                               call stdlib${ii}$_${ri}$axpy( m, -cs*sn*aqoap,a( 1_${ik}$, q ), 1_${ik}$,a( &
                                                         1_${ik}$, p ), 1_${ik}$ )
                                               if( rsvec ) then
                                                  call stdlib${ii}$_${ri}$axpy( mvl, t*apoaq,v( 1_${ik}$, p ), 1_${ik}$,v( &
                                                            1_${ik}$, q ), 1_${ik}$ )
                                                  call stdlib${ii}$_${ri}$axpy( mvl,-cs*sn*aqoap,v( 1_${ik}$, q ), &
                                                            1_${ik}$,v( 1_${ik}$, p ), 1_${ik}$ )
                                               end if
                                               d( p ) = d( p ) / cs
                                               d( q ) = d( q )*cs
                                            else
                                               if( d( p )>=d( q ) ) then
                                                  call stdlib${ii}$_${ri}$axpy( m, -t*aqoap,a( 1_${ik}$, q ), 1_${ik}$,a( &
                                                            1_${ik}$, p ), 1_${ik}$ )
                                                  call stdlib${ii}$_${ri}$axpy( m, cs*sn*apoaq,a( 1_${ik}$, p ), 1_${ik}$,&
                                                            a( 1_${ik}$, q ), 1_${ik}$ )
                                                  d( p ) = d( p )*cs
                                                  d( q ) = d( q ) / cs
                                                  if( rsvec ) then
                                                     call stdlib${ii}$_${ri}$axpy( mvl,-t*aqoap,v( 1_${ik}$, q ), 1_${ik}$,&
                                                               v( 1_${ik}$, p ), 1_${ik}$ )
                                                     call stdlib${ii}$_${ri}$axpy( mvl,cs*sn*apoaq,v( 1_${ik}$, p ),&
                                                                1_${ik}$,v( 1_${ik}$, q ), 1_${ik}$ )
                                                  end if
                                               else
                                                  call stdlib${ii}$_${ri}$axpy( m, t*apoaq,a( 1_${ik}$, p ), 1_${ik}$,a( 1_${ik}$,&
                                                             q ), 1_${ik}$ )
                                                  call stdlib${ii}$_${ri}$axpy( m,-cs*sn*aqoap,a( 1_${ik}$, q ), 1_${ik}$,&
                                                            a( 1_${ik}$, p ), 1_${ik}$ )
                                                  d( p ) = d( p ) / cs
                                                  d( q ) = d( q )*cs
                                                  if( rsvec ) then
                                                     call stdlib${ii}$_${ri}$axpy( mvl,t*apoaq, v( 1_${ik}$, p ),1_${ik}$, &
                                                               v( 1_${ik}$, q ), 1_${ik}$ )
                                                     call stdlib${ii}$_${ri}$axpy( mvl,-cs*sn*aqoap,v( 1_${ik}$, q )&
                                                               , 1_${ik}$,v( 1_${ik}$, p ), 1_${ik}$ )
                                                  end if
                                               end if
                                            end if
                                         end if
                                      end if
                                   else
                                      if( aapp>aaqq ) then
                                         call stdlib${ii}$_${ri}$copy( m, a( 1_${ik}$, p ), 1_${ik}$, work,1_${ik}$ )
                                         call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, aapp, one,m, 1_${ik}$, work, lda,&
                                                    ierr )
                                         call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, aaqq, one,m, 1_${ik}$, a( 1_${ik}$, q ),&
                                                    lda,ierr )
                                         temp1 = -aapq*d( p ) / d( q )
                                         call stdlib${ii}$_${ri}$axpy( m, temp1, work, 1_${ik}$,a( 1_${ik}$, q ), 1_${ik}$ )
                                                   
                                         call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, one, aaqq,m, 1_${ik}$, a( 1_${ik}$, q ),&
                                                    lda,ierr )
                                         sva( q ) = aaqq*sqrt( max( zero,one-aapq*aapq ) )
                                         mxsinj = max( mxsinj, sfmin )
                                      else
                                         call stdlib${ii}$_${ri}$copy( m, a( 1_${ik}$, q ), 1_${ik}$, work,1_${ik}$ )
                                         call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, aaqq, one,m, 1_${ik}$, work, lda,&
                                                    ierr )
                                         call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, aapp, one,m, 1_${ik}$, a( 1_${ik}$, p ),&
                                                    lda,ierr )
                                         temp1 = -aapq*d( q ) / d( p )
                                         call stdlib${ii}$_${ri}$axpy( m, temp1, work, 1_${ik}$,a( 1_${ik}$, p ), 1_${ik}$ )
                                                   
                                         call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, one, aapp,m, 1_${ik}$, a( 1_${ik}$, p ),&
                                                    lda,ierr )
                                         sva( p ) = aapp*sqrt( max( zero,one-aapq*aapq ) )
                                         mxsinj = max( mxsinj, sfmin )
                                      end if
                                   end if
                 ! end if rotok then ... else
                 ! in the case of cancellation in updating sva(q)
                 ! .. recompute sva(q)
                                   if( ( sva( q ) / aaqq )**2_${ik}$<=rooteps )then
                                      if( ( aaqq<rootbig ) .and.( aaqq>rootsfmin ) ) then
                                         sva( q ) = stdlib${ii}$_${ri}$nrm2( m, a( 1_${ik}$, q ), 1_${ik}$ )*d( q )
                                      else
                                         t = zero
                                         aaqq = one
                                         call stdlib${ii}$_${ri}$lassq( m, a( 1_${ik}$, q ), 1_${ik}$, t,aaqq )
                                         sva( q ) = t*sqrt( aaqq )*d( q )
                                      end if
                                   end if
                                   if( ( aapp / aapp0 )**2_${ik}$<=rooteps ) then
                                      if( ( aapp<rootbig ) .and.( aapp>rootsfmin ) ) then
                                         aapp = stdlib${ii}$_${ri}$nrm2( m, a( 1_${ik}$, p ), 1_${ik}$ )*d( p )
                                      else
                                         t = zero
                                         aapp = one
                                         call stdlib${ii}$_${ri}$lassq( m, a( 1_${ik}$, p ), 1_${ik}$, t,aapp )
                                         aapp = t*sqrt( aapp )*d( p )
                                      end if
                                      sva( p ) = aapp
                                   end if
                    ! end of ok rotation
                                else
                                   notrot = notrot + 1_${ik}$
                                   pskipped = pskipped + 1_${ik}$
                                   ijblsk = ijblsk + 1_${ik}$
                                end if
                             else
                                notrot = notrot + 1_${ik}$
                                pskipped = pskipped + 1_${ik}$
                                ijblsk = ijblsk + 1_${ik}$
                             end if
                             if( ( i<=swband ) .and. ( ijblsk>=blskip ) )then
                                sva( p ) = aapp
                                notrot = 0_${ik}$
                                go to 2011
                             end if
                             if( ( i<=swband ) .and.( pskipped>rowskip ) ) then
                                aapp = -aapp
                                notrot = 0_${ik}$
                                go to 2203
                             end if
                          end do loop_2200
              ! end of the q-loop
              2203 continue
                          sva( p ) = aapp
                       else
                          if( aapp==zero )notrot = notrot +min( jgl+kbl-1, n ) - jgl + 1_${ik}$
                          if( aapp<zero )notrot = 0_${ik}$
                       end if
                    end do loop_2100
           ! end of the p-loop
                 end do loop_2010
           ! end of the jbc-loop
           2011 continue
      ! 2011 bailed out of the jbc-loop
                 do p = igl, min( igl+kbl-1, n )
                    sva( p ) = abs( sva( p ) )
                 end do
              end do loop_2000
      ! 2000 :: end of the ibr-loop
           ! .. update sva(n)
              if( ( sva( n )<rootbig ) .and. ( sva( n )>rootsfmin ) )then
                 sva( n ) = stdlib${ii}$_${ri}$nrm2( m, a( 1_${ik}$, n ), 1_${ik}$ )*d( n )
              else
                 t = zero
                 aapp = one
                 call stdlib${ii}$_${ri}$lassq( m, a( 1_${ik}$, n ), 1_${ik}$, t, aapp )
                 sva( n ) = t*sqrt( aapp )*d( n )
              end if
           ! additional steering devices
              if( ( i<swband ) .and. ( ( mxaapq<=roottol ) .or.( iswrot<=n ) ) )swband = i
              if( ( i>swband+1 ) .and. ( mxaapq<real( n,KIND=${rk}$)*tol ) .and.( real( n,KIND=${rk}$)&
                        *mxaapq*mxsinj<tol ) ) then
                 go to 1994
              end if
              if( notrot>=emptsw )go to 1994
           end do loop_1993
           ! end i=1:nsweep loop
       ! #:) reaching this point means that the procedure has completed the given
           ! number of iterations.
           info = nsweep - 1_${ik}$
           go to 1995
           1994 continue
       ! #:) reaching this point means that during the i-th sweep all pivots were
           ! below the given tolerance, causing early exit.
           info = 0_${ik}$
       ! #:) info = 0 confirms successful iterations.
       1995 continue
           ! sort the vector d.
           do p = 1, n - 1
              q = stdlib${ii}$_i${ri}$amax( n-p+1, sva( p ), 1_${ik}$ ) + p - 1_${ik}$
              if( p/=q ) then
                 temp1 = sva( p )
                 sva( p ) = sva( q )
                 sva( q ) = temp1
                 temp1 = d( p )
                 d( p ) = d( q )
                 d( q ) = temp1
                 call stdlib${ii}$_${ri}$swap( m, a( 1_${ik}$, p ), 1_${ik}$, a( 1_${ik}$, q ), 1_${ik}$ )
                 if( rsvec )call stdlib${ii}$_${ri}$swap( mvl, v( 1_${ik}$, p ), 1_${ik}$, v( 1_${ik}$, q ), 1_${ik}$ )
              end if
           end do
           return
     end subroutine stdlib${ii}$_${ri}$gsvj0

#:endif
#:endfor

     pure module subroutine stdlib${ii}$_cgsvj0( jobv, m, n, a, lda, d, sva, mv, v, ldv, eps,sfmin, tol, &
     !! CGSVJ0 is called from CGESVJ as a pre-processor and that is its main
     !! purpose. It applies Jacobi rotations in the same way as CGESVJ does, but
     !! it does not check convergence (stopping criterion). Few tuning
     !! parameters (marked by [TP]) are available for the implementer.
               nsweep, work, lwork, info )
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, ldv, lwork, m, mv, n, nsweep
           real(sp), intent(in) :: eps, sfmin, tol
           character, intent(in) :: jobv
           ! Array Arguments 
           complex(sp), intent(inout) :: a(lda,*), d(n), v(ldv,*)
           complex(sp), intent(out) :: work(lwork)
           real(sp), intent(inout) :: sva(n)
        ! =====================================================================
           
           
           ! Local Scalars 
           complex(sp) :: aapq, ompq
           real(sp) :: aapp, aapp0, aapq1, aaqq, apoaq, aqoap, big, bigtheta, cs, mxaapq, mxsinj, &
                     rootbig, rooteps, rootsfmin, roottol, small, sn, t, temp1, theta, thsign
           integer(${ik}$) :: blskip, emptsw, i, ibr, ierr, igl, ijblsk, ir1, iswrot, jbc, jgl, kbl, &
                     lkahead, mvl, nbl, notrot, p, pskipped, q, rowskip, swband
           logical(lk) :: applv, rotok, rsvec
           ! Intrinsic Functions 
           ! from lapack
           ! Executable Statements 
           ! test the input parameters.
           applv = stdlib_lsame( jobv, 'A' )
           rsvec = stdlib_lsame( jobv, 'V' )
           if( .not.( rsvec .or. applv .or. stdlib_lsame( jobv, 'N' ) ) ) then
              info = -1_${ik}$
           else if( m<0_${ik}$ ) then
              info = -2_${ik}$
           else if( ( n<0_${ik}$ ) .or. ( n>m ) ) then
              info = -3_${ik}$
           else if( lda<m ) then
              info = -5_${ik}$
           else if( ( rsvec.or.applv ) .and. ( mv<0_${ik}$ ) ) then
              info = -8_${ik}$
           else if( ( rsvec.and.( ldv<n ) ).or.( applv.and.( ldv<mv ) ) ) then
              info = -10_${ik}$
           else if( tol<=eps ) then
              info = -13_${ik}$
           else if( nsweep<0_${ik}$ ) then
              info = -14_${ik}$
           else if( lwork<m ) then
              info = -16_${ik}$
           else
              info = 0_${ik}$
           end if
           ! #:(
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'CGSVJ0', -info )
              return
           end if
           if( rsvec ) then
              mvl = n
           else if( applv ) then
              mvl = mv
           end if
           rsvec = rsvec .or. applv
           rooteps = sqrt( eps )
           rootsfmin = sqrt( sfmin )
           small = sfmin / eps
           big = one / sfmin
           rootbig = one / rootsfmin
           bigtheta = one / rooteps
           roottol = sqrt( tol )
           ! .. row-cyclic jacobi svd algorithm with column pivoting ..
           emptsw = ( n*( n-1 ) ) / 2_${ik}$
           notrot = 0_${ik}$
           ! .. row-cyclic pivot strategy with de rijk's pivoting ..
           swband = 0_${ik}$
      ! [tp] swband is a tuning parameter [tp]. it is meaningful and effective
           ! if stdlib${ii}$_cgesvj is used as a computational routine in the preconditioned
           ! jacobi svd algorithm stdlib${ii}$_cgejsv. for sweeps i=1:swband the procedure
           ! works on pivots inside a band-like region around the diagonal.
           ! the boundaries are determined dynamically, based on the number of
           ! pivots above a threshold.
           kbl = min( 8_${ik}$, n )
      ! [tp] kbl is a tuning parameter that defines the tile size in the
           ! tiling of the p-q loops of pivot pairs. in general, an optimal
           ! value of kbl depends on the matrix dimensions and on the
           ! parameters of the computer's memory.
           nbl = n / kbl
           if( ( nbl*kbl )/=n )nbl = nbl + 1_${ik}$
           blskip = kbl**2_${ik}$
      ! [tp] blkskip is a tuning parameter that depends on swband and kbl.
           rowskip = min( 5_${ik}$, kbl )
      ! [tp] rowskip is a tuning parameter.
           lkahead = 1_${ik}$
      ! [tp] lkahead is a tuning parameter.
           ! quasi block transformations, using the lower (upper) triangular
           ! structure of the input matrix. the quasi-block-cycling usually
           ! invokes cubic convergence. big part of this cycle is done inside
           ! canonical subspaces of dimensions less than m.
           ! .. row-cyclic pivot strategy with de rijk's pivoting ..
           loop_1993: do i = 1, nsweep
           ! .. go go go ...
              mxaapq = zero
              mxsinj = zero
              iswrot = 0_${ik}$
              notrot = 0_${ik}$
              pskipped = 0_${ik}$
           ! each sweep is unrolled using kbl-by-kbl tiles over the pivot pairs
           ! 1 <= p < q <= n. this is the first step toward a blocked implementation
           ! of the rotations. new implementation, based on block transformations,
           ! is under development.
              loop_2000: do ibr = 1, nbl
                 igl = ( ibr-1 )*kbl + 1_${ik}$
                 loop_1002: do ir1 = 0, min( lkahead, nbl-ibr )
                    igl = igl + ir1*kbl
                    loop_2001: do p = igl, min( igl+kbl-1, n-1 )
           ! .. de rijk's pivoting
                       q = stdlib${ii}$_isamax( n-p+1, sva( p ), 1_${ik}$ ) + p - 1_${ik}$
                       if( p/=q ) then
                          call stdlib${ii}$_cswap( m, a( 1_${ik}$, p ), 1_${ik}$, a( 1_${ik}$, q ), 1_${ik}$ )
                          if( rsvec )call stdlib${ii}$_cswap( mvl, v( 1_${ik}$, p ), 1_${ik}$,v( 1_${ik}$, q ), 1_${ik}$ )
                          temp1 = sva( p )
                          sva( p ) = sva( q )
                          sva( q ) = temp1
                          aapq = d(p)
                          d(p) = d(q)
                          d(q) = aapq
                       end if
                       if( ir1==0_${ik}$ ) then
              ! column norms are periodically updated by explicit
              ! norm computation.
              ! caveat:
              ! unfortunately, some blas implementations compute sncrm2(m,a(1,p),1)
              ! as sqrt(s=stdlib${ii}$_cdotc(m,a(1,p),1,a(1,p),1)), which may cause the result to
              ! overflow for ||a(:,p)||_2 > sqrt(overflow_threshold), and to
              ! underflow for ||a(:,p)||_2 < sqrt(underflow_threshold).
              ! hence, stdlib${ii}$_scnrm2 cannot be trusted, not even in the case when
              ! the true norm is far from the under(over)flow boundaries.
              ! if properly implemented stdlib${ii}$_scnrm2 is available, the if-then-else-end if
              ! below should be replaced with "aapp = stdlib${ii}$_scnrm2( m, a(1,p), 1 )".
                          if( ( sva( p )<rootbig ) .and.( sva( p )>rootsfmin ) ) then
                             sva( p ) = stdlib${ii}$_scnrm2( m, a( 1_${ik}$, p ), 1_${ik}$ )
                          else
                             temp1 = zero
                             aapp = one
                             call stdlib${ii}$_classq( m, a( 1_${ik}$, p ), 1_${ik}$, temp1, aapp )
                             sva( p ) = temp1*sqrt( aapp )
                          end if
                          aapp = sva( p )
                       else
                          aapp = sva( p )
                       end if
                       if( aapp>zero ) then
                          pskipped = 0_${ik}$
                          loop_2002: do q = p + 1, min( igl+kbl-1, n )
                             aaqq = sva( q )
                             if( aaqq>zero ) then
                                aapp0 = aapp
                                if( aaqq>=one ) then
                                   rotok = ( small*aapp )<=aaqq
                                   if( aapp<( big / aaqq ) ) then
                                      aapq = ( stdlib${ii}$_cdotc( m, a( 1_${ik}$, p ), 1_${ik}$,a( 1_${ik}$, q ), 1_${ik}$ ) / &
                                                aaqq ) / aapp
                                   else
                                      call stdlib${ii}$_ccopy( m, a( 1_${ik}$, p ), 1_${ik}$,work, 1_${ik}$ )
                                      call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, aapp, one,m, 1_${ik}$, work, lda, &
                                                ierr )
                                      aapq = stdlib${ii}$_cdotc( m, work, 1_${ik}$,a( 1_${ik}$, q ), 1_${ik}$ ) / &
                                                aaqq
                                   end if
                                else
                                   rotok = aapp<=( aaqq / small )
                                   if( aapp>( small / aaqq ) ) then
                                      aapq = ( stdlib${ii}$_cdotc( m, a( 1_${ik}$, p ), 1_${ik}$,a( 1_${ik}$, q ), 1_${ik}$ ) / &
                                                aapp ) / aaqq
                                   else
                                      call stdlib${ii}$_ccopy( m, a( 1_${ik}$, q ), 1_${ik}$,work, 1_${ik}$ )
                                      call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, aaqq,one, m, 1_${ik}$,work, lda, &
                                                ierr )
                                      aapq = stdlib${ii}$_cdotc( m, a( 1_${ik}$, p ), 1_${ik}$,work, 1_${ik}$ ) / &
                                                aapp
                                   end if
                                end if
                                 ! aapq = aapq * conjg( cwork(p) ) * cwork(q)
                                aapq1  = -abs(aapq)
                                mxaapq = max( mxaapq, -aapq1 )
              ! to rotate or not to rotate, that is the question ...
                                if( abs( aapq1 )>tol ) then
                                   ompq = aapq / abs(aapq)
                 ! Rotate
      ! [rtd]      rotated = rotated + one
                                   if( ir1==0_${ik}$ ) then
                                      notrot = 0_${ik}$
                                      pskipped = 0_${ik}$
                                      iswrot = iswrot + 1_${ik}$
                                   end if
                                   if( rotok ) then
                                      aqoap = aaqq / aapp
                                      apoaq = aapp / aaqq
                                      theta = -half*abs( aqoap-apoaq )/aapq1
                                      if( abs( theta )>bigtheta ) then
                                         t  = half / theta
                                         cs = one
                                         call stdlib${ii}$_crot( m, a(1_${ik}$,p), 1_${ik}$, a(1_${ik}$,q), 1_${ik}$,cs, conjg(ompq)&
                                                   *t )
                                         if ( rsvec ) then
                                             call stdlib${ii}$_crot( mvl, v(1_${ik}$,p), 1_${ik}$,v(1_${ik}$,q), 1_${ik}$, cs, &
                                                       conjg(ompq)*t )
                                         end if
                                         sva( q ) = aaqq*sqrt( max( zero,one+t*apoaq*aapq1 ) )
                                                   
                                         aapp = aapp*sqrt( max( zero,one-t*aqoap*aapq1 ) )
                                         mxsinj = max( mxsinj, abs( t ) )
                                      else
                       ! Choose Correct Signum For Theta And Rotate
                                         thsign = -sign( one, aapq1 )
                                         t = one / ( theta+thsign*sqrt( one+theta*theta ) )
                                                   
                                         cs = sqrt( one / ( one+t*t ) )
                                         sn = t*cs
                                         mxsinj = max( mxsinj, abs( sn ) )
                                         sva( q ) = aaqq*sqrt( max( zero,one+t*apoaq*aapq1 ) )
                                                   
                                         aapp = aapp*sqrt( max( zero,one-t*aqoap*aapq1 ) )
                                         call stdlib${ii}$_crot( m, a(1_${ik}$,p), 1_${ik}$, a(1_${ik}$,q), 1_${ik}$,cs, conjg(ompq)&
                                                   *sn )
                                         if ( rsvec ) then
                                             call stdlib${ii}$_crot( mvl, v(1_${ik}$,p), 1_${ik}$,v(1_${ik}$,q), 1_${ik}$, cs, &
                                                       conjg(ompq)*sn )
                                         end if
                                      end if
                                      d(p) = -d(q) * ompq
                                      else
                    ! .. have to use modified gram-schmidt like transformation
                                      call stdlib${ii}$_ccopy( m, a( 1_${ik}$, p ), 1_${ik}$,work, 1_${ik}$ )
                                      call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, aapp, one, m,1_${ik}$, work, lda,&
                                                ierr )
                                      call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, aaqq, one, m,1_${ik}$, a( 1_${ik}$, q ), &
                                                lda, ierr )
                                      call stdlib${ii}$_caxpy( m, -aapq, work, 1_${ik}$,a( 1_${ik}$, q ), 1_${ik}$ )
                                      call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, one, aaqq, m,1_${ik}$, a( 1_${ik}$, q ), &
                                                lda, ierr )
                                      sva( q ) = aaqq*sqrt( max( zero,one-aapq1*aapq1 ) )
                                      mxsinj = max( mxsinj, sfmin )
                                   end if
                 ! end if rotok then ... else
                 ! in the case of cancellation in updating sva(q), sva(p)
                 ! recompute sva(q), sva(p).
                                   if( ( sva( q ) / aaqq )**2_${ik}$<=rooteps )then
                                      if( ( aaqq<rootbig ) .and.( aaqq>rootsfmin ) ) then
                                         sva( q ) = stdlib${ii}$_scnrm2( m, a( 1_${ik}$, q ), 1_${ik}$ )
                                      else
                                         t = zero
                                         aaqq = one
                                         call stdlib${ii}$_classq( m, a( 1_${ik}$, q ), 1_${ik}$, t,aaqq )
                                         sva( q ) = t*sqrt( aaqq )
                                      end if
                                   end if
                                   if( ( aapp / aapp0 )<=rooteps ) then
                                      if( ( aapp<rootbig ) .and.( aapp>rootsfmin ) ) then
                                         aapp = stdlib${ii}$_scnrm2( m, a( 1_${ik}$, p ), 1_${ik}$ )
                                      else
                                         t = zero
                                         aapp = one
                                         call stdlib${ii}$_classq( m, a( 1_${ik}$, p ), 1_${ik}$, t,aapp )
                                         aapp = t*sqrt( aapp )
                                      end if
                                      sva( p ) = aapp
                                   end if
                                else
              ! a(:,p) and a(:,q) already numerically orthogonal
                                   if( ir1==0_${ik}$ )notrot = notrot + 1_${ik}$
      ! [rtd]      skipped  = skipped  + 1
                                   pskipped = pskipped + 1_${ik}$
                                end if
                             else
              ! a(:,q) is zero column
                                if( ir1==0_${ik}$ )notrot = notrot + 1_${ik}$
                                pskipped = pskipped + 1_${ik}$
                             end if
                             if( ( i<=swband ) .and.( pskipped>rowskip ) ) then
                                if( ir1==0_${ik}$ )aapp = -aapp
                                notrot = 0_${ik}$
                                go to 2103
                             end if
                          end do loop_2002
           ! end q-loop
           2103 continue
           ! bailed out of q-loop
                          sva( p ) = aapp
                       else
                          sva( p ) = aapp
                          if( ( ir1==0_${ik}$ ) .and. ( aapp==zero ) )notrot = notrot + min( igl+kbl-1, &
                                    n ) - p
                       end if
                    end do loop_2001
           ! end of the p-loop
           ! end of doing the block ( ibr, ibr )
                 end do loop_1002
           ! end of ir1-loop
       ! ... go to the off diagonal blocks
                 igl = ( ibr-1 )*kbl + 1_${ik}$
                 loop_2010: do jbc = ibr + 1, nbl
                    jgl = ( jbc-1 )*kbl + 1_${ik}$
              ! doing the block at ( ibr, jbc )
                    ijblsk = 0_${ik}$
                    loop_2100: do p = igl, min( igl+kbl-1, n )
                       aapp = sva( p )
                       if( aapp>zero ) then
                          pskipped = 0_${ik}$
                          loop_2200: do q = jgl, min( jgl+kbl-1, n )
                             aaqq = sva( q )
                             if( aaqq>zero ) then
                                aapp0 = aapp
           ! M X 2 Jacobi Svd 
              ! safe gram matrix computation
                                if( aaqq>=one ) then
                                   if( aapp>=aaqq ) then
                                      rotok = ( small*aapp )<=aaqq
                                   else
                                      rotok = ( small*aaqq )<=aapp
                                   end if
                                   if( aapp<( big / aaqq ) ) then
                                      aapq = ( stdlib${ii}$_cdotc( m, a( 1_${ik}$, p ), 1_${ik}$,a( 1_${ik}$, q ), 1_${ik}$ ) / &
                                                aaqq ) / aapp
                                   else
                                      call stdlib${ii}$_ccopy( m, a( 1_${ik}$, p ), 1_${ik}$,work, 1_${ik}$ )
                                      call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, aapp,one, m, 1_${ik}$,work, lda, &
                                                ierr )
                                      aapq = stdlib${ii}$_cdotc( m, work, 1_${ik}$,a( 1_${ik}$, q ), 1_${ik}$ ) / &
                                                aaqq
                                   end if
                                else
                                   if( aapp>=aaqq ) then
                                      rotok = aapp<=( aaqq / small )
                                   else
                                      rotok = aaqq<=( aapp / small )
                                   end if
                                   if( aapp>( small / aaqq ) ) then
                                      aapq = ( stdlib${ii}$_cdotc( m, a( 1_${ik}$, p ), 1_${ik}$,a( 1_${ik}$, q ), 1_${ik}$ ) / max(&
                                                aaqq,aapp) )/ min(aaqq,aapp)
                                   else
                                      call stdlib${ii}$_ccopy( m, a( 1_${ik}$, q ), 1_${ik}$,work, 1_${ik}$ )
                                      call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, aaqq,one, m, 1_${ik}$,work, lda, &
                                                ierr )
                                      aapq = stdlib${ii}$_cdotc( m, a( 1_${ik}$, p ), 1_${ik}$,work, 1_${ik}$ ) / &
                                                aapp
                                   end if
                                end if
                                 ! aapq = aapq * conjg(cwork(p))*cwork(q)
                                aapq1  = -abs(aapq)
                                mxaapq = max( mxaapq, -aapq1 )
              ! to rotate or not to rotate, that is the question ...
                                if( abs( aapq1 )>tol ) then
                                   ompq = aapq / abs(aapq)
                                   notrot = 0_${ik}$
      ! [rtd]      rotated  = rotated + 1
                                   pskipped = 0_${ik}$
                                   iswrot = iswrot + 1_${ik}$
                                   if( rotok ) then
                                      aqoap = aaqq / aapp
                                      apoaq = aapp / aaqq
                                      theta = -half*abs( aqoap-apoaq )/ aapq1
                                      if( aaqq>aapp0 )theta = -theta
                                      if( abs( theta )>bigtheta ) then
                                         t  = half / theta
                                         cs = one
                                         call stdlib${ii}$_crot( m, a(1_${ik}$,p), 1_${ik}$, a(1_${ik}$,q), 1_${ik}$,cs, conjg(ompq)&
                                                   *t )
                                         if( rsvec ) then
                                             call stdlib${ii}$_crot( mvl, v(1_${ik}$,p), 1_${ik}$,v(1_${ik}$,q), 1_${ik}$, cs, &
                                                       conjg(ompq)*t )
                                         end if
                                         sva( q ) = aaqq*sqrt( max( zero,one+t*apoaq*aapq1 ) )
                                                   
                                         aapp = aapp*sqrt( max( zero,one-t*aqoap*aapq1 ) )
                                         mxsinj = max( mxsinj, abs( t ) )
                                      else
                       ! Choose Correct Signum For Theta And Rotate
                                         thsign = -sign( one, aapq1 )
                                         if( aaqq>aapp0 )thsign = -thsign
                                         t = one / ( theta+thsign*sqrt( one+theta*theta ) )
                                                   
                                         cs = sqrt( one / ( one+t*t ) )
                                         sn = t*cs
                                         mxsinj = max( mxsinj, abs( sn ) )
                                         sva( q ) = aaqq*sqrt( max( zero,one+t*apoaq*aapq1 ) )
                                                   
                                         aapp = aapp*sqrt( max( zero,one-t*aqoap*aapq1 ) )
                                         call stdlib${ii}$_crot( m, a(1_${ik}$,p), 1_${ik}$, a(1_${ik}$,q), 1_${ik}$,cs, conjg(ompq)&
                                                   *sn )
                                         if( rsvec ) then
                                             call stdlib${ii}$_crot( mvl, v(1_${ik}$,p), 1_${ik}$,v(1_${ik}$,q), 1_${ik}$, cs, &
                                                       conjg(ompq)*sn )
                                         end if
                                      end if
                                      d(p) = -d(q) * ompq
                                   else
                    ! .. have to use modified gram-schmidt like transformation
                                    if( aapp>aaqq ) then
                                         call stdlib${ii}$_ccopy( m, a( 1_${ik}$, p ), 1_${ik}$,work, 1_${ik}$ )
                                         call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, aapp, one,m, 1_${ik}$, work,lda,&
                                                   ierr )
                                         call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, aaqq, one,m, 1_${ik}$, a( 1_${ik}$, q ),&
                                                    lda,ierr )
                                         call stdlib${ii}$_caxpy( m, -aapq, work,1_${ik}$, a( 1_${ik}$, q ), 1_${ik}$ )
                                                   
                                         call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, one, aaqq,m, 1_${ik}$, a( 1_${ik}$, q ),&
                                                    lda,ierr )
                                         sva( q ) = aaqq*sqrt( max( zero,one-aapq1*aapq1 ) )
                                                   
                                         mxsinj = max( mxsinj, sfmin )
                                    else
                                        call stdlib${ii}$_ccopy( m, a( 1_${ik}$, q ), 1_${ik}$,work, 1_${ik}$ )
                                         call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, aaqq, one,m, 1_${ik}$, work,lda,&
                                                   ierr )
                                         call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, aapp, one,m, 1_${ik}$, a( 1_${ik}$, p ),&
                                                    lda,ierr )
                                         call stdlib${ii}$_caxpy( m, -conjg(aapq),work, 1_${ik}$, a( 1_${ik}$, p ), 1_${ik}$ &
                                                   )
                                         call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, one, aapp,m, 1_${ik}$, a( 1_${ik}$, p ),&
                                                    lda,ierr )
                                         sva( p ) = aapp*sqrt( max( zero,one-aapq1*aapq1 ) )
                                                   
                                         mxsinj = max( mxsinj, sfmin )
                                    end if
                                   end if
                 ! end if rotok then ... else
                 ! in the case of cancellation in updating sva(q), sva(p)
                 ! .. recompute sva(q), sva(p)
                                   if( ( sva( q ) / aaqq )**2_${ik}$<=rooteps )then
                                      if( ( aaqq<rootbig ) .and.( aaqq>rootsfmin ) ) then
                                         sva( q ) = stdlib${ii}$_scnrm2( m, a( 1_${ik}$, q ), 1_${ik}$)
                                       else
                                         t = zero
                                         aaqq = one
                                         call stdlib${ii}$_classq( m, a( 1_${ik}$, q ), 1_${ik}$, t,aaqq )
                                         sva( q ) = t*sqrt( aaqq )
                                      end if
                                   end if
                                   if( ( aapp / aapp0 )**2_${ik}$<=rooteps ) then
                                      if( ( aapp<rootbig ) .and.( aapp>rootsfmin ) ) then
                                         aapp = stdlib${ii}$_scnrm2( m, a( 1_${ik}$, p ), 1_${ik}$ )
                                      else
                                         t = zero
                                         aapp = one
                                         call stdlib${ii}$_classq( m, a( 1_${ik}$, p ), 1_${ik}$, t,aapp )
                                         aapp = t*sqrt( aapp )
                                      end if
                                      sva( p ) = aapp
                                   end if
                    ! end of ok rotation
                                else
                                   notrot = notrot + 1_${ik}$
      ! [rtd]      skipped  = skipped  + 1
                                   pskipped = pskipped + 1_${ik}$
                                   ijblsk = ijblsk + 1_${ik}$
                                end if
                             else
                                notrot = notrot + 1_${ik}$
                                pskipped = pskipped + 1_${ik}$
                                ijblsk = ijblsk + 1_${ik}$
                             end if
                             if( ( i<=swband ) .and. ( ijblsk>=blskip ) )then
                                sva( p ) = aapp
                                notrot = 0_${ik}$
                                go to 2011
                             end if
                             if( ( i<=swband ) .and.( pskipped>rowskip ) ) then
                                aapp = -aapp
                                notrot = 0_${ik}$
                                go to 2203
                             end if
                          end do loop_2200
              ! end of the q-loop
              2203 continue
                          sva( p ) = aapp
                       else
                          if( aapp==zero )notrot = notrot +min( jgl+kbl-1, n ) - jgl + 1_${ik}$
                          if( aapp<zero )notrot = 0_${ik}$
                       end if
                    end do loop_2100
           ! end of the p-loop
                 end do loop_2010
           ! end of the jbc-loop
           2011 continue
      ! 2011 bailed out of the jbc-loop
                 do p = igl, min( igl+kbl-1, n )
                    sva( p ) = abs( sva( p ) )
                 end do
      ! **
              end do loop_2000
      ! 2000 :: end of the ibr-loop
           ! .. update sva(n)
              if( ( sva( n )<rootbig ) .and. ( sva( n )>rootsfmin ) )then
                 sva( n ) = stdlib${ii}$_scnrm2( m, a( 1_${ik}$, n ), 1_${ik}$ )
              else
                 t = zero
                 aapp = one
                 call stdlib${ii}$_classq( m, a( 1_${ik}$, n ), 1_${ik}$, t, aapp )
                 sva( n ) = t*sqrt( aapp )
              end if
           ! additional steering devices
              if( ( i<swband ) .and. ( ( mxaapq<=roottol ) .or.( iswrot<=n ) ) )swband = i
              if( ( i>swband+1 ) .and. ( mxaapq<sqrt( real( n,KIND=sp) )*tol ) .and. ( real( n,&
                        KIND=sp)*mxaapq*mxsinj<tol ) ) then
                 go to 1994
              end if
              if( notrot>=emptsw )go to 1994
           end do loop_1993
           ! end i=1:nsweep loop
       ! #:( reaching this point means that the procedure has not converged.
           info = nsweep - 1_${ik}$
           go to 1995
           1994 continue
       ! #:) reaching this point means numerical convergence after the i-th
           ! sweep.
           info = 0_${ik}$
       ! #:) info = 0 confirms successful iterations.
       1995 continue
           ! sort the vector sva() of column norms.
           do p = 1, n - 1
              q = stdlib${ii}$_isamax( n-p+1, sva( p ), 1_${ik}$ ) + p - 1_${ik}$
              if( p/=q ) then
                 temp1 = sva( p )
                 sva( p ) = sva( q )
                 sva( q ) = temp1
                 aapq = d( p )
                 d( p ) = d( q )
                 d( q ) = aapq
                 call stdlib${ii}$_cswap( m, a( 1_${ik}$, p ), 1_${ik}$, a( 1_${ik}$, q ), 1_${ik}$ )
                 if( rsvec )call stdlib${ii}$_cswap( mvl, v( 1_${ik}$, p ), 1_${ik}$, v( 1_${ik}$, q ), 1_${ik}$ )
              end if
           end do
           return
     end subroutine stdlib${ii}$_cgsvj0

     pure module subroutine stdlib${ii}$_zgsvj0( jobv, m, n, a, lda, d, sva, mv, v, ldv, eps,sfmin, tol, &
     !! ZGSVJ0 is called from ZGESVJ as a pre-processor and that is its main
     !! purpose. It applies Jacobi rotations in the same way as ZGESVJ does, but
     !! it does not check convergence (stopping criterion). Few tuning
     !! parameters (marked by [TP]) are available for the implementer.
               nsweep, work, lwork, info )
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, ldv, lwork, m, mv, n, nsweep
           real(dp), intent(in) :: eps, sfmin, tol
           character, intent(in) :: jobv
           ! Array Arguments 
           complex(dp), intent(inout) :: a(lda,*), d(n), v(ldv,*)
           complex(dp), intent(out) :: work(lwork)
           real(dp), intent(inout) :: sva(n)
        ! =====================================================================
           
           
           ! Local Scalars 
           complex(dp) :: aapq, ompq
           real(dp) :: aapp, aapp0, aapq1, aaqq, apoaq, aqoap, big, bigtheta, cs, mxaapq, mxsinj, &
                     rootbig, rooteps, rootsfmin, roottol, small, sn, t, temp1, theta, thsign
           integer(${ik}$) :: blskip, emptsw, i, ibr, ierr, igl, ijblsk, ir1, iswrot, jbc, jgl, kbl, &
                     lkahead, mvl, nbl, notrot, p, pskipped, q, rowskip, swband
           logical(lk) :: applv, rotok, rsvec
           ! Intrinsic Functions 
           ! from lapack
           ! Executable Statements 
           ! test the input parameters.
           applv = stdlib_lsame( jobv, 'A' )
           rsvec = stdlib_lsame( jobv, 'V' )
           if( .not.( rsvec .or. applv .or. stdlib_lsame( jobv, 'N' ) ) ) then
              info = -1_${ik}$
           else if( m<0_${ik}$ ) then
              info = -2_${ik}$
           else if( ( n<0_${ik}$ ) .or. ( n>m ) ) then
              info = -3_${ik}$
           else if( lda<m ) then
              info = -5_${ik}$
           else if( ( rsvec.or.applv ) .and. ( mv<0_${ik}$ ) ) then
              info = -8_${ik}$
           else if( ( rsvec.and.( ldv<n ) ).or.( applv.and.( ldv<mv ) ) ) then
              info = -10_${ik}$
           else if( tol<=eps ) then
              info = -13_${ik}$
           else if( nsweep<0_${ik}$ ) then
              info = -14_${ik}$
           else if( lwork<m ) then
              info = -16_${ik}$
           else
              info = 0_${ik}$
           end if
           ! #:(
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'ZGSVJ0', -info )
              return
           end if
           if( rsvec ) then
              mvl = n
           else if( applv ) then
              mvl = mv
           end if
           rsvec = rsvec .or. applv
           rooteps = sqrt( eps )
           rootsfmin = sqrt( sfmin )
           small = sfmin / eps
           big = one / sfmin
           rootbig = one / rootsfmin
           bigtheta = one / rooteps
           roottol = sqrt( tol )
           ! .. row-cyclic jacobi svd algorithm with column pivoting ..
           emptsw = ( n*( n-1 ) ) / 2_${ik}$
           notrot = 0_${ik}$
           ! .. row-cyclic pivot strategy with de rijk's pivoting ..
           swband = 0_${ik}$
      ! [tp] swband is a tuning parameter [tp]. it is meaningful and effective
           ! if stdlib${ii}$_zgesvj is used as a computational routine in the preconditioned
           ! jacobi svd algorithm stdlib${ii}$_zgejsv. for sweeps i=1:swband the procedure
           ! works on pivots inside a band-like region around the diagonal.
           ! the boundaries are determined dynamically, based on the number of
           ! pivots above a threshold.
           kbl = min( 8_${ik}$, n )
      ! [tp] kbl is a tuning parameter that defines the tile size in the
           ! tiling of the p-q loops of pivot pairs. in general, an optimal
           ! value of kbl depends on the matrix dimensions and on the
           ! parameters of the computer's memory.
           nbl = n / kbl
           if( ( nbl*kbl )/=n )nbl = nbl + 1_${ik}$
           blskip = kbl**2_${ik}$
      ! [tp] blkskip is a tuning parameter that depends on swband and kbl.
           rowskip = min( 5_${ik}$, kbl )
      ! [tp] rowskip is a tuning parameter.
           lkahead = 1_${ik}$
      ! [tp] lkahead is a tuning parameter.
           ! quasi block transformations, using the lower (upper) triangular
           ! structure of the input matrix. the quasi-block-cycling usually
           ! invokes cubic convergence. big part of this cycle is done inside
           ! canonical subspaces of dimensions less than m.
           ! .. row-cyclic pivot strategy with de rijk's pivoting ..
           loop_1993: do i = 1, nsweep
           ! .. go go go ...
              mxaapq = zero
              mxsinj = zero
              iswrot = 0_${ik}$
              notrot = 0_${ik}$
              pskipped = 0_${ik}$
           ! each sweep is unrolled using kbl-by-kbl tiles over the pivot pairs
           ! 1 <= p < q <= n. this is the first step toward a blocked implementation
           ! of the rotations. new implementation, based on block transformations,
           ! is under development.
              loop_2000: do ibr = 1, nbl
                 igl = ( ibr-1 )*kbl + 1_${ik}$
                 loop_1002: do ir1 = 0, min( lkahead, nbl-ibr )
                    igl = igl + ir1*kbl
                    loop_2001: do p = igl, min( igl+kbl-1, n-1 )
           ! .. de rijk's pivoting
                       q = stdlib${ii}$_idamax( n-p+1, sva( p ), 1_${ik}$ ) + p - 1_${ik}$
                       if( p/=q ) then
                          call stdlib${ii}$_zswap( m, a( 1_${ik}$, p ), 1_${ik}$, a( 1_${ik}$, q ), 1_${ik}$ )
                          if( rsvec )call stdlib${ii}$_zswap( mvl, v( 1_${ik}$, p ), 1_${ik}$,v( 1_${ik}$, q ), 1_${ik}$ )
                          temp1 = sva( p )
                          sva( p ) = sva( q )
                          sva( q ) = temp1
                          aapq = d(p)
                          d(p) = d(q)
                          d(q) = aapq
                       end if
                       if( ir1==0_${ik}$ ) then
              ! column norms are periodically updated by explicit
              ! norm computation.
              ! caveat:
              ! unfortunately, some blas implementations compute sncrm2(m,a(1,p),1)
              ! as sqrt(s=stdlib${ii}$_zdotc(m,a(1,p),1,a(1,p),1)), which may cause the result to
              ! overflow for ||a(:,p)||_2 > sqrt(overflow_threshold), and to
              ! underflow for ||a(:,p)||_2 < sqrt(underflow_threshold).
              ! hence, stdlib${ii}$_dznrm2 cannot be trusted, not even in the case when
              ! the true norm is far from the under(over)flow boundaries.
              ! if properly implemented stdlib${ii}$_dznrm2 is available, the if-then-else-end if
              ! below should be replaced with "aapp = stdlib${ii}$_dznrm2( m, a(1,p), 1 )".
                          if( ( sva( p )<rootbig ) .and.( sva( p )>rootsfmin ) ) then
                             sva( p ) = stdlib${ii}$_dznrm2( m, a( 1_${ik}$, p ), 1_${ik}$ )
                          else
                             temp1 = zero
                             aapp = one
                             call stdlib${ii}$_zlassq( m, a( 1_${ik}$, p ), 1_${ik}$, temp1, aapp )
                             sva( p ) = temp1*sqrt( aapp )
                          end if
                          aapp = sva( p )
                       else
                          aapp = sva( p )
                       end if
                       if( aapp>zero ) then
                          pskipped = 0_${ik}$
                          loop_2002: do q = p + 1, min( igl+kbl-1, n )
                             aaqq = sva( q )
                             if( aaqq>zero ) then
                                aapp0 = aapp
                                if( aaqq>=one ) then
                                   rotok = ( small*aapp )<=aaqq
                                   if( aapp<( big / aaqq ) ) then
                                      aapq = ( stdlib${ii}$_zdotc( m, a( 1_${ik}$, p ), 1_${ik}$,a( 1_${ik}$, q ), 1_${ik}$ ) / &
                                                aaqq ) / aapp
                                   else
                                      call stdlib${ii}$_zcopy( m, a( 1_${ik}$, p ), 1_${ik}$,work, 1_${ik}$ )
                                      call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, aapp, one,m, 1_${ik}$, work, lda, &
                                                ierr )
                                      aapq = stdlib${ii}$_zdotc( m, work, 1_${ik}$,a( 1_${ik}$, q ), 1_${ik}$ ) / &
                                                aaqq
                                   end if
                                else
                                   rotok = aapp<=( aaqq / small )
                                   if( aapp>( small / aaqq ) ) then
                                      aapq = ( stdlib${ii}$_zdotc( m, a( 1_${ik}$, p ), 1_${ik}$,a( 1_${ik}$, q ), 1_${ik}$ ) / &
                                                aapp ) / aaqq
                                   else
                                      call stdlib${ii}$_zcopy( m, a( 1_${ik}$, q ), 1_${ik}$,work, 1_${ik}$ )
                                      call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, aaqq,one, m, 1_${ik}$,work, lda, &
                                                ierr )
                                      aapq = stdlib${ii}$_zdotc( m, a( 1_${ik}$, p ), 1_${ik}$,work, 1_${ik}$ ) / &
                                                aapp
                                   end if
                                end if
                                 ! aapq = aapq * conjg( cwork(p) ) * cwork(q)
                                aapq1  = -abs(aapq)
                                mxaapq = max( mxaapq, -aapq1 )
              ! to rotate or not to rotate, that is the question ...
                                if( abs( aapq1 )>tol ) then
                                   ompq = aapq / abs(aapq)
                 ! Rotate
      ! [rtd]      rotated = rotated + one
                                   if( ir1==0_${ik}$ ) then
                                      notrot = 0_${ik}$
                                      pskipped = 0_${ik}$
                                      iswrot = iswrot + 1_${ik}$
                                   end if
                                   if( rotok ) then
                                      aqoap = aaqq / aapp
                                      apoaq = aapp / aaqq
                                      theta = -half*abs( aqoap-apoaq )/aapq1
                                      if( abs( theta )>bigtheta ) then
                                         t  = half / theta
                                         cs = one
                                         call stdlib${ii}$_zrot( m, a(1_${ik}$,p), 1_${ik}$, a(1_${ik}$,q), 1_${ik}$,cs, conjg(ompq)&
                                                   *t )
                                         if ( rsvec ) then
                                             call stdlib${ii}$_zrot( mvl, v(1_${ik}$,p), 1_${ik}$,v(1_${ik}$,q), 1_${ik}$, cs, &
                                                       conjg(ompq)*t )
                                         end if
                                         sva( q ) = aaqq*sqrt( max( zero,one+t*apoaq*aapq1 ) )
                                                   
                                         aapp = aapp*sqrt( max( zero,one-t*aqoap*aapq1 ) )
                                         mxsinj = max( mxsinj, abs( t ) )
                                      else
                       ! Choose Correct Signum For Theta And Rotate
                                         thsign = -sign( one, aapq1 )
                                         t = one / ( theta+thsign*sqrt( one+theta*theta ) )
                                                   
                                         cs = sqrt( one / ( one+t*t ) )
                                         sn = t*cs
                                         mxsinj = max( mxsinj, abs( sn ) )
                                         sva( q ) = aaqq*sqrt( max( zero,one+t*apoaq*aapq1 ) )
                                                   
                                         aapp = aapp*sqrt( max( zero,one-t*aqoap*aapq1 ) )
                                         call stdlib${ii}$_zrot( m, a(1_${ik}$,p), 1_${ik}$, a(1_${ik}$,q), 1_${ik}$,cs, conjg(ompq)&
                                                   *sn )
                                         if ( rsvec ) then
                                             call stdlib${ii}$_zrot( mvl, v(1_${ik}$,p), 1_${ik}$,v(1_${ik}$,q), 1_${ik}$, cs, &
                                                       conjg(ompq)*sn )
                                         end if
                                      end if
                                      d(p) = -d(q) * ompq
                                      else
                    ! .. have to use modified gram-schmidt like transformation
                                      call stdlib${ii}$_zcopy( m, a( 1_${ik}$, p ), 1_${ik}$,work, 1_${ik}$ )
                                      call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, aapp, one, m,1_${ik}$, work, lda,&
                                                ierr )
                                      call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, aaqq, one, m,1_${ik}$, a( 1_${ik}$, q ), &
                                                lda, ierr )
                                      call stdlib${ii}$_zaxpy( m, -aapq, work, 1_${ik}$,a( 1_${ik}$, q ), 1_${ik}$ )
                                      call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, one, aaqq, m,1_${ik}$, a( 1_${ik}$, q ), &
                                                lda, ierr )
                                      sva( q ) = aaqq*sqrt( max( zero,one-aapq1*aapq1 ) )
                                      mxsinj = max( mxsinj, sfmin )
                                   end if
                 ! end if rotok then ... else
                 ! in the case of cancellation in updating sva(q), sva(p)
                 ! recompute sva(q), sva(p).
                                   if( ( sva( q ) / aaqq )**2_${ik}$<=rooteps )then
                                      if( ( aaqq<rootbig ) .and.( aaqq>rootsfmin ) ) then
                                         sva( q ) = stdlib${ii}$_dznrm2( m, a( 1_${ik}$, q ), 1_${ik}$ )
                                      else
                                         t = zero
                                         aaqq = one
                                         call stdlib${ii}$_zlassq( m, a( 1_${ik}$, q ), 1_${ik}$, t,aaqq )
                                         sva( q ) = t*sqrt( aaqq )
                                      end if
                                   end if
                                   if( ( aapp / aapp0 )<=rooteps ) then
                                      if( ( aapp<rootbig ) .and.( aapp>rootsfmin ) ) then
                                         aapp = stdlib${ii}$_dznrm2( m, a( 1_${ik}$, p ), 1_${ik}$ )
                                      else
                                         t = zero
                                         aapp = one
                                         call stdlib${ii}$_zlassq( m, a( 1_${ik}$, p ), 1_${ik}$, t,aapp )
                                         aapp = t*sqrt( aapp )
                                      end if
                                      sva( p ) = aapp
                                   end if
                                else
              ! a(:,p) and a(:,q) already numerically orthogonal
                                   if( ir1==0_${ik}$ )notrot = notrot + 1_${ik}$
      ! [rtd]      skipped  = skipped  + 1
                                   pskipped = pskipped + 1_${ik}$
                                end if
                             else
              ! a(:,q) is zero column
                                if( ir1==0_${ik}$ )notrot = notrot + 1_${ik}$
                                pskipped = pskipped + 1_${ik}$
                             end if
                             if( ( i<=swband ) .and.( pskipped>rowskip ) ) then
                                if( ir1==0_${ik}$ )aapp = -aapp
                                notrot = 0_${ik}$
                                go to 2103
                             end if
                          end do loop_2002
           ! end q-loop
           2103 continue
           ! bailed out of q-loop
                          sva( p ) = aapp
                       else
                          sva( p ) = aapp
                          if( ( ir1==0_${ik}$ ) .and. ( aapp==zero ) )notrot = notrot + min( igl+kbl-1, &
                                    n ) - p
                       end if
                    end do loop_2001
           ! end of the p-loop
           ! end of doing the block ( ibr, ibr )
                 end do loop_1002
           ! end of ir1-loop
       ! ... go to the off diagonal blocks
                 igl = ( ibr-1 )*kbl + 1_${ik}$
                 loop_2010: do jbc = ibr + 1, nbl
                    jgl = ( jbc-1 )*kbl + 1_${ik}$
              ! doing the block at ( ibr, jbc )
                    ijblsk = 0_${ik}$
                    loop_2100: do p = igl, min( igl+kbl-1, n )
                       aapp = sva( p )
                       if( aapp>zero ) then
                          pskipped = 0_${ik}$
                          loop_2200: do q = jgl, min( jgl+kbl-1, n )
                             aaqq = sva( q )
                             if( aaqq>zero ) then
                                aapp0 = aapp
           ! M X 2 Jacobi Svd 
              ! safe gram matrix computation
                                if( aaqq>=one ) then
                                   if( aapp>=aaqq ) then
                                      rotok = ( small*aapp )<=aaqq
                                   else
                                      rotok = ( small*aaqq )<=aapp
                                   end if
                                   if( aapp<( big / aaqq ) ) then
                                      aapq = ( stdlib${ii}$_zdotc( m, a( 1_${ik}$, p ), 1_${ik}$,a( 1_${ik}$, q ), 1_${ik}$ ) / &
                                                aaqq ) / aapp
                                   else
                                      call stdlib${ii}$_zcopy( m, a( 1_${ik}$, p ), 1_${ik}$,work, 1_${ik}$ )
                                      call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, aapp,one, m, 1_${ik}$,work, lda, &
                                                ierr )
                                      aapq = stdlib${ii}$_zdotc( m, work, 1_${ik}$,a( 1_${ik}$, q ), 1_${ik}$ ) / &
                                                aaqq
                                   end if
                                else
                                   if( aapp>=aaqq ) then
                                      rotok = aapp<=( aaqq / small )
                                   else
                                      rotok = aaqq<=( aapp / small )
                                   end if
                                   if( aapp>( small / aaqq ) ) then
                                      aapq = ( stdlib${ii}$_zdotc( m, a( 1_${ik}$, p ), 1_${ik}$,a( 1_${ik}$, q ), 1_${ik}$ ) / max(&
                                                aaqq,aapp) )/ min(aaqq,aapp)
                                   else
                                      call stdlib${ii}$_zcopy( m, a( 1_${ik}$, q ), 1_${ik}$,work, 1_${ik}$ )
                                      call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, aaqq,one, m, 1_${ik}$,work, lda, &
                                                ierr )
                                      aapq = stdlib${ii}$_zdotc( m, a( 1_${ik}$, p ), 1_${ik}$,work, 1_${ik}$ ) / &
                                                aapp
                                   end if
                                end if
                                 ! aapq = aapq * conjg(cwork(p))*cwork(q)
                                aapq1  = -abs(aapq)
                                mxaapq = max( mxaapq, -aapq1 )
              ! to rotate or not to rotate, that is the question ...
                                if( abs( aapq1 )>tol ) then
                                   ompq = aapq / abs(aapq)
                                   notrot = 0_${ik}$
      ! [rtd]      rotated  = rotated + 1
                                   pskipped = 0_${ik}$
                                   iswrot = iswrot + 1_${ik}$
                                   if( rotok ) then
                                      aqoap = aaqq / aapp
                                      apoaq = aapp / aaqq
                                      theta = -half*abs( aqoap-apoaq )/ aapq1
                                      if( aaqq>aapp0 )theta = -theta
                                      if( abs( theta )>bigtheta ) then
                                         t  = half / theta
                                         cs = one
                                         call stdlib${ii}$_zrot( m, a(1_${ik}$,p), 1_${ik}$, a(1_${ik}$,q), 1_${ik}$,cs, conjg(ompq)&
                                                   *t )
                                         if( rsvec ) then
                                             call stdlib${ii}$_zrot( mvl, v(1_${ik}$,p), 1_${ik}$,v(1_${ik}$,q), 1_${ik}$, cs, &
                                                       conjg(ompq)*t )
                                         end if
                                         sva( q ) = aaqq*sqrt( max( zero,one+t*apoaq*aapq1 ) )
                                                   
                                         aapp = aapp*sqrt( max( zero,one-t*aqoap*aapq1 ) )
                                         mxsinj = max( mxsinj, abs( t ) )
                                      else
                       ! Choose Correct Signum For Theta And Rotate
                                         thsign = -sign( one, aapq1 )
                                         if( aaqq>aapp0 )thsign = -thsign
                                         t = one / ( theta+thsign*sqrt( one+theta*theta ) )
                                                   
                                         cs = sqrt( one / ( one+t*t ) )
                                         sn = t*cs
                                         mxsinj = max( mxsinj, abs( sn ) )
                                         sva( q ) = aaqq*sqrt( max( zero,one+t*apoaq*aapq1 ) )
                                                   
                                         aapp = aapp*sqrt( max( zero,one-t*aqoap*aapq1 ) )
                                         call stdlib${ii}$_zrot( m, a(1_${ik}$,p), 1_${ik}$, a(1_${ik}$,q), 1_${ik}$,cs, conjg(ompq)&
                                                   *sn )
                                         if( rsvec ) then
                                             call stdlib${ii}$_zrot( mvl, v(1_${ik}$,p), 1_${ik}$,v(1_${ik}$,q), 1_${ik}$, cs, &
                                                       conjg(ompq)*sn )
                                         end if
                                      end if
                                      d(p) = -d(q) * ompq
                                   else
                    ! .. have to use modified gram-schmidt like transformation
                                    if( aapp>aaqq ) then
                                         call stdlib${ii}$_zcopy( m, a( 1_${ik}$, p ), 1_${ik}$,work, 1_${ik}$ )
                                         call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, aapp, one,m, 1_${ik}$, work,lda,&
                                                   ierr )
                                         call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, aaqq, one,m, 1_${ik}$, a( 1_${ik}$, q ),&
                                                    lda,ierr )
                                         call stdlib${ii}$_zaxpy( m, -aapq, work,1_${ik}$, a( 1_${ik}$, q ), 1_${ik}$ )
                                                   
                                         call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, one, aaqq,m, 1_${ik}$, a( 1_${ik}$, q ),&
                                                    lda,ierr )
                                         sva( q ) = aaqq*sqrt( max( zero,one-aapq1*aapq1 ) )
                                                   
                                         mxsinj = max( mxsinj, sfmin )
                                    else
                                        call stdlib${ii}$_zcopy( m, a( 1_${ik}$, q ), 1_${ik}$,work, 1_${ik}$ )
                                         call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, aaqq, one,m, 1_${ik}$, work,lda,&
                                                   ierr )
                                         call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, aapp, one,m, 1_${ik}$, a( 1_${ik}$, p ),&
                                                    lda,ierr )
                                         call stdlib${ii}$_zaxpy( m, -conjg(aapq),work, 1_${ik}$, a( 1_${ik}$, p ), 1_${ik}$ &
                                                   )
                                         call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, one, aapp,m, 1_${ik}$, a( 1_${ik}$, p ),&
                                                    lda,ierr )
                                         sva( p ) = aapp*sqrt( max( zero,one-aapq1*aapq1 ) )
                                                   
                                         mxsinj = max( mxsinj, sfmin )
                                    end if
                                   end if
                 ! end if rotok then ... else
                 ! in the case of cancellation in updating sva(q), sva(p)
                 ! .. recompute sva(q), sva(p)
                                   if( ( sva( q ) / aaqq )**2_${ik}$<=rooteps )then
                                      if( ( aaqq<rootbig ) .and.( aaqq>rootsfmin ) ) then
                                         sva( q ) = stdlib${ii}$_dznrm2( m, a( 1_${ik}$, q ), 1_${ik}$)
                                       else
                                         t = zero
                                         aaqq = one
                                         call stdlib${ii}$_zlassq( m, a( 1_${ik}$, q ), 1_${ik}$, t,aaqq )
                                         sva( q ) = t*sqrt( aaqq )
                                      end if
                                   end if
                                   if( ( aapp / aapp0 )**2_${ik}$<=rooteps ) then
                                      if( ( aapp<rootbig ) .and.( aapp>rootsfmin ) ) then
                                         aapp = stdlib${ii}$_dznrm2( m, a( 1_${ik}$, p ), 1_${ik}$ )
                                      else
                                         t = zero
                                         aapp = one
                                         call stdlib${ii}$_zlassq( m, a( 1_${ik}$, p ), 1_${ik}$, t,aapp )
                                         aapp = t*sqrt( aapp )
                                      end if
                                      sva( p ) = aapp
                                   end if
                    ! end of ok rotation
                                else
                                   notrot = notrot + 1_${ik}$
      ! [rtd]      skipped  = skipped  + 1
                                   pskipped = pskipped + 1_${ik}$
                                   ijblsk = ijblsk + 1_${ik}$
                                end if
                             else
                                notrot = notrot + 1_${ik}$
                                pskipped = pskipped + 1_${ik}$
                                ijblsk = ijblsk + 1_${ik}$
                             end if
                             if( ( i<=swband ) .and. ( ijblsk>=blskip ) )then
                                sva( p ) = aapp
                                notrot = 0_${ik}$
                                go to 2011
                             end if
                             if( ( i<=swband ) .and.( pskipped>rowskip ) ) then
                                aapp = -aapp
                                notrot = 0_${ik}$
                                go to 2203
                             end if
                          end do loop_2200
              ! end of the q-loop
              2203 continue
                          sva( p ) = aapp
                       else
                          if( aapp==zero )notrot = notrot +min( jgl+kbl-1, n ) - jgl + 1_${ik}$
                          if( aapp<zero )notrot = 0_${ik}$
                       end if
                    end do loop_2100
           ! end of the p-loop
                 end do loop_2010
           ! end of the jbc-loop
           2011 continue
      ! 2011 bailed out of the jbc-loop
                 do p = igl, min( igl+kbl-1, n )
                    sva( p ) = abs( sva( p ) )
                 end do
      ! **
              end do loop_2000
      ! 2000 :: end of the ibr-loop
           ! .. update sva(n)
              if( ( sva( n )<rootbig ) .and. ( sva( n )>rootsfmin ) )then
                 sva( n ) = stdlib${ii}$_dznrm2( m, a( 1_${ik}$, n ), 1_${ik}$ )
              else
                 t = zero
                 aapp = one
                 call stdlib${ii}$_zlassq( m, a( 1_${ik}$, n ), 1_${ik}$, t, aapp )
                 sva( n ) = t*sqrt( aapp )
              end if
           ! additional steering devices
              if( ( i<swband ) .and. ( ( mxaapq<=roottol ) .or.( iswrot<=n ) ) )swband = i
              if( ( i>swband+1 ) .and. ( mxaapq<sqrt( real( n,KIND=dp) )*tol ) .and. ( real( n,&
                        KIND=dp)*mxaapq*mxsinj<tol ) ) then
                 go to 1994
              end if
              if( notrot>=emptsw )go to 1994
           end do loop_1993
           ! end i=1:nsweep loop
       ! #:( reaching this point means that the procedure has not converged.
           info = nsweep - 1_${ik}$
           go to 1995
           1994 continue
       ! #:) reaching this point means numerical convergence after the i-th
           ! sweep.
           info = 0_${ik}$
       ! #:) info = 0 confirms successful iterations.
       1995 continue
           ! sort the vector sva() of column norms.
           do p = 1, n - 1
              q = stdlib${ii}$_idamax( n-p+1, sva( p ), 1_${ik}$ ) + p - 1_${ik}$
              if( p/=q ) then
                 temp1 = sva( p )
                 sva( p ) = sva( q )
                 sva( q ) = temp1
                 aapq = d( p )
                 d( p ) = d( q )
                 d( q ) = aapq
                 call stdlib${ii}$_zswap( m, a( 1_${ik}$, p ), 1_${ik}$, a( 1_${ik}$, q ), 1_${ik}$ )
                 if( rsvec )call stdlib${ii}$_zswap( mvl, v( 1_${ik}$, p ), 1_${ik}$, v( 1_${ik}$, q ), 1_${ik}$ )
              end if
           end do
           return
     end subroutine stdlib${ii}$_zgsvj0

#:for ck,ct,ci in CMPLX_KINDS_TYPES
#:if not ck in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ci}$gsvj0( jobv, m, n, a, lda, d, sva, mv, v, ldv, eps,sfmin, tol, &
     !! ZGSVJ0: is called from ZGESVJ as a pre-processor and that is its main
     !! purpose. It applies Jacobi rotations in the same way as ZGESVJ does, but
     !! it does not check convergence (stopping criterion). Few tuning
     !! parameters (marked by [TP]) are available for the implementer.
               nsweep, work, lwork, info )
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${ck}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, ldv, lwork, m, mv, n, nsweep
           real(${ck}$), intent(in) :: eps, sfmin, tol
           character, intent(in) :: jobv
           ! Array Arguments 
           complex(${ck}$), intent(inout) :: a(lda,*), d(n), v(ldv,*)
           complex(${ck}$), intent(out) :: work(lwork)
           real(${ck}$), intent(inout) :: sva(n)
        ! =====================================================================
           
           
           ! Local Scalars 
           complex(${ck}$) :: aapq, ompq
           real(${ck}$) :: aapp, aapp0, aapq1, aaqq, apoaq, aqoap, big, bigtheta, cs, mxaapq, mxsinj, &
                     rootbig, rooteps, rootsfmin, roottol, small, sn, t, temp1, theta, thsign
           integer(${ik}$) :: blskip, emptsw, i, ibr, ierr, igl, ijblsk, ir1, iswrot, jbc, jgl, kbl, &
                     lkahead, mvl, nbl, notrot, p, pskipped, q, rowskip, swband
           logical(lk) :: applv, rotok, rsvec
           ! Intrinsic Functions 
           ! from lapack
           ! Executable Statements 
           ! test the input parameters.
           applv = stdlib_lsame( jobv, 'A' )
           rsvec = stdlib_lsame( jobv, 'V' )
           if( .not.( rsvec .or. applv .or. stdlib_lsame( jobv, 'N' ) ) ) then
              info = -1_${ik}$
           else if( m<0_${ik}$ ) then
              info = -2_${ik}$
           else if( ( n<0_${ik}$ ) .or. ( n>m ) ) then
              info = -3_${ik}$
           else if( lda<m ) then
              info = -5_${ik}$
           else if( ( rsvec.or.applv ) .and. ( mv<0_${ik}$ ) ) then
              info = -8_${ik}$
           else if( ( rsvec.and.( ldv<n ) ).or.( applv.and.( ldv<mv ) ) ) then
              info = -10_${ik}$
           else if( tol<=eps ) then
              info = -13_${ik}$
           else if( nsweep<0_${ik}$ ) then
              info = -14_${ik}$
           else if( lwork<m ) then
              info = -16_${ik}$
           else
              info = 0_${ik}$
           end if
           ! #:(
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'ZGSVJ0', -info )
              return
           end if
           if( rsvec ) then
              mvl = n
           else if( applv ) then
              mvl = mv
           end if
           rsvec = rsvec .or. applv
           rooteps = sqrt( eps )
           rootsfmin = sqrt( sfmin )
           small = sfmin / eps
           big = one / sfmin
           rootbig = one / rootsfmin
           bigtheta = one / rooteps
           roottol = sqrt( tol )
           ! .. row-cyclic jacobi svd algorithm with column pivoting ..
           emptsw = ( n*( n-1 ) ) / 2_${ik}$
           notrot = 0_${ik}$
           ! .. row-cyclic pivot strategy with de rijk's pivoting ..
           swband = 0_${ik}$
      ! [tp] swband is a tuning parameter [tp]. it is meaningful and effective
           ! if stdlib${ii}$_${ci}$gesvj is used as a computational routine in the preconditioned
           ! jacobi svd algorithm stdlib${ii}$_${ci}$gejsv. for sweeps i=1:swband the procedure
           ! works on pivots inside a band-like region around the diagonal.
           ! the boundaries are determined dynamically, based on the number of
           ! pivots above a threshold.
           kbl = min( 8_${ik}$, n )
      ! [tp] kbl is a tuning parameter that defines the tile size in the
           ! tiling of the p-q loops of pivot pairs. in general, an optimal
           ! value of kbl depends on the matrix dimensions and on the
           ! parameters of the computer's memory.
           nbl = n / kbl
           if( ( nbl*kbl )/=n )nbl = nbl + 1_${ik}$
           blskip = kbl**2_${ik}$
      ! [tp] blkskip is a tuning parameter that depends on swband and kbl.
           rowskip = min( 5_${ik}$, kbl )
      ! [tp] rowskip is a tuning parameter.
           lkahead = 1_${ik}$
      ! [tp] lkahead is a tuning parameter.
           ! quasi block transformations, using the lower (upper) triangular
           ! structure of the input matrix. the quasi-block-cycling usually
           ! invokes cubic convergence. big part of this cycle is done inside
           ! canonical subspaces of dimensions less than m.
           ! .. row-cyclic pivot strategy with de rijk's pivoting ..
           loop_1993: do i = 1, nsweep
           ! .. go go go ...
              mxaapq = zero
              mxsinj = zero
              iswrot = 0_${ik}$
              notrot = 0_${ik}$
              pskipped = 0_${ik}$
           ! each sweep is unrolled using kbl-by-kbl tiles over the pivot pairs
           ! 1 <= p < q <= n. this is the first step toward a blocked implementation
           ! of the rotations. new implementation, based on block transformations,
           ! is under development.
              loop_2000: do ibr = 1, nbl
                 igl = ( ibr-1 )*kbl + 1_${ik}$
                 loop_1002: do ir1 = 0, min( lkahead, nbl-ibr )
                    igl = igl + ir1*kbl
                    loop_2001: do p = igl, min( igl+kbl-1, n-1 )
           ! .. de rijk's pivoting
                       q = stdlib${ii}$_i${c2ri(ci)}$amax( n-p+1, sva( p ), 1_${ik}$ ) + p - 1_${ik}$
                       if( p/=q ) then
                          call stdlib${ii}$_${ci}$swap( m, a( 1_${ik}$, p ), 1_${ik}$, a( 1_${ik}$, q ), 1_${ik}$ )
                          if( rsvec )call stdlib${ii}$_${ci}$swap( mvl, v( 1_${ik}$, p ), 1_${ik}$,v( 1_${ik}$, q ), 1_${ik}$ )
                          temp1 = sva( p )
                          sva( p ) = sva( q )
                          sva( q ) = temp1
                          aapq = d(p)
                          d(p) = d(q)
                          d(q) = aapq
                       end if
                       if( ir1==0_${ik}$ ) then
              ! column norms are periodically updated by explicit
              ! norm computation.
              ! caveat:
              ! unfortunately, some blas implementations compute sncrm2(m,a(1,p),1)
              ! as sqrt(s=stdlib${ii}$_${ci}$dotc(m,a(1,p),1,a(1,p),1)), which may cause the result to
              ! overflow for ||a(:,p)||_2 > sqrt(overflow_threshold), and to
              ! underflow for ||a(:,p)||_2 < sqrt(underflow_threshold).
              ! hence, stdlib${ii}$_${c2ri(ci)}$znrm2 cannot be trusted, not even in the case when
              ! the true norm is far from the under(over)flow boundaries.
              ! if properly implemented stdlib${ii}$_${c2ri(ci)}$znrm2 is available, the if-then-else-end if
              ! below should be replaced with "aapp = stdlib${ii}$_${c2ri(ci)}$znrm2( m, a(1,p), 1 )".
                          if( ( sva( p )<rootbig ) .and.( sva( p )>rootsfmin ) ) then
                             sva( p ) = stdlib${ii}$_${c2ri(ci)}$znrm2( m, a( 1_${ik}$, p ), 1_${ik}$ )
                          else
                             temp1 = zero
                             aapp = one
                             call stdlib${ii}$_${ci}$lassq( m, a( 1_${ik}$, p ), 1_${ik}$, temp1, aapp )
                             sva( p ) = temp1*sqrt( aapp )
                          end if
                          aapp = sva( p )
                       else
                          aapp = sva( p )
                       end if
                       if( aapp>zero ) then
                          pskipped = 0_${ik}$
                          loop_2002: do q = p + 1, min( igl+kbl-1, n )
                             aaqq = sva( q )
                             if( aaqq>zero ) then
                                aapp0 = aapp
                                if( aaqq>=one ) then
                                   rotok = ( small*aapp )<=aaqq
                                   if( aapp<( big / aaqq ) ) then
                                      aapq = ( stdlib${ii}$_${ci}$dotc( m, a( 1_${ik}$, p ), 1_${ik}$,a( 1_${ik}$, q ), 1_${ik}$ ) / &
                                                aaqq ) / aapp
                                   else
                                      call stdlib${ii}$_${ci}$copy( m, a( 1_${ik}$, p ), 1_${ik}$,work, 1_${ik}$ )
                                      call stdlib${ii}$_${ci}$lascl( 'G', 0_${ik}$, 0_${ik}$, aapp, one,m, 1_${ik}$, work, lda, &
                                                ierr )
                                      aapq = stdlib${ii}$_${ci}$dotc( m, work, 1_${ik}$,a( 1_${ik}$, q ), 1_${ik}$ ) / &
                                                aaqq
                                   end if
                                else
                                   rotok = aapp<=( aaqq / small )
                                   if( aapp>( small / aaqq ) ) then
                                      aapq = ( stdlib${ii}$_${ci}$dotc( m, a( 1_${ik}$, p ), 1_${ik}$,a( 1_${ik}$, q ), 1_${ik}$ ) / &
                                                aapp ) / aaqq
                                   else
                                      call stdlib${ii}$_${ci}$copy( m, a( 1_${ik}$, q ), 1_${ik}$,work, 1_${ik}$ )
                                      call stdlib${ii}$_${ci}$lascl( 'G', 0_${ik}$, 0_${ik}$, aaqq,one, m, 1_${ik}$,work, lda, &
                                                ierr )
                                      aapq = stdlib${ii}$_${ci}$dotc( m, a( 1_${ik}$, p ), 1_${ik}$,work, 1_${ik}$ ) / &
                                                aapp
                                   end if
                                end if
                                 ! aapq = aapq * conjg( cwork(p) ) * cwork(q)
                                aapq1  = -abs(aapq)
                                mxaapq = max( mxaapq, -aapq1 )
              ! to rotate or not to rotate, that is the question ...
                                if( abs( aapq1 )>tol ) then
                                   ompq = aapq / abs(aapq)
                 ! Rotate
      ! [rtd]      rotated = rotated + one
                                   if( ir1==0_${ik}$ ) then
                                      notrot = 0_${ik}$
                                      pskipped = 0_${ik}$
                                      iswrot = iswrot + 1_${ik}$
                                   end if
                                   if( rotok ) then
                                      aqoap = aaqq / aapp
                                      apoaq = aapp / aaqq
                                      theta = -half*abs( aqoap-apoaq )/aapq1
                                      if( abs( theta )>bigtheta ) then
                                         t  = half / theta
                                         cs = one
                                         call stdlib${ii}$_${ci}$rot( m, a(1_${ik}$,p), 1_${ik}$, a(1_${ik}$,q), 1_${ik}$,cs, conjg(ompq)&
                                                   *t )
                                         if ( rsvec ) then
                                             call stdlib${ii}$_${ci}$rot( mvl, v(1_${ik}$,p), 1_${ik}$,v(1_${ik}$,q), 1_${ik}$, cs, &
                                                       conjg(ompq)*t )
                                         end if
                                         sva( q ) = aaqq*sqrt( max( zero,one+t*apoaq*aapq1 ) )
                                                   
                                         aapp = aapp*sqrt( max( zero,one-t*aqoap*aapq1 ) )
                                         mxsinj = max( mxsinj, abs( t ) )
                                      else
                       ! Choose Correct Signum For Theta And Rotate
                                         thsign = -sign( one, aapq1 )
                                         t = one / ( theta+thsign*sqrt( one+theta*theta ) )
                                                   
                                         cs = sqrt( one / ( one+t*t ) )
                                         sn = t*cs
                                         mxsinj = max( mxsinj, abs( sn ) )
                                         sva( q ) = aaqq*sqrt( max( zero,one+t*apoaq*aapq1 ) )
                                                   
                                         aapp = aapp*sqrt( max( zero,one-t*aqoap*aapq1 ) )
                                         call stdlib${ii}$_${ci}$rot( m, a(1_${ik}$,p), 1_${ik}$, a(1_${ik}$,q), 1_${ik}$,cs, conjg(ompq)&
                                                   *sn )
                                         if ( rsvec ) then
                                             call stdlib${ii}$_${ci}$rot( mvl, v(1_${ik}$,p), 1_${ik}$,v(1_${ik}$,q), 1_${ik}$, cs, &
                                                       conjg(ompq)*sn )
                                         end if
                                      end if
                                      d(p) = -d(q) * ompq
                                      else
                    ! .. have to use modified gram-schmidt like transformation
                                      call stdlib${ii}$_${ci}$copy( m, a( 1_${ik}$, p ), 1_${ik}$,work, 1_${ik}$ )
                                      call stdlib${ii}$_${ci}$lascl( 'G', 0_${ik}$, 0_${ik}$, aapp, one, m,1_${ik}$, work, lda,&
                                                ierr )
                                      call stdlib${ii}$_${ci}$lascl( 'G', 0_${ik}$, 0_${ik}$, aaqq, one, m,1_${ik}$, a( 1_${ik}$, q ), &
                                                lda, ierr )
                                      call stdlib${ii}$_${ci}$axpy( m, -aapq, work, 1_${ik}$,a( 1_${ik}$, q ), 1_${ik}$ )
                                      call stdlib${ii}$_${ci}$lascl( 'G', 0_${ik}$, 0_${ik}$, one, aaqq, m,1_${ik}$, a( 1_${ik}$, q ), &
                                                lda, ierr )
                                      sva( q ) = aaqq*sqrt( max( zero,one-aapq1*aapq1 ) )
                                      mxsinj = max( mxsinj, sfmin )
                                   end if
                 ! end if rotok then ... else
                 ! in the case of cancellation in updating sva(q), sva(p)
                 ! recompute sva(q), sva(p).
                                   if( ( sva( q ) / aaqq )**2_${ik}$<=rooteps )then
                                      if( ( aaqq<rootbig ) .and.( aaqq>rootsfmin ) ) then
                                         sva( q ) = stdlib${ii}$_${c2ri(ci)}$znrm2( m, a( 1_${ik}$, q ), 1_${ik}$ )
                                      else
                                         t = zero
                                         aaqq = one
                                         call stdlib${ii}$_${ci}$lassq( m, a( 1_${ik}$, q ), 1_${ik}$, t,aaqq )
                                         sva( q ) = t*sqrt( aaqq )
                                      end if
                                   end if
                                   if( ( aapp / aapp0 )<=rooteps ) then
                                      if( ( aapp<rootbig ) .and.( aapp>rootsfmin ) ) then
                                         aapp = stdlib${ii}$_${c2ri(ci)}$znrm2( m, a( 1_${ik}$, p ), 1_${ik}$ )
                                      else
                                         t = zero
                                         aapp = one
                                         call stdlib${ii}$_${ci}$lassq( m, a( 1_${ik}$, p ), 1_${ik}$, t,aapp )
                                         aapp = t*sqrt( aapp )
                                      end if
                                      sva( p ) = aapp
                                   end if
                                else
              ! a(:,p) and a(:,q) already numerically orthogonal
                                   if( ir1==0_${ik}$ )notrot = notrot + 1_${ik}$
      ! [rtd]      skipped  = skipped  + 1
                                   pskipped = pskipped + 1_${ik}$
                                end if
                             else
              ! a(:,q) is zero column
                                if( ir1==0_${ik}$ )notrot = notrot + 1_${ik}$
                                pskipped = pskipped + 1_${ik}$
                             end if
                             if( ( i<=swband ) .and.( pskipped>rowskip ) ) then
                                if( ir1==0_${ik}$ )aapp = -aapp
                                notrot = 0_${ik}$
                                go to 2103
                             end if
                          end do loop_2002
           ! end q-loop
           2103 continue
           ! bailed out of q-loop
                          sva( p ) = aapp
                       else
                          sva( p ) = aapp
                          if( ( ir1==0_${ik}$ ) .and. ( aapp==zero ) )notrot = notrot + min( igl+kbl-1, &
                                    n ) - p
                       end if
                    end do loop_2001
           ! end of the p-loop
           ! end of doing the block ( ibr, ibr )
                 end do loop_1002
           ! end of ir1-loop
       ! ... go to the off diagonal blocks
                 igl = ( ibr-1 )*kbl + 1_${ik}$
                 loop_2010: do jbc = ibr + 1, nbl
                    jgl = ( jbc-1 )*kbl + 1_${ik}$
              ! doing the block at ( ibr, jbc )
                    ijblsk = 0_${ik}$
                    loop_2100: do p = igl, min( igl+kbl-1, n )
                       aapp = sva( p )
                       if( aapp>zero ) then
                          pskipped = 0_${ik}$
                          loop_2200: do q = jgl, min( jgl+kbl-1, n )
                             aaqq = sva( q )
                             if( aaqq>zero ) then
                                aapp0 = aapp
           ! M X 2 Jacobi Svd 
              ! safe gram matrix computation
                                if( aaqq>=one ) then
                                   if( aapp>=aaqq ) then
                                      rotok = ( small*aapp )<=aaqq
                                   else
                                      rotok = ( small*aaqq )<=aapp
                                   end if
                                   if( aapp<( big / aaqq ) ) then
                                      aapq = ( stdlib${ii}$_${ci}$dotc( m, a( 1_${ik}$, p ), 1_${ik}$,a( 1_${ik}$, q ), 1_${ik}$ ) / &
                                                aaqq ) / aapp
                                   else
                                      call stdlib${ii}$_${ci}$copy( m, a( 1_${ik}$, p ), 1_${ik}$,work, 1_${ik}$ )
                                      call stdlib${ii}$_${ci}$lascl( 'G', 0_${ik}$, 0_${ik}$, aapp,one, m, 1_${ik}$,work, lda, &
                                                ierr )
                                      aapq = stdlib${ii}$_${ci}$dotc( m, work, 1_${ik}$,a( 1_${ik}$, q ), 1_${ik}$ ) / &
                                                aaqq
                                   end if
                                else
                                   if( aapp>=aaqq ) then
                                      rotok = aapp<=( aaqq / small )
                                   else
                                      rotok = aaqq<=( aapp / small )
                                   end if
                                   if( aapp>( small / aaqq ) ) then
                                      aapq = ( stdlib${ii}$_${ci}$dotc( m, a( 1_${ik}$, p ), 1_${ik}$,a( 1_${ik}$, q ), 1_${ik}$ ) / max(&
                                                aaqq,aapp) )/ min(aaqq,aapp)
                                   else
                                      call stdlib${ii}$_${ci}$copy( m, a( 1_${ik}$, q ), 1_${ik}$,work, 1_${ik}$ )
                                      call stdlib${ii}$_${ci}$lascl( 'G', 0_${ik}$, 0_${ik}$, aaqq,one, m, 1_${ik}$,work, lda, &
                                                ierr )
                                      aapq = stdlib${ii}$_${ci}$dotc( m, a( 1_${ik}$, p ), 1_${ik}$,work, 1_${ik}$ ) / &
                                                aapp
                                   end if
                                end if
                                 ! aapq = aapq * conjg(cwork(p))*cwork(q)
                                aapq1  = -abs(aapq)
                                mxaapq = max( mxaapq, -aapq1 )
              ! to rotate or not to rotate, that is the question ...
                                if( abs( aapq1 )>tol ) then
                                   ompq = aapq / abs(aapq)
                                   notrot = 0_${ik}$
      ! [rtd]      rotated  = rotated + 1
                                   pskipped = 0_${ik}$
                                   iswrot = iswrot + 1_${ik}$
                                   if( rotok ) then
                                      aqoap = aaqq / aapp
                                      apoaq = aapp / aaqq
                                      theta = -half*abs( aqoap-apoaq )/ aapq1
                                      if( aaqq>aapp0 )theta = -theta
                                      if( abs( theta )>bigtheta ) then
                                         t  = half / theta
                                         cs = one
                                         call stdlib${ii}$_${ci}$rot( m, a(1_${ik}$,p), 1_${ik}$, a(1_${ik}$,q), 1_${ik}$,cs, conjg(ompq)&
                                                   *t )
                                         if( rsvec ) then
                                             call stdlib${ii}$_${ci}$rot( mvl, v(1_${ik}$,p), 1_${ik}$,v(1_${ik}$,q), 1_${ik}$, cs, &
                                                       conjg(ompq)*t )
                                         end if
                                         sva( q ) = aaqq*sqrt( max( zero,one+t*apoaq*aapq1 ) )
                                                   
                                         aapp = aapp*sqrt( max( zero,one-t*aqoap*aapq1 ) )
                                         mxsinj = max( mxsinj, abs( t ) )
                                      else
                       ! Choose Correct Signum For Theta And Rotate
                                         thsign = -sign( one, aapq1 )
                                         if( aaqq>aapp0 )thsign = -thsign
                                         t = one / ( theta+thsign*sqrt( one+theta*theta ) )
                                                   
                                         cs = sqrt( one / ( one+t*t ) )
                                         sn = t*cs
                                         mxsinj = max( mxsinj, abs( sn ) )
                                         sva( q ) = aaqq*sqrt( max( zero,one+t*apoaq*aapq1 ) )
                                                   
                                         aapp = aapp*sqrt( max( zero,one-t*aqoap*aapq1 ) )
                                         call stdlib${ii}$_${ci}$rot( m, a(1_${ik}$,p), 1_${ik}$, a(1_${ik}$,q), 1_${ik}$,cs, conjg(ompq)&
                                                   *sn )
                                         if( rsvec ) then
                                             call stdlib${ii}$_${ci}$rot( mvl, v(1_${ik}$,p), 1_${ik}$,v(1_${ik}$,q), 1_${ik}$, cs, &
                                                       conjg(ompq)*sn )
                                         end if
                                      end if
                                      d(p) = -d(q) * ompq
                                   else
                    ! .. have to use modified gram-schmidt like transformation
                                    if( aapp>aaqq ) then
                                         call stdlib${ii}$_${ci}$copy( m, a( 1_${ik}$, p ), 1_${ik}$,work, 1_${ik}$ )
                                         call stdlib${ii}$_${ci}$lascl( 'G', 0_${ik}$, 0_${ik}$, aapp, one,m, 1_${ik}$, work,lda,&
                                                   ierr )
                                         call stdlib${ii}$_${ci}$lascl( 'G', 0_${ik}$, 0_${ik}$, aaqq, one,m, 1_${ik}$, a( 1_${ik}$, q ),&
                                                    lda,ierr )
                                         call stdlib${ii}$_${ci}$axpy( m, -aapq, work,1_${ik}$, a( 1_${ik}$, q ), 1_${ik}$ )
                                                   
                                         call stdlib${ii}$_${ci}$lascl( 'G', 0_${ik}$, 0_${ik}$, one, aaqq,m, 1_${ik}$, a( 1_${ik}$, q ),&
                                                    lda,ierr )
                                         sva( q ) = aaqq*sqrt( max( zero,one-aapq1*aapq1 ) )
                                                   
                                         mxsinj = max( mxsinj, sfmin )
                                    else
                                        call stdlib${ii}$_${ci}$copy( m, a( 1_${ik}$, q ), 1_${ik}$,work, 1_${ik}$ )
                                         call stdlib${ii}$_${ci}$lascl( 'G', 0_${ik}$, 0_${ik}$, aaqq, one,m, 1_${ik}$, work,lda,&
                                                   ierr )
                                         call stdlib${ii}$_${ci}$lascl( 'G', 0_${ik}$, 0_${ik}$, aapp, one,m, 1_${ik}$, a( 1_${ik}$, p ),&
                                                    lda,ierr )
                                         call stdlib${ii}$_${ci}$axpy( m, -conjg(aapq),work, 1_${ik}$, a( 1_${ik}$, p ), 1_${ik}$ &
                                                   )
                                         call stdlib${ii}$_${ci}$lascl( 'G', 0_${ik}$, 0_${ik}$, one, aapp,m, 1_${ik}$, a( 1_${ik}$, p ),&
                                                    lda,ierr )
                                         sva( p ) = aapp*sqrt( max( zero,one-aapq1*aapq1 ) )
                                                   
                                         mxsinj = max( mxsinj, sfmin )
                                    end if
                                   end if
                 ! end if rotok then ... else
                 ! in the case of cancellation in updating sva(q), sva(p)
                 ! .. recompute sva(q), sva(p)
                                   if( ( sva( q ) / aaqq )**2_${ik}$<=rooteps )then
                                      if( ( aaqq<rootbig ) .and.( aaqq>rootsfmin ) ) then
                                         sva( q ) = stdlib${ii}$_${c2ri(ci)}$znrm2( m, a( 1_${ik}$, q ), 1_${ik}$)
                                       else
                                         t = zero
                                         aaqq = one
                                         call stdlib${ii}$_${ci}$lassq( m, a( 1_${ik}$, q ), 1_${ik}$, t,aaqq )
                                         sva( q ) = t*sqrt( aaqq )
                                      end if
                                   end if
                                   if( ( aapp / aapp0 )**2_${ik}$<=rooteps ) then
                                      if( ( aapp<rootbig ) .and.( aapp>rootsfmin ) ) then
                                         aapp = stdlib${ii}$_${c2ri(ci)}$znrm2( m, a( 1_${ik}$, p ), 1_${ik}$ )
                                      else
                                         t = zero
                                         aapp = one
                                         call stdlib${ii}$_${ci}$lassq( m, a( 1_${ik}$, p ), 1_${ik}$, t,aapp )
                                         aapp = t*sqrt( aapp )
                                      end if
                                      sva( p ) = aapp
                                   end if
                    ! end of ok rotation
                                else
                                   notrot = notrot + 1_${ik}$
      ! [rtd]      skipped  = skipped  + 1
                                   pskipped = pskipped + 1_${ik}$
                                   ijblsk = ijblsk + 1_${ik}$
                                end if
                             else
                                notrot = notrot + 1_${ik}$
                                pskipped = pskipped + 1_${ik}$
                                ijblsk = ijblsk + 1_${ik}$
                             end if
                             if( ( i<=swband ) .and. ( ijblsk>=blskip ) )then
                                sva( p ) = aapp
                                notrot = 0_${ik}$
                                go to 2011
                             end if
                             if( ( i<=swband ) .and.( pskipped>rowskip ) ) then
                                aapp = -aapp
                                notrot = 0_${ik}$
                                go to 2203
                             end if
                          end do loop_2200
              ! end of the q-loop
              2203 continue
                          sva( p ) = aapp
                       else
                          if( aapp==zero )notrot = notrot +min( jgl+kbl-1, n ) - jgl + 1_${ik}$
                          if( aapp<zero )notrot = 0_${ik}$
                       end if
                    end do loop_2100
           ! end of the p-loop
                 end do loop_2010
           ! end of the jbc-loop
           2011 continue
      ! 2011 bailed out of the jbc-loop
                 do p = igl, min( igl+kbl-1, n )
                    sva( p ) = abs( sva( p ) )
                 end do
      ! **
              end do loop_2000
      ! 2000 :: end of the ibr-loop
           ! .. update sva(n)
              if( ( sva( n )<rootbig ) .and. ( sva( n )>rootsfmin ) )then
                 sva( n ) = stdlib${ii}$_${c2ri(ci)}$znrm2( m, a( 1_${ik}$, n ), 1_${ik}$ )
              else
                 t = zero
                 aapp = one
                 call stdlib${ii}$_${ci}$lassq( m, a( 1_${ik}$, n ), 1_${ik}$, t, aapp )
                 sva( n ) = t*sqrt( aapp )
              end if
           ! additional steering devices
              if( ( i<swband ) .and. ( ( mxaapq<=roottol ) .or.( iswrot<=n ) ) )swband = i
              if( ( i>swband+1 ) .and. ( mxaapq<sqrt( real( n,KIND=${ck}$) )*tol ) .and. ( real( n,&
                        KIND=${ck}$)*mxaapq*mxsinj<tol ) ) then
                 go to 1994
              end if
              if( notrot>=emptsw )go to 1994
           end do loop_1993
           ! end i=1:nsweep loop
       ! #:( reaching this point means that the procedure has not converged.
           info = nsweep - 1_${ik}$
           go to 1995
           1994 continue
       ! #:) reaching this point means numerical convergence after the i-th
           ! sweep.
           info = 0_${ik}$
       ! #:) info = 0 confirms successful iterations.
       1995 continue
           ! sort the vector sva() of column norms.
           do p = 1, n - 1
              q = stdlib${ii}$_i${c2ri(ci)}$amax( n-p+1, sva( p ), 1_${ik}$ ) + p - 1_${ik}$
              if( p/=q ) then
                 temp1 = sva( p )
                 sva( p ) = sva( q )
                 sva( q ) = temp1
                 aapq = d( p )
                 d( p ) = d( q )
                 d( q ) = aapq
                 call stdlib${ii}$_${ci}$swap( m, a( 1_${ik}$, p ), 1_${ik}$, a( 1_${ik}$, q ), 1_${ik}$ )
                 if( rsvec )call stdlib${ii}$_${ci}$swap( mvl, v( 1_${ik}$, p ), 1_${ik}$, v( 1_${ik}$, q ), 1_${ik}$ )
              end if
           end do
           return
     end subroutine stdlib${ii}$_${ci}$gsvj0

#:endif
#:endfor



     pure module subroutine stdlib${ii}$_sgsvj1( jobv, m, n, n1, a, lda, d, sva, mv, v, ldv,eps, sfmin, tol, &
     !! SGSVJ1 is called from SGESVJ as a pre-processor and that is its main
     !! purpose. It applies Jacobi rotations in the same way as SGESVJ does, but
     !! it targets only particular pivots and it does not check convergence
     !! (stopping criterion). Few tuning parameters (marked by [TP]) are
     !! available for the implementer.
     !! Further Details
     !! ~~~~~~~~~~~~~~~
     !! SGSVJ1 applies few sweeps of Jacobi rotations in the column space of
     !! the input M-by-N matrix A. The pivot pairs are taken from the (1,2)
     !! off-diagonal block in the corresponding N-by-N Gram matrix A^T * A. The
     !! block-entries (tiles) of the (1,2) off-diagonal block are marked by the
     !! [x]'s in the following scheme:
     !! | *  *  * [x] [x] [x]|
     !! | *  *  * [x] [x] [x]|    Row-cycling in the nblr-by-nblc [x] blocks.
     !! | *  *  * [x] [x] [x]|    Row-cyclic pivoting inside each [x] block.
     !! |[x] [x] [x] *  *  * |
     !! |[x] [x] [x] *  *  * |
     !! |[x] [x] [x] *  *  * |
     !! In terms of the columns of A, the first N1 columns are rotated 'against'
     !! the remaining N-N1 columns, trying to increase the angle between the
     !! corresponding subspaces. The off-diagonal block is N1-by(N-N1) and it is
     !! tiled using quadratic tiles of side KBL. Here, KBL is a tuning parameter.
     !! The number of sweeps is given in NSWEEP and the orthogonality threshold
     !! is given in TOL.
               nsweep, work, lwork, info )
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           real(sp), intent(in) :: eps, sfmin, tol
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, ldv, lwork, m, mv, n, n1, nsweep
           character, intent(in) :: jobv
           ! Array Arguments 
           real(sp), intent(inout) :: a(lda,*), d(n), sva(n), v(ldv,*)
           real(sp), intent(out) :: work(lwork)
        ! =====================================================================
           
           ! Local Scalars 
           real(sp) :: aapp, aapp0, aapq, aaqq, apoaq, aqoap, big, bigtheta, cs, large, mxaapq, &
           mxsinj, rootbig, rooteps, rootsfmin, roottol, small, sn, t, temp1, theta, &
                     thsign
           integer(${ik}$) :: blskip, emptsw, i, ibr, igl, ierr, ijblsk, iswrot, jbc, jgl, kbl, mvl, &
                     notrot, nblc, nblr, p, pskipped, q, rowskip, swband
           logical(lk) :: applv, rotok, rsvec
           ! Local Arrays 
           real(sp) :: fastr(5_${ik}$)
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input parameters.
           applv = stdlib_lsame( jobv, 'A' )
           rsvec = stdlib_lsame( jobv, 'V' )
           if( .not.( rsvec .or. applv .or. stdlib_lsame( jobv, 'N' ) ) ) then
              info = -1_${ik}$
           else if( m<0_${ik}$ ) then
              info = -2_${ik}$
           else if( ( n<0_${ik}$ ) .or. ( n>m ) ) then
              info = -3_${ik}$
           else if( n1<0_${ik}$ ) then
              info = -4_${ik}$
           else if( lda<m ) then
              info = -6_${ik}$
           else if( ( rsvec.or.applv ) .and. ( mv<0_${ik}$ ) ) then
              info = -9_${ik}$
           else if( ( rsvec.and.( ldv<n ) ).or.( applv.and.( ldv<mv ) )  ) then
              info = -11_${ik}$
           else if( tol<=eps ) then
              info = -14_${ik}$
           else if( nsweep<0_${ik}$ ) then
              info = -15_${ik}$
           else if( lwork<m ) then
              info = -17_${ik}$
           else
              info = 0_${ik}$
           end if
           ! #:(
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'SGSVJ1', -info )
              return
           end if
           if( rsvec ) then
              mvl = n
           else if( applv ) then
              mvl = mv
           end if
           rsvec = rsvec .or. applv
           rooteps = sqrt( eps )
           rootsfmin = sqrt( sfmin )
           small = sfmin / eps
           big = one / sfmin
           rootbig = one / rootsfmin
           large = big / sqrt( real( m*n,KIND=sp) )
           bigtheta = one / rooteps
           roottol = sqrt( tol )
           ! Initialize The Right Singular Vector Matrix 
           ! rsvec = stdlib_lsame( jobv, 'y' )
           emptsw = n1*( n-n1 )
           notrot = 0_${ik}$
           fastr( 1_${ik}$ ) = zero
           ! .. row-cyclic pivot strategy with de rijk's pivoting ..
           kbl = min( 8_${ik}$, n )
           nblr = n1 / kbl
           if( ( nblr*kbl )/=n1 )nblr = nblr + 1_${ik}$
           ! .. the tiling is nblr-by-nblc [tiles]
           nblc = ( n-n1 ) / kbl
           if( ( nblc*kbl )/=( n-n1 ) )nblc = nblc + 1_${ik}$
           blskip = ( kbl**2_${ik}$ ) + 1_${ik}$
      ! [tp] blkskip is a tuning parameter that depends on swband and kbl.
           rowskip = min( 5_${ik}$, kbl )
      ! [tp] rowskip is a tuning parameter.
           swband = 0_${ik}$
      ! [tp] swband is a tuning parameter. it is meaningful and effective
           ! if stdlib${ii}$_sgesvj is used as a computational routine in the preconditioned
           ! jacobi svd algorithm stdlib${ii}$_sgesvj.
           ! | *   *   * [x] [x] [x]|
           ! | *   *   * [x] [x] [x]|    row-cycling in the nblr-by-nblc [x] blocks.
           ! | *   *   * [x] [x] [x]|    row-cyclic pivoting inside each [x] block.
           ! |[x] [x] [x] *   *   * |
           ! |[x] [x] [x] *   *   * |
           ! |[x] [x] [x] *   *   * |
           loop_1993: do i = 1, nsweep
           ! .. go go go ...
              mxaapq = zero
              mxsinj = zero
              iswrot = 0_${ik}$
              notrot = 0_${ik}$
              pskipped = 0_${ik}$
              loop_2000: do ibr = 1, nblr
                 igl = ( ibr-1 )*kbl + 1_${ik}$
      ! ........................................................
       ! ... go to the off diagonal blocks
                 igl = ( ibr-1 )*kbl + 1_${ik}$
                 loop_2010: do jbc = 1, nblc
                    jgl = n1 + ( jbc-1 )*kbl + 1_${ik}$
              ! doing the block at ( ibr, jbc )
                    ijblsk = 0_${ik}$
                    loop_2100: do p = igl, min( igl+kbl-1, n1 )
                       aapp = sva( p )
                       if( aapp>zero ) then
                          pskipped = 0_${ik}$
                          loop_2200: do q = jgl, min( jgl+kbl-1, n )
                             aaqq = sva( q )
                             if( aaqq>zero ) then
                                aapp0 = aapp
           ! M X 2 Jacobi Svd 
              ! Safe Gram Matrix Computation 
                                if( aaqq>=one ) then
                                   if( aapp>=aaqq ) then
                                      rotok = ( small*aapp )<=aaqq
                                   else
                                      rotok = ( small*aaqq )<=aapp
                                   end if
                                   if( aapp<( big / aaqq ) ) then
                                      aapq = ( stdlib${ii}$_sdot( m, a( 1_${ik}$, p ), 1_${ik}$, a( 1_${ik}$,q ), 1_${ik}$ )*d( p )&
                                                *d( q ) / aaqq )/ aapp
                                   else
                                      call stdlib${ii}$_scopy( m, a( 1_${ik}$, p ), 1_${ik}$, work, 1_${ik}$ )
                                      call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, aapp, d( p ),m, 1_${ik}$, work, lda,&
                                                 ierr )
                                      aapq = stdlib${ii}$_sdot( m, work, 1_${ik}$, a( 1_${ik}$, q ),1_${ik}$ )*d( q ) / &
                                                aaqq
                                   end if
                                else
                                   if( aapp>=aaqq ) then
                                      rotok = aapp<=( aaqq / small )
                                   else
                                      rotok = aaqq<=( aapp / small )
                                   end if
                                   if( aapp>( small / aaqq ) ) then
                                      aapq = ( stdlib${ii}$_sdot( m, a( 1_${ik}$, p ), 1_${ik}$, a( 1_${ik}$,q ), 1_${ik}$ )*d( p )&
                                                *d( q ) / aaqq )/ aapp
                                   else
                                      call stdlib${ii}$_scopy( m, a( 1_${ik}$, q ), 1_${ik}$, work, 1_${ik}$ )
                                      call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, aaqq, d( q ),m, 1_${ik}$, work, lda,&
                                                 ierr )
                                      aapq = stdlib${ii}$_sdot( m, work, 1_${ik}$, a( 1_${ik}$, p ),1_${ik}$ )*d( p ) / &
                                                aapp
                                   end if
                                end if
                                mxaapq = max( mxaapq, abs( aapq ) )
              ! to rotate or not to rotate, that is the question ...
                                if( abs( aapq )>tol ) then
                                   notrot = 0_${ik}$
                 ! rotated  = rotated + 1
                                   pskipped = 0_${ik}$
                                   iswrot = iswrot + 1_${ik}$
                                   if( rotok ) then
                                      aqoap = aaqq / aapp
                                      apoaq = aapp / aaqq
                                      theta = -half*abs( aqoap-apoaq ) / aapq
                                      if( aaqq>aapp0 )theta = -theta
                                      if( abs( theta )>bigtheta ) then
                                         t = half / theta
                                         fastr( 3_${ik}$ ) = t*d( p ) / d( q )
                                         fastr( 4_${ik}$ ) = -t*d( q ) / d( p )
                                         call stdlib${ii}$_srotm( m, a( 1_${ik}$, p ), 1_${ik}$,a( 1_${ik}$, q ), 1_${ik}$, fastr )
                                                   
                                         if( rsvec )call stdlib${ii}$_srotm( mvl,v( 1_${ik}$, p ), 1_${ik}$,v( 1_${ik}$, q ),&
                                                    1_${ik}$,fastr )
                                         sva( q ) = aaqq*sqrt( max( zero,one+t*apoaq*aapq ) )
                                                   
                                         aapp = aapp*sqrt( max( zero,one-t*aqoap*aapq ) )
                                         mxsinj = max( mxsinj, abs( t ) )
                                      else
                       ! Choose Correct Signum For Theta And Rotate
                                         thsign = -sign( one, aapq )
                                         if( aaqq>aapp0 )thsign = -thsign
                                         t = one / ( theta+thsign*sqrt( one+theta*theta ) )
                                                   
                                         cs = sqrt( one / ( one+t*t ) )
                                         sn = t*cs
                                         mxsinj = max( mxsinj, abs( sn ) )
                                         sva( q ) = aaqq*sqrt( max( zero,one+t*apoaq*aapq ) )
                                                   
                                         aapp = aapp*sqrt( max( zero,one-t*aqoap*aapq ) )
                                         apoaq = d( p ) / d( q )
                                         aqoap = d( q ) / d( p )
                                         if( d( p )>=one ) then
                                            if( d( q )>=one ) then
                                               fastr( 3_${ik}$ ) = t*apoaq
                                               fastr( 4_${ik}$ ) = -t*aqoap
                                               d( p ) = d( p )*cs
                                               d( q ) = d( q )*cs
                                               call stdlib${ii}$_srotm( m, a( 1_${ik}$, p ), 1_${ik}$,a( 1_${ik}$, q ), 1_${ik}$,&
                                                         fastr )
                                               if( rsvec )call stdlib${ii}$_srotm( mvl,v( 1_${ik}$, p ), 1_${ik}$, v( &
                                                         1_${ik}$, q ),1_${ik}$, fastr )
                                            else
                                               call stdlib${ii}$_saxpy( m, -t*aqoap,a( 1_${ik}$, q ), 1_${ik}$,a( 1_${ik}$, &
                                                         p ), 1_${ik}$ )
                                               call stdlib${ii}$_saxpy( m, cs*sn*apoaq,a( 1_${ik}$, p ), 1_${ik}$,a( &
                                                         1_${ik}$, q ), 1_${ik}$ )
                                               if( rsvec ) then
                                                  call stdlib${ii}$_saxpy( mvl, -t*aqoap,v( 1_${ik}$, q ), 1_${ik}$,v(&
                                                             1_${ik}$, p ), 1_${ik}$ )
                                                  call stdlib${ii}$_saxpy( mvl,cs*sn*apoaq,v( 1_${ik}$, p ), 1_${ik}$,&
                                                            v( 1_${ik}$, q ), 1_${ik}$ )
                                               end if
                                               d( p ) = d( p )*cs
                                               d( q ) = d( q ) / cs
                                            end if
                                         else
                                            if( d( q )>=one ) then
                                               call stdlib${ii}$_saxpy( m, t*apoaq,a( 1_${ik}$, p ), 1_${ik}$,a( 1_${ik}$, q &
                                                         ), 1_${ik}$ )
                                               call stdlib${ii}$_saxpy( m, -cs*sn*aqoap,a( 1_${ik}$, q ), 1_${ik}$,a( &
                                                         1_${ik}$, p ), 1_${ik}$ )
                                               if( rsvec ) then
                                                  call stdlib${ii}$_saxpy( mvl, t*apoaq,v( 1_${ik}$, p ), 1_${ik}$,v( &
                                                            1_${ik}$, q ), 1_${ik}$ )
                                                  call stdlib${ii}$_saxpy( mvl,-cs*sn*aqoap,v( 1_${ik}$, q ), &
                                                            1_${ik}$,v( 1_${ik}$, p ), 1_${ik}$ )
                                               end if
                                               d( p ) = d( p ) / cs
                                               d( q ) = d( q )*cs
                                            else
                                               if( d( p )>=d( q ) ) then
                                                  call stdlib${ii}$_saxpy( m, -t*aqoap,a( 1_${ik}$, q ), 1_${ik}$,a( &
                                                            1_${ik}$, p ), 1_${ik}$ )
                                                  call stdlib${ii}$_saxpy( m, cs*sn*apoaq,a( 1_${ik}$, p ), 1_${ik}$,&
                                                            a( 1_${ik}$, q ), 1_${ik}$ )
                                                  d( p ) = d( p )*cs
                                                  d( q ) = d( q ) / cs
                                                  if( rsvec ) then
                                                     call stdlib${ii}$_saxpy( mvl,-t*aqoap,v( 1_${ik}$, q ), 1_${ik}$,&
                                                               v( 1_${ik}$, p ), 1_${ik}$ )
                                                     call stdlib${ii}$_saxpy( mvl,cs*sn*apoaq,v( 1_${ik}$, p ),&
                                                                1_${ik}$,v( 1_${ik}$, q ), 1_${ik}$ )
                                                  end if
                                               else
                                                  call stdlib${ii}$_saxpy( m, t*apoaq,a( 1_${ik}$, p ), 1_${ik}$,a( 1_${ik}$,&
                                                             q ), 1_${ik}$ )
                                                  call stdlib${ii}$_saxpy( m,-cs*sn*aqoap,a( 1_${ik}$, q ), 1_${ik}$,&
                                                            a( 1_${ik}$, p ), 1_${ik}$ )
                                                  d( p ) = d( p ) / cs
                                                  d( q ) = d( q )*cs
                                                  if( rsvec ) then
                                                     call stdlib${ii}$_saxpy( mvl,t*apoaq, v( 1_${ik}$, p ),1_${ik}$, &
                                                               v( 1_${ik}$, q ), 1_${ik}$ )
                                                     call stdlib${ii}$_saxpy( mvl,-cs*sn*aqoap,v( 1_${ik}$, q )&
                                                               , 1_${ik}$,v( 1_${ik}$, p ), 1_${ik}$ )
                                                  end if
                                               end if
                                            end if
                                         end if
                                      end if
                                   else
                                      if( aapp>aaqq ) then
                                         call stdlib${ii}$_scopy( m, a( 1_${ik}$, p ), 1_${ik}$, work,1_${ik}$ )
                                         call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, aapp, one,m, 1_${ik}$, work, lda,&
                                                    ierr )
                                         call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, aaqq, one,m, 1_${ik}$, a( 1_${ik}$, q ),&
                                                    lda,ierr )
                                         temp1 = -aapq*d( p ) / d( q )
                                         call stdlib${ii}$_saxpy( m, temp1, work, 1_${ik}$,a( 1_${ik}$, q ), 1_${ik}$ )
                                                   
                                         call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, one, aaqq,m, 1_${ik}$, a( 1_${ik}$, q ),&
                                                    lda,ierr )
                                         sva( q ) = aaqq*sqrt( max( zero,one-aapq*aapq ) )
                                         mxsinj = max( mxsinj, sfmin )
                                      else
                                         call stdlib${ii}$_scopy( m, a( 1_${ik}$, q ), 1_${ik}$, work,1_${ik}$ )
                                         call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, aaqq, one,m, 1_${ik}$, work, lda,&
                                                    ierr )
                                         call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, aapp, one,m, 1_${ik}$, a( 1_${ik}$, p ),&
                                                    lda,ierr )
                                         temp1 = -aapq*d( q ) / d( p )
                                         call stdlib${ii}$_saxpy( m, temp1, work, 1_${ik}$,a( 1_${ik}$, p ), 1_${ik}$ )
                                                   
                                         call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, one, aapp,m, 1_${ik}$, a( 1_${ik}$, p ),&
                                                    lda,ierr )
                                         sva( p ) = aapp*sqrt( max( zero,one-aapq*aapq ) )
                                         mxsinj = max( mxsinj, sfmin )
                                      end if
                                   end if
                 ! end if rotok then ... else
                 ! in the case of cancellation in updating sva(q)
                 ! .. recompute sva(q)
                                   if( ( sva( q ) / aaqq )**2_${ik}$<=rooteps )then
                                      if( ( aaqq<rootbig ) .and.( aaqq>rootsfmin ) ) then
                                         sva( q ) = stdlib${ii}$_snrm2( m, a( 1_${ik}$, q ), 1_${ik}$ )*d( q )
                                      else
                                         t = zero
                                         aaqq = one
                                         call stdlib${ii}$_slassq( m, a( 1_${ik}$, q ), 1_${ik}$, t,aaqq )
                                         sva( q ) = t*sqrt( aaqq )*d( q )
                                      end if
                                   end if
                                   if( ( aapp / aapp0 )**2_${ik}$<=rooteps ) then
                                      if( ( aapp<rootbig ) .and.( aapp>rootsfmin ) ) then
                                         aapp = stdlib${ii}$_snrm2( m, a( 1_${ik}$, p ), 1_${ik}$ )*d( p )
                                      else
                                         t = zero
                                         aapp = one
                                         call stdlib${ii}$_slassq( m, a( 1_${ik}$, p ), 1_${ik}$, t,aapp )
                                         aapp = t*sqrt( aapp )*d( p )
                                      end if
                                      sva( p ) = aapp
                                   end if
                    ! end of ok rotation
                                else
                                   notrot = notrot + 1_${ik}$
                 ! skipped  = skipped  + 1
                                   pskipped = pskipped + 1_${ik}$
                                   ijblsk = ijblsk + 1_${ik}$
                                end if
                             else
                                notrot = notrot + 1_${ik}$
                                pskipped = pskipped + 1_${ik}$
                                ijblsk = ijblsk + 1_${ik}$
                             end if
            ! if ( notrot >= emptsw )  go to 2011
                             if( ( i<=swband ) .and. ( ijblsk>=blskip ) )then
                                sva( p ) = aapp
                                notrot = 0_${ik}$
                                go to 2011
                             end if
                             if( ( i<=swband ) .and.( pskipped>rowskip ) ) then
                                aapp = -aapp
                                notrot = 0_${ik}$
                                go to 2203
                             end if
                          end do loop_2200
              ! end of the q-loop
              2203 continue
                          sva( p ) = aapp
                       else
                          if( aapp==zero )notrot = notrot +min( jgl+kbl-1, n ) - jgl + 1_${ik}$
                          if( aapp<zero )notrot = 0_${ik}$
      ! **      if ( notrot >= emptsw )  go to 2011
                       end if
                    end do loop_2100
           ! end of the p-loop
                 end do loop_2010
           ! end of the jbc-loop
           2011 continue
      ! 2011 bailed out of the jbc-loop
                 do p = igl, min( igl+kbl-1, n )
                    sva( p ) = abs( sva( p ) )
                 end do
      ! **   if ( notrot >= emptsw ) go to 1994
              end do loop_2000
      ! 2000 :: end of the ibr-loop
           ! .. update sva(n)
              if( ( sva( n )<rootbig ) .and. ( sva( n )>rootsfmin ) )then
                 sva( n ) = stdlib${ii}$_snrm2( m, a( 1_${ik}$, n ), 1_${ik}$ )*d( n )
              else
                 t = zero
                 aapp = one
                 call stdlib${ii}$_slassq( m, a( 1_${ik}$, n ), 1_${ik}$, t, aapp )
                 sva( n ) = t*sqrt( aapp )*d( n )
              end if
           ! additional steering devices
              if( ( i<swband ) .and. ( ( mxaapq<=roottol ) .or.( iswrot<=n ) ) )swband = i
              if( ( i>swband+1 ) .and. ( mxaapq<real( n,KIND=sp)*tol ) .and.( real( n,KIND=sp)&
                        *mxaapq*mxsinj<tol ) ) then
                 go to 1994
              end if
              if( notrot>=emptsw )go to 1994
           end do loop_1993
           ! end i=1:nsweep loop
       ! #:) reaching this point means that the procedure has completed the given
           ! number of sweeps.
           info = nsweep - 1_${ik}$
           go to 1995
           1994 continue
       ! #:) reaching this point means that during the i-th sweep all pivots were
           ! below the given threshold, causing early exit.
           info = 0_${ik}$
       ! #:) info = 0 confirms successful iterations.
       1995 continue
           ! sort the vector d
           do p = 1, n - 1
              q = stdlib${ii}$_isamax( n-p+1, sva( p ), 1_${ik}$ ) + p - 1_${ik}$
              if( p/=q ) then
                 temp1 = sva( p )
                 sva( p ) = sva( q )
                 sva( q ) = temp1
                 temp1 = d( p )
                 d( p ) = d( q )
                 d( q ) = temp1
                 call stdlib${ii}$_sswap( m, a( 1_${ik}$, p ), 1_${ik}$, a( 1_${ik}$, q ), 1_${ik}$ )
                 if( rsvec )call stdlib${ii}$_sswap( mvl, v( 1_${ik}$, p ), 1_${ik}$, v( 1_${ik}$, q ), 1_${ik}$ )
              end if
           end do
           return
     end subroutine stdlib${ii}$_sgsvj1

     pure module subroutine stdlib${ii}$_dgsvj1( jobv, m, n, n1, a, lda, d, sva, mv, v, ldv,eps, sfmin, tol, &
     !! DGSVJ1 is called from DGESVJ as a pre-processor and that is its main
     !! purpose. It applies Jacobi rotations in the same way as DGESVJ does, but
     !! it targets only particular pivots and it does not check convergence
     !! (stopping criterion). Few tuning parameters (marked by [TP]) are
     !! available for the implementer.
     !! Further Details
     !! ~~~~~~~~~~~~~~~
     !! DGSVJ1 applies few sweeps of Jacobi rotations in the column space of
     !! the input M-by-N matrix A. The pivot pairs are taken from the (1,2)
     !! off-diagonal block in the corresponding N-by-N Gram matrix A^T * A. The
     !! block-entries (tiles) of the (1,2) off-diagonal block are marked by the
     !! [x]'s in the following scheme:
     !! | *  *  * [x] [x] [x]|
     !! | *  *  * [x] [x] [x]|    Row-cycling in the nblr-by-nblc [x] blocks.
     !! | *  *  * [x] [x] [x]|    Row-cyclic pivoting inside each [x] block.
     !! |[x] [x] [x] *  *  * |
     !! |[x] [x] [x] *  *  * |
     !! |[x] [x] [x] *  *  * |
     !! In terms of the columns of A, the first N1 columns are rotated 'against'
     !! the remaining N-N1 columns, trying to increase the angle between the
     !! corresponding subspaces. The off-diagonal block is N1-by(N-N1) and it is
     !! tiled using quadratic tiles of side KBL. Here, KBL is a tuning parameter.
     !! The number of sweeps is given in NSWEEP and the orthogonality threshold
     !! is given in TOL.
               nsweep, work, lwork, info )
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           real(dp), intent(in) :: eps, sfmin, tol
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, ldv, lwork, m, mv, n, n1, nsweep
           character, intent(in) :: jobv
           ! Array Arguments 
           real(dp), intent(inout) :: a(lda,*), d(n), sva(n), v(ldv,*)
           real(dp), intent(out) :: work(lwork)
        ! =====================================================================
           
           ! Local Scalars 
           real(dp) :: aapp, aapp0, aapq, aaqq, apoaq, aqoap, big, bigtheta, cs, large, mxaapq, &
           mxsinj, rootbig, rooteps, rootsfmin, roottol, small, sn, t, temp1, theta, &
                     thsign
           integer(${ik}$) :: blskip, emptsw, i, ibr, igl, ierr, ijblsk, iswrot, jbc, jgl, kbl, mvl, &
                     notrot, nblc, nblr, p, pskipped, q, rowskip, swband
           logical(lk) :: applv, rotok, rsvec
           ! Local Arrays 
           real(dp) :: fastr(5_${ik}$)
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input parameters.
           applv = stdlib_lsame( jobv, 'A' )
           rsvec = stdlib_lsame( jobv, 'V' )
           if( .not.( rsvec .or. applv .or. stdlib_lsame( jobv, 'N' ) ) ) then
              info = -1_${ik}$
           else if( m<0_${ik}$ ) then
              info = -2_${ik}$
           else if( ( n<0_${ik}$ ) .or. ( n>m ) ) then
              info = -3_${ik}$
           else if( n1<0_${ik}$ ) then
              info = -4_${ik}$
           else if( lda<m ) then
              info = -6_${ik}$
           else if( ( rsvec.or.applv ) .and. ( mv<0_${ik}$ ) ) then
              info = -9_${ik}$
           else if( ( rsvec.and.( ldv<n ) ).or.( applv.and.( ldv<mv ) )  ) then
              info = -11_${ik}$
           else if( tol<=eps ) then
              info = -14_${ik}$
           else if( nsweep<0_${ik}$ ) then
              info = -15_${ik}$
           else if( lwork<m ) then
              info = -17_${ik}$
           else
              info = 0_${ik}$
           end if
           ! #:(
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DGSVJ1', -info )
              return
           end if
           if( rsvec ) then
              mvl = n
           else if( applv ) then
              mvl = mv
           end if
           rsvec = rsvec .or. applv
           rooteps = sqrt( eps )
           rootsfmin = sqrt( sfmin )
           small = sfmin / eps
           big = one / sfmin
           rootbig = one / rootsfmin
           large = big / sqrt( real( m*n,KIND=dp) )
           bigtheta = one / rooteps
           roottol = sqrt( tol )
           ! Initialize The Right Singular Vector Matrix 
           ! rsvec = stdlib_lsame( jobv, 'y' )
           emptsw = n1*( n-n1 )
           notrot = 0_${ik}$
           fastr( 1_${ik}$ ) = zero
           ! .. row-cyclic pivot strategy with de rijk's pivoting ..
           kbl = min( 8_${ik}$, n )
           nblr = n1 / kbl
           if( ( nblr*kbl )/=n1 )nblr = nblr + 1_${ik}$
           ! .. the tiling is nblr-by-nblc [tiles]
           nblc = ( n-n1 ) / kbl
           if( ( nblc*kbl )/=( n-n1 ) )nblc = nblc + 1_${ik}$
           blskip = ( kbl**2_${ik}$ ) + 1_${ik}$
      ! [tp] blkskip is a tuning parameter that depends on swband and kbl.
           rowskip = min( 5_${ik}$, kbl )
      ! [tp] rowskip is a tuning parameter.
           swband = 0_${ik}$
      ! [tp] swband is a tuning parameter. it is meaningful and effective
           ! if stdlib${ii}$_sgesvj is used as a computational routine in the preconditioned
           ! jacobi svd algorithm stdlib${ii}$_sgesvj.
           ! | *   *   * [x] [x] [x]|
           ! | *   *   * [x] [x] [x]|    row-cycling in the nblr-by-nblc [x] blocks.
           ! | *   *   * [x] [x] [x]|    row-cyclic pivoting inside each [x] block.
           ! |[x] [x] [x] *   *   * |
           ! |[x] [x] [x] *   *   * |
           ! |[x] [x] [x] *   *   * |
           loop_1993: do i = 1, nsweep
           ! .. go go go ...
              mxaapq = zero
              mxsinj = zero
              iswrot = 0_${ik}$
              notrot = 0_${ik}$
              pskipped = 0_${ik}$
              loop_2000: do ibr = 1, nblr
                 igl = ( ibr-1 )*kbl + 1_${ik}$
      ! ........................................................
       ! ... go to the off diagonal blocks
                 igl = ( ibr-1 )*kbl + 1_${ik}$
                 loop_2010: do jbc = 1, nblc
                    jgl = n1 + ( jbc-1 )*kbl + 1_${ik}$
              ! doing the block at ( ibr, jbc )
                    ijblsk = 0_${ik}$
                    loop_2100: do p = igl, min( igl+kbl-1, n1 )
                       aapp = sva( p )
                       if( aapp>zero ) then
                          pskipped = 0_${ik}$
                          loop_2200: do q = jgl, min( jgl+kbl-1, n )
                             aaqq = sva( q )
                             if( aaqq>zero ) then
                                aapp0 = aapp
           ! M X 2 Jacobi Svd 
              ! Safe Gram Matrix Computation 
                                if( aaqq>=one ) then
                                   if( aapp>=aaqq ) then
                                      rotok = ( small*aapp )<=aaqq
                                   else
                                      rotok = ( small*aaqq )<=aapp
                                   end if
                                   if( aapp<( big / aaqq ) ) then
                                      aapq = ( stdlib${ii}$_ddot( m, a( 1_${ik}$, p ), 1_${ik}$, a( 1_${ik}$,q ), 1_${ik}$ )*d( p )&
                                                *d( q ) / aaqq )/ aapp
                                   else
                                      call stdlib${ii}$_dcopy( m, a( 1_${ik}$, p ), 1_${ik}$, work, 1_${ik}$ )
                                      call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, aapp, d( p ),m, 1_${ik}$, work, lda,&
                                                 ierr )
                                      aapq = stdlib${ii}$_ddot( m, work, 1_${ik}$, a( 1_${ik}$, q ),1_${ik}$ )*d( q ) / &
                                                aaqq
                                   end if
                                else
                                   if( aapp>=aaqq ) then
                                      rotok = aapp<=( aaqq / small )
                                   else
                                      rotok = aaqq<=( aapp / small )
                                   end if
                                   if( aapp>( small / aaqq ) ) then
                                      aapq = ( stdlib${ii}$_ddot( m, a( 1_${ik}$, p ), 1_${ik}$, a( 1_${ik}$,q ), 1_${ik}$ )*d( p )&
                                                *d( q ) / aaqq )/ aapp
                                   else
                                      call stdlib${ii}$_dcopy( m, a( 1_${ik}$, q ), 1_${ik}$, work, 1_${ik}$ )
                                      call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, aaqq, d( q ),m, 1_${ik}$, work, lda,&
                                                 ierr )
                                      aapq = stdlib${ii}$_ddot( m, work, 1_${ik}$, a( 1_${ik}$, p ),1_${ik}$ )*d( p ) / &
                                                aapp
                                   end if
                                end if
                                mxaapq = max( mxaapq, abs( aapq ) )
              ! to rotate or not to rotate, that is the question ...
                                if( abs( aapq )>tol ) then
                                   notrot = 0_${ik}$
                 ! rotated  = rotated + 1
                                   pskipped = 0_${ik}$
                                   iswrot = iswrot + 1_${ik}$
                                   if( rotok ) then
                                      aqoap = aaqq / aapp
                                      apoaq = aapp / aaqq
                                      theta = -half*abs(aqoap-apoaq) / aapq
                                      if( aaqq>aapp0 )theta = -theta
                                      if( abs( theta )>bigtheta ) then
                                         t = half / theta
                                         fastr( 3_${ik}$ ) = t*d( p ) / d( q )
                                         fastr( 4_${ik}$ ) = -t*d( q ) / d( p )
                                         call stdlib${ii}$_drotm( m, a( 1_${ik}$, p ), 1_${ik}$,a( 1_${ik}$, q ), 1_${ik}$, fastr )
                                                   
                                         if( rsvec )call stdlib${ii}$_drotm( mvl,v( 1_${ik}$, p ), 1_${ik}$,v( 1_${ik}$, q ),&
                                                    1_${ik}$,fastr )
                                         sva( q ) = aaqq*sqrt( max( zero,one+t*apoaq*aapq ) )
                                                   
                                         aapp = aapp*sqrt( max( zero,one-t*aqoap*aapq ) )
                                         mxsinj = max( mxsinj, abs( t ) )
                                      else
                       ! Choose Correct Signum For Theta And Rotate
                                         thsign = -sign( one, aapq )
                                         if( aaqq>aapp0 )thsign = -thsign
                                         t = one / ( theta+thsign*sqrt( one+theta*theta ) )
                                                   
                                         cs = sqrt( one / ( one+t*t ) )
                                         sn = t*cs
                                         mxsinj = max( mxsinj, abs( sn ) )
                                         sva( q ) = aaqq*sqrt( max( zero,one+t*apoaq*aapq ) )
                                                   
                                         aapp = aapp*sqrt( max( zero,one-t*aqoap*aapq ) )
                                         apoaq = d( p ) / d( q )
                                         aqoap = d( q ) / d( p )
                                         if( d( p )>=one ) then
                                            if( d( q )>=one ) then
                                               fastr( 3_${ik}$ ) = t*apoaq
                                               fastr( 4_${ik}$ ) = -t*aqoap
                                               d( p ) = d( p )*cs
                                               d( q ) = d( q )*cs
                                               call stdlib${ii}$_drotm( m, a( 1_${ik}$, p ), 1_${ik}$,a( 1_${ik}$, q ), 1_${ik}$,&
                                                         fastr )
                                               if( rsvec )call stdlib${ii}$_drotm( mvl,v( 1_${ik}$, p ), 1_${ik}$, v( &
                                                         1_${ik}$, q ),1_${ik}$, fastr )
                                            else
                                               call stdlib${ii}$_daxpy( m, -t*aqoap,a( 1_${ik}$, q ), 1_${ik}$,a( 1_${ik}$, &
                                                         p ), 1_${ik}$ )
                                               call stdlib${ii}$_daxpy( m, cs*sn*apoaq,a( 1_${ik}$, p ), 1_${ik}$,a( &
                                                         1_${ik}$, q ), 1_${ik}$ )
                                               if( rsvec ) then
                                                  call stdlib${ii}$_daxpy( mvl, -t*aqoap,v( 1_${ik}$, q ), 1_${ik}$,v(&
                                                             1_${ik}$, p ), 1_${ik}$ )
                                                  call stdlib${ii}$_daxpy( mvl,cs*sn*apoaq,v( 1_${ik}$, p ), 1_${ik}$,&
                                                            v( 1_${ik}$, q ), 1_${ik}$ )
                                               end if
                                               d( p ) = d( p )*cs
                                               d( q ) = d( q ) / cs
                                            end if
                                         else
                                            if( d( q )>=one ) then
                                               call stdlib${ii}$_daxpy( m, t*apoaq,a( 1_${ik}$, p ), 1_${ik}$,a( 1_${ik}$, q &
                                                         ), 1_${ik}$ )
                                               call stdlib${ii}$_daxpy( m, -cs*sn*aqoap,a( 1_${ik}$, q ), 1_${ik}$,a( &
                                                         1_${ik}$, p ), 1_${ik}$ )
                                               if( rsvec ) then
                                                  call stdlib${ii}$_daxpy( mvl, t*apoaq,v( 1_${ik}$, p ), 1_${ik}$,v( &
                                                            1_${ik}$, q ), 1_${ik}$ )
                                                  call stdlib${ii}$_daxpy( mvl,-cs*sn*aqoap,v( 1_${ik}$, q ), &
                                                            1_${ik}$,v( 1_${ik}$, p ), 1_${ik}$ )
                                               end if
                                               d( p ) = d( p ) / cs
                                               d( q ) = d( q )*cs
                                            else
                                               if( d( p )>=d( q ) ) then
                                                  call stdlib${ii}$_daxpy( m, -t*aqoap,a( 1_${ik}$, q ), 1_${ik}$,a( &
                                                            1_${ik}$, p ), 1_${ik}$ )
                                                  call stdlib${ii}$_daxpy( m, cs*sn*apoaq,a( 1_${ik}$, p ), 1_${ik}$,&
                                                            a( 1_${ik}$, q ), 1_${ik}$ )
                                                  d( p ) = d( p )*cs
                                                  d( q ) = d( q ) / cs
                                                  if( rsvec ) then
                                                     call stdlib${ii}$_daxpy( mvl,-t*aqoap,v( 1_${ik}$, q ), 1_${ik}$,&
                                                               v( 1_${ik}$, p ), 1_${ik}$ )
                                                     call stdlib${ii}$_daxpy( mvl,cs*sn*apoaq,v( 1_${ik}$, p ),&
                                                                1_${ik}$,v( 1_${ik}$, q ), 1_${ik}$ )
                                                  end if
                                               else
                                                  call stdlib${ii}$_daxpy( m, t*apoaq,a( 1_${ik}$, p ), 1_${ik}$,a( 1_${ik}$,&
                                                             q ), 1_${ik}$ )
                                                  call stdlib${ii}$_daxpy( m,-cs*sn*aqoap,a( 1_${ik}$, q ), 1_${ik}$,&
                                                            a( 1_${ik}$, p ), 1_${ik}$ )
                                                  d( p ) = d( p ) / cs
                                                  d( q ) = d( q )*cs
                                                  if( rsvec ) then
                                                     call stdlib${ii}$_daxpy( mvl,t*apoaq, v( 1_${ik}$, p ),1_${ik}$, &
                                                               v( 1_${ik}$, q ), 1_${ik}$ )
                                                     call stdlib${ii}$_daxpy( mvl,-cs*sn*aqoap,v( 1_${ik}$, q )&
                                                               , 1_${ik}$,v( 1_${ik}$, p ), 1_${ik}$ )
                                                  end if
                                               end if
                                            end if
                                         end if
                                      end if
                                   else
                                      if( aapp>aaqq ) then
                                         call stdlib${ii}$_dcopy( m, a( 1_${ik}$, p ), 1_${ik}$, work,1_${ik}$ )
                                         call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, aapp, one,m, 1_${ik}$, work, lda,&
                                                    ierr )
                                         call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, aaqq, one,m, 1_${ik}$, a( 1_${ik}$, q ),&
                                                    lda,ierr )
                                         temp1 = -aapq*d( p ) / d( q )
                                         call stdlib${ii}$_daxpy( m, temp1, work, 1_${ik}$,a( 1_${ik}$, q ), 1_${ik}$ )
                                                   
                                         call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, one, aaqq,m, 1_${ik}$, a( 1_${ik}$, q ),&
                                                    lda,ierr )
                                         sva( q ) = aaqq*sqrt( max( zero,one-aapq*aapq ) )
                                         mxsinj = max( mxsinj, sfmin )
                                      else
                                         call stdlib${ii}$_dcopy( m, a( 1_${ik}$, q ), 1_${ik}$, work,1_${ik}$ )
                                         call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, aaqq, one,m, 1_${ik}$, work, lda,&
                                                    ierr )
                                         call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, aapp, one,m, 1_${ik}$, a( 1_${ik}$, p ),&
                                                    lda,ierr )
                                         temp1 = -aapq*d( q ) / d( p )
                                         call stdlib${ii}$_daxpy( m, temp1, work, 1_${ik}$,a( 1_${ik}$, p ), 1_${ik}$ )
                                                   
                                         call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, one, aapp,m, 1_${ik}$, a( 1_${ik}$, p ),&
                                                    lda,ierr )
                                         sva( p ) = aapp*sqrt( max( zero,one-aapq*aapq ) )
                                         mxsinj = max( mxsinj, sfmin )
                                      end if
                                   end if
                 ! end if rotok then ... else
                 ! in the case of cancellation in updating sva(q)
                 ! .. recompute sva(q)
                                   if( ( sva( q ) / aaqq )**2_${ik}$<=rooteps )then
                                      if( ( aaqq<rootbig ) .and.( aaqq>rootsfmin ) ) then
                                         sva( q ) = stdlib${ii}$_dnrm2( m, a( 1_${ik}$, q ), 1_${ik}$ )*d( q )
                                      else
                                         t = zero
                                         aaqq = one
                                         call stdlib${ii}$_dlassq( m, a( 1_${ik}$, q ), 1_${ik}$, t,aaqq )
                                         sva( q ) = t*sqrt( aaqq )*d( q )
                                      end if
                                   end if
                                   if( ( aapp / aapp0 )**2_${ik}$<=rooteps ) then
                                      if( ( aapp<rootbig ) .and.( aapp>rootsfmin ) ) then
                                         aapp = stdlib${ii}$_dnrm2( m, a( 1_${ik}$, p ), 1_${ik}$ )*d( p )
                                      else
                                         t = zero
                                         aapp = one
                                         call stdlib${ii}$_dlassq( m, a( 1_${ik}$, p ), 1_${ik}$, t,aapp )
                                         aapp = t*sqrt( aapp )*d( p )
                                      end if
                                      sva( p ) = aapp
                                   end if
                    ! end of ok rotation
                                else
                                   notrot = notrot + 1_${ik}$
                 ! skipped  = skipped  + 1
                                   pskipped = pskipped + 1_${ik}$
                                   ijblsk = ijblsk + 1_${ik}$
                                end if
                             else
                                notrot = notrot + 1_${ik}$
                                pskipped = pskipped + 1_${ik}$
                                ijblsk = ijblsk + 1_${ik}$
                             end if
            ! if ( notrot >= emptsw )  go to 2011
                             if( ( i<=swband ) .and. ( ijblsk>=blskip ) )then
                                sva( p ) = aapp
                                notrot = 0_${ik}$
                                go to 2011
                             end if
                             if( ( i<=swband ) .and.( pskipped>rowskip ) ) then
                                aapp = -aapp
                                notrot = 0_${ik}$
                                go to 2203
                             end if
                          end do loop_2200
              ! end of the q-loop
              2203 continue
                          sva( p ) = aapp
                       else
                          if( aapp==zero )notrot = notrot +min( jgl+kbl-1, n ) - jgl + 1_${ik}$
                          if( aapp<zero )notrot = 0_${ik}$
      ! **      if ( notrot >= emptsw )  go to 2011
                       end if
                    end do loop_2100
           ! end of the p-loop
                 end do loop_2010
           ! end of the jbc-loop
           2011 continue
      ! 2011 bailed out of the jbc-loop
                 do p = igl, min( igl+kbl-1, n )
                    sva( p ) = abs( sva( p ) )
                 end do
      ! **   if ( notrot >= emptsw ) go to 1994
              end do loop_2000
      ! 2000 :: end of the ibr-loop
           ! .. update sva(n)
              if( ( sva( n )<rootbig ) .and. ( sva( n )>rootsfmin ) )then
                 sva( n ) = stdlib${ii}$_dnrm2( m, a( 1_${ik}$, n ), 1_${ik}$ )*d( n )
              else
                 t = zero
                 aapp = one
                 call stdlib${ii}$_dlassq( m, a( 1_${ik}$, n ), 1_${ik}$, t, aapp )
                 sva( n ) = t*sqrt( aapp )*d( n )
              end if
           ! additional steering devices
              if( ( i<swband ) .and. ( ( mxaapq<=roottol ) .or.( iswrot<=n ) ) )swband = i
              if( ( i>swband+1 ) .and. ( mxaapq<real( n,KIND=dp)*tol ) .and.( real( n,KIND=dp)&
                        *mxaapq*mxsinj<tol ) ) then
                 go to 1994
              end if
              if( notrot>=emptsw )go to 1994
           end do loop_1993
           ! end i=1:nsweep loop
       ! #:) reaching this point means that the procedure has completed the given
           ! number of sweeps.
           info = nsweep - 1_${ik}$
           go to 1995
           1994 continue
       ! #:) reaching this point means that during the i-th sweep all pivots were
           ! below the given threshold, causing early exit.
           info = 0_${ik}$
       ! #:) info = 0 confirms successful iterations.
       1995 continue
           ! sort the vector d
           do p = 1, n - 1
              q = stdlib${ii}$_idamax( n-p+1, sva( p ), 1_${ik}$ ) + p - 1_${ik}$
              if( p/=q ) then
                 temp1 = sva( p )
                 sva( p ) = sva( q )
                 sva( q ) = temp1
                 temp1 = d( p )
                 d( p ) = d( q )
                 d( q ) = temp1
                 call stdlib${ii}$_dswap( m, a( 1_${ik}$, p ), 1_${ik}$, a( 1_${ik}$, q ), 1_${ik}$ )
                 if( rsvec )call stdlib${ii}$_dswap( mvl, v( 1_${ik}$, p ), 1_${ik}$, v( 1_${ik}$, q ), 1_${ik}$ )
              end if
           end do
           return
     end subroutine stdlib${ii}$_dgsvj1

#:for rk,rt,ri in REAL_KINDS_TYPES
#:if not rk in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ri}$gsvj1( jobv, m, n, n1, a, lda, d, sva, mv, v, ldv,eps, sfmin, tol, &
     !! DGSVJ1: is called from DGESVJ as a pre-processor and that is its main
     !! purpose. It applies Jacobi rotations in the same way as DGESVJ does, but
     !! it targets only particular pivots and it does not check convergence
     !! (stopping criterion). Few tuning parameters (marked by [TP]) are
     !! available for the implementer.
     !! Further Details
     !! ~~~~~~~~~~~~~~~
     !! DGSVJ1 applies few sweeps of Jacobi rotations in the column space of
     !! the input M-by-N matrix A. The pivot pairs are taken from the (1,2)
     !! off-diagonal block in the corresponding N-by-N Gram matrix A^T * A. The
     !! block-entries (tiles) of the (1,2) off-diagonal block are marked by the
     !! [x]'s in the following scheme:
     !! | *  *  * [x] [x] [x]|
     !! | *  *  * [x] [x] [x]|    Row-cycling in the nblr-by-nblc [x] blocks.
     !! | *  *  * [x] [x] [x]|    Row-cyclic pivoting inside each [x] block.
     !! |[x] [x] [x] *  *  * |
     !! |[x] [x] [x] *  *  * |
     !! |[x] [x] [x] *  *  * |
     !! In terms of the columns of A, the first N1 columns are rotated 'against'
     !! the remaining N-N1 columns, trying to increase the angle between the
     !! corresponding subspaces. The off-diagonal block is N1-by(N-N1) and it is
     !! tiled using quadratic tiles of side KBL. Here, KBL is a tuning parameter.
     !! The number of sweeps is given in NSWEEP and the orthogonality threshold
     !! is given in TOL.
               nsweep, work, lwork, info )
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           real(${rk}$), intent(in) :: eps, sfmin, tol
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, ldv, lwork, m, mv, n, n1, nsweep
           character, intent(in) :: jobv
           ! Array Arguments 
           real(${rk}$), intent(inout) :: a(lda,*), d(n), sva(n), v(ldv,*)
           real(${rk}$), intent(out) :: work(lwork)
        ! =====================================================================
           
           ! Local Scalars 
           real(${rk}$) :: aapp, aapp0, aapq, aaqq, apoaq, aqoap, big, bigtheta, cs, large, mxaapq, &
           mxsinj, rootbig, rooteps, rootsfmin, roottol, small, sn, t, temp1, theta, &
                     thsign
           integer(${ik}$) :: blskip, emptsw, i, ibr, igl, ierr, ijblsk, iswrot, jbc, jgl, kbl, mvl, &
                     notrot, nblc, nblr, p, pskipped, q, rowskip, swband
           logical(lk) :: applv, rotok, rsvec
           ! Local Arrays 
           real(${rk}$) :: fastr(5_${ik}$)
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input parameters.
           applv = stdlib_lsame( jobv, 'A' )
           rsvec = stdlib_lsame( jobv, 'V' )
           if( .not.( rsvec .or. applv .or. stdlib_lsame( jobv, 'N' ) ) ) then
              info = -1_${ik}$
           else if( m<0_${ik}$ ) then
              info = -2_${ik}$
           else if( ( n<0_${ik}$ ) .or. ( n>m ) ) then
              info = -3_${ik}$
           else if( n1<0_${ik}$ ) then
              info = -4_${ik}$
           else if( lda<m ) then
              info = -6_${ik}$
           else if( ( rsvec.or.applv ) .and. ( mv<0_${ik}$ ) ) then
              info = -9_${ik}$
           else if( ( rsvec.and.( ldv<n ) ).or.( applv.and.( ldv<mv ) )  ) then
              info = -11_${ik}$
           else if( tol<=eps ) then
              info = -14_${ik}$
           else if( nsweep<0_${ik}$ ) then
              info = -15_${ik}$
           else if( lwork<m ) then
              info = -17_${ik}$
           else
              info = 0_${ik}$
           end if
           ! #:(
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DGSVJ1', -info )
              return
           end if
           if( rsvec ) then
              mvl = n
           else if( applv ) then
              mvl = mv
           end if
           rsvec = rsvec .or. applv
           rooteps = sqrt( eps )
           rootsfmin = sqrt( sfmin )
           small = sfmin / eps
           big = one / sfmin
           rootbig = one / rootsfmin
           large = big / sqrt( real( m*n,KIND=${rk}$) )
           bigtheta = one / rooteps
           roottol = sqrt( tol )
           ! Initialize The Right Singular Vector Matrix 
           ! rsvec = stdlib_lsame( jobv, 'y' )
           emptsw = n1*( n-n1 )
           notrot = 0_${ik}$
           fastr( 1_${ik}$ ) = zero
           ! .. row-cyclic pivot strategy with de rijk's pivoting ..
           kbl = min( 8_${ik}$, n )
           nblr = n1 / kbl
           if( ( nblr*kbl )/=n1 )nblr = nblr + 1_${ik}$
           ! .. the tiling is nblr-by-nblc [tiles]
           nblc = ( n-n1 ) / kbl
           if( ( nblc*kbl )/=( n-n1 ) )nblc = nblc + 1_${ik}$
           blskip = ( kbl**2_${ik}$ ) + 1_${ik}$
      ! [tp] blkskip is a tuning parameter that depends on swband and kbl.
           rowskip = min( 5_${ik}$, kbl )
      ! [tp] rowskip is a tuning parameter.
           swband = 0_${ik}$
      ! [tp] swband is a tuning parameter. it is meaningful and effective
           ! if stdlib${ii}$_dgesvj is used as a computational routine in the preconditioned
           ! jacobi svd algorithm stdlib${ii}$_dgesvj.
           ! | *   *   * [x] [x] [x]|
           ! | *   *   * [x] [x] [x]|    row-cycling in the nblr-by-nblc [x] blocks.
           ! | *   *   * [x] [x] [x]|    row-cyclic pivoting inside each [x] block.
           ! |[x] [x] [x] *   *   * |
           ! |[x] [x] [x] *   *   * |
           ! |[x] [x] [x] *   *   * |
           loop_1993: do i = 1, nsweep
           ! .. go go go ...
              mxaapq = zero
              mxsinj = zero
              iswrot = 0_${ik}$
              notrot = 0_${ik}$
              pskipped = 0_${ik}$
              loop_2000: do ibr = 1, nblr
                 igl = ( ibr-1 )*kbl + 1_${ik}$
      ! ........................................................
       ! ... go to the off diagonal blocks
                 igl = ( ibr-1 )*kbl + 1_${ik}$
                 loop_2010: do jbc = 1, nblc
                    jgl = n1 + ( jbc-1 )*kbl + 1_${ik}$
              ! doing the block at ( ibr, jbc )
                    ijblsk = 0_${ik}$
                    loop_2100: do p = igl, min( igl+kbl-1, n1 )
                       aapp = sva( p )
                       if( aapp>zero ) then
                          pskipped = 0_${ik}$
                          loop_2200: do q = jgl, min( jgl+kbl-1, n )
                             aaqq = sva( q )
                             if( aaqq>zero ) then
                                aapp0 = aapp
           ! M X 2 Jacobi Svd 
              ! Safe Gram Matrix Computation 
                                if( aaqq>=one ) then
                                   if( aapp>=aaqq ) then
                                      rotok = ( small*aapp )<=aaqq
                                   else
                                      rotok = ( small*aaqq )<=aapp
                                   end if
                                   if( aapp<( big / aaqq ) ) then
                                      aapq = ( stdlib${ii}$_${ri}$dot( m, a( 1_${ik}$, p ), 1_${ik}$, a( 1_${ik}$,q ), 1_${ik}$ )*d( p )&
                                                *d( q ) / aaqq )/ aapp
                                   else
                                      call stdlib${ii}$_${ri}$copy( m, a( 1_${ik}$, p ), 1_${ik}$, work, 1_${ik}$ )
                                      call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, aapp, d( p ),m, 1_${ik}$, work, lda,&
                                                 ierr )
                                      aapq = stdlib${ii}$_${ri}$dot( m, work, 1_${ik}$, a( 1_${ik}$, q ),1_${ik}$ )*d( q ) / &
                                                aaqq
                                   end if
                                else
                                   if( aapp>=aaqq ) then
                                      rotok = aapp<=( aaqq / small )
                                   else
                                      rotok = aaqq<=( aapp / small )
                                   end if
                                   if( aapp>( small / aaqq ) ) then
                                      aapq = ( stdlib${ii}$_${ri}$dot( m, a( 1_${ik}$, p ), 1_${ik}$, a( 1_${ik}$,q ), 1_${ik}$ )*d( p )&
                                                *d( q ) / aaqq )/ aapp
                                   else
                                      call stdlib${ii}$_${ri}$copy( m, a( 1_${ik}$, q ), 1_${ik}$, work, 1_${ik}$ )
                                      call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, aaqq, d( q ),m, 1_${ik}$, work, lda,&
                                                 ierr )
                                      aapq = stdlib${ii}$_${ri}$dot( m, work, 1_${ik}$, a( 1_${ik}$, p ),1_${ik}$ )*d( p ) / &
                                                aapp
                                   end if
                                end if
                                mxaapq = max( mxaapq, abs( aapq ) )
              ! to rotate or not to rotate, that is the question ...
                                if( abs( aapq )>tol ) then
                                   notrot = 0_${ik}$
                 ! rotated  = rotated + 1
                                   pskipped = 0_${ik}$
                                   iswrot = iswrot + 1_${ik}$
                                   if( rotok ) then
                                      aqoap = aaqq / aapp
                                      apoaq = aapp / aaqq
                                      theta = -half*abs(aqoap-apoaq) / aapq
                                      if( aaqq>aapp0 )theta = -theta
                                      if( abs( theta )>bigtheta ) then
                                         t = half / theta
                                         fastr( 3_${ik}$ ) = t*d( p ) / d( q )
                                         fastr( 4_${ik}$ ) = -t*d( q ) / d( p )
                                         call stdlib${ii}$_${ri}$rotm( m, a( 1_${ik}$, p ), 1_${ik}$,a( 1_${ik}$, q ), 1_${ik}$, fastr )
                                                   
                                         if( rsvec )call stdlib${ii}$_${ri}$rotm( mvl,v( 1_${ik}$, p ), 1_${ik}$,v( 1_${ik}$, q ),&
                                                    1_${ik}$,fastr )
                                         sva( q ) = aaqq*sqrt( max( zero,one+t*apoaq*aapq ) )
                                                   
                                         aapp = aapp*sqrt( max( zero,one-t*aqoap*aapq ) )
                                         mxsinj = max( mxsinj, abs( t ) )
                                      else
                       ! Choose Correct Signum For Theta And Rotate
                                         thsign = -sign( one, aapq )
                                         if( aaqq>aapp0 )thsign = -thsign
                                         t = one / ( theta+thsign*sqrt( one+theta*theta ) )
                                                   
                                         cs = sqrt( one / ( one+t*t ) )
                                         sn = t*cs
                                         mxsinj = max( mxsinj, abs( sn ) )
                                         sva( q ) = aaqq*sqrt( max( zero,one+t*apoaq*aapq ) )
                                                   
                                         aapp = aapp*sqrt( max( zero,one-t*aqoap*aapq ) )
                                         apoaq = d( p ) / d( q )
                                         aqoap = d( q ) / d( p )
                                         if( d( p )>=one ) then
                                            if( d( q )>=one ) then
                                               fastr( 3_${ik}$ ) = t*apoaq
                                               fastr( 4_${ik}$ ) = -t*aqoap
                                               d( p ) = d( p )*cs
                                               d( q ) = d( q )*cs
                                               call stdlib${ii}$_${ri}$rotm( m, a( 1_${ik}$, p ), 1_${ik}$,a( 1_${ik}$, q ), 1_${ik}$,&
                                                         fastr )
                                               if( rsvec )call stdlib${ii}$_${ri}$rotm( mvl,v( 1_${ik}$, p ), 1_${ik}$, v( &
                                                         1_${ik}$, q ),1_${ik}$, fastr )
                                            else
                                               call stdlib${ii}$_${ri}$axpy( m, -t*aqoap,a( 1_${ik}$, q ), 1_${ik}$,a( 1_${ik}$, &
                                                         p ), 1_${ik}$ )
                                               call stdlib${ii}$_${ri}$axpy( m, cs*sn*apoaq,a( 1_${ik}$, p ), 1_${ik}$,a( &
                                                         1_${ik}$, q ), 1_${ik}$ )
                                               if( rsvec ) then
                                                  call stdlib${ii}$_${ri}$axpy( mvl, -t*aqoap,v( 1_${ik}$, q ), 1_${ik}$,v(&
                                                             1_${ik}$, p ), 1_${ik}$ )
                                                  call stdlib${ii}$_${ri}$axpy( mvl,cs*sn*apoaq,v( 1_${ik}$, p ), 1_${ik}$,&
                                                            v( 1_${ik}$, q ), 1_${ik}$ )
                                               end if
                                               d( p ) = d( p )*cs
                                               d( q ) = d( q ) / cs
                                            end if
                                         else
                                            if( d( q )>=one ) then
                                               call stdlib${ii}$_${ri}$axpy( m, t*apoaq,a( 1_${ik}$, p ), 1_${ik}$,a( 1_${ik}$, q &
                                                         ), 1_${ik}$ )
                                               call stdlib${ii}$_${ri}$axpy( m, -cs*sn*aqoap,a( 1_${ik}$, q ), 1_${ik}$,a( &
                                                         1_${ik}$, p ), 1_${ik}$ )
                                               if( rsvec ) then
                                                  call stdlib${ii}$_${ri}$axpy( mvl, t*apoaq,v( 1_${ik}$, p ), 1_${ik}$,v( &
                                                            1_${ik}$, q ), 1_${ik}$ )
                                                  call stdlib${ii}$_${ri}$axpy( mvl,-cs*sn*aqoap,v( 1_${ik}$, q ), &
                                                            1_${ik}$,v( 1_${ik}$, p ), 1_${ik}$ )
                                               end if
                                               d( p ) = d( p ) / cs
                                               d( q ) = d( q )*cs
                                            else
                                               if( d( p )>=d( q ) ) then
                                                  call stdlib${ii}$_${ri}$axpy( m, -t*aqoap,a( 1_${ik}$, q ), 1_${ik}$,a( &
                                                            1_${ik}$, p ), 1_${ik}$ )
                                                  call stdlib${ii}$_${ri}$axpy( m, cs*sn*apoaq,a( 1_${ik}$, p ), 1_${ik}$,&
                                                            a( 1_${ik}$, q ), 1_${ik}$ )
                                                  d( p ) = d( p )*cs
                                                  d( q ) = d( q ) / cs
                                                  if( rsvec ) then
                                                     call stdlib${ii}$_${ri}$axpy( mvl,-t*aqoap,v( 1_${ik}$, q ), 1_${ik}$,&
                                                               v( 1_${ik}$, p ), 1_${ik}$ )
                                                     call stdlib${ii}$_${ri}$axpy( mvl,cs*sn*apoaq,v( 1_${ik}$, p ),&
                                                                1_${ik}$,v( 1_${ik}$, q ), 1_${ik}$ )
                                                  end if
                                               else
                                                  call stdlib${ii}$_${ri}$axpy( m, t*apoaq,a( 1_${ik}$, p ), 1_${ik}$,a( 1_${ik}$,&
                                                             q ), 1_${ik}$ )
                                                  call stdlib${ii}$_${ri}$axpy( m,-cs*sn*aqoap,a( 1_${ik}$, q ), 1_${ik}$,&
                                                            a( 1_${ik}$, p ), 1_${ik}$ )
                                                  d( p ) = d( p ) / cs
                                                  d( q ) = d( q )*cs
                                                  if( rsvec ) then
                                                     call stdlib${ii}$_${ri}$axpy( mvl,t*apoaq, v( 1_${ik}$, p ),1_${ik}$, &
                                                               v( 1_${ik}$, q ), 1_${ik}$ )
                                                     call stdlib${ii}$_${ri}$axpy( mvl,-cs*sn*aqoap,v( 1_${ik}$, q )&
                                                               , 1_${ik}$,v( 1_${ik}$, p ), 1_${ik}$ )
                                                  end if
                                               end if
                                            end if
                                         end if
                                      end if
                                   else
                                      if( aapp>aaqq ) then
                                         call stdlib${ii}$_${ri}$copy( m, a( 1_${ik}$, p ), 1_${ik}$, work,1_${ik}$ )
                                         call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, aapp, one,m, 1_${ik}$, work, lda,&
                                                    ierr )
                                         call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, aaqq, one,m, 1_${ik}$, a( 1_${ik}$, q ),&
                                                    lda,ierr )
                                         temp1 = -aapq*d( p ) / d( q )
                                         call stdlib${ii}$_${ri}$axpy( m, temp1, work, 1_${ik}$,a( 1_${ik}$, q ), 1_${ik}$ )
                                                   
                                         call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, one, aaqq,m, 1_${ik}$, a( 1_${ik}$, q ),&
                                                    lda,ierr )
                                         sva( q ) = aaqq*sqrt( max( zero,one-aapq*aapq ) )
                                         mxsinj = max( mxsinj, sfmin )
                                      else
                                         call stdlib${ii}$_${ri}$copy( m, a( 1_${ik}$, q ), 1_${ik}$, work,1_${ik}$ )
                                         call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, aaqq, one,m, 1_${ik}$, work, lda,&
                                                    ierr )
                                         call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, aapp, one,m, 1_${ik}$, a( 1_${ik}$, p ),&
                                                    lda,ierr )
                                         temp1 = -aapq*d( q ) / d( p )
                                         call stdlib${ii}$_${ri}$axpy( m, temp1, work, 1_${ik}$,a( 1_${ik}$, p ), 1_${ik}$ )
                                                   
                                         call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, one, aapp,m, 1_${ik}$, a( 1_${ik}$, p ),&
                                                    lda,ierr )
                                         sva( p ) = aapp*sqrt( max( zero,one-aapq*aapq ) )
                                         mxsinj = max( mxsinj, sfmin )
                                      end if
                                   end if
                 ! end if rotok then ... else
                 ! in the case of cancellation in updating sva(q)
                 ! .. recompute sva(q)
                                   if( ( sva( q ) / aaqq )**2_${ik}$<=rooteps )then
                                      if( ( aaqq<rootbig ) .and.( aaqq>rootsfmin ) ) then
                                         sva( q ) = stdlib${ii}$_${ri}$nrm2( m, a( 1_${ik}$, q ), 1_${ik}$ )*d( q )
                                      else
                                         t = zero
                                         aaqq = one
                                         call stdlib${ii}$_${ri}$lassq( m, a( 1_${ik}$, q ), 1_${ik}$, t,aaqq )
                                         sva( q ) = t*sqrt( aaqq )*d( q )
                                      end if
                                   end if
                                   if( ( aapp / aapp0 )**2_${ik}$<=rooteps ) then
                                      if( ( aapp<rootbig ) .and.( aapp>rootsfmin ) ) then
                                         aapp = stdlib${ii}$_${ri}$nrm2( m, a( 1_${ik}$, p ), 1_${ik}$ )*d( p )
                                      else
                                         t = zero
                                         aapp = one
                                         call stdlib${ii}$_${ri}$lassq( m, a( 1_${ik}$, p ), 1_${ik}$, t,aapp )
                                         aapp = t*sqrt( aapp )*d( p )
                                      end if
                                      sva( p ) = aapp
                                   end if
                    ! end of ok rotation
                                else
                                   notrot = notrot + 1_${ik}$
                 ! skipped  = skipped  + 1
                                   pskipped = pskipped + 1_${ik}$
                                   ijblsk = ijblsk + 1_${ik}$
                                end if
                             else
                                notrot = notrot + 1_${ik}$
                                pskipped = pskipped + 1_${ik}$
                                ijblsk = ijblsk + 1_${ik}$
                             end if
            ! if ( notrot >= emptsw )  go to 2011
                             if( ( i<=swband ) .and. ( ijblsk>=blskip ) )then
                                sva( p ) = aapp
                                notrot = 0_${ik}$
                                go to 2011
                             end if
                             if( ( i<=swband ) .and.( pskipped>rowskip ) ) then
                                aapp = -aapp
                                notrot = 0_${ik}$
                                go to 2203
                             end if
                          end do loop_2200
              ! end of the q-loop
              2203 continue
                          sva( p ) = aapp
                       else
                          if( aapp==zero )notrot = notrot +min( jgl+kbl-1, n ) - jgl + 1_${ik}$
                          if( aapp<zero )notrot = 0_${ik}$
      ! **      if ( notrot >= emptsw )  go to 2011
                       end if
                    end do loop_2100
           ! end of the p-loop
                 end do loop_2010
           ! end of the jbc-loop
           2011 continue
      ! 2011 bailed out of the jbc-loop
                 do p = igl, min( igl+kbl-1, n )
                    sva( p ) = abs( sva( p ) )
                 end do
      ! **   if ( notrot >= emptsw ) go to 1994
              end do loop_2000
      ! 2000 :: end of the ibr-loop
           ! .. update sva(n)
              if( ( sva( n )<rootbig ) .and. ( sva( n )>rootsfmin ) )then
                 sva( n ) = stdlib${ii}$_${ri}$nrm2( m, a( 1_${ik}$, n ), 1_${ik}$ )*d( n )
              else
                 t = zero
                 aapp = one
                 call stdlib${ii}$_${ri}$lassq( m, a( 1_${ik}$, n ), 1_${ik}$, t, aapp )
                 sva( n ) = t*sqrt( aapp )*d( n )
              end if
           ! additional steering devices
              if( ( i<swband ) .and. ( ( mxaapq<=roottol ) .or.( iswrot<=n ) ) )swband = i
              if( ( i>swband+1 ) .and. ( mxaapq<real( n,KIND=${rk}$)*tol ) .and.( real( n,KIND=${rk}$)&
                        *mxaapq*mxsinj<tol ) ) then
                 go to 1994
              end if
              if( notrot>=emptsw )go to 1994
           end do loop_1993
           ! end i=1:nsweep loop
       ! #:) reaching this point means that the procedure has completed the given
           ! number of sweeps.
           info = nsweep - 1_${ik}$
           go to 1995
           1994 continue
       ! #:) reaching this point means that during the i-th sweep all pivots were
           ! below the given threshold, causing early exit.
           info = 0_${ik}$
       ! #:) info = 0 confirms successful iterations.
       1995 continue
           ! sort the vector d
           do p = 1, n - 1
              q = stdlib${ii}$_i${ri}$amax( n-p+1, sva( p ), 1_${ik}$ ) + p - 1_${ik}$
              if( p/=q ) then
                 temp1 = sva( p )
                 sva( p ) = sva( q )
                 sva( q ) = temp1
                 temp1 = d( p )
                 d( p ) = d( q )
                 d( q ) = temp1
                 call stdlib${ii}$_${ri}$swap( m, a( 1_${ik}$, p ), 1_${ik}$, a( 1_${ik}$, q ), 1_${ik}$ )
                 if( rsvec )call stdlib${ii}$_${ri}$swap( mvl, v( 1_${ik}$, p ), 1_${ik}$, v( 1_${ik}$, q ), 1_${ik}$ )
              end if
           end do
           return
     end subroutine stdlib${ii}$_${ri}$gsvj1

#:endif
#:endfor

     pure module subroutine stdlib${ii}$_cgsvj1( jobv, m, n, n1, a, lda, d, sva, mv, v, ldv,eps, sfmin, tol, &
     !! CGSVJ1 is called from CGESVJ as a pre-processor and that is its main
     !! purpose. It applies Jacobi rotations in the same way as CGESVJ does, but
     !! it targets only particular pivots and it does not check convergence
     !! (stopping criterion). Few tuning parameters (marked by [TP]) are
     !! available for the implementer.
     !! Further Details
     !! ~~~~~~~~~~~~~~~
     !! CGSVJ1 applies few sweeps of Jacobi rotations in the column space of
     !! the input M-by-N matrix A. The pivot pairs are taken from the (1,2)
     !! off-diagonal block in the corresponding N-by-N Gram matrix A^T * A. The
     !! block-entries (tiles) of the (1,2) off-diagonal block are marked by the
     !! [x]'s in the following scheme:
     !! | *  *  * [x] [x] [x]|
     !! | *  *  * [x] [x] [x]|    Row-cycling in the nblr-by-nblc [x] blocks.
     !! | *  *  * [x] [x] [x]|    Row-cyclic pivoting inside each [x] block.
     !! |[x] [x] [x] *  *  * |
     !! |[x] [x] [x] *  *  * |
     !! |[x] [x] [x] *  *  * |
     !! In terms of the columns of A, the first N1 columns are rotated 'against'
     !! the remaining N-N1 columns, trying to increase the angle between the
     !! corresponding subspaces. The off-diagonal block is N1-by(N-N1) and it is
     !! tiled using quadratic tiles of side KBL. Here, KBL is a tuning parameter.
     !! The number of sweeps is given in NSWEEP and the orthogonality threshold
     !! is given in TOL.
               nsweep, work, lwork, info )
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           real(sp), intent(in) :: eps, sfmin, tol
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, ldv, lwork, m, mv, n, n1, nsweep
           character, intent(in) :: jobv
           ! Array Arguments 
           complex(sp), intent(inout) :: a(lda,*), d(n), v(ldv,*)
           complex(sp), intent(out) :: work(lwork)
           real(sp), intent(inout) :: sva(n)
        ! =====================================================================
           
           ! Local Scalars 
           complex(sp) :: aapq, ompq
           real(sp) :: aapp, aapp0, aapq1, aaqq, apoaq, aqoap, big, bigtheta, cs, mxaapq, mxsinj, &
                     rootbig, rooteps, rootsfmin, roottol, small, sn, t, temp1, theta, thsign
           integer(${ik}$) :: blskip, emptsw, i, ibr, igl, ierr, ijblsk, iswrot, jbc, jgl, kbl, mvl, &
                     notrot, nblc, nblr, p, pskipped, q, rowskip, swband
           logical(lk) :: applv, rotok, rsvec
           ! Intrinsic Functions 
           ! From Lapack
           ! Executable Statements 
           ! test the input parameters.
           applv = stdlib_lsame( jobv, 'A' )
           rsvec = stdlib_lsame( jobv, 'V' )
           if( .not.( rsvec .or. applv .or. stdlib_lsame( jobv, 'N' ) ) ) then
              info = -1_${ik}$
           else if( m<0_${ik}$ ) then
              info = -2_${ik}$
           else if( ( n<0_${ik}$ ) .or. ( n>m ) ) then
              info = -3_${ik}$
           else if( n1<0_${ik}$ ) then
              info = -4_${ik}$
           else if( lda<m ) then
              info = -6_${ik}$
           else if( ( rsvec.or.applv ) .and. ( mv<0_${ik}$ ) ) then
              info = -9_${ik}$
           else if( ( rsvec.and.( ldv<n ) ).or.( applv.and.( ldv<mv ) )  ) then
              info = -11_${ik}$
           else if( tol<=eps ) then
              info = -14_${ik}$
           else if( nsweep<0_${ik}$ ) then
              info = -15_${ik}$
           else if( lwork<m ) then
              info = -17_${ik}$
           else
              info = 0_${ik}$
           end if
           ! #:(
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'CGSVJ1', -info )
              return
           end if
           if( rsvec ) then
              mvl = n
           else if( applv ) then
              mvl = mv
           end if
           rsvec = rsvec .or. applv
           rooteps = sqrt( eps )
           rootsfmin = sqrt( sfmin )
           small = sfmin / eps
           big = one / sfmin
           rootbig = one / rootsfmin
           ! large = big / sqrt( real( m*n,KIND=sp) )
           bigtheta = one / rooteps
           roottol = sqrt( tol )
           ! Initialize The Right Singular Vector Matrix 
           ! rsvec = stdlib_lsame( jobv, 'y' )
           emptsw = n1*( n-n1 )
           notrot = 0_${ik}$
           ! .. row-cyclic pivot strategy with de rijk's pivoting ..
           kbl = min( 8_${ik}$, n )
           nblr = n1 / kbl
           if( ( nblr*kbl )/=n1 )nblr = nblr + 1_${ik}$
           ! .. the tiling is nblr-by-nblc [tiles]
           nblc = ( n-n1 ) / kbl
           if( ( nblc*kbl )/=( n-n1 ) )nblc = nblc + 1_${ik}$
           blskip = ( kbl**2_${ik}$ ) + 1_${ik}$
      ! [tp] blkskip is a tuning parameter that depends on swband and kbl.
           rowskip = min( 5_${ik}$, kbl )
      ! [tp] rowskip is a tuning parameter.
           swband = 0_${ik}$
      ! [tp] swband is a tuning parameter. it is meaningful and effective
           ! if stdlib${ii}$_cgesvj is used as a computational routine in the preconditioned
           ! jacobi svd algorithm stdlib${ii}$_cgejsv.
           ! | *   *   * [x] [x] [x]|
           ! | *   *   * [x] [x] [x]|    row-cycling in the nblr-by-nblc [x] blocks.
           ! | *   *   * [x] [x] [x]|    row-cyclic pivoting inside each [x] block.
           ! |[x] [x] [x] *   *   * |
           ! |[x] [x] [x] *   *   * |
           ! |[x] [x] [x] *   *   * |
           loop_1993: do i = 1, nsweep
           ! .. go go go ...
              mxaapq = zero
              mxsinj = zero
              iswrot = 0_${ik}$
              notrot = 0_${ik}$
              pskipped = 0_${ik}$
           ! each sweep is unrolled using kbl-by-kbl tiles over the pivot pairs
           ! 1 <= p < q <= n. this is the first step toward a blocked implementation
           ! of the rotations. new implementation, based on block transformations,
           ! is under development.
              loop_2000: do ibr = 1, nblr
                 igl = ( ibr-1 )*kbl + 1_${ik}$
       ! ... go to the off diagonal blocks
                 igl = ( ibr-1 )*kbl + 1_${ik}$
                  ! do 2010 jbc = ibr + 1, nbl
                 loop_2010: do jbc = 1, nblc
                    jgl = ( jbc-1 )*kbl + n1 + 1_${ik}$
              ! doing the block at ( ibr, jbc )
                    ijblsk = 0_${ik}$
                    loop_2100: do p = igl, min( igl+kbl-1, n1 )
                       aapp = sva( p )
                       if( aapp>zero ) then
                          pskipped = 0_${ik}$
                          loop_2200: do q = jgl, min( jgl+kbl-1, n )
                             aaqq = sva( q )
                             if( aaqq>zero ) then
                                aapp0 = aapp
           ! M X 2 Jacobi Svd 
              ! safe gram matrix computation
                                if( aaqq>=one ) then
                                   if( aapp>=aaqq ) then
                                      rotok = ( small*aapp )<=aaqq
                                   else
                                      rotok = ( small*aaqq )<=aapp
                                   end if
                                   if( aapp<( big / aaqq ) ) then
                                      aapq = ( stdlib${ii}$_cdotc( m, a( 1_${ik}$, p ), 1_${ik}$,a( 1_${ik}$, q ), 1_${ik}$ ) / &
                                                aaqq ) / aapp
                                   else
                                      call stdlib${ii}$_ccopy( m, a( 1_${ik}$, p ), 1_${ik}$,work, 1_${ik}$ )
                                      call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, aapp,one, m, 1_${ik}$,work, lda, &
                                                ierr )
                                      aapq = stdlib${ii}$_cdotc( m, work, 1_${ik}$,a( 1_${ik}$, q ), 1_${ik}$ ) / &
                                                aaqq
                                   end if
                                else
                                   if( aapp>=aaqq ) then
                                      rotok = aapp<=( aaqq / small )
                                   else
                                      rotok = aaqq<=( aapp / small )
                                   end if
                                   if( aapp>( small / aaqq ) ) then
                                      aapq = ( stdlib${ii}$_cdotc( m, a( 1_${ik}$, p ), 1_${ik}$,a( 1_${ik}$, q ), 1_${ik}$ ) / max(&
                                                aaqq,aapp) )/ min(aaqq,aapp)
                                   else
                                      call stdlib${ii}$_ccopy( m, a( 1_${ik}$, q ), 1_${ik}$,work, 1_${ik}$ )
                                      call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, aaqq,one, m, 1_${ik}$,work, lda, &
                                                ierr )
                                      aapq = stdlib${ii}$_cdotc( m, a( 1_${ik}$, p ), 1_${ik}$,work, 1_${ik}$ ) / &
                                                aapp
                                   end if
                                end if
                                 ! aapq = aapq * conjg(cwork(p))*cwork(q)
                                aapq1  = -abs(aapq)
                                mxaapq = max( mxaapq, -aapq1 )
              ! to rotate or not to rotate, that is the question ...
                                if( abs( aapq1 )>tol ) then
                                   ompq = aapq / abs(aapq)
                                   notrot = 0_${ik}$
      ! [rtd]      rotated  = rotated + 1
                                   pskipped = 0_${ik}$
                                   iswrot = iswrot + 1_${ik}$
                                   if( rotok ) then
                                      aqoap = aaqq / aapp
                                      apoaq = aapp / aaqq
                                      theta = -half*abs( aqoap-apoaq )/ aapq1
                                      if( aaqq>aapp0 )theta = -theta
                                      if( abs( theta )>bigtheta ) then
                                         t  = half / theta
                                         cs = one
                                         call stdlib${ii}$_crot( m, a(1_${ik}$,p), 1_${ik}$, a(1_${ik}$,q), 1_${ik}$,cs, conjg(ompq)&
                                                   *t )
                                         if( rsvec ) then
                                             call stdlib${ii}$_crot( mvl, v(1_${ik}$,p), 1_${ik}$,v(1_${ik}$,q), 1_${ik}$, cs, &
                                                       conjg(ompq)*t )
                                         end if
                                         sva( q ) = aaqq*sqrt( max( zero,one+t*apoaq*aapq1 ) )
                                                   
                                         aapp = aapp*sqrt( max( zero,one-t*aqoap*aapq1 ) )
                                         mxsinj = max( mxsinj, abs( t ) )
                                      else
                       ! Choose Correct Signum For Theta And Rotate
                                         thsign = -sign( one, aapq1 )
                                         if( aaqq>aapp0 )thsign = -thsign
                                         t = one / ( theta+thsign*sqrt( one+theta*theta ) )
                                                   
                                         cs = sqrt( one / ( one+t*t ) )
                                         sn = t*cs
                                         mxsinj = max( mxsinj, abs( sn ) )
                                         sva( q ) = aaqq*sqrt( max( zero,one+t*apoaq*aapq1 ) )
                                                   
                                         aapp = aapp*sqrt( max( zero,one-t*aqoap*aapq1 ) )
                                         call stdlib${ii}$_crot( m, a(1_${ik}$,p), 1_${ik}$, a(1_${ik}$,q), 1_${ik}$,cs, conjg(ompq)&
                                                   *sn )
                                         if( rsvec ) then
                                             call stdlib${ii}$_crot( mvl, v(1_${ik}$,p), 1_${ik}$,v(1_${ik}$,q), 1_${ik}$, cs, &
                                                       conjg(ompq)*sn )
                                         end if
                                      end if
                                      d(p) = -d(q) * ompq
                                   else
                    ! .. have to use modified gram-schmidt like transformation
                                    if( aapp>aaqq ) then
                                         call stdlib${ii}$_ccopy( m, a( 1_${ik}$, p ), 1_${ik}$,work, 1_${ik}$ )
                                         call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, aapp, one,m, 1_${ik}$, work,lda,&
                                                   ierr )
                                         call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, aaqq, one,m, 1_${ik}$, a( 1_${ik}$, q ),&
                                                    lda,ierr )
                                         call stdlib${ii}$_caxpy( m, -aapq, work,1_${ik}$, a( 1_${ik}$, q ), 1_${ik}$ )
                                                   
                                         call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, one, aaqq,m, 1_${ik}$, a( 1_${ik}$, q ),&
                                                    lda,ierr )
                                         sva( q ) = aaqq*sqrt( max( zero,one-aapq1*aapq1 ) )
                                                   
                                         mxsinj = max( mxsinj, sfmin )
                                    else
                                        call stdlib${ii}$_ccopy( m, a( 1_${ik}$, q ), 1_${ik}$,work, 1_${ik}$ )
                                         call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, aaqq, one,m, 1_${ik}$, work,lda,&
                                                   ierr )
                                         call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, aapp, one,m, 1_${ik}$, a( 1_${ik}$, p ),&
                                                    lda,ierr )
                                         call stdlib${ii}$_caxpy( m, -conjg(aapq),work, 1_${ik}$, a( 1_${ik}$, p ), 1_${ik}$ &
                                                   )
                                         call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, one, aapp,m, 1_${ik}$, a( 1_${ik}$, p ),&
                                                    lda,ierr )
                                         sva( p ) = aapp*sqrt( max( zero,one-aapq1*aapq1 ) )
                                                   
                                         mxsinj = max( mxsinj, sfmin )
                                    end if
                                   end if
                 ! end if rotok then ... else
                 ! in the case of cancellation in updating sva(q), sva(p)
                 ! .. recompute sva(q), sva(p)
                                   if( ( sva( q ) / aaqq )**2_${ik}$<=rooteps )then
                                      if( ( aaqq<rootbig ) .and.( aaqq>rootsfmin ) ) then
                                         sva( q ) = stdlib${ii}$_scnrm2( m, a( 1_${ik}$, q ), 1_${ik}$)
                                       else
                                         t = zero
                                         aaqq = one
                                         call stdlib${ii}$_classq( m, a( 1_${ik}$, q ), 1_${ik}$, t,aaqq )
                                         sva( q ) = t*sqrt( aaqq )
                                      end if
                                   end if
                                   if( ( aapp / aapp0 )**2_${ik}$<=rooteps ) then
                                      if( ( aapp<rootbig ) .and.( aapp>rootsfmin ) ) then
                                         aapp = stdlib${ii}$_scnrm2( m, a( 1_${ik}$, p ), 1_${ik}$ )
                                      else
                                         t = zero
                                         aapp = one
                                         call stdlib${ii}$_classq( m, a( 1_${ik}$, p ), 1_${ik}$, t,aapp )
                                         aapp = t*sqrt( aapp )
                                      end if
                                      sva( p ) = aapp
                                   end if
                    ! end of ok rotation
                                else
                                   notrot = notrot + 1_${ik}$
      ! [rtd]      skipped  = skipped  + 1
                                   pskipped = pskipped + 1_${ik}$
                                   ijblsk = ijblsk + 1_${ik}$
                                end if
                             else
                                notrot = notrot + 1_${ik}$
                                pskipped = pskipped + 1_${ik}$
                                ijblsk = ijblsk + 1_${ik}$
                             end if
                             if( ( i<=swband ) .and. ( ijblsk>=blskip ) )then
                                sva( p ) = aapp
                                notrot = 0_${ik}$
                                go to 2011
                             end if
                             if( ( i<=swband ) .and.( pskipped>rowskip ) ) then
                                aapp = -aapp
                                notrot = 0_${ik}$
                                go to 2203
                             end if
                          end do loop_2200
              ! end of the q-loop
              2203 continue
                          sva( p ) = aapp
                       else
                          if( aapp==zero )notrot = notrot +min( jgl+kbl-1, n ) - jgl + 1_${ik}$
                          if( aapp<zero )notrot = 0_${ik}$
                       end if
                    end do loop_2100
           ! end of the p-loop
                 end do loop_2010
           ! end of the jbc-loop
           2011 continue
      ! 2011 bailed out of the jbc-loop
                 do p = igl, min( igl+kbl-1, n )
                    sva( p ) = abs( sva( p ) )
                 end do
      ! **
              end do loop_2000
      ! 2000 :: end of the ibr-loop
           ! .. update sva(n)
              if( ( sva( n )<rootbig ) .and. ( sva( n )>rootsfmin ) )then
                 sva( n ) = stdlib${ii}$_scnrm2( m, a( 1_${ik}$, n ), 1_${ik}$ )
              else
                 t = zero
                 aapp = one
                 call stdlib${ii}$_classq( m, a( 1_${ik}$, n ), 1_${ik}$, t, aapp )
                 sva( n ) = t*sqrt( aapp )
              end if
           ! additional steering devices
              if( ( i<swband ) .and. ( ( mxaapq<=roottol ) .or.( iswrot<=n ) ) )swband = i
              if( ( i>swband+1 ) .and. ( mxaapq<sqrt( real( n,KIND=sp) )*tol ) .and. ( real( n,&
                        KIND=sp)*mxaapq*mxsinj<tol ) ) then
                 go to 1994
              end if
              if( notrot>=emptsw )go to 1994
           end do loop_1993
           ! end i=1:nsweep loop
       ! #:( reaching this point means that the procedure has not converged.
           info = nsweep - 1_${ik}$
           go to 1995
           1994 continue
       ! #:) reaching this point means numerical convergence after the i-th
           ! sweep.
           info = 0_${ik}$
       ! #:) info = 0 confirms successful iterations.
       1995 continue
           ! sort the vector sva() of column norms.
           do p = 1, n - 1
              q = stdlib${ii}$_isamax( n-p+1, sva( p ), 1_${ik}$ ) + p - 1_${ik}$
              if( p/=q ) then
                 temp1 = sva( p )
                 sva( p ) = sva( q )
                 sva( q ) = temp1
                 aapq = d( p )
                 d( p ) = d( q )
                 d( q ) = aapq
                 call stdlib${ii}$_cswap( m, a( 1_${ik}$, p ), 1_${ik}$, a( 1_${ik}$, q ), 1_${ik}$ )
                 if( rsvec )call stdlib${ii}$_cswap( mvl, v( 1_${ik}$, p ), 1_${ik}$, v( 1_${ik}$, q ), 1_${ik}$ )
              end if
           end do
           return
     end subroutine stdlib${ii}$_cgsvj1

     pure module subroutine stdlib${ii}$_zgsvj1( jobv, m, n, n1, a, lda, d, sva, mv, v, ldv,eps, sfmin, tol, &
     !! ZGSVJ1 is called from ZGESVJ as a pre-processor and that is its main
     !! purpose. It applies Jacobi rotations in the same way as ZGESVJ does, but
     !! it targets only particular pivots and it does not check convergence
     !! (stopping criterion). Few tuning parameters (marked by [TP]) are
     !! available for the implementer.
     !! Further Details
     !! ~~~~~~~~~~~~~~~
     !! ZGSVJ1 applies few sweeps of Jacobi rotations in the column space of
     !! the input M-by-N matrix A. The pivot pairs are taken from the (1,2)
     !! off-diagonal block in the corresponding N-by-N Gram matrix A^T * A. The
     !! block-entries (tiles) of the (1,2) off-diagonal block are marked by the
     !! [x]'s in the following scheme:
     !! | *  *  * [x] [x] [x]|
     !! | *  *  * [x] [x] [x]|    Row-cycling in the nblr-by-nblc [x] blocks.
     !! | *  *  * [x] [x] [x]|    Row-cyclic pivoting inside each [x] block.
     !! |[x] [x] [x] *  *  * |
     !! |[x] [x] [x] *  *  * |
     !! |[x] [x] [x] *  *  * |
     !! In terms of the columns of A, the first N1 columns are rotated 'against'
     !! the remaining N-N1 columns, trying to increase the angle between the
     !! corresponding subspaces. The off-diagonal block is N1-by(N-N1) and it is
     !! tiled using quadratic tiles of side KBL. Here, KBL is a tuning parameter.
     !! The number of sweeps is given in NSWEEP and the orthogonality threshold
     !! is given in TOL.
               nsweep, work, lwork, info )
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           real(dp), intent(in) :: eps, sfmin, tol
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, ldv, lwork, m, mv, n, n1, nsweep
           character, intent(in) :: jobv
           ! Array Arguments 
           complex(dp), intent(inout) :: a(lda,*), d(n), v(ldv,*)
           complex(dp), intent(out) :: work(lwork)
           real(dp), intent(inout) :: sva(n)
        ! =====================================================================
           
           ! Local Scalars 
           complex(dp) :: aapq, ompq
           real(dp) :: aapp, aapp0, aapq1, aaqq, apoaq, aqoap, big, bigtheta, cs, mxaapq, mxsinj, &
                     rootbig, rooteps, rootsfmin, roottol, small, sn, t, temp1, theta, thsign
           integer(${ik}$) :: blskip, emptsw, i, ibr, igl, ierr, ijblsk, iswrot, jbc, jgl, kbl, mvl, &
                     notrot, nblc, nblr, p, pskipped, q, rowskip, swband
           logical(lk) :: applv, rotok, rsvec
           ! Intrinsic Functions 
           ! From Lapack
           ! Executable Statements 
           ! test the input parameters.
           applv = stdlib_lsame( jobv, 'A' )
           rsvec = stdlib_lsame( jobv, 'V' )
           if( .not.( rsvec .or. applv .or. stdlib_lsame( jobv, 'N' ) ) ) then
              info = -1_${ik}$
           else if( m<0_${ik}$ ) then
              info = -2_${ik}$
           else if( ( n<0_${ik}$ ) .or. ( n>m ) ) then
              info = -3_${ik}$
           else if( n1<0_${ik}$ ) then
              info = -4_${ik}$
           else if( lda<m ) then
              info = -6_${ik}$
           else if( ( rsvec.or.applv ) .and. ( mv<0_${ik}$ ) ) then
              info = -9_${ik}$
           else if( ( rsvec.and.( ldv<n ) ).or.( applv.and.( ldv<mv ) )  ) then
              info = -11_${ik}$
           else if( tol<=eps ) then
              info = -14_${ik}$
           else if( nsweep<0_${ik}$ ) then
              info = -15_${ik}$
           else if( lwork<m ) then
              info = -17_${ik}$
           else
              info = 0_${ik}$
           end if
           ! #:(
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'ZGSVJ1', -info )
              return
           end if
           if( rsvec ) then
              mvl = n
           else if( applv ) then
              mvl = mv
           end if
           rsvec = rsvec .or. applv
           rooteps = sqrt( eps )
           rootsfmin = sqrt( sfmin )
           small = sfmin / eps
           big = one / sfmin
           rootbig = one / rootsfmin
           ! large = big / sqrt( real( m*n,KIND=dp) )
           bigtheta = one / rooteps
           roottol = sqrt( tol )
           ! Initialize The Right Singular Vector Matrix 
           ! rsvec = stdlib_lsame( jobv, 'y' )
           emptsw = n1*( n-n1 )
           notrot = 0_${ik}$
           ! .. row-cyclic pivot strategy with de rijk's pivoting ..
           kbl = min( 8_${ik}$, n )
           nblr = n1 / kbl
           if( ( nblr*kbl )/=n1 )nblr = nblr + 1_${ik}$
           ! .. the tiling is nblr-by-nblc [tiles]
           nblc = ( n-n1 ) / kbl
           if( ( nblc*kbl )/=( n-n1 ) )nblc = nblc + 1_${ik}$
           blskip = ( kbl**2_${ik}$ ) + 1_${ik}$
      ! [tp] blkskip is a tuning parameter that depends on swband and kbl.
           rowskip = min( 5_${ik}$, kbl )
      ! [tp] rowskip is a tuning parameter.
           swband = 0_${ik}$
      ! [tp] swband is a tuning parameter. it is meaningful and effective
           ! if stdlib${ii}$_zgesvj is used as a computational routine in the preconditioned
           ! jacobi svd algorithm stdlib${ii}$_zgejsv.
           ! | *   *   * [x] [x] [x]|
           ! | *   *   * [x] [x] [x]|    row-cycling in the nblr-by-nblc [x] blocks.
           ! | *   *   * [x] [x] [x]|    row-cyclic pivoting inside each [x] block.
           ! |[x] [x] [x] *   *   * |
           ! |[x] [x] [x] *   *   * |
           ! |[x] [x] [x] *   *   * |
           loop_1993: do i = 1, nsweep
           ! .. go go go ...
              mxaapq = zero
              mxsinj = zero
              iswrot = 0_${ik}$
              notrot = 0_${ik}$
              pskipped = 0_${ik}$
           ! each sweep is unrolled using kbl-by-kbl tiles over the pivot pairs
           ! 1 <= p < q <= n. this is the first step toward a blocked implementation
           ! of the rotations. new implementation, based on block transformations,
           ! is under development.
              loop_2000: do ibr = 1, nblr
                 igl = ( ibr-1 )*kbl + 1_${ik}$
       ! ... go to the off diagonal blocks
                 igl = ( ibr-1 )*kbl + 1_${ik}$
                  ! do 2010 jbc = ibr + 1, nbl
                 loop_2010: do jbc = 1, nblc
                    jgl = ( jbc-1 )*kbl + n1 + 1_${ik}$
              ! doing the block at ( ibr, jbc )
                    ijblsk = 0_${ik}$
                    loop_2100: do p = igl, min( igl+kbl-1, n1 )
                       aapp = sva( p )
                       if( aapp>zero ) then
                          pskipped = 0_${ik}$
                          loop_2200: do q = jgl, min( jgl+kbl-1, n )
                             aaqq = sva( q )
                             if( aaqq>zero ) then
                                aapp0 = aapp
           ! M X 2 Jacobi Svd 
              ! safe gram matrix computation
                                if( aaqq>=one ) then
                                   if( aapp>=aaqq ) then
                                      rotok = ( small*aapp )<=aaqq
                                   else
                                      rotok = ( small*aaqq )<=aapp
                                   end if
                                   if( aapp<( big / aaqq ) ) then
                                      aapq = ( stdlib${ii}$_zdotc( m, a( 1_${ik}$, p ), 1_${ik}$,a( 1_${ik}$, q ), 1_${ik}$ ) / &
                                                aaqq ) / aapp
                                   else
                                      call stdlib${ii}$_zcopy( m, a( 1_${ik}$, p ), 1_${ik}$,work, 1_${ik}$ )
                                      call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, aapp,one, m, 1_${ik}$,work, lda, &
                                                ierr )
                                      aapq = stdlib${ii}$_zdotc( m, work, 1_${ik}$,a( 1_${ik}$, q ), 1_${ik}$ ) / &
                                                aaqq
                                   end if
                                else
                                   if( aapp>=aaqq ) then
                                      rotok = aapp<=( aaqq / small )
                                   else
                                      rotok = aaqq<=( aapp / small )
                                   end if
                                   if( aapp>( small / aaqq ) ) then
                                      aapq = ( stdlib${ii}$_zdotc( m, a( 1_${ik}$, p ), 1_${ik}$,a( 1_${ik}$, q ), 1_${ik}$ ) / max(&
                                                aaqq,aapp) )/ min(aaqq,aapp)
                                   else
                                      call stdlib${ii}$_zcopy( m, a( 1_${ik}$, q ), 1_${ik}$,work, 1_${ik}$ )
                                      call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, aaqq,one, m, 1_${ik}$,work, lda, &
                                                ierr )
                                      aapq = stdlib${ii}$_zdotc( m, a( 1_${ik}$, p ), 1_${ik}$,work, 1_${ik}$ ) / &
                                                aapp
                                   end if
                                end if
                                 ! aapq = aapq * conjg(cwork(p))*cwork(q)
                                aapq1  = -abs(aapq)
                                mxaapq = max( mxaapq, -aapq1 )
              ! to rotate or not to rotate, that is the question ...
                                if( abs( aapq1 )>tol ) then
                                   ompq = aapq / abs(aapq)
                                   notrot = 0_${ik}$
      ! [rtd]      rotated  = rotated + 1
                                   pskipped = 0_${ik}$
                                   iswrot = iswrot + 1_${ik}$
                                   if( rotok ) then
                                      aqoap = aaqq / aapp
                                      apoaq = aapp / aaqq
                                      theta = -half*abs( aqoap-apoaq )/ aapq1
                                      if( aaqq>aapp0 )theta = -theta
                                      if( abs( theta )>bigtheta ) then
                                         t  = half / theta
                                         cs = one
                                         call stdlib${ii}$_zrot( m, a(1_${ik}$,p), 1_${ik}$, a(1_${ik}$,q), 1_${ik}$,cs, conjg(ompq)&
                                                   *t )
                                         if( rsvec ) then
                                             call stdlib${ii}$_zrot( mvl, v(1_${ik}$,p), 1_${ik}$,v(1_${ik}$,q), 1_${ik}$, cs, &
                                                       conjg(ompq)*t )
                                         end if
                                         sva( q ) = aaqq*sqrt( max( zero,one+t*apoaq*aapq1 ) )
                                                   
                                         aapp = aapp*sqrt( max( zero,one-t*aqoap*aapq1 ) )
                                         mxsinj = max( mxsinj, abs( t ) )
                                      else
                       ! Choose Correct Signum For Theta And Rotate
                                         thsign = -sign( one, aapq1 )
                                         if( aaqq>aapp0 )thsign = -thsign
                                         t = one / ( theta+thsign*sqrt( one+theta*theta ) )
                                                   
                                         cs = sqrt( one / ( one+t*t ) )
                                         sn = t*cs
                                         mxsinj = max( mxsinj, abs( sn ) )
                                         sva( q ) = aaqq*sqrt( max( zero,one+t*apoaq*aapq1 ) )
                                                   
                                         aapp = aapp*sqrt( max( zero,one-t*aqoap*aapq1 ) )
                                         call stdlib${ii}$_zrot( m, a(1_${ik}$,p), 1_${ik}$, a(1_${ik}$,q), 1_${ik}$,cs, conjg(ompq)&
                                                   *sn )
                                         if( rsvec ) then
                                             call stdlib${ii}$_zrot( mvl, v(1_${ik}$,p), 1_${ik}$,v(1_${ik}$,q), 1_${ik}$, cs, &
                                                       conjg(ompq)*sn )
                                         end if
                                      end if
                                      d(p) = -d(q) * ompq
                                   else
                    ! .. have to use modified gram-schmidt like transformation
                                    if( aapp>aaqq ) then
                                         call stdlib${ii}$_zcopy( m, a( 1_${ik}$, p ), 1_${ik}$,work, 1_${ik}$ )
                                         call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, aapp, one,m, 1_${ik}$, work,lda,&
                                                   ierr )
                                         call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, aaqq, one,m, 1_${ik}$, a( 1_${ik}$, q ),&
                                                    lda,ierr )
                                         call stdlib${ii}$_zaxpy( m, -aapq, work,1_${ik}$, a( 1_${ik}$, q ), 1_${ik}$ )
                                                   
                                         call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, one, aaqq,m, 1_${ik}$, a( 1_${ik}$, q ),&
                                                    lda,ierr )
                                         sva( q ) = aaqq*sqrt( max( zero,one-aapq1*aapq1 ) )
                                                   
                                         mxsinj = max( mxsinj, sfmin )
                                    else
                                        call stdlib${ii}$_zcopy( m, a( 1_${ik}$, q ), 1_${ik}$,work, 1_${ik}$ )
                                         call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, aaqq, one,m, 1_${ik}$, work,lda,&
                                                   ierr )
                                         call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, aapp, one,m, 1_${ik}$, a( 1_${ik}$, p ),&
                                                    lda,ierr )
                                         call stdlib${ii}$_zaxpy( m, -conjg(aapq),work, 1_${ik}$, a( 1_${ik}$, p ), 1_${ik}$ &
                                                   )
                                         call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, one, aapp,m, 1_${ik}$, a( 1_${ik}$, p ),&
                                                    lda,ierr )
                                         sva( p ) = aapp*sqrt( max( zero,one-aapq1*aapq1 ) )
                                                   
                                         mxsinj = max( mxsinj, sfmin )
                                    end if
                                   end if
                 ! end if rotok then ... else
                 ! in the case of cancellation in updating sva(q), sva(p)
                 ! .. recompute sva(q), sva(p)
                                   if( ( sva( q ) / aaqq )**2_${ik}$<=rooteps )then
                                      if( ( aaqq<rootbig ) .and.( aaqq>rootsfmin ) ) then
                                         sva( q ) = stdlib${ii}$_dznrm2( m, a( 1_${ik}$, q ), 1_${ik}$)
                                       else
                                         t = zero
                                         aaqq = one
                                         call stdlib${ii}$_zlassq( m, a( 1_${ik}$, q ), 1_${ik}$, t,aaqq )
                                         sva( q ) = t*sqrt( aaqq )
                                      end if
                                   end if
                                   if( ( aapp / aapp0 )**2_${ik}$<=rooteps ) then
                                      if( ( aapp<rootbig ) .and.( aapp>rootsfmin ) ) then
                                         aapp = stdlib${ii}$_dznrm2( m, a( 1_${ik}$, p ), 1_${ik}$ )
                                      else
                                         t = zero
                                         aapp = one
                                         call stdlib${ii}$_zlassq( m, a( 1_${ik}$, p ), 1_${ik}$, t,aapp )
                                         aapp = t*sqrt( aapp )
                                      end if
                                      sva( p ) = aapp
                                   end if
                    ! end of ok rotation
                                else
                                   notrot = notrot + 1_${ik}$
      ! [rtd]      skipped  = skipped  + 1
                                   pskipped = pskipped + 1_${ik}$
                                   ijblsk = ijblsk + 1_${ik}$
                                end if
                             else
                                notrot = notrot + 1_${ik}$
                                pskipped = pskipped + 1_${ik}$
                                ijblsk = ijblsk + 1_${ik}$
                             end if
                             if( ( i<=swband ) .and. ( ijblsk>=blskip ) )then
                                sva( p ) = aapp
                                notrot = 0_${ik}$
                                go to 2011
                             end if
                             if( ( i<=swband ) .and.( pskipped>rowskip ) ) then
                                aapp = -aapp
                                notrot = 0_${ik}$
                                go to 2203
                             end if
                          end do loop_2200
              ! end of the q-loop
              2203 continue
                          sva( p ) = aapp
                       else
                          if( aapp==zero )notrot = notrot +min( jgl+kbl-1, n ) - jgl + 1_${ik}$
                          if( aapp<zero )notrot = 0_${ik}$
                       end if
                    end do loop_2100
           ! end of the p-loop
                 end do loop_2010
           ! end of the jbc-loop
           2011 continue
      ! 2011 bailed out of the jbc-loop
                 do p = igl, min( igl+kbl-1, n )
                    sva( p ) = abs( sva( p ) )
                 end do
      ! **
              end do loop_2000
      ! 2000 :: end of the ibr-loop
           ! .. update sva(n)
              if( ( sva( n )<rootbig ) .and. ( sva( n )>rootsfmin ) )then
                 sva( n ) = stdlib${ii}$_dznrm2( m, a( 1_${ik}$, n ), 1_${ik}$ )
              else
                 t = zero
                 aapp = one
                 call stdlib${ii}$_zlassq( m, a( 1_${ik}$, n ), 1_${ik}$, t, aapp )
                 sva( n ) = t*sqrt( aapp )
              end if
           ! additional steering devices
              if( ( i<swband ) .and. ( ( mxaapq<=roottol ) .or.( iswrot<=n ) ) )swband = i
              if( ( i>swband+1 ) .and. ( mxaapq<sqrt( real( n,KIND=dp) )*tol ) .and. ( real( n,&
                        KIND=dp)*mxaapq*mxsinj<tol ) ) then
                 go to 1994
              end if
              if( notrot>=emptsw )go to 1994
           end do loop_1993
           ! end i=1:nsweep loop
       ! #:( reaching this point means that the procedure has not converged.
           info = nsweep - 1_${ik}$
           go to 1995
           1994 continue
       ! #:) reaching this point means numerical convergence after the i-th
           ! sweep.
           info = 0_${ik}$
       ! #:) info = 0 confirms successful iterations.
       1995 continue
           ! sort the vector sva() of column norms.
           do p = 1, n - 1
              q = stdlib${ii}$_idamax( n-p+1, sva( p ), 1_${ik}$ ) + p - 1_${ik}$
              if( p/=q ) then
                 temp1 = sva( p )
                 sva( p ) = sva( q )
                 sva( q ) = temp1
                 aapq = d( p )
                 d( p ) = d( q )
                 d( q ) = aapq
                 call stdlib${ii}$_zswap( m, a( 1_${ik}$, p ), 1_${ik}$, a( 1_${ik}$, q ), 1_${ik}$ )
                 if( rsvec )call stdlib${ii}$_zswap( mvl, v( 1_${ik}$, p ), 1_${ik}$, v( 1_${ik}$, q ), 1_${ik}$ )
              end if
           end do
           return
     end subroutine stdlib${ii}$_zgsvj1

#:for ck,ct,ci in CMPLX_KINDS_TYPES
#:if not ck in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ci}$gsvj1( jobv, m, n, n1, a, lda, d, sva, mv, v, ldv,eps, sfmin, tol, &
     !! ZGSVJ1: is called from ZGESVJ as a pre-processor and that is its main
     !! purpose. It applies Jacobi rotations in the same way as ZGESVJ does, but
     !! it targets only particular pivots and it does not check convergence
     !! (stopping criterion). Few tuning parameters (marked by [TP]) are
     !! available for the implementer.
     !! Further Details
     !! ~~~~~~~~~~~~~~~
     !! ZGSVJ1 applies few sweeps of Jacobi rotations in the column space of
     !! the input M-by-N matrix A. The pivot pairs are taken from the (1,2)
     !! off-diagonal block in the corresponding N-by-N Gram matrix A^T * A. The
     !! block-entries (tiles) of the (1,2) off-diagonal block are marked by the
     !! [x]'s in the following scheme:
     !! | *  *  * [x] [x] [x]|
     !! | *  *  * [x] [x] [x]|    Row-cycling in the nblr-by-nblc [x] blocks.
     !! | *  *  * [x] [x] [x]|    Row-cyclic pivoting inside each [x] block.
     !! |[x] [x] [x] *  *  * |
     !! |[x] [x] [x] *  *  * |
     !! |[x] [x] [x] *  *  * |
     !! In terms of the columns of A, the first N1 columns are rotated 'against'
     !! the remaining N-N1 columns, trying to increase the angle between the
     !! corresponding subspaces. The off-diagonal block is N1-by(N-N1) and it is
     !! tiled using quadratic tiles of side KBL. Here, KBL is a tuning parameter.
     !! The number of sweeps is given in NSWEEP and the orthogonality threshold
     !! is given in TOL.
               nsweep, work, lwork, info )
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${ck}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           real(${ck}$), intent(in) :: eps, sfmin, tol
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, ldv, lwork, m, mv, n, n1, nsweep
           character, intent(in) :: jobv
           ! Array Arguments 
           complex(${ck}$), intent(inout) :: a(lda,*), d(n), v(ldv,*)
           complex(${ck}$), intent(out) :: work(lwork)
           real(${ck}$), intent(inout) :: sva(n)
        ! =====================================================================
           
           ! Local Scalars 
           complex(${ck}$) :: aapq, ompq
           real(${ck}$) :: aapp, aapp0, aapq1, aaqq, apoaq, aqoap, big, bigtheta, cs, mxaapq, mxsinj, &
                     rootbig, rooteps, rootsfmin, roottol, small, sn, t, temp1, theta, thsign
           integer(${ik}$) :: blskip, emptsw, i, ibr, igl, ierr, ijblsk, iswrot, jbc, jgl, kbl, mvl, &
                     notrot, nblc, nblr, p, pskipped, q, rowskip, swband
           logical(lk) :: applv, rotok, rsvec
           ! Intrinsic Functions 
           ! From Lapack
           ! Executable Statements 
           ! test the input parameters.
           applv = stdlib_lsame( jobv, 'A' )
           rsvec = stdlib_lsame( jobv, 'V' )
           if( .not.( rsvec .or. applv .or. stdlib_lsame( jobv, 'N' ) ) ) then
              info = -1_${ik}$
           else if( m<0_${ik}$ ) then
              info = -2_${ik}$
           else if( ( n<0_${ik}$ ) .or. ( n>m ) ) then
              info = -3_${ik}$
           else if( n1<0_${ik}$ ) then
              info = -4_${ik}$
           else if( lda<m ) then
              info = -6_${ik}$
           else if( ( rsvec.or.applv ) .and. ( mv<0_${ik}$ ) ) then
              info = -9_${ik}$
           else if( ( rsvec.and.( ldv<n ) ).or.( applv.and.( ldv<mv ) )  ) then
              info = -11_${ik}$
           else if( tol<=eps ) then
              info = -14_${ik}$
           else if( nsweep<0_${ik}$ ) then
              info = -15_${ik}$
           else if( lwork<m ) then
              info = -17_${ik}$
           else
              info = 0_${ik}$
           end if
           ! #:(
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'ZGSVJ1', -info )
              return
           end if
           if( rsvec ) then
              mvl = n
           else if( applv ) then
              mvl = mv
           end if
           rsvec = rsvec .or. applv
           rooteps = sqrt( eps )
           rootsfmin = sqrt( sfmin )
           small = sfmin / eps
           big = one / sfmin
           rootbig = one / rootsfmin
           ! large = big / sqrt( real( m*n,KIND=${ck}$) )
           bigtheta = one / rooteps
           roottol = sqrt( tol )
           ! Initialize The Right Singular Vector Matrix 
           ! rsvec = stdlib_lsame( jobv, 'y' )
           emptsw = n1*( n-n1 )
           notrot = 0_${ik}$
           ! .. row-cyclic pivot strategy with de rijk's pivoting ..
           kbl = min( 8_${ik}$, n )
           nblr = n1 / kbl
           if( ( nblr*kbl )/=n1 )nblr = nblr + 1_${ik}$
           ! .. the tiling is nblr-by-nblc [tiles]
           nblc = ( n-n1 ) / kbl
           if( ( nblc*kbl )/=( n-n1 ) )nblc = nblc + 1_${ik}$
           blskip = ( kbl**2_${ik}$ ) + 1_${ik}$
      ! [tp] blkskip is a tuning parameter that depends on swband and kbl.
           rowskip = min( 5_${ik}$, kbl )
      ! [tp] rowskip is a tuning parameter.
           swband = 0_${ik}$
      ! [tp] swband is a tuning parameter. it is meaningful and effective
           ! if stdlib${ii}$_${ci}$gesvj is used as a computational routine in the preconditioned
           ! jacobi svd algorithm stdlib${ii}$_${ci}$gejsv.
           ! | *   *   * [x] [x] [x]|
           ! | *   *   * [x] [x] [x]|    row-cycling in the nblr-by-nblc [x] blocks.
           ! | *   *   * [x] [x] [x]|    row-cyclic pivoting inside each [x] block.
           ! |[x] [x] [x] *   *   * |
           ! |[x] [x] [x] *   *   * |
           ! |[x] [x] [x] *   *   * |
           loop_1993: do i = 1, nsweep
           ! .. go go go ...
              mxaapq = zero
              mxsinj = zero
              iswrot = 0_${ik}$
              notrot = 0_${ik}$
              pskipped = 0_${ik}$
           ! each sweep is unrolled using kbl-by-kbl tiles over the pivot pairs
           ! 1 <= p < q <= n. this is the first step toward a blocked implementation
           ! of the rotations. new implementation, based on block transformations,
           ! is under development.
              loop_2000: do ibr = 1, nblr
                 igl = ( ibr-1 )*kbl + 1_${ik}$
       ! ... go to the off diagonal blocks
                 igl = ( ibr-1 )*kbl + 1_${ik}$
                  ! do 2010 jbc = ibr + 1, nbl
                 loop_2010: do jbc = 1, nblc
                    jgl = ( jbc-1 )*kbl + n1 + 1_${ik}$
              ! doing the block at ( ibr, jbc )
                    ijblsk = 0_${ik}$
                    loop_2100: do p = igl, min( igl+kbl-1, n1 )
                       aapp = sva( p )
                       if( aapp>zero ) then
                          pskipped = 0_${ik}$
                          loop_2200: do q = jgl, min( jgl+kbl-1, n )
                             aaqq = sva( q )
                             if( aaqq>zero ) then
                                aapp0 = aapp
           ! M X 2 Jacobi Svd 
              ! safe gram matrix computation
                                if( aaqq>=one ) then
                                   if( aapp>=aaqq ) then
                                      rotok = ( small*aapp )<=aaqq
                                   else
                                      rotok = ( small*aaqq )<=aapp
                                   end if
                                   if( aapp<( big / aaqq ) ) then
                                      aapq = ( stdlib${ii}$_${ci}$dotc( m, a( 1_${ik}$, p ), 1_${ik}$,a( 1_${ik}$, q ), 1_${ik}$ ) / &
                                                aaqq ) / aapp
                                   else
                                      call stdlib${ii}$_${ci}$copy( m, a( 1_${ik}$, p ), 1_${ik}$,work, 1_${ik}$ )
                                      call stdlib${ii}$_${ci}$lascl( 'G', 0_${ik}$, 0_${ik}$, aapp,one, m, 1_${ik}$,work, lda, &
                                                ierr )
                                      aapq = stdlib${ii}$_${ci}$dotc( m, work, 1_${ik}$,a( 1_${ik}$, q ), 1_${ik}$ ) / &
                                                aaqq
                                   end if
                                else
                                   if( aapp>=aaqq ) then
                                      rotok = aapp<=( aaqq / small )
                                   else
                                      rotok = aaqq<=( aapp / small )
                                   end if
                                   if( aapp>( small / aaqq ) ) then
                                      aapq = ( stdlib${ii}$_${ci}$dotc( m, a( 1_${ik}$, p ), 1_${ik}$,a( 1_${ik}$, q ), 1_${ik}$ ) / max(&
                                                aaqq,aapp) )/ min(aaqq,aapp)
                                   else
                                      call stdlib${ii}$_${ci}$copy( m, a( 1_${ik}$, q ), 1_${ik}$,work, 1_${ik}$ )
                                      call stdlib${ii}$_${ci}$lascl( 'G', 0_${ik}$, 0_${ik}$, aaqq,one, m, 1_${ik}$,work, lda, &
                                                ierr )
                                      aapq = stdlib${ii}$_${ci}$dotc( m, a( 1_${ik}$, p ), 1_${ik}$,work, 1_${ik}$ ) / &
                                                aapp
                                   end if
                                end if
                                 ! aapq = aapq * conjg(cwork(p))*cwork(q)
                                aapq1  = -abs(aapq)
                                mxaapq = max( mxaapq, -aapq1 )
              ! to rotate or not to rotate, that is the question ...
                                if( abs( aapq1 )>tol ) then
                                   ompq = aapq / abs(aapq)
                                   notrot = 0_${ik}$
      ! [rtd]      rotated  = rotated + 1
                                   pskipped = 0_${ik}$
                                   iswrot = iswrot + 1_${ik}$
                                   if( rotok ) then
                                      aqoap = aaqq / aapp
                                      apoaq = aapp / aaqq
                                      theta = -half*abs( aqoap-apoaq )/ aapq1
                                      if( aaqq>aapp0 )theta = -theta
                                      if( abs( theta )>bigtheta ) then
                                         t  = half / theta
                                         cs = one
                                         call stdlib${ii}$_${ci}$rot( m, a(1_${ik}$,p), 1_${ik}$, a(1_${ik}$,q), 1_${ik}$,cs, conjg(ompq)&
                                                   *t )
                                         if( rsvec ) then
                                             call stdlib${ii}$_${ci}$rot( mvl, v(1_${ik}$,p), 1_${ik}$,v(1_${ik}$,q), 1_${ik}$, cs, &
                                                       conjg(ompq)*t )
                                         end if
                                         sva( q ) = aaqq*sqrt( max( zero,one+t*apoaq*aapq1 ) )
                                                   
                                         aapp = aapp*sqrt( max( zero,one-t*aqoap*aapq1 ) )
                                         mxsinj = max( mxsinj, abs( t ) )
                                      else
                       ! Choose Correct Signum For Theta And Rotate
                                         thsign = -sign( one, aapq1 )
                                         if( aaqq>aapp0 )thsign = -thsign
                                         t = one / ( theta+thsign*sqrt( one+theta*theta ) )
                                                   
                                         cs = sqrt( one / ( one+t*t ) )
                                         sn = t*cs
                                         mxsinj = max( mxsinj, abs( sn ) )
                                         sva( q ) = aaqq*sqrt( max( zero,one+t*apoaq*aapq1 ) )
                                                   
                                         aapp = aapp*sqrt( max( zero,one-t*aqoap*aapq1 ) )
                                         call stdlib${ii}$_${ci}$rot( m, a(1_${ik}$,p), 1_${ik}$, a(1_${ik}$,q), 1_${ik}$,cs, conjg(ompq)&
                                                   *sn )
                                         if( rsvec ) then
                                             call stdlib${ii}$_${ci}$rot( mvl, v(1_${ik}$,p), 1_${ik}$,v(1_${ik}$,q), 1_${ik}$, cs, &
                                                       conjg(ompq)*sn )
                                         end if
                                      end if
                                      d(p) = -d(q) * ompq
                                   else
                    ! .. have to use modified gram-schmidt like transformation
                                    if( aapp>aaqq ) then
                                         call stdlib${ii}$_${ci}$copy( m, a( 1_${ik}$, p ), 1_${ik}$,work, 1_${ik}$ )
                                         call stdlib${ii}$_${ci}$lascl( 'G', 0_${ik}$, 0_${ik}$, aapp, one,m, 1_${ik}$, work,lda,&
                                                   ierr )
                                         call stdlib${ii}$_${ci}$lascl( 'G', 0_${ik}$, 0_${ik}$, aaqq, one,m, 1_${ik}$, a( 1_${ik}$, q ),&
                                                    lda,ierr )
                                         call stdlib${ii}$_${ci}$axpy( m, -aapq, work,1_${ik}$, a( 1_${ik}$, q ), 1_${ik}$ )
                                                   
                                         call stdlib${ii}$_${ci}$lascl( 'G', 0_${ik}$, 0_${ik}$, one, aaqq,m, 1_${ik}$, a( 1_${ik}$, q ),&
                                                    lda,ierr )
                                         sva( q ) = aaqq*sqrt( max( zero,one-aapq1*aapq1 ) )
                                                   
                                         mxsinj = max( mxsinj, sfmin )
                                    else
                                        call stdlib${ii}$_${ci}$copy( m, a( 1_${ik}$, q ), 1_${ik}$,work, 1_${ik}$ )
                                         call stdlib${ii}$_${ci}$lascl( 'G', 0_${ik}$, 0_${ik}$, aaqq, one,m, 1_${ik}$, work,lda,&
                                                   ierr )
                                         call stdlib${ii}$_${ci}$lascl( 'G', 0_${ik}$, 0_${ik}$, aapp, one,m, 1_${ik}$, a( 1_${ik}$, p ),&
                                                    lda,ierr )
                                         call stdlib${ii}$_${ci}$axpy( m, -conjg(aapq),work, 1_${ik}$, a( 1_${ik}$, p ), 1_${ik}$ &
                                                   )
                                         call stdlib${ii}$_${ci}$lascl( 'G', 0_${ik}$, 0_${ik}$, one, aapp,m, 1_${ik}$, a( 1_${ik}$, p ),&
                                                    lda,ierr )
                                         sva( p ) = aapp*sqrt( max( zero,one-aapq1*aapq1 ) )
                                                   
                                         mxsinj = max( mxsinj, sfmin )
                                    end if
                                   end if
                 ! end if rotok then ... else
                 ! in the case of cancellation in updating sva(q), sva(p)
                 ! .. recompute sva(q), sva(p)
                                   if( ( sva( q ) / aaqq )**2_${ik}$<=rooteps )then
                                      if( ( aaqq<rootbig ) .and.( aaqq>rootsfmin ) ) then
                                         sva( q ) = stdlib${ii}$_${c2ri(ci)}$znrm2( m, a( 1_${ik}$, q ), 1_${ik}$)
                                       else
                                         t = zero
                                         aaqq = one
                                         call stdlib${ii}$_${ci}$lassq( m, a( 1_${ik}$, q ), 1_${ik}$, t,aaqq )
                                         sva( q ) = t*sqrt( aaqq )
                                      end if
                                   end if
                                   if( ( aapp / aapp0 )**2_${ik}$<=rooteps ) then
                                      if( ( aapp<rootbig ) .and.( aapp>rootsfmin ) ) then
                                         aapp = stdlib${ii}$_${c2ri(ci)}$znrm2( m, a( 1_${ik}$, p ), 1_${ik}$ )
                                      else
                                         t = zero
                                         aapp = one
                                         call stdlib${ii}$_${ci}$lassq( m, a( 1_${ik}$, p ), 1_${ik}$, t,aapp )
                                         aapp = t*sqrt( aapp )
                                      end if
                                      sva( p ) = aapp
                                   end if
                    ! end of ok rotation
                                else
                                   notrot = notrot + 1_${ik}$
      ! [rtd]      skipped  = skipped  + 1
                                   pskipped = pskipped + 1_${ik}$
                                   ijblsk = ijblsk + 1_${ik}$
                                end if
                             else
                                notrot = notrot + 1_${ik}$
                                pskipped = pskipped + 1_${ik}$
                                ijblsk = ijblsk + 1_${ik}$
                             end if
                             if( ( i<=swband ) .and. ( ijblsk>=blskip ) )then
                                sva( p ) = aapp
                                notrot = 0_${ik}$
                                go to 2011
                             end if
                             if( ( i<=swband ) .and.( pskipped>rowskip ) ) then
                                aapp = -aapp
                                notrot = 0_${ik}$
                                go to 2203
                             end if
                          end do loop_2200
              ! end of the q-loop
              2203 continue
                          sva( p ) = aapp
                       else
                          if( aapp==zero )notrot = notrot +min( jgl+kbl-1, n ) - jgl + 1_${ik}$
                          if( aapp<zero )notrot = 0_${ik}$
                       end if
                    end do loop_2100
           ! end of the p-loop
                 end do loop_2010
           ! end of the jbc-loop
           2011 continue
      ! 2011 bailed out of the jbc-loop
                 do p = igl, min( igl+kbl-1, n )
                    sva( p ) = abs( sva( p ) )
                 end do
      ! **
              end do loop_2000
      ! 2000 :: end of the ibr-loop
           ! .. update sva(n)
              if( ( sva( n )<rootbig ) .and. ( sva( n )>rootsfmin ) )then
                 sva( n ) = stdlib${ii}$_${c2ri(ci)}$znrm2( m, a( 1_${ik}$, n ), 1_${ik}$ )
              else
                 t = zero
                 aapp = one
                 call stdlib${ii}$_${ci}$lassq( m, a( 1_${ik}$, n ), 1_${ik}$, t, aapp )
                 sva( n ) = t*sqrt( aapp )
              end if
           ! additional steering devices
              if( ( i<swband ) .and. ( ( mxaapq<=roottol ) .or.( iswrot<=n ) ) )swband = i
              if( ( i>swband+1 ) .and. ( mxaapq<sqrt( real( n,KIND=${ck}$) )*tol ) .and. ( real( n,&
                        KIND=${ck}$)*mxaapq*mxsinj<tol ) ) then
                 go to 1994
              end if
              if( notrot>=emptsw )go to 1994
           end do loop_1993
           ! end i=1:nsweep loop
       ! #:( reaching this point means that the procedure has not converged.
           info = nsweep - 1_${ik}$
           go to 1995
           1994 continue
       ! #:) reaching this point means numerical convergence after the i-th
           ! sweep.
           info = 0_${ik}$
       ! #:) info = 0 confirms successful iterations.
       1995 continue
           ! sort the vector sva() of column norms.
           do p = 1, n - 1
              q = stdlib${ii}$_i${c2ri(ci)}$amax( n-p+1, sva( p ), 1_${ik}$ ) + p - 1_${ik}$
              if( p/=q ) then
                 temp1 = sva( p )
                 sva( p ) = sva( q )
                 sva( q ) = temp1
                 aapq = d( p )
                 d( p ) = d( q )
                 d( q ) = aapq
                 call stdlib${ii}$_${ci}$swap( m, a( 1_${ik}$, p ), 1_${ik}$, a( 1_${ik}$, q ), 1_${ik}$ )
                 if( rsvec )call stdlib${ii}$_${ci}$swap( mvl, v( 1_${ik}$, p ), 1_${ik}$, v( 1_${ik}$, q ), 1_${ik}$ )
              end if
           end do
           return
     end subroutine stdlib${ii}$_${ci}$gsvj1

#:endif
#:endfor



     pure module subroutine stdlib${ii}$_stgsja( jobu, jobv, jobq, m, p, n, k, l, a, lda, b,ldb, tola, tolb, &
     !! STGSJA computes the generalized singular value decomposition (GSVD)
     !! of two real upper triangular (or trapezoidal) matrices A and B.
     !! On entry, it is assumed that matrices A and B have the following
     !! forms, which may be obtained by the preprocessing subroutine SGGSVP
     !! from a general M-by-N matrix A and P-by-N matrix B:
     !! N-K-L  K    L
     !! A =    K ( 0    A12  A13 ) if M-K-L >= 0;
     !! L ( 0     0   A23 )
     !! M-K-L ( 0     0    0  )
     !! N-K-L  K    L
     !! A =  K ( 0    A12  A13 ) if M-K-L < 0;
     !! M-K ( 0     0   A23 )
     !! N-K-L  K    L
     !! B =  L ( 0     0   B13 )
     !! P-L ( 0     0    0  )
     !! where the K-by-K matrix A12 and L-by-L matrix B13 are nonsingular
     !! upper triangular; A23 is L-by-L upper triangular if M-K-L >= 0,
     !! otherwise A23 is (M-K)-by-L upper trapezoidal.
     !! On exit,
     !! U**T *A*Q = D1*( 0 R ),    V**T *B*Q = D2*( 0 R ),
     !! where U, V and Q are orthogonal matrices.
     !! R is a nonsingular upper triangular matrix, and D1 and D2 are
     !! ``diagonal'' matrices, which are of the following structures:
     !! If M-K-L >= 0,
     !! K  L
     !! D1 =     K ( I  0 )
     !! L ( 0  C )
     !! M-K-L ( 0  0 )
     !! K  L
     !! D2 = L   ( 0  S )
     !! P-L ( 0  0 )
     !! N-K-L  K    L
     !! ( 0 R ) = K (  0   R11  R12 ) K
     !! L (  0    0   R22 ) L
     !! where
     !! C = diag( ALPHA(K+1), ... , ALPHA(K+L) ),
     !! S = diag( BETA(K+1),  ... , BETA(K+L) ),
     !! C**2 + S**2 = I.
     !! R is stored in A(1:K+L,N-K-L+1:N) on exit.
     !! If M-K-L < 0,
     !! K M-K K+L-M
     !! D1 =   K ( I  0    0   )
     !! M-K ( 0  C    0   )
     !! K M-K K+L-M
     !! D2 =   M-K ( 0  S    0   )
     !! K+L-M ( 0  0    I   )
     !! P-L ( 0  0    0   )
     !! N-K-L  K   M-K  K+L-M
     !! ( 0 R ) =    K ( 0    R11  R12  R13  )
     !! M-K ( 0     0   R22  R23  )
     !! K+L-M ( 0     0    0   R33  )
     !! where
     !! C = diag( ALPHA(K+1), ... , ALPHA(M) ),
     !! S = diag( BETA(K+1),  ... , BETA(M) ),
     !! C**2 + S**2 = I.
     !! R = ( R11 R12 R13 ) is stored in A(1:M, N-K-L+1:N) and R33 is stored
     !! (  0  R22 R23 )
     !! in B(M-K+1:L,N+M-K-L+1:N) on exit.
     !! The computation of the orthogonal transformation matrices U, V or Q
     !! is optional.  These matrices may either be formed explicitly, or they
     !! may be postmultiplied into input matrices U1, V1, or Q1.
               alpha, beta, u, ldu, v, ldv,q, ldq, work, ncycle, info )
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: jobq, jobu, jobv
           integer(${ik}$), intent(out) :: info, ncycle
           integer(${ik}$), intent(in) :: k, l, lda, ldb, ldq, ldu, ldv, m, n, p
           real(sp), intent(in) :: tola, tolb
           ! Array Arguments 
           real(sp), intent(inout) :: a(lda,*), b(ldb,*), q(ldq,*), u(ldu,*), v(ldv,*)
           real(sp), intent(out) :: alpha(*), beta(*), work(*)
        ! =====================================================================
           ! Parameters 
           integer(${ik}$), parameter :: maxit = 40_${ik}$
           real(sp), parameter :: hugenum = huge(zero)
           
           
           ! Local Scalars 
           logical(lk) :: initq, initu, initv, upper, wantq, wantu, wantv
           integer(${ik}$) :: i, j, kcycle
           real(sp) :: a1, a2, a3, b1, b2, b3, csq, csu, csv, error, gamma, rwk, snq, snu, snv, &
                     ssmin
           ! Intrinsic Functions 
           ! Executable Statements 
           ! decode and test the input parameters
           initu = stdlib_lsame( jobu, 'I' )
           wantu = initu .or. stdlib_lsame( jobu, 'U' )
           initv = stdlib_lsame( jobv, 'I' )
           wantv = initv .or. stdlib_lsame( jobv, 'V' )
           initq = stdlib_lsame( jobq, 'I' )
           wantq = initq .or. stdlib_lsame( jobq, 'Q' )
           info = 0_${ik}$
           if( .not.( initu .or. wantu .or. stdlib_lsame( jobu, 'N' ) ) ) then
              info = -1_${ik}$
           else if( .not.( initv .or. wantv .or. stdlib_lsame( jobv, 'N' ) ) ) then
              info = -2_${ik}$
           else if( .not.( initq .or. wantq .or. stdlib_lsame( jobq, 'N' ) ) ) then
              info = -3_${ik}$
           else if( m<0_${ik}$ ) then
              info = -4_${ik}$
           else if( p<0_${ik}$ ) then
              info = -5_${ik}$
           else if( n<0_${ik}$ ) then
              info = -6_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -10_${ik}$
           else if( ldb<max( 1_${ik}$, p ) ) then
              info = -12_${ik}$
           else if( ldu<1_${ik}$ .or. ( wantu .and. ldu<m ) ) then
              info = -18_${ik}$
           else if( ldv<1_${ik}$ .or. ( wantv .and. ldv<p ) ) then
              info = -20_${ik}$
           else if( ldq<1_${ik}$ .or. ( wantq .and. ldq<n ) ) then
              info = -22_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'STGSJA', -info )
              return
           end if
           ! initialize u, v and q, if necessary
           if( initu )call stdlib${ii}$_slaset( 'FULL', m, m, zero, one, u, ldu )
           if( initv )call stdlib${ii}$_slaset( 'FULL', p, p, zero, one, v, ldv )
           if( initq )call stdlib${ii}$_slaset( 'FULL', n, n, zero, one, q, ldq )
           ! loop until convergence
           upper = .false.
           loop_40: do kcycle = 1, maxit
              upper = .not.upper
              loop_20: do i = 1, l - 1
                 loop_10: do j = i + 1, l
                    a1 = zero
                    a2 = zero
                    a3 = zero
                    if( k+i<=m )a1 = a( k+i, n-l+i )
                    if( k+j<=m )a3 = a( k+j, n-l+j )
                    b1 = b( i, n-l+i )
                    b3 = b( j, n-l+j )
                    if( upper ) then
                       if( k+i<=m )a2 = a( k+i, n-l+j )
                       b2 = b( i, n-l+j )
                    else
                       if( k+j<=m )a2 = a( k+j, n-l+i )
                       b2 = b( j, n-l+i )
                    end if
                    call stdlib${ii}$_slags2( upper, a1, a2, a3, b1, b2, b3, csu, snu,csv, snv, csq, &
                              snq )
                    ! update (k+i)-th and (k+j)-th rows of matrix a: u**t *a
                    if( k+j<=m )call stdlib${ii}$_srot( l, a( k+j, n-l+1 ), lda, a( k+i, n-l+1 ),lda, &
                              csu, snu )
                    ! update i-th and j-th rows of matrix b: v**t *b
                    call stdlib${ii}$_srot( l, b( j, n-l+1 ), ldb, b( i, n-l+1 ), ldb,csv, snv )
                    ! update (n-l+i)-th and (n-l+j)-th columns of matrices
                    ! a and b: a*q and b*q
                    call stdlib${ii}$_srot( min( k+l, m ), a( 1_${ik}$, n-l+j ), 1_${ik}$,a( 1_${ik}$, n-l+i ), 1_${ik}$, csq, snq )
                              
                    call stdlib${ii}$_srot( l, b( 1_${ik}$, n-l+j ), 1_${ik}$, b( 1_${ik}$, n-l+i ), 1_${ik}$, csq,snq )
                    if( upper ) then
                       if( k+i<=m )a( k+i, n-l+j ) = zero
                       b( i, n-l+j ) = zero
                    else
                       if( k+j<=m )a( k+j, n-l+i ) = zero
                       b( j, n-l+i ) = zero
                    end if
                    ! update orthogonal matrices u, v, q, if desired.
                    if( wantu .and. k+j<=m )call stdlib${ii}$_srot( m, u( 1_${ik}$, k+j ), 1_${ik}$, u( 1_${ik}$, k+i ), 1_${ik}$, &
                              csu,snu )
                    if( wantv )call stdlib${ii}$_srot( p, v( 1_${ik}$, j ), 1_${ik}$, v( 1_${ik}$, i ), 1_${ik}$, csv, snv )
                    if( wantq )call stdlib${ii}$_srot( n, q( 1_${ik}$, n-l+j ), 1_${ik}$, q( 1_${ik}$, n-l+i ), 1_${ik}$, csq,snq )
                              
                 end do loop_10
              end do loop_20
              if( .not.upper ) then
                 ! the matrices a13 and b13 were lower triangular at the start
                 ! of the cycle, and are now upper triangular.
                 ! convergence test: test the parallelism of the corresponding
                 ! rows of a and b.
                 error = zero
                 do i = 1, min( l, m-k )
                    call stdlib${ii}$_scopy( l-i+1, a( k+i, n-l+i ), lda, work, 1_${ik}$ )
                    call stdlib${ii}$_scopy( l-i+1, b( i, n-l+i ), ldb, work( l+1 ), 1_${ik}$ )
                    call stdlib${ii}$_slapll( l-i+1, work, 1_${ik}$, work( l+1 ), 1_${ik}$, ssmin )
                    error = max( error, ssmin )
                 end do
                 if( abs( error )<=min( tola, tolb ) )go to 50
              end if
              ! end of cycle loop
           end do loop_40
           ! the algorithm has not converged after maxit cycles.
           info = 1_${ik}$
           go to 100
           50 continue
           ! if error <= min(tola,tolb), then the algorithm has converged.
           ! compute the generalized singular value pairs (alpha, beta), and
           ! set the triangular matrix r to array a.
           do i = 1, k
              alpha( i ) = one
              beta( i ) = zero
           end do
           do i = 1, min( l, m-k )
              a1 = a( k+i, n-l+i )
              b1 = b( i, n-l+i )
              gamma = b1 / a1
              if( (gamma<=hugenum).and.(gamma>=-hugenum) ) then
                 ! change sign if necessary
                 if( gamma<zero ) then
                    call stdlib${ii}$_sscal( l-i+1, -one, b( i, n-l+i ), ldb )
                    if( wantv )call stdlib${ii}$_sscal( p, -one, v( 1_${ik}$, i ), 1_${ik}$ )
                 end if
                 call stdlib${ii}$_slartg( abs( gamma ), one, beta( k+i ), alpha( k+i ),rwk )
                 if( alpha( k+i )>=beta( k+i ) ) then
                    call stdlib${ii}$_sscal( l-i+1, one / alpha( k+i ), a( k+i, n-l+i ),lda )
                 else
                    call stdlib${ii}$_sscal( l-i+1, one / beta( k+i ), b( i, n-l+i ),ldb )
                    call stdlib${ii}$_scopy( l-i+1, b( i, n-l+i ), ldb, a( k+i, n-l+i ),lda )
                 end if
              else
                 alpha( k+i ) = zero
                 beta( k+i ) = one
                 call stdlib${ii}$_scopy( l-i+1, b( i, n-l+i ), ldb, a( k+i, n-l+i ),lda )
              end if
           end do
           ! post-assignment
           do i = m + 1, k + l
              alpha( i ) = zero
              beta( i ) = one
           end do
           if( k+l<n ) then
              do i = k + l + 1, n
                 alpha( i ) = zero
                 beta( i ) = zero
              end do
           end if
           100 continue
           ncycle = kcycle
           return
     end subroutine stdlib${ii}$_stgsja

     pure module subroutine stdlib${ii}$_dtgsja( jobu, jobv, jobq, m, p, n, k, l, a, lda, b,ldb, tola, tolb, &
     !! DTGSJA computes the generalized singular value decomposition (GSVD)
     !! of two real upper triangular (or trapezoidal) matrices A and B.
     !! On entry, it is assumed that matrices A and B have the following
     !! forms, which may be obtained by the preprocessing subroutine DGGSVP
     !! from a general M-by-N matrix A and P-by-N matrix B:
     !! N-K-L  K    L
     !! A =    K ( 0    A12  A13 ) if M-K-L >= 0;
     !! L ( 0     0   A23 )
     !! M-K-L ( 0     0    0  )
     !! N-K-L  K    L
     !! A =  K ( 0    A12  A13 ) if M-K-L < 0;
     !! M-K ( 0     0   A23 )
     !! N-K-L  K    L
     !! B =  L ( 0     0   B13 )
     !! P-L ( 0     0    0  )
     !! where the K-by-K matrix A12 and L-by-L matrix B13 are nonsingular
     !! upper triangular; A23 is L-by-L upper triangular if M-K-L >= 0,
     !! otherwise A23 is (M-K)-by-L upper trapezoidal.
     !! On exit,
     !! U**T *A*Q = D1*( 0 R ),    V**T *B*Q = D2*( 0 R ),
     !! where U, V and Q are orthogonal matrices.
     !! R is a nonsingular upper triangular matrix, and D1 and D2 are
     !! ``diagonal'' matrices, which are of the following structures:
     !! If M-K-L >= 0,
     !! K  L
     !! D1 =     K ( I  0 )
     !! L ( 0  C )
     !! M-K-L ( 0  0 )
     !! K  L
     !! D2 = L   ( 0  S )
     !! P-L ( 0  0 )
     !! N-K-L  K    L
     !! ( 0 R ) = K (  0   R11  R12 ) K
     !! L (  0    0   R22 ) L
     !! where
     !! C = diag( ALPHA(K+1), ... , ALPHA(K+L) ),
     !! S = diag( BETA(K+1),  ... , BETA(K+L) ),
     !! C**2 + S**2 = I.
     !! R is stored in A(1:K+L,N-K-L+1:N) on exit.
     !! If M-K-L < 0,
     !! K M-K K+L-M
     !! D1 =   K ( I  0    0   )
     !! M-K ( 0  C    0   )
     !! K M-K K+L-M
     !! D2 =   M-K ( 0  S    0   )
     !! K+L-M ( 0  0    I   )
     !! P-L ( 0  0    0   )
     !! N-K-L  K   M-K  K+L-M
     !! ( 0 R ) =    K ( 0    R11  R12  R13  )
     !! M-K ( 0     0   R22  R23  )
     !! K+L-M ( 0     0    0   R33  )
     !! where
     !! C = diag( ALPHA(K+1), ... , ALPHA(M) ),
     !! S = diag( BETA(K+1),  ... , BETA(M) ),
     !! C**2 + S**2 = I.
     !! R = ( R11 R12 R13 ) is stored in A(1:M, N-K-L+1:N) and R33 is stored
     !! (  0  R22 R23 )
     !! in B(M-K+1:L,N+M-K-L+1:N) on exit.
     !! The computation of the orthogonal transformation matrices U, V or Q
     !! is optional.  These matrices may either be formed explicitly, or they
     !! may be postmultiplied into input matrices U1, V1, or Q1.
               alpha, beta, u, ldu, v, ldv,q, ldq, work, ncycle, info )
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: jobq, jobu, jobv
           integer(${ik}$), intent(out) :: info, ncycle
           integer(${ik}$), intent(in) :: k, l, lda, ldb, ldq, ldu, ldv, m, n, p
           real(dp), intent(in) :: tola, tolb
           ! Array Arguments 
           real(dp), intent(inout) :: a(lda,*), b(ldb,*), q(ldq,*), u(ldu,*), v(ldv,*)
           real(dp), intent(out) :: alpha(*), beta(*), work(*)
        ! =====================================================================
           ! Parameters 
           integer(${ik}$), parameter :: maxit = 40_${ik}$
           real(dp), parameter :: hugenum = huge(zero)
           
           
           ! Local Scalars 
           logical(lk) :: initq, initu, initv, upper, wantq, wantu, wantv
           integer(${ik}$) :: i, j, kcycle
           real(dp) :: a1, a2, a3, b1, b2, b3, csq, csu, csv, error, gamma, rwk, snq, snu, snv, &
                     ssmin
           ! Intrinsic Functions 
           ! Executable Statements 
           ! decode and test the input parameters
           initu = stdlib_lsame( jobu, 'I' )
           wantu = initu .or. stdlib_lsame( jobu, 'U' )
           initv = stdlib_lsame( jobv, 'I' )
           wantv = initv .or. stdlib_lsame( jobv, 'V' )
           initq = stdlib_lsame( jobq, 'I' )
           wantq = initq .or. stdlib_lsame( jobq, 'Q' )
           info = 0_${ik}$
           if( .not.( initu .or. wantu .or. stdlib_lsame( jobu, 'N' ) ) ) then
              info = -1_${ik}$
           else if( .not.( initv .or. wantv .or. stdlib_lsame( jobv, 'N' ) ) ) then
              info = -2_${ik}$
           else if( .not.( initq .or. wantq .or. stdlib_lsame( jobq, 'N' ) ) ) then
              info = -3_${ik}$
           else if( m<0_${ik}$ ) then
              info = -4_${ik}$
           else if( p<0_${ik}$ ) then
              info = -5_${ik}$
           else if( n<0_${ik}$ ) then
              info = -6_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -10_${ik}$
           else if( ldb<max( 1_${ik}$, p ) ) then
              info = -12_${ik}$
           else if( ldu<1_${ik}$ .or. ( wantu .and. ldu<m ) ) then
              info = -18_${ik}$
           else if( ldv<1_${ik}$ .or. ( wantv .and. ldv<p ) ) then
              info = -20_${ik}$
           else if( ldq<1_${ik}$ .or. ( wantq .and. ldq<n ) ) then
              info = -22_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DTGSJA', -info )
              return
           end if
           ! initialize u, v and q, if necessary
           if( initu )call stdlib${ii}$_dlaset( 'FULL', m, m, zero, one, u, ldu )
           if( initv )call stdlib${ii}$_dlaset( 'FULL', p, p, zero, one, v, ldv )
           if( initq )call stdlib${ii}$_dlaset( 'FULL', n, n, zero, one, q, ldq )
           ! loop until convergence
           upper = .false.
           loop_40: do kcycle = 1, maxit
              upper = .not.upper
              loop_20: do i = 1, l - 1
                 loop_10: do j = i + 1, l
                    a1 = zero
                    a2 = zero
                    a3 = zero
                    if( k+i<=m )a1 = a( k+i, n-l+i )
                    if( k+j<=m )a3 = a( k+j, n-l+j )
                    b1 = b( i, n-l+i )
                    b3 = b( j, n-l+j )
                    if( upper ) then
                       if( k+i<=m )a2 = a( k+i, n-l+j )
                       b2 = b( i, n-l+j )
                    else
                       if( k+j<=m )a2 = a( k+j, n-l+i )
                       b2 = b( j, n-l+i )
                    end if
                    call stdlib${ii}$_dlags2( upper, a1, a2, a3, b1, b2, b3, csu, snu,csv, snv, csq, &
                              snq )
                    ! update (k+i)-th and (k+j)-th rows of matrix a: u**t *a
                    if( k+j<=m )call stdlib${ii}$_drot( l, a( k+j, n-l+1 ), lda, a( k+i, n-l+1 ),lda, &
                              csu, snu )
                    ! update i-th and j-th rows of matrix b: v**t *b
                    call stdlib${ii}$_drot( l, b( j, n-l+1 ), ldb, b( i, n-l+1 ), ldb,csv, snv )
                    ! update (n-l+i)-th and (n-l+j)-th columns of matrices
                    ! a and b: a*q and b*q
                    call stdlib${ii}$_drot( min( k+l, m ), a( 1_${ik}$, n-l+j ), 1_${ik}$,a( 1_${ik}$, n-l+i ), 1_${ik}$, csq, snq )
                              
                    call stdlib${ii}$_drot( l, b( 1_${ik}$, n-l+j ), 1_${ik}$, b( 1_${ik}$, n-l+i ), 1_${ik}$, csq,snq )
                    if( upper ) then
                       if( k+i<=m )a( k+i, n-l+j ) = zero
                       b( i, n-l+j ) = zero
                    else
                       if( k+j<=m )a( k+j, n-l+i ) = zero
                       b( j, n-l+i ) = zero
                    end if
                    ! update orthogonal matrices u, v, q, if desired.
                    if( wantu .and. k+j<=m )call stdlib${ii}$_drot( m, u( 1_${ik}$, k+j ), 1_${ik}$, u( 1_${ik}$, k+i ), 1_${ik}$, &
                              csu,snu )
                    if( wantv )call stdlib${ii}$_drot( p, v( 1_${ik}$, j ), 1_${ik}$, v( 1_${ik}$, i ), 1_${ik}$, csv, snv )
                    if( wantq )call stdlib${ii}$_drot( n, q( 1_${ik}$, n-l+j ), 1_${ik}$, q( 1_${ik}$, n-l+i ), 1_${ik}$, csq,snq )
                              
                 end do loop_10
              end do loop_20
              if( .not.upper ) then
                 ! the matrices a13 and b13 were lower triangular at the start
                 ! of the cycle, and are now upper triangular.
                 ! convergence test: test the parallelism of the corresponding
                 ! rows of a and b.
                 error = zero
                 do i = 1, min( l, m-k )
                    call stdlib${ii}$_dcopy( l-i+1, a( k+i, n-l+i ), lda, work, 1_${ik}$ )
                    call stdlib${ii}$_dcopy( l-i+1, b( i, n-l+i ), ldb, work( l+1 ), 1_${ik}$ )
                    call stdlib${ii}$_dlapll( l-i+1, work, 1_${ik}$, work( l+1 ), 1_${ik}$, ssmin )
                    error = max( error, ssmin )
                 end do
                 if( abs( error )<=min( tola, tolb ) )go to 50
              end if
              ! end of cycle loop
           end do loop_40
           ! the algorithm has not converged after maxit cycles.
           info = 1_${ik}$
           go to 100
           50 continue
           ! if error <= min(tola,tolb), then the algorithm has converged.
           ! compute the generalized singular value pairs (alpha, beta), and
           ! set the triangular matrix r to array a.
           do i = 1, k
              alpha( i ) = one
              beta( i ) = zero
           end do
           do i = 1, min( l, m-k )
              a1 = a( k+i, n-l+i )
              b1 = b( i, n-l+i )
              gamma = b1 / a1
              if( (gamma<=hugenum).and.(gamma>=-hugenum) ) then
                 ! change sign if necessary
                 if( gamma<zero ) then
                    call stdlib${ii}$_dscal( l-i+1, -one, b( i, n-l+i ), ldb )
                    if( wantv )call stdlib${ii}$_dscal( p, -one, v( 1_${ik}$, i ), 1_${ik}$ )
                 end if
                 call stdlib${ii}$_dlartg( abs( gamma ), one, beta( k+i ), alpha( k+i ),rwk )
                 if( alpha( k+i )>=beta( k+i ) ) then
                    call stdlib${ii}$_dscal( l-i+1, one / alpha( k+i ), a( k+i, n-l+i ),lda )
                 else
                    call stdlib${ii}$_dscal( l-i+1, one / beta( k+i ), b( i, n-l+i ),ldb )
                    call stdlib${ii}$_dcopy( l-i+1, b( i, n-l+i ), ldb, a( k+i, n-l+i ),lda )
                 end if
              else
                 alpha( k+i ) = zero
                 beta( k+i ) = one
                 call stdlib${ii}$_dcopy( l-i+1, b( i, n-l+i ), ldb, a( k+i, n-l+i ),lda )
              end if
           end do
           ! post-assignment
           do i = m + 1, k + l
              alpha( i ) = zero
              beta( i ) = one
           end do
           if( k+l<n ) then
              do i = k + l + 1, n
                 alpha( i ) = zero
                 beta( i ) = zero
              end do
           end if
           100 continue
           ncycle = kcycle
           return
     end subroutine stdlib${ii}$_dtgsja

#:for rk,rt,ri in REAL_KINDS_TYPES
#:if not rk in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ri}$tgsja( jobu, jobv, jobq, m, p, n, k, l, a, lda, b,ldb, tola, tolb, &
     !! DTGSJA: computes the generalized singular value decomposition (GSVD)
     !! of two real upper triangular (or trapezoidal) matrices A and B.
     !! On entry, it is assumed that matrices A and B have the following
     !! forms, which may be obtained by the preprocessing subroutine DGGSVP
     !! from a general M-by-N matrix A and P-by-N matrix B:
     !! N-K-L  K    L
     !! A =    K ( 0    A12  A13 ) if M-K-L >= 0;
     !! L ( 0     0   A23 )
     !! M-K-L ( 0     0    0  )
     !! N-K-L  K    L
     !! A =  K ( 0    A12  A13 ) if M-K-L < 0;
     !! M-K ( 0     0   A23 )
     !! N-K-L  K    L
     !! B =  L ( 0     0   B13 )
     !! P-L ( 0     0    0  )
     !! where the K-by-K matrix A12 and L-by-L matrix B13 are nonsingular
     !! upper triangular; A23 is L-by-L upper triangular if M-K-L >= 0,
     !! otherwise A23 is (M-K)-by-L upper trapezoidal.
     !! On exit,
     !! U**T *A*Q = D1*( 0 R ),    V**T *B*Q = D2*( 0 R ),
     !! where U, V and Q are orthogonal matrices.
     !! R is a nonsingular upper triangular matrix, and D1 and D2 are
     !! ``diagonal'' matrices, which are of the following structures:
     !! If M-K-L >= 0,
     !! K  L
     !! D1 =     K ( I  0 )
     !! L ( 0  C )
     !! M-K-L ( 0  0 )
     !! K  L
     !! D2 = L   ( 0  S )
     !! P-L ( 0  0 )
     !! N-K-L  K    L
     !! ( 0 R ) = K (  0   R11  R12 ) K
     !! L (  0    0   R22 ) L
     !! where
     !! C = diag( ALPHA(K+1), ... , ALPHA(K+L) ),
     !! S = diag( BETA(K+1),  ... , BETA(K+L) ),
     !! C**2 + S**2 = I.
     !! R is stored in A(1:K+L,N-K-L+1:N) on exit.
     !! If M-K-L < 0,
     !! K M-K K+L-M
     !! D1 =   K ( I  0    0   )
     !! M-K ( 0  C    0   )
     !! K M-K K+L-M
     !! D2 =   M-K ( 0  S    0   )
     !! K+L-M ( 0  0    I   )
     !! P-L ( 0  0    0   )
     !! N-K-L  K   M-K  K+L-M
     !! ( 0 R ) =    K ( 0    R11  R12  R13  )
     !! M-K ( 0     0   R22  R23  )
     !! K+L-M ( 0     0    0   R33  )
     !! where
     !! C = diag( ALPHA(K+1), ... , ALPHA(M) ),
     !! S = diag( BETA(K+1),  ... , BETA(M) ),
     !! C**2 + S**2 = I.
     !! R = ( R11 R12 R13 ) is stored in A(1:M, N-K-L+1:N) and R33 is stored
     !! (  0  R22 R23 )
     !! in B(M-K+1:L,N+M-K-L+1:N) on exit.
     !! The computation of the orthogonal transformation matrices U, V or Q
     !! is optional.  These matrices may either be formed explicitly, or they
     !! may be postmultiplied into input matrices U1, V1, or Q1.
               alpha, beta, u, ldu, v, ldv,q, ldq, work, ncycle, info )
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: jobq, jobu, jobv
           integer(${ik}$), intent(out) :: info, ncycle
           integer(${ik}$), intent(in) :: k, l, lda, ldb, ldq, ldu, ldv, m, n, p
           real(${rk}$), intent(in) :: tola, tolb
           ! Array Arguments 
           real(${rk}$), intent(inout) :: a(lda,*), b(ldb,*), q(ldq,*), u(ldu,*), v(ldv,*)
           real(${rk}$), intent(out) :: alpha(*), beta(*), work(*)
        ! =====================================================================
           ! Parameters 
           integer(${ik}$), parameter :: maxit = 40_${ik}$
           real(${rk}$), parameter :: hugenum = huge(zero)
           
           
           ! Local Scalars 
           logical(lk) :: initq, initu, initv, upper, wantq, wantu, wantv
           integer(${ik}$) :: i, j, kcycle
           real(${rk}$) :: a1, a2, a3, b1, b2, b3, csq, csu, csv, error, gamma, rwk, snq, snu, snv, &
                     ssmin
           ! Intrinsic Functions 
           ! Executable Statements 
           ! decode and test the input parameters
           initu = stdlib_lsame( jobu, 'I' )
           wantu = initu .or. stdlib_lsame( jobu, 'U' )
           initv = stdlib_lsame( jobv, 'I' )
           wantv = initv .or. stdlib_lsame( jobv, 'V' )
           initq = stdlib_lsame( jobq, 'I' )
           wantq = initq .or. stdlib_lsame( jobq, 'Q' )
           info = 0_${ik}$
           if( .not.( initu .or. wantu .or. stdlib_lsame( jobu, 'N' ) ) ) then
              info = -1_${ik}$
           else if( .not.( initv .or. wantv .or. stdlib_lsame( jobv, 'N' ) ) ) then
              info = -2_${ik}$
           else if( .not.( initq .or. wantq .or. stdlib_lsame( jobq, 'N' ) ) ) then
              info = -3_${ik}$
           else if( m<0_${ik}$ ) then
              info = -4_${ik}$
           else if( p<0_${ik}$ ) then
              info = -5_${ik}$
           else if( n<0_${ik}$ ) then
              info = -6_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -10_${ik}$
           else if( ldb<max( 1_${ik}$, p ) ) then
              info = -12_${ik}$
           else if( ldu<1_${ik}$ .or. ( wantu .and. ldu<m ) ) then
              info = -18_${ik}$
           else if( ldv<1_${ik}$ .or. ( wantv .and. ldv<p ) ) then
              info = -20_${ik}$
           else if( ldq<1_${ik}$ .or. ( wantq .and. ldq<n ) ) then
              info = -22_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DTGSJA', -info )
              return
           end if
           ! initialize u, v and q, if necessary
           if( initu )call stdlib${ii}$_${ri}$laset( 'FULL', m, m, zero, one, u, ldu )
           if( initv )call stdlib${ii}$_${ri}$laset( 'FULL', p, p, zero, one, v, ldv )
           if( initq )call stdlib${ii}$_${ri}$laset( 'FULL', n, n, zero, one, q, ldq )
           ! loop until convergence
           upper = .false.
           loop_40: do kcycle = 1, maxit
              upper = .not.upper
              loop_20: do i = 1, l - 1
                 loop_10: do j = i + 1, l
                    a1 = zero
                    a2 = zero
                    a3 = zero
                    if( k+i<=m )a1 = a( k+i, n-l+i )
                    if( k+j<=m )a3 = a( k+j, n-l+j )
                    b1 = b( i, n-l+i )
                    b3 = b( j, n-l+j )
                    if( upper ) then
                       if( k+i<=m )a2 = a( k+i, n-l+j )
                       b2 = b( i, n-l+j )
                    else
                       if( k+j<=m )a2 = a( k+j, n-l+i )
                       b2 = b( j, n-l+i )
                    end if
                    call stdlib${ii}$_${ri}$lags2( upper, a1, a2, a3, b1, b2, b3, csu, snu,csv, snv, csq, &
                              snq )
                    ! update (k+i)-th and (k+j)-th rows of matrix a: u**t *a
                    if( k+j<=m )call stdlib${ii}$_${ri}$rot( l, a( k+j, n-l+1 ), lda, a( k+i, n-l+1 ),lda, &
                              csu, snu )
                    ! update i-th and j-th rows of matrix b: v**t *b
                    call stdlib${ii}$_${ri}$rot( l, b( j, n-l+1 ), ldb, b( i, n-l+1 ), ldb,csv, snv )
                    ! update (n-l+i)-th and (n-l+j)-th columns of matrices
                    ! a and b: a*q and b*q
                    call stdlib${ii}$_${ri}$rot( min( k+l, m ), a( 1_${ik}$, n-l+j ), 1_${ik}$,a( 1_${ik}$, n-l+i ), 1_${ik}$, csq, snq )
                              
                    call stdlib${ii}$_${ri}$rot( l, b( 1_${ik}$, n-l+j ), 1_${ik}$, b( 1_${ik}$, n-l+i ), 1_${ik}$, csq,snq )
                    if( upper ) then
                       if( k+i<=m )a( k+i, n-l+j ) = zero
                       b( i, n-l+j ) = zero
                    else
                       if( k+j<=m )a( k+j, n-l+i ) = zero
                       b( j, n-l+i ) = zero
                    end if
                    ! update orthogonal matrices u, v, q, if desired.
                    if( wantu .and. k+j<=m )call stdlib${ii}$_${ri}$rot( m, u( 1_${ik}$, k+j ), 1_${ik}$, u( 1_${ik}$, k+i ), 1_${ik}$, &
                              csu,snu )
                    if( wantv )call stdlib${ii}$_${ri}$rot( p, v( 1_${ik}$, j ), 1_${ik}$, v( 1_${ik}$, i ), 1_${ik}$, csv, snv )
                    if( wantq )call stdlib${ii}$_${ri}$rot( n, q( 1_${ik}$, n-l+j ), 1_${ik}$, q( 1_${ik}$, n-l+i ), 1_${ik}$, csq,snq )
                              
                 end do loop_10
              end do loop_20
              if( .not.upper ) then
                 ! the matrices a13 and b13 were lower triangular at the start
                 ! of the cycle, and are now upper triangular.
                 ! convergence test: test the parallelism of the corresponding
                 ! rows of a and b.
                 error = zero
                 do i = 1, min( l, m-k )
                    call stdlib${ii}$_${ri}$copy( l-i+1, a( k+i, n-l+i ), lda, work, 1_${ik}$ )
                    call stdlib${ii}$_${ri}$copy( l-i+1, b( i, n-l+i ), ldb, work( l+1 ), 1_${ik}$ )
                    call stdlib${ii}$_${ri}$lapll( l-i+1, work, 1_${ik}$, work( l+1 ), 1_${ik}$, ssmin )
                    error = max( error, ssmin )
                 end do
                 if( abs( error )<=min( tola, tolb ) )go to 50
              end if
              ! end of cycle loop
           end do loop_40
           ! the algorithm has not converged after maxit cycles.
           info = 1_${ik}$
           go to 100
           50 continue
           ! if error <= min(tola,tolb), then the algorithm has converged.
           ! compute the generalized singular value pairs (alpha, beta), and
           ! set the triangular matrix r to array a.
           do i = 1, k
              alpha( i ) = one
              beta( i ) = zero
           end do
           do i = 1, min( l, m-k )
              a1 = a( k+i, n-l+i )
              b1 = b( i, n-l+i )
              gamma = b1 / a1
              if( (gamma<=hugenum).and.(gamma>=-hugenum) ) then
                 ! change sign if necessary
                 if( gamma<zero ) then
                    call stdlib${ii}$_${ri}$scal( l-i+1, -one, b( i, n-l+i ), ldb )
                    if( wantv )call stdlib${ii}$_${ri}$scal( p, -one, v( 1_${ik}$, i ), 1_${ik}$ )
                 end if
                 call stdlib${ii}$_${ri}$lartg( abs( gamma ), one, beta( k+i ), alpha( k+i ),rwk )
                 if( alpha( k+i )>=beta( k+i ) ) then
                    call stdlib${ii}$_${ri}$scal( l-i+1, one / alpha( k+i ), a( k+i, n-l+i ),lda )
                 else
                    call stdlib${ii}$_${ri}$scal( l-i+1, one / beta( k+i ), b( i, n-l+i ),ldb )
                    call stdlib${ii}$_${ri}$copy( l-i+1, b( i, n-l+i ), ldb, a( k+i, n-l+i ),lda )
                 end if
              else
                 alpha( k+i ) = zero
                 beta( k+i ) = one
                 call stdlib${ii}$_${ri}$copy( l-i+1, b( i, n-l+i ), ldb, a( k+i, n-l+i ),lda )
              end if
           end do
           ! post-assignment
           do i = m + 1, k + l
              alpha( i ) = zero
              beta( i ) = one
           end do
           if( k+l<n ) then
              do i = k + l + 1, n
                 alpha( i ) = zero
                 beta( i ) = zero
              end do
           end if
           100 continue
           ncycle = kcycle
           return
     end subroutine stdlib${ii}$_${ri}$tgsja

#:endif
#:endfor

     pure module subroutine stdlib${ii}$_ctgsja( jobu, jobv, jobq, m, p, n, k, l, a, lda, b,ldb, tola, tolb, &
     !! CTGSJA computes the generalized singular value decomposition (GSVD)
     !! of two complex upper triangular (or trapezoidal) matrices A and B.
     !! On entry, it is assumed that matrices A and B have the following
     !! forms, which may be obtained by the preprocessing subroutine CGGSVP
     !! from a general M-by-N matrix A and P-by-N matrix B:
     !! N-K-L  K    L
     !! A =    K ( 0    A12  A13 ) if M-K-L >= 0;
     !! L ( 0     0   A23 )
     !! M-K-L ( 0     0    0  )
     !! N-K-L  K    L
     !! A =  K ( 0    A12  A13 ) if M-K-L < 0;
     !! M-K ( 0     0   A23 )
     !! N-K-L  K    L
     !! B =  L ( 0     0   B13 )
     !! P-L ( 0     0    0  )
     !! where the K-by-K matrix A12 and L-by-L matrix B13 are nonsingular
     !! upper triangular; A23 is L-by-L upper triangular if M-K-L >= 0,
     !! otherwise A23 is (M-K)-by-L upper trapezoidal.
     !! On exit,
     !! U**H *A*Q = D1*( 0 R ),    V**H *B*Q = D2*( 0 R ),
     !! where U, V and Q are unitary matrices.
     !! R is a nonsingular upper triangular matrix, and D1
     !! and D2 are ``diagonal'' matrices, which are of the following
     !! structures:
     !! If M-K-L >= 0,
     !! K  L
     !! D1 =     K ( I  0 )
     !! L ( 0  C )
     !! M-K-L ( 0  0 )
     !! K  L
     !! D2 = L   ( 0  S )
     !! P-L ( 0  0 )
     !! N-K-L  K    L
     !! ( 0 R ) = K (  0   R11  R12 ) K
     !! L (  0    0   R22 ) L
     !! where
     !! C = diag( ALPHA(K+1), ... , ALPHA(K+L) ),
     !! S = diag( BETA(K+1),  ... , BETA(K+L) ),
     !! C**2 + S**2 = I.
     !! R is stored in A(1:K+L,N-K-L+1:N) on exit.
     !! If M-K-L < 0,
     !! K M-K K+L-M
     !! D1 =   K ( I  0    0   )
     !! M-K ( 0  C    0   )
     !! K M-K K+L-M
     !! D2 =   M-K ( 0  S    0   )
     !! K+L-M ( 0  0    I   )
     !! P-L ( 0  0    0   )
     !! N-K-L  K   M-K  K+L-M
     !! ( 0 R ) =    K ( 0    R11  R12  R13  )
     !! M-K ( 0     0   R22  R23  )
     !! K+L-M ( 0     0    0   R33  )
     !! where
     !! C = diag( ALPHA(K+1), ... , ALPHA(M) ),
     !! S = diag( BETA(K+1),  ... , BETA(M) ),
     !! C**2 + S**2 = I.
     !! R = ( R11 R12 R13 ) is stored in A(1:M, N-K-L+1:N) and R33 is stored
     !! (  0  R22 R23 )
     !! in B(M-K+1:L,N+M-K-L+1:N) on exit.
     !! The computation of the unitary transformation matrices U, V or Q
     !! is optional.  These matrices may either be formed explicitly, or they
     !! may be postmultiplied into input matrices U1, V1, or Q1.
               alpha, beta, u, ldu, v, ldv,q, ldq, work, ncycle, info )
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: jobq, jobu, jobv
           integer(${ik}$), intent(out) :: info, ncycle
           integer(${ik}$), intent(in) :: k, l, lda, ldb, ldq, ldu, ldv, m, n, p
           real(sp), intent(in) :: tola, tolb
           ! Array Arguments 
           real(sp), intent(out) :: alpha(*), beta(*)
           complex(sp), intent(inout) :: a(lda,*), b(ldb,*), q(ldq,*), u(ldu,*), v(ldv,*)
           complex(sp), intent(out) :: work(*)
        ! =====================================================================
           ! Parameters 
           integer(${ik}$), parameter :: maxit = 40_${ik}$
           real(sp), parameter :: hugenum = huge(zero)
           
           
           
           ! Local Scalars 
           logical(lk) :: initq, initu, initv, upper, wantq, wantu, wantv
           integer(${ik}$) :: i, j, kcycle
           real(sp) :: a1, a3, b1, b3, csq, csu, csv, error, gamma, rwk, ssmin
           complex(sp) :: a2, b2, snq, snu, snv
           ! Intrinsic Functions 
           ! Executable Statements 
           ! decode and test the input parameters
           initu = stdlib_lsame( jobu, 'I' )
           wantu = initu .or. stdlib_lsame( jobu, 'U' )
           initv = stdlib_lsame( jobv, 'I' )
           wantv = initv .or. stdlib_lsame( jobv, 'V' )
           initq = stdlib_lsame( jobq, 'I' )
           wantq = initq .or. stdlib_lsame( jobq, 'Q' )
           info = 0_${ik}$
           if( .not.( initu .or. wantu .or. stdlib_lsame( jobu, 'N' ) ) ) then
              info = -1_${ik}$
           else if( .not.( initv .or. wantv .or. stdlib_lsame( jobv, 'N' ) ) ) then
              info = -2_${ik}$
           else if( .not.( initq .or. wantq .or. stdlib_lsame( jobq, 'N' ) ) ) then
              info = -3_${ik}$
           else if( m<0_${ik}$ ) then
              info = -4_${ik}$
           else if( p<0_${ik}$ ) then
              info = -5_${ik}$
           else if( n<0_${ik}$ ) then
              info = -6_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -10_${ik}$
           else if( ldb<max( 1_${ik}$, p ) ) then
              info = -12_${ik}$
           else if( ldu<1_${ik}$ .or. ( wantu .and. ldu<m ) ) then
              info = -18_${ik}$
           else if( ldv<1_${ik}$ .or. ( wantv .and. ldv<p ) ) then
              info = -20_${ik}$
           else if( ldq<1_${ik}$ .or. ( wantq .and. ldq<n ) ) then
              info = -22_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'CTGSJA', -info )
              return
           end if
           ! initialize u, v and q, if necessary
           if( initu )call stdlib${ii}$_claset( 'FULL', m, m, czero, cone, u, ldu )
           if( initv )call stdlib${ii}$_claset( 'FULL', p, p, czero, cone, v, ldv )
           if( initq )call stdlib${ii}$_claset( 'FULL', n, n, czero, cone, q, ldq )
           ! loop until convergence
           upper = .false.
           loop_40: do kcycle = 1, maxit
              upper = .not.upper
              loop_20: do i = 1, l - 1
                 loop_10: do j = i + 1, l
                    a1 = zero
                    a2 = czero
                    a3 = zero
                    if( k+i<=m )a1 = real( a( k+i, n-l+i ),KIND=sp)
                    if( k+j<=m )a3 = real( a( k+j, n-l+j ),KIND=sp)
                    b1 = real( b( i, n-l+i ),KIND=sp)
                    b3 = real( b( j, n-l+j ),KIND=sp)
                    if( upper ) then
                       if( k+i<=m )a2 = a( k+i, n-l+j )
                       b2 = b( i, n-l+j )
                    else
                       if( k+j<=m )a2 = a( k+j, n-l+i )
                       b2 = b( j, n-l+i )
                    end if
                    call stdlib${ii}$_clags2( upper, a1, a2, a3, b1, b2, b3, csu, snu,csv, snv, csq, &
                              snq )
                    ! update (k+i)-th and (k+j)-th rows of matrix a: u**h *a
                    if( k+j<=m )call stdlib${ii}$_crot( l, a( k+j, n-l+1 ), lda, a( k+i, n-l+1 ),lda, &
                              csu, conjg( snu ) )
                    ! update i-th and j-th rows of matrix b: v**h *b
                    call stdlib${ii}$_crot( l, b( j, n-l+1 ), ldb, b( i, n-l+1 ), ldb,csv, conjg( snv ) &
                              )
                    ! update (n-l+i)-th and (n-l+j)-th columns of matrices
                    ! a and b: a*q and b*q
                    call stdlib${ii}$_crot( min( k+l, m ), a( 1_${ik}$, n-l+j ), 1_${ik}$,a( 1_${ik}$, n-l+i ), 1_${ik}$, csq, snq )
                              
                    call stdlib${ii}$_crot( l, b( 1_${ik}$, n-l+j ), 1_${ik}$, b( 1_${ik}$, n-l+i ), 1_${ik}$, csq,snq )
                    if( upper ) then
                       if( k+i<=m )a( k+i, n-l+j ) = czero
                       b( i, n-l+j ) = czero
                    else
                       if( k+j<=m )a( k+j, n-l+i ) = czero
                       b( j, n-l+i ) = czero
                    end if
                    ! ensure that the diagonal elements of a and b are real.
                    if( k+i<=m )a( k+i, n-l+i ) = real( a( k+i, n-l+i ),KIND=sp)
                    if( k+j<=m )a( k+j, n-l+j ) = real( a( k+j, n-l+j ),KIND=sp)
                    b( i, n-l+i ) = real( b( i, n-l+i ),KIND=sp)
                    b( j, n-l+j ) = real( b( j, n-l+j ),KIND=sp)
                    ! update unitary matrices u, v, q, if desired.
                    if( wantu .and. k+j<=m )call stdlib${ii}$_crot( m, u( 1_${ik}$, k+j ), 1_${ik}$, u( 1_${ik}$, k+i ), 1_${ik}$, &
                              csu,snu )
                    if( wantv )call stdlib${ii}$_crot( p, v( 1_${ik}$, j ), 1_${ik}$, v( 1_${ik}$, i ), 1_${ik}$, csv, snv )
                    if( wantq )call stdlib${ii}$_crot( n, q( 1_${ik}$, n-l+j ), 1_${ik}$, q( 1_${ik}$, n-l+i ), 1_${ik}$, csq,snq )
                              
                 end do loop_10
              end do loop_20
              if( .not.upper ) then
                 ! the matrices a13 and b13 were lower triangular at the start
                 ! of the cycle, and are now upper triangular.
                 ! convergence test: test the parallelism of the corresponding
                 ! rows of a and b.
                 error = zero
                 do i = 1, min( l, m-k )
                    call stdlib${ii}$_ccopy( l-i+1, a( k+i, n-l+i ), lda, work, 1_${ik}$ )
                    call stdlib${ii}$_ccopy( l-i+1, b( i, n-l+i ), ldb, work( l+1 ), 1_${ik}$ )
                    call stdlib${ii}$_clapll( l-i+1, work, 1_${ik}$, work( l+1 ), 1_${ik}$, ssmin )
                    error = max( error, ssmin )
                 end do
                 if( abs( error )<=min( tola, tolb ) )go to 50
              end if
              ! end of cycle loop
           end do loop_40
           ! the algorithm has not converged after maxit cycles.
           info = 1_${ik}$
           go to 100
           50 continue
           ! if error <= min(tola,tolb), then the algorithm has converged.
           ! compute the generalized singular value pairs (alpha, beta), and
           ! set the triangular matrix r to array a.
           do i = 1, k
              alpha( i ) = one
              beta( i ) = zero
           end do
           do i = 1, min( l, m-k )
              a1 = real( a( k+i, n-l+i ),KIND=sp)
              b1 = real( b( i, n-l+i ),KIND=sp)
              gamma = b1 / a1
              if( (gamma<=hugenum).and.(gamma>=-hugenum) ) then
                 if( gamma<zero ) then
                    call stdlib${ii}$_csscal( l-i+1, -one, b( i, n-l+i ), ldb )
                    if( wantv )call stdlib${ii}$_csscal( p, -one, v( 1_${ik}$, i ), 1_${ik}$ )
                 end if
                 call stdlib${ii}$_slartg( abs( gamma ), one, beta( k+i ), alpha( k+i ),rwk )
                 if( alpha( k+i )>=beta( k+i ) ) then
                    call stdlib${ii}$_csscal( l-i+1, one / alpha( k+i ), a( k+i, n-l+i ),lda )
                 else
                    call stdlib${ii}$_csscal( l-i+1, one / beta( k+i ), b( i, n-l+i ),ldb )
                    call stdlib${ii}$_ccopy( l-i+1, b( i, n-l+i ), ldb, a( k+i, n-l+i ),lda )
                 end if
              else
                 alpha( k+i ) = zero
                 beta( k+i ) = one
                 call stdlib${ii}$_ccopy( l-i+1, b( i, n-l+i ), ldb, a( k+i, n-l+i ),lda )
              end if
           end do
           ! post-assignment
           do i = m + 1, k + l
              alpha( i ) = zero
              beta( i ) = one
           end do
           if( k+l<n ) then
              do i = k + l + 1, n
                 alpha( i ) = zero
                 beta( i ) = zero
              end do
           end if
           100 continue
           ncycle = kcycle
           return
     end subroutine stdlib${ii}$_ctgsja

     pure module subroutine stdlib${ii}$_ztgsja( jobu, jobv, jobq, m, p, n, k, l, a, lda, b,ldb, tola, tolb, &
     !! ZTGSJA computes the generalized singular value decomposition (GSVD)
     !! of two complex upper triangular (or trapezoidal) matrices A and B.
     !! On entry, it is assumed that matrices A and B have the following
     !! forms, which may be obtained by the preprocessing subroutine ZGGSVP
     !! from a general M-by-N matrix A and P-by-N matrix B:
     !! N-K-L  K    L
     !! A =    K ( 0    A12  A13 ) if M-K-L >= 0;
     !! L ( 0     0   A23 )
     !! M-K-L ( 0     0    0  )
     !! N-K-L  K    L
     !! A =  K ( 0    A12  A13 ) if M-K-L < 0;
     !! M-K ( 0     0   A23 )
     !! N-K-L  K    L
     !! B =  L ( 0     0   B13 )
     !! P-L ( 0     0    0  )
     !! where the K-by-K matrix A12 and L-by-L matrix B13 are nonsingular
     !! upper triangular; A23 is L-by-L upper triangular if M-K-L >= 0,
     !! otherwise A23 is (M-K)-by-L upper trapezoidal.
     !! On exit,
     !! U**H *A*Q = D1*( 0 R ),    V**H *B*Q = D2*( 0 R ),
     !! where U, V and Q are unitary matrices.
     !! R is a nonsingular upper triangular matrix, and D1
     !! and D2 are ``diagonal'' matrices, which are of the following
     !! structures:
     !! If M-K-L >= 0,
     !! K  L
     !! D1 =     K ( I  0 )
     !! L ( 0  C )
     !! M-K-L ( 0  0 )
     !! K  L
     !! D2 = L   ( 0  S )
     !! P-L ( 0  0 )
     !! N-K-L  K    L
     !! ( 0 R ) = K (  0   R11  R12 ) K
     !! L (  0    0   R22 ) L
     !! where
     !! C = diag( ALPHA(K+1), ... , ALPHA(K+L) ),
     !! S = diag( BETA(K+1),  ... , BETA(K+L) ),
     !! C**2 + S**2 = I.
     !! R is stored in A(1:K+L,N-K-L+1:N) on exit.
     !! If M-K-L < 0,
     !! K M-K K+L-M
     !! D1 =   K ( I  0    0   )
     !! M-K ( 0  C    0   )
     !! K M-K K+L-M
     !! D2 =   M-K ( 0  S    0   )
     !! K+L-M ( 0  0    I   )
     !! P-L ( 0  0    0   )
     !! N-K-L  K   M-K  K+L-M
     !! ( 0 R ) =    K ( 0    R11  R12  R13  )
     !! M-K ( 0     0   R22  R23  )
     !! K+L-M ( 0     0    0   R33  )
     !! where
     !! C = diag( ALPHA(K+1), ... , ALPHA(M) ),
     !! S = diag( BETA(K+1),  ... , BETA(M) ),
     !! C**2 + S**2 = I.
     !! R = ( R11 R12 R13 ) is stored in A(1:M, N-K-L+1:N) and R33 is stored
     !! (  0  R22 R23 )
     !! in B(M-K+1:L,N+M-K-L+1:N) on exit.
     !! The computation of the unitary transformation matrices U, V or Q
     !! is optional.  These matrices may either be formed explicitly, or they
     !! may be postmultiplied into input matrices U1, V1, or Q1.
               alpha, beta, u, ldu, v, ldv,q, ldq, work, ncycle, info )
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: jobq, jobu, jobv
           integer(${ik}$), intent(out) :: info, ncycle
           integer(${ik}$), intent(in) :: k, l, lda, ldb, ldq, ldu, ldv, m, n, p
           real(dp), intent(in) :: tola, tolb
           ! Array Arguments 
           real(dp), intent(out) :: alpha(*), beta(*)
           complex(dp), intent(inout) :: a(lda,*), b(ldb,*), q(ldq,*), u(ldu,*), v(ldv,*)
           complex(dp), intent(out) :: work(*)
        ! =====================================================================
           ! Parameters 
           integer(${ik}$), parameter :: maxit = 40_${ik}$
           real(dp), parameter :: hugenum = huge(zero)
           
           
           
           ! Local Scalars 
           logical(lk) :: initq, initu, initv, upper, wantq, wantu, wantv
           integer(${ik}$) :: i, j, kcycle
           real(dp) :: a1, a3, b1, b3, csq, csu, csv, error, gamma, rwk, ssmin
           complex(dp) :: a2, b2, snq, snu, snv
           ! Intrinsic Functions 
           ! Executable Statements 
           ! decode and test the input parameters
           initu = stdlib_lsame( jobu, 'I' )
           wantu = initu .or. stdlib_lsame( jobu, 'U' )
           initv = stdlib_lsame( jobv, 'I' )
           wantv = initv .or. stdlib_lsame( jobv, 'V' )
           initq = stdlib_lsame( jobq, 'I' )
           wantq = initq .or. stdlib_lsame( jobq, 'Q' )
           info = 0_${ik}$
           if( .not.( initu .or. wantu .or. stdlib_lsame( jobu, 'N' ) ) ) then
              info = -1_${ik}$
           else if( .not.( initv .or. wantv .or. stdlib_lsame( jobv, 'N' ) ) ) then
              info = -2_${ik}$
           else if( .not.( initq .or. wantq .or. stdlib_lsame( jobq, 'N' ) ) ) then
              info = -3_${ik}$
           else if( m<0_${ik}$ ) then
              info = -4_${ik}$
           else if( p<0_${ik}$ ) then
              info = -5_${ik}$
           else if( n<0_${ik}$ ) then
              info = -6_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -10_${ik}$
           else if( ldb<max( 1_${ik}$, p ) ) then
              info = -12_${ik}$
           else if( ldu<1_${ik}$ .or. ( wantu .and. ldu<m ) ) then
              info = -18_${ik}$
           else if( ldv<1_${ik}$ .or. ( wantv .and. ldv<p ) ) then
              info = -20_${ik}$
           else if( ldq<1_${ik}$ .or. ( wantq .and. ldq<n ) ) then
              info = -22_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'ZTGSJA', -info )
              return
           end if
           ! initialize u, v and q, if necessary
           if( initu )call stdlib${ii}$_zlaset( 'FULL', m, m, czero, cone, u, ldu )
           if( initv )call stdlib${ii}$_zlaset( 'FULL', p, p, czero, cone, v, ldv )
           if( initq )call stdlib${ii}$_zlaset( 'FULL', n, n, czero, cone, q, ldq )
           ! loop until convergence
           upper = .false.
           loop_40: do kcycle = 1, maxit
              upper = .not.upper
              loop_20: do i = 1, l - 1
                 loop_10: do j = i + 1, l
                    a1 = zero
                    a2 = czero
                    a3 = zero
                    if( k+i<=m )a1 = real( a( k+i, n-l+i ),KIND=dp)
                    if( k+j<=m )a3 = real( a( k+j, n-l+j ),KIND=dp)
                    b1 = real( b( i, n-l+i ),KIND=dp)
                    b3 = real( b( j, n-l+j ),KIND=dp)
                    if( upper ) then
                       if( k+i<=m )a2 = a( k+i, n-l+j )
                       b2 = b( i, n-l+j )
                    else
                       if( k+j<=m )a2 = a( k+j, n-l+i )
                       b2 = b( j, n-l+i )
                    end if
                    call stdlib${ii}$_zlags2( upper, a1, a2, a3, b1, b2, b3, csu, snu,csv, snv, csq, &
                              snq )
                    ! update (k+i)-th and (k+j)-th rows of matrix a: u**h *a
                    if( k+j<=m )call stdlib${ii}$_zrot( l, a( k+j, n-l+1 ), lda, a( k+i, n-l+1 ),lda, &
                              csu, conjg( snu ) )
                    ! update i-th and j-th rows of matrix b: v**h *b
                    call stdlib${ii}$_zrot( l, b( j, n-l+1 ), ldb, b( i, n-l+1 ), ldb,csv, conjg( snv ) &
                              )
                    ! update (n-l+i)-th and (n-l+j)-th columns of matrices
                    ! a and b: a*q and b*q
                    call stdlib${ii}$_zrot( min( k+l, m ), a( 1_${ik}$, n-l+j ), 1_${ik}$,a( 1_${ik}$, n-l+i ), 1_${ik}$, csq, snq )
                              
                    call stdlib${ii}$_zrot( l, b( 1_${ik}$, n-l+j ), 1_${ik}$, b( 1_${ik}$, n-l+i ), 1_${ik}$, csq,snq )
                    if( upper ) then
                       if( k+i<=m )a( k+i, n-l+j ) = czero
                       b( i, n-l+j ) = czero
                    else
                       if( k+j<=m )a( k+j, n-l+i ) = czero
                       b( j, n-l+i ) = czero
                    end if
                    ! ensure that the diagonal elements of a and b are real.
                    if( k+i<=m )a( k+i, n-l+i ) = real( a( k+i, n-l+i ),KIND=dp)
                    if( k+j<=m )a( k+j, n-l+j ) = real( a( k+j, n-l+j ),KIND=dp)
                    b( i, n-l+i ) = real( b( i, n-l+i ),KIND=dp)
                    b( j, n-l+j ) = real( b( j, n-l+j ),KIND=dp)
                    ! update unitary matrices u, v, q, if desired.
                    if( wantu .and. k+j<=m )call stdlib${ii}$_zrot( m, u( 1_${ik}$, k+j ), 1_${ik}$, u( 1_${ik}$, k+i ), 1_${ik}$, &
                              csu,snu )
                    if( wantv )call stdlib${ii}$_zrot( p, v( 1_${ik}$, j ), 1_${ik}$, v( 1_${ik}$, i ), 1_${ik}$, csv, snv )
                    if( wantq )call stdlib${ii}$_zrot( n, q( 1_${ik}$, n-l+j ), 1_${ik}$, q( 1_${ik}$, n-l+i ), 1_${ik}$, csq,snq )
                              
                 end do loop_10
              end do loop_20
              if( .not.upper ) then
                 ! the matrices a13 and b13 were lower triangular at the start
                 ! of the cycle, and are now upper triangular.
                 ! convergence test: test the parallelism of the corresponding
                 ! rows of a and b.
                 error = zero
                 do i = 1, min( l, m-k )
                    call stdlib${ii}$_zcopy( l-i+1, a( k+i, n-l+i ), lda, work, 1_${ik}$ )
                    call stdlib${ii}$_zcopy( l-i+1, b( i, n-l+i ), ldb, work( l+1 ), 1_${ik}$ )
                    call stdlib${ii}$_zlapll( l-i+1, work, 1_${ik}$, work( l+1 ), 1_${ik}$, ssmin )
                    error = max( error, ssmin )
                 end do
                 if( abs( error )<=min( tola, tolb ) )go to 50
              end if
              ! end of cycle loop
           end do loop_40
           ! the algorithm has not converged after maxit cycles.
           info = 1_${ik}$
           go to 100
           50 continue
           ! if error <= min(tola,tolb), then the algorithm has converged.
           ! compute the generalized singular value pairs (alpha, beta), and
           ! set the triangular matrix r to array a.
           do i = 1, k
              alpha( i ) = one
              beta( i ) = zero
           end do
           do i = 1, min( l, m-k )
              a1 = real( a( k+i, n-l+i ),KIND=dp)
              b1 = real( b( i, n-l+i ),KIND=dp)
              gamma = b1 / a1
              if( (gamma<=hugenum).and.(gamma>=-hugenum) ) then
                 if( gamma<zero ) then
                    call stdlib${ii}$_zdscal( l-i+1, -one, b( i, n-l+i ), ldb )
                    if( wantv )call stdlib${ii}$_zdscal( p, -one, v( 1_${ik}$, i ), 1_${ik}$ )
                 end if
                 call stdlib${ii}$_dlartg( abs( gamma ), one, beta( k+i ), alpha( k+i ),rwk )
                 if( alpha( k+i )>=beta( k+i ) ) then
                    call stdlib${ii}$_zdscal( l-i+1, one / alpha( k+i ), a( k+i, n-l+i ),lda )
                 else
                    call stdlib${ii}$_zdscal( l-i+1, one / beta( k+i ), b( i, n-l+i ),ldb )
                    call stdlib${ii}$_zcopy( l-i+1, b( i, n-l+i ), ldb, a( k+i, n-l+i ),lda )
                 end if
              else
                 alpha( k+i ) = zero
                 beta( k+i ) = one
                 call stdlib${ii}$_zcopy( l-i+1, b( i, n-l+i ), ldb, a( k+i, n-l+i ),lda )
              end if
           end do
           ! post-assignment
           do i = m + 1, k + l
              alpha( i ) = zero
              beta( i ) = one
           end do
           if( k+l<n ) then
              do i = k + l + 1, n
                 alpha( i ) = zero
                 beta( i ) = zero
              end do
           end if
           100 continue
           ncycle = kcycle
           return
     end subroutine stdlib${ii}$_ztgsja

#:for ck,ct,ci in CMPLX_KINDS_TYPES
#:if not ck in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ci}$tgsja( jobu, jobv, jobq, m, p, n, k, l, a, lda, b,ldb, tola, tolb, &
     !! ZTGSJA: computes the generalized singular value decomposition (GSVD)
     !! of two complex upper triangular (or trapezoidal) matrices A and B.
     !! On entry, it is assumed that matrices A and B have the following
     !! forms, which may be obtained by the preprocessing subroutine ZGGSVP
     !! from a general M-by-N matrix A and P-by-N matrix B:
     !! N-K-L  K    L
     !! A =    K ( 0    A12  A13 ) if M-K-L >= 0;
     !! L ( 0     0   A23 )
     !! M-K-L ( 0     0    0  )
     !! N-K-L  K    L
     !! A =  K ( 0    A12  A13 ) if M-K-L < 0;
     !! M-K ( 0     0   A23 )
     !! N-K-L  K    L
     !! B =  L ( 0     0   B13 )
     !! P-L ( 0     0    0  )
     !! where the K-by-K matrix A12 and L-by-L matrix B13 are nonsingular
     !! upper triangular; A23 is L-by-L upper triangular if M-K-L >= 0,
     !! otherwise A23 is (M-K)-by-L upper trapezoidal.
     !! On exit,
     !! U**H *A*Q = D1*( 0 R ),    V**H *B*Q = D2*( 0 R ),
     !! where U, V and Q are unitary matrices.
     !! R is a nonsingular upper triangular matrix, and D1
     !! and D2 are ``diagonal'' matrices, which are of the following
     !! structures:
     !! If M-K-L >= 0,
     !! K  L
     !! D1 =     K ( I  0 )
     !! L ( 0  C )
     !! M-K-L ( 0  0 )
     !! K  L
     !! D2 = L   ( 0  S )
     !! P-L ( 0  0 )
     !! N-K-L  K    L
     !! ( 0 R ) = K (  0   R11  R12 ) K
     !! L (  0    0   R22 ) L
     !! where
     !! C = diag( ALPHA(K+1), ... , ALPHA(K+L) ),
     !! S = diag( BETA(K+1),  ... , BETA(K+L) ),
     !! C**2 + S**2 = I.
     !! R is stored in A(1:K+L,N-K-L+1:N) on exit.
     !! If M-K-L < 0,
     !! K M-K K+L-M
     !! D1 =   K ( I  0    0   )
     !! M-K ( 0  C    0   )
     !! K M-K K+L-M
     !! D2 =   M-K ( 0  S    0   )
     !! K+L-M ( 0  0    I   )
     !! P-L ( 0  0    0   )
     !! N-K-L  K   M-K  K+L-M
     !! ( 0 R ) =    K ( 0    R11  R12  R13  )
     !! M-K ( 0     0   R22  R23  )
     !! K+L-M ( 0     0    0   R33  )
     !! where
     !! C = diag( ALPHA(K+1), ... , ALPHA(M) ),
     !! S = diag( BETA(K+1),  ... , BETA(M) ),
     !! C**2 + S**2 = I.
     !! R = ( R11 R12 R13 ) is stored in A(1:M, N-K-L+1:N) and R33 is stored
     !! (  0  R22 R23 )
     !! in B(M-K+1:L,N+M-K-L+1:N) on exit.
     !! The computation of the unitary transformation matrices U, V or Q
     !! is optional.  These matrices may either be formed explicitly, or they
     !! may be postmultiplied into input matrices U1, V1, or Q1.
               alpha, beta, u, ldu, v, ldv,q, ldq, work, ncycle, info )
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${ck}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: jobq, jobu, jobv
           integer(${ik}$), intent(out) :: info, ncycle
           integer(${ik}$), intent(in) :: k, l, lda, ldb, ldq, ldu, ldv, m, n, p
           real(${ck}$), intent(in) :: tola, tolb
           ! Array Arguments 
           real(${ck}$), intent(out) :: alpha(*), beta(*)
           complex(${ck}$), intent(inout) :: a(lda,*), b(ldb,*), q(ldq,*), u(ldu,*), v(ldv,*)
           complex(${ck}$), intent(out) :: work(*)
        ! =====================================================================
           ! Parameters 
           integer(${ik}$), parameter :: maxit = 40_${ik}$
           real(${ck}$), parameter :: hugenum = huge(zero)
           
           
           
           ! Local Scalars 
           logical(lk) :: initq, initu, initv, upper, wantq, wantu, wantv
           integer(${ik}$) :: i, j, kcycle
           real(${ck}$) :: a1, a3, b1, b3, csq, csu, csv, error, gamma, rwk, ssmin
           complex(${ck}$) :: a2, b2, snq, snu, snv
           ! Intrinsic Functions 
           ! Executable Statements 
           ! decode and test the input parameters
           initu = stdlib_lsame( jobu, 'I' )
           wantu = initu .or. stdlib_lsame( jobu, 'U' )
           initv = stdlib_lsame( jobv, 'I' )
           wantv = initv .or. stdlib_lsame( jobv, 'V' )
           initq = stdlib_lsame( jobq, 'I' )
           wantq = initq .or. stdlib_lsame( jobq, 'Q' )
           info = 0_${ik}$
           if( .not.( initu .or. wantu .or. stdlib_lsame( jobu, 'N' ) ) ) then
              info = -1_${ik}$
           else if( .not.( initv .or. wantv .or. stdlib_lsame( jobv, 'N' ) ) ) then
              info = -2_${ik}$
           else if( .not.( initq .or. wantq .or. stdlib_lsame( jobq, 'N' ) ) ) then
              info = -3_${ik}$
           else if( m<0_${ik}$ ) then
              info = -4_${ik}$
           else if( p<0_${ik}$ ) then
              info = -5_${ik}$
           else if( n<0_${ik}$ ) then
              info = -6_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -10_${ik}$
           else if( ldb<max( 1_${ik}$, p ) ) then
              info = -12_${ik}$
           else if( ldu<1_${ik}$ .or. ( wantu .and. ldu<m ) ) then
              info = -18_${ik}$
           else if( ldv<1_${ik}$ .or. ( wantv .and. ldv<p ) ) then
              info = -20_${ik}$
           else if( ldq<1_${ik}$ .or. ( wantq .and. ldq<n ) ) then
              info = -22_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'ZTGSJA', -info )
              return
           end if
           ! initialize u, v and q, if necessary
           if( initu )call stdlib${ii}$_${ci}$laset( 'FULL', m, m, czero, cone, u, ldu )
           if( initv )call stdlib${ii}$_${ci}$laset( 'FULL', p, p, czero, cone, v, ldv )
           if( initq )call stdlib${ii}$_${ci}$laset( 'FULL', n, n, czero, cone, q, ldq )
           ! loop until convergence
           upper = .false.
           loop_40: do kcycle = 1, maxit
              upper = .not.upper
              loop_20: do i = 1, l - 1
                 loop_10: do j = i + 1, l
                    a1 = zero
                    a2 = czero
                    a3 = zero
                    if( k+i<=m )a1 = real( a( k+i, n-l+i ),KIND=${ck}$)
                    if( k+j<=m )a3 = real( a( k+j, n-l+j ),KIND=${ck}$)
                    b1 = real( b( i, n-l+i ),KIND=${ck}$)
                    b3 = real( b( j, n-l+j ),KIND=${ck}$)
                    if( upper ) then
                       if( k+i<=m )a2 = a( k+i, n-l+j )
                       b2 = b( i, n-l+j )
                    else
                       if( k+j<=m )a2 = a( k+j, n-l+i )
                       b2 = b( j, n-l+i )
                    end if
                    call stdlib${ii}$_${ci}$lags2( upper, a1, a2, a3, b1, b2, b3, csu, snu,csv, snv, csq, &
                              snq )
                    ! update (k+i)-th and (k+j)-th rows of matrix a: u**h *a
                    if( k+j<=m )call stdlib${ii}$_${ci}$rot( l, a( k+j, n-l+1 ), lda, a( k+i, n-l+1 ),lda, &
                              csu, conjg( snu ) )
                    ! update i-th and j-th rows of matrix b: v**h *b
                    call stdlib${ii}$_${ci}$rot( l, b( j, n-l+1 ), ldb, b( i, n-l+1 ), ldb,csv, conjg( snv ) &
                              )
                    ! update (n-l+i)-th and (n-l+j)-th columns of matrices
                    ! a and b: a*q and b*q
                    call stdlib${ii}$_${ci}$rot( min( k+l, m ), a( 1_${ik}$, n-l+j ), 1_${ik}$,a( 1_${ik}$, n-l+i ), 1_${ik}$, csq, snq )
                              
                    call stdlib${ii}$_${ci}$rot( l, b( 1_${ik}$, n-l+j ), 1_${ik}$, b( 1_${ik}$, n-l+i ), 1_${ik}$, csq,snq )
                    if( upper ) then
                       if( k+i<=m )a( k+i, n-l+j ) = czero
                       b( i, n-l+j ) = czero
                    else
                       if( k+j<=m )a( k+j, n-l+i ) = czero
                       b( j, n-l+i ) = czero
                    end if
                    ! ensure that the diagonal elements of a and b are real.
                    if( k+i<=m )a( k+i, n-l+i ) = real( a( k+i, n-l+i ),KIND=${ck}$)
                    if( k+j<=m )a( k+j, n-l+j ) = real( a( k+j, n-l+j ),KIND=${ck}$)
                    b( i, n-l+i ) = real( b( i, n-l+i ),KIND=${ck}$)
                    b( j, n-l+j ) = real( b( j, n-l+j ),KIND=${ck}$)
                    ! update unitary matrices u, v, q, if desired.
                    if( wantu .and. k+j<=m )call stdlib${ii}$_${ci}$rot( m, u( 1_${ik}$, k+j ), 1_${ik}$, u( 1_${ik}$, k+i ), 1_${ik}$, &
                              csu,snu )
                    if( wantv )call stdlib${ii}$_${ci}$rot( p, v( 1_${ik}$, j ), 1_${ik}$, v( 1_${ik}$, i ), 1_${ik}$, csv, snv )
                    if( wantq )call stdlib${ii}$_${ci}$rot( n, q( 1_${ik}$, n-l+j ), 1_${ik}$, q( 1_${ik}$, n-l+i ), 1_${ik}$, csq,snq )
                              
                 end do loop_10
              end do loop_20
              if( .not.upper ) then
                 ! the matrices a13 and b13 were lower triangular at the start
                 ! of the cycle, and are now upper triangular.
                 ! convergence test: test the parallelism of the corresponding
                 ! rows of a and b.
                 error = zero
                 do i = 1, min( l, m-k )
                    call stdlib${ii}$_${ci}$copy( l-i+1, a( k+i, n-l+i ), lda, work, 1_${ik}$ )
                    call stdlib${ii}$_${ci}$copy( l-i+1, b( i, n-l+i ), ldb, work( l+1 ), 1_${ik}$ )
                    call stdlib${ii}$_${ci}$lapll( l-i+1, work, 1_${ik}$, work( l+1 ), 1_${ik}$, ssmin )
                    error = max( error, ssmin )
                 end do
                 if( abs( error )<=min( tola, tolb ) )go to 50
              end if
              ! end of cycle loop
           end do loop_40
           ! the algorithm has not converged after maxit cycles.
           info = 1_${ik}$
           go to 100
           50 continue
           ! if error <= min(tola,tolb), then the algorithm has converged.
           ! compute the generalized singular value pairs (alpha, beta), and
           ! set the triangular matrix r to array a.
           do i = 1, k
              alpha( i ) = one
              beta( i ) = zero
           end do
           do i = 1, min( l, m-k )
              a1 = real( a( k+i, n-l+i ),KIND=${ck}$)
              b1 = real( b( i, n-l+i ),KIND=${ck}$)
              gamma = b1 / a1
              if( (gamma<=hugenum).and.(gamma>=-hugenum) ) then
                 if( gamma<zero ) then
                    call stdlib${ii}$_${ci}$dscal( l-i+1, -one, b( i, n-l+i ), ldb )
                    if( wantv )call stdlib${ii}$_${ci}$dscal( p, -one, v( 1_${ik}$, i ), 1_${ik}$ )
                 end if
                 call stdlib${ii}$_${c2ri(ci)}$lartg( abs( gamma ), one, beta( k+i ), alpha( k+i ),rwk )
                 if( alpha( k+i )>=beta( k+i ) ) then
                    call stdlib${ii}$_${ci}$dscal( l-i+1, one / alpha( k+i ), a( k+i, n-l+i ),lda )
                 else
                    call stdlib${ii}$_${ci}$dscal( l-i+1, one / beta( k+i ), b( i, n-l+i ),ldb )
                    call stdlib${ii}$_${ci}$copy( l-i+1, b( i, n-l+i ), ldb, a( k+i, n-l+i ),lda )
                 end if
              else
                 alpha( k+i ) = zero
                 beta( k+i ) = one
                 call stdlib${ii}$_${ci}$copy( l-i+1, b( i, n-l+i ), ldb, a( k+i, n-l+i ),lda )
              end if
           end do
           ! post-assignment
           do i = m + 1, k + l
              alpha( i ) = zero
              beta( i ) = one
           end do
           if( k+l<n ) then
              do i = k + l + 1, n
                 alpha( i ) = zero
                 beta( i ) = zero
              end do
           end if
           100 continue
           ncycle = kcycle
           return
     end subroutine stdlib${ii}$_${ci}$tgsja

#:endif
#:endfor



     pure module subroutine stdlib${ii}$_cungbr( vect, m, n, k, a, lda, tau, work, lwork, info )
     !! CUNGBR generates one of the complex unitary matrices Q or P**H
     !! determined by CGEBRD when reducing a complex matrix A to bidiagonal
     !! form: A = Q * B * P**H.  Q and P**H are defined as products of
     !! elementary reflectors H(i) or G(i) respectively.
     !! If VECT = 'Q', A is assumed to have been an M-by-K matrix, and Q
     !! is of order M:
     !! if m >= k, Q = H(1) H(2) . . . H(k) and CUNGBR returns the first n
     !! columns of Q, where m >= n >= k;
     !! if m < k, Q = H(1) H(2) . . . H(m-1) and CUNGBR returns Q as an
     !! M-by-M matrix.
     !! If VECT = 'P', A is assumed to have been a K-by-N matrix, and P**H
     !! is of order N:
     !! if k < n, P**H = G(k) . . . G(2) G(1) and CUNGBR returns the first m
     !! rows of P**H, where n >= m >= k;
     !! if k >= n, P**H = G(n-1) . . . G(2) G(1) and CUNGBR returns P**H as
     !! an N-by-N matrix.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: vect
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: k, lda, lwork, m, n
           ! Array Arguments 
           complex(sp), intent(inout) :: a(lda,*)
           complex(sp), intent(in) :: tau(*)
           complex(sp), intent(out) :: work(*)
        ! =====================================================================
           
           ! Local Scalars 
           logical(lk) :: lquery, wantq
           integer(${ik}$) :: i, iinfo, j, lwkopt, mn
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           wantq = stdlib_lsame( vect, 'Q' )
           mn = min( m, n )
           lquery = ( lwork==-1_${ik}$ )
           if( .not.wantq .and. .not.stdlib_lsame( vect, 'P' ) ) then
              info = -1_${ik}$
           else if( m<0_${ik}$ ) then
              info = -2_${ik}$
           else if( n<0_${ik}$ .or. ( wantq .and. ( n>m .or. n<min( m,k ) ) ) .or. ( .not.wantq .and. ( &
                     m>n .or. m<min( n, k ) ) ) ) then
              info = -3_${ik}$
           else if( k<0_${ik}$ ) then
              info = -4_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -6_${ik}$
           else if( lwork<max( 1_${ik}$, mn ) .and. .not.lquery ) then
              info = -9_${ik}$
           end if
           if( info==0_${ik}$ ) then
              work( 1_${ik}$ ) = 1_${ik}$
              if( wantq ) then
                 if( m>=k ) then
                    call stdlib${ii}$_cungqr( m, n, k, a, lda, tau, work, -1_${ik}$, iinfo )
                 else
                    if( m>1_${ik}$ ) then
                       call stdlib${ii}$_cungqr( m-1, m-1, m-1, a, lda, tau, work, -1_${ik}$,iinfo )
                    end if
                 end if
              else
                 if( k<n ) then
                    call stdlib${ii}$_cunglq( m, n, k, a, lda, tau, work, -1_${ik}$, iinfo )
                 else
                    if( n>1_${ik}$ ) then
                       call stdlib${ii}$_cunglq( n-1, n-1, n-1, a, lda, tau, work, -1_${ik}$,iinfo )
                    end if
                 end if
              end if
              lwkopt = real( work( 1_${ik}$ ),KIND=sp)
              lwkopt = max (lwkopt, mn)
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'CUNGBR', -info )
              return
           else if( lquery ) then
              work( 1_${ik}$ ) = lwkopt
              return
           end if
           ! quick return if possible
           if( m==0_${ik}$ .or. n==0_${ik}$ ) then
              work( 1_${ik}$ ) = 1_${ik}$
              return
           end if
           if( wantq ) then
              ! form q, determined by a call to stdlib${ii}$_cgebrd to reduce an m-by-k
              ! matrix
              if( m>=k ) then
                 ! if m >= k, assume m >= n >= k
                 call stdlib${ii}$_cungqr( m, n, k, a, lda, tau, work, lwork, iinfo )
              else
                 ! if m < k, assume m = n
                 ! shift the vectors which define the elementary reflectors cone
                 ! column to the right, and set the first row and column of q
                 ! to those of the unit matrix
                 do j = m, 2, -1
                    a( 1_${ik}$, j ) = czero
                    do i = j + 1, m
                       a( i, j ) = a( i, j-1 )
                    end do
                 end do
                 a( 1_${ik}$, 1_${ik}$ ) = cone
                 do i = 2, m
                    a( i, 1_${ik}$ ) = czero
                 end do
                 if( m>1_${ik}$ ) then
                    ! form q(2:m,2:m)
                    call stdlib${ii}$_cungqr( m-1, m-1, m-1, a( 2_${ik}$, 2_${ik}$ ), lda, tau, work,lwork, iinfo )
                              
                 end if
              end if
           else
              ! form p**h, determined by a call to stdlib${ii}$_cgebrd to reduce a k-by-n
              ! matrix
              if( k<n ) then
                 ! if k < n, assume k <= m <= n
                 call stdlib${ii}$_cunglq( m, n, k, a, lda, tau, work, lwork, iinfo )
              else
                 ! if k >= n, assume m = n
                 ! shift the vectors which define the elementary reflectors cone
                 ! row downward, and set the first row and column of p**h to
                 ! those of the unit matrix
                 a( 1_${ik}$, 1_${ik}$ ) = cone
                 do i = 2, n
                    a( i, 1_${ik}$ ) = czero
                 end do
                 do j = 2, n
                    do i = j - 1, 2, -1
                       a( i, j ) = a( i-1, j )
                    end do
                    a( 1_${ik}$, j ) = czero
                 end do
                 if( n>1_${ik}$ ) then
                    ! form p**h(2:n,2:n)
                    call stdlib${ii}$_cunglq( n-1, n-1, n-1, a( 2_${ik}$, 2_${ik}$ ), lda, tau, work,lwork, iinfo )
                              
                 end if
              end if
           end if
           work( 1_${ik}$ ) = lwkopt
           return
     end subroutine stdlib${ii}$_cungbr

     pure module subroutine stdlib${ii}$_zungbr( vect, m, n, k, a, lda, tau, work, lwork, info )
     !! ZUNGBR generates one of the complex unitary matrices Q or P**H
     !! determined by ZGEBRD when reducing a complex matrix A to bidiagonal
     !! form: A = Q * B * P**H.  Q and P**H are defined as products of
     !! elementary reflectors H(i) or G(i) respectively.
     !! If VECT = 'Q', A is assumed to have been an M-by-K matrix, and Q
     !! is of order M:
     !! if m >= k, Q = H(1) H(2) . . . H(k) and ZUNGBR returns the first n
     !! columns of Q, where m >= n >= k;
     !! if m < k, Q = H(1) H(2) . . . H(m-1) and ZUNGBR returns Q as an
     !! M-by-M matrix.
     !! If VECT = 'P', A is assumed to have been a K-by-N matrix, and P**H
     !! is of order N:
     !! if k < n, P**H = G(k) . . . G(2) G(1) and ZUNGBR returns the first m
     !! rows of P**H, where n >= m >= k;
     !! if k >= n, P**H = G(n-1) . . . G(2) G(1) and ZUNGBR returns P**H as
     !! an N-by-N matrix.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: vect
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: k, lda, lwork, m, n
           ! Array Arguments 
           complex(dp), intent(inout) :: a(lda,*)
           complex(dp), intent(in) :: tau(*)
           complex(dp), intent(out) :: work(*)
        ! =====================================================================
           
           ! Local Scalars 
           logical(lk) :: lquery, wantq
           integer(${ik}$) :: i, iinfo, j, lwkopt, mn
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           wantq = stdlib_lsame( vect, 'Q' )
           mn = min( m, n )
           lquery = ( lwork==-1_${ik}$ )
           if( .not.wantq .and. .not.stdlib_lsame( vect, 'P' ) ) then
              info = -1_${ik}$
           else if( m<0_${ik}$ ) then
              info = -2_${ik}$
           else if( n<0_${ik}$ .or. ( wantq .and. ( n>m .or. n<min( m,k ) ) ) .or. ( .not.wantq .and. ( &
                     m>n .or. m<min( n, k ) ) ) ) then
              info = -3_${ik}$
           else if( k<0_${ik}$ ) then
              info = -4_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -6_${ik}$
           else if( lwork<max( 1_${ik}$, mn ) .and. .not.lquery ) then
              info = -9_${ik}$
           end if
           if( info==0_${ik}$ ) then
              work( 1_${ik}$ ) = 1_${ik}$
              if( wantq ) then
                 if( m>=k ) then
                    call stdlib${ii}$_zungqr( m, n, k, a, lda, tau, work, -1_${ik}$, iinfo )
                 else
                    if( m>1_${ik}$ ) then
                       call stdlib${ii}$_zungqr( m-1, m-1, m-1, a, lda, tau, work, -1_${ik}$,iinfo )
                    end if
                 end if
              else
                 if( k<n ) then
                    call stdlib${ii}$_zunglq( m, n, k, a, lda, tau, work, -1_${ik}$, iinfo )
                 else
                    if( n>1_${ik}$ ) then
                       call stdlib${ii}$_zunglq( n-1, n-1, n-1, a, lda, tau, work, -1_${ik}$,iinfo )
                    end if
                 end if
              end if
              lwkopt = real( work( 1_${ik}$ ),KIND=dp)
              lwkopt = max (lwkopt, mn)
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'ZUNGBR', -info )
              return
           else if( lquery ) then
              work( 1_${ik}$ ) = lwkopt
              return
           end if
           ! quick return if possible
           if( m==0_${ik}$ .or. n==0_${ik}$ ) then
              work( 1_${ik}$ ) = 1_${ik}$
              return
           end if
           if( wantq ) then
              ! form q, determined by a call to stdlib${ii}$_zgebrd to reduce an m-by-k
              ! matrix
              if( m>=k ) then
                 ! if m >= k, assume m >= n >= k
                 call stdlib${ii}$_zungqr( m, n, k, a, lda, tau, work, lwork, iinfo )
              else
                 ! if m < k, assume m = n
                 ! shift the vectors which define the elementary reflectors cone
                 ! column to the right, and set the first row and column of q
                 ! to those of the unit matrix
                 do j = m, 2, -1
                    a( 1_${ik}$, j ) = czero
                    do i = j + 1, m
                       a( i, j ) = a( i, j-1 )
                    end do
                 end do
                 a( 1_${ik}$, 1_${ik}$ ) = cone
                 do i = 2, m
                    a( i, 1_${ik}$ ) = czero
                 end do
                 if( m>1_${ik}$ ) then
                    ! form q(2:m,2:m)
                    call stdlib${ii}$_zungqr( m-1, m-1, m-1, a( 2_${ik}$, 2_${ik}$ ), lda, tau, work,lwork, iinfo )
                              
                 end if
              end if
           else
              ! form p**h, determined by a call to stdlib${ii}$_zgebrd to reduce a k-by-n
              ! matrix
              if( k<n ) then
                 ! if k < n, assume k <= m <= n
                 call stdlib${ii}$_zunglq( m, n, k, a, lda, tau, work, lwork, iinfo )
              else
                 ! if k >= n, assume m = n
                 ! shift the vectors which define the elementary reflectors cone
                 ! row downward, and set the first row and column of p**h to
                 ! those of the unit matrix
                 a( 1_${ik}$, 1_${ik}$ ) = cone
                 do i = 2, n
                    a( i, 1_${ik}$ ) = czero
                 end do
                 do j = 2, n
                    do i = j - 1, 2, -1
                       a( i, j ) = a( i-1, j )
                    end do
                    a( 1_${ik}$, j ) = czero
                 end do
                 if( n>1_${ik}$ ) then
                    ! form p**h(2:n,2:n)
                    call stdlib${ii}$_zunglq( n-1, n-1, n-1, a( 2_${ik}$, 2_${ik}$ ), lda, tau, work,lwork, iinfo )
                              
                 end if
              end if
           end if
           work( 1_${ik}$ ) = lwkopt
           return
     end subroutine stdlib${ii}$_zungbr

#:for ck,ct,ci in CMPLX_KINDS_TYPES
#:if not ck in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ci}$ungbr( vect, m, n, k, a, lda, tau, work, lwork, info )
     !! ZUNGBR: generates one of the complex unitary matrices Q or P**H
     !! determined by ZGEBRD when reducing a complex matrix A to bidiagonal
     !! form: A = Q * B * P**H.  Q and P**H are defined as products of
     !! elementary reflectors H(i) or G(i) respectively.
     !! If VECT = 'Q', A is assumed to have been an M-by-K matrix, and Q
     !! is of order M:
     !! if m >= k, Q = H(1) H(2) . . . H(k) and ZUNGBR returns the first n
     !! columns of Q, where m >= n >= k;
     !! if m < k, Q = H(1) H(2) . . . H(m-1) and ZUNGBR returns Q as an
     !! M-by-M matrix.
     !! If VECT = 'P', A is assumed to have been a K-by-N matrix, and P**H
     !! is of order N:
     !! if k < n, P**H = G(k) . . . G(2) G(1) and ZUNGBR returns the first m
     !! rows of P**H, where n >= m >= k;
     !! if k >= n, P**H = G(n-1) . . . G(2) G(1) and ZUNGBR returns P**H as
     !! an N-by-N matrix.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${ck}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: vect
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: k, lda, lwork, m, n
           ! Array Arguments 
           complex(${ck}$), intent(inout) :: a(lda,*)
           complex(${ck}$), intent(in) :: tau(*)
           complex(${ck}$), intent(out) :: work(*)
        ! =====================================================================
           
           ! Local Scalars 
           logical(lk) :: lquery, wantq
           integer(${ik}$) :: i, iinfo, j, lwkopt, mn
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           wantq = stdlib_lsame( vect, 'Q' )
           mn = min( m, n )
           lquery = ( lwork==-1_${ik}$ )
           if( .not.wantq .and. .not.stdlib_lsame( vect, 'P' ) ) then
              info = -1_${ik}$
           else if( m<0_${ik}$ ) then
              info = -2_${ik}$
           else if( n<0_${ik}$ .or. ( wantq .and. ( n>m .or. n<min( m,k ) ) ) .or. ( .not.wantq .and. ( &
                     m>n .or. m<min( n, k ) ) ) ) then
              info = -3_${ik}$
           else if( k<0_${ik}$ ) then
              info = -4_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -6_${ik}$
           else if( lwork<max( 1_${ik}$, mn ) .and. .not.lquery ) then
              info = -9_${ik}$
           end if
           if( info==0_${ik}$ ) then
              work( 1_${ik}$ ) = 1_${ik}$
              if( wantq ) then
                 if( m>=k ) then
                    call stdlib${ii}$_${ci}$ungqr( m, n, k, a, lda, tau, work, -1_${ik}$, iinfo )
                 else
                    if( m>1_${ik}$ ) then
                       call stdlib${ii}$_${ci}$ungqr( m-1, m-1, m-1, a, lda, tau, work, -1_${ik}$,iinfo )
                    end if
                 end if
              else
                 if( k<n ) then
                    call stdlib${ii}$_${ci}$unglq( m, n, k, a, lda, tau, work, -1_${ik}$, iinfo )
                 else
                    if( n>1_${ik}$ ) then
                       call stdlib${ii}$_${ci}$unglq( n-1, n-1, n-1, a, lda, tau, work, -1_${ik}$,iinfo )
                    end if
                 end if
              end if
              lwkopt = real( work( 1_${ik}$ ),KIND=${ck}$)
              lwkopt = max (lwkopt, mn)
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'ZUNGBR', -info )
              return
           else if( lquery ) then
              work( 1_${ik}$ ) = lwkopt
              return
           end if
           ! quick return if possible
           if( m==0_${ik}$ .or. n==0_${ik}$ ) then
              work( 1_${ik}$ ) = 1_${ik}$
              return
           end if
           if( wantq ) then
              ! form q, determined by a call to stdlib${ii}$_${ci}$gebrd to reduce an m-by-k
              ! matrix
              if( m>=k ) then
                 ! if m >= k, assume m >= n >= k
                 call stdlib${ii}$_${ci}$ungqr( m, n, k, a, lda, tau, work, lwork, iinfo )
              else
                 ! if m < k, assume m = n
                 ! shift the vectors which define the elementary reflectors cone
                 ! column to the right, and set the first row and column of q
                 ! to those of the unit matrix
                 do j = m, 2, -1
                    a( 1_${ik}$, j ) = czero
                    do i = j + 1, m
                       a( i, j ) = a( i, j-1 )
                    end do
                 end do
                 a( 1_${ik}$, 1_${ik}$ ) = cone
                 do i = 2, m
                    a( i, 1_${ik}$ ) = czero
                 end do
                 if( m>1_${ik}$ ) then
                    ! form q(2:m,2:m)
                    call stdlib${ii}$_${ci}$ungqr( m-1, m-1, m-1, a( 2_${ik}$, 2_${ik}$ ), lda, tau, work,lwork, iinfo )
                              
                 end if
              end if
           else
              ! form p**h, determined by a call to stdlib${ii}$_${ci}$gebrd to reduce a k-by-n
              ! matrix
              if( k<n ) then
                 ! if k < n, assume k <= m <= n
                 call stdlib${ii}$_${ci}$unglq( m, n, k, a, lda, tau, work, lwork, iinfo )
              else
                 ! if k >= n, assume m = n
                 ! shift the vectors which define the elementary reflectors cone
                 ! row downward, and set the first row and column of p**h to
                 ! those of the unit matrix
                 a( 1_${ik}$, 1_${ik}$ ) = cone
                 do i = 2, n
                    a( i, 1_${ik}$ ) = czero
                 end do
                 do j = 2, n
                    do i = j - 1, 2, -1
                       a( i, j ) = a( i-1, j )
                    end do
                    a( 1_${ik}$, j ) = czero
                 end do
                 if( n>1_${ik}$ ) then
                    ! form p**h(2:n,2:n)
                    call stdlib${ii}$_${ci}$unglq( n-1, n-1, n-1, a( 2_${ik}$, 2_${ik}$ ), lda, tau, work,lwork, iinfo )
                              
                 end if
              end if
           end if
           work( 1_${ik}$ ) = lwkopt
           return
     end subroutine stdlib${ii}$_${ci}$ungbr

#:endif
#:endfor



     pure module subroutine stdlib${ii}$_sorgbr( vect, m, n, k, a, lda, tau, work, lwork, info )
     !! SORGBR generates one of the real orthogonal matrices Q or P**T
     !! determined by SGEBRD when reducing a real matrix A to bidiagonal
     !! form: A = Q * B * P**T.  Q and P**T are defined as products of
     !! elementary reflectors H(i) or G(i) respectively.
     !! If VECT = 'Q', A is assumed to have been an M-by-K matrix, and Q
     !! is of order M:
     !! if m >= k, Q = H(1) H(2) . . . H(k) and SORGBR returns the first n
     !! columns of Q, where m >= n >= k;
     !! if m < k, Q = H(1) H(2) . . . H(m-1) and SORGBR returns Q as an
     !! M-by-M matrix.
     !! If VECT = 'P', A is assumed to have been a K-by-N matrix, and P**T
     !! is of order N:
     !! if k < n, P**T = G(k) . . . G(2) G(1) and SORGBR returns the first m
     !! rows of P**T, where n >= m >= k;
     !! if k >= n, P**T = G(n-1) . . . G(2) G(1) and SORGBR returns P**T as
     !! an N-by-N matrix.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: vect
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: k, lda, lwork, m, n
           ! Array Arguments 
           real(sp), intent(inout) :: a(lda,*)
           real(sp), intent(in) :: tau(*)
           real(sp), intent(out) :: work(*)
        ! =====================================================================
           
           ! Local Scalars 
           logical(lk) :: lquery, wantq
           integer(${ik}$) :: i, iinfo, j, lwkopt, mn
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           wantq = stdlib_lsame( vect, 'Q' )
           mn = min( m, n )
           lquery = ( lwork==-1_${ik}$ )
           if( .not.wantq .and. .not.stdlib_lsame( vect, 'P' ) ) then
              info = -1_${ik}$
           else if( m<0_${ik}$ ) then
              info = -2_${ik}$
           else if( n<0_${ik}$ .or. ( wantq .and. ( n>m .or. n<min( m,k ) ) ) .or. ( .not.wantq .and. ( &
                     m>n .or. m<min( n, k ) ) ) ) then
              info = -3_${ik}$
           else if( k<0_${ik}$ ) then
              info = -4_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -6_${ik}$
           else if( lwork<max( 1_${ik}$, mn ) .and. .not.lquery ) then
              info = -9_${ik}$
           end if
           if( info==0_${ik}$ ) then
              work( 1_${ik}$ ) = 1_${ik}$
              if( wantq ) then
                 if( m>=k ) then
                    call stdlib${ii}$_sorgqr( m, n, k, a, lda, tau, work, -1_${ik}$, iinfo )
                 else
                    if( m>1_${ik}$ ) then
                       call stdlib${ii}$_sorgqr( m-1, m-1, m-1, a, lda, tau, work, -1_${ik}$,iinfo )
                    end if
                 end if
              else
                 if( k<n ) then
                    call stdlib${ii}$_sorglq( m, n, k, a, lda, tau, work, -1_${ik}$, iinfo )
                 else
                    if( n>1_${ik}$ ) then
                       call stdlib${ii}$_sorglq( n-1, n-1, n-1, a, lda, tau, work, -1_${ik}$,iinfo )
                    end if
                 end if
              end if
              lwkopt = work( 1_${ik}$ )
              lwkopt = max (lwkopt, mn)
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'SORGBR', -info )
              return
           else if( lquery ) then
              work( 1_${ik}$ ) = lwkopt
              return
           end if
           ! quick return if possible
           if( m==0_${ik}$ .or. n==0_${ik}$ ) then
              work( 1_${ik}$ ) = 1_${ik}$
              return
           end if
           if( wantq ) then
              ! form q, determined by a call to stdlib${ii}$_sgebrd to reduce an m-by-k
              ! matrix
              if( m>=k ) then
                 ! if m >= k, assume m >= n >= k
                 call stdlib${ii}$_sorgqr( m, n, k, a, lda, tau, work, lwork, iinfo )
              else
                 ! if m < k, assume m = n
                 ! shift the vectors which define the elementary reflectors one
                 ! column to the right, and set the first row and column of q
                 ! to those of the unit matrix
                 do j = m, 2, -1
                    a( 1_${ik}$, j ) = zero
                    do i = j + 1, m
                       a( i, j ) = a( i, j-1 )
                    end do
                 end do
                 a( 1_${ik}$, 1_${ik}$ ) = one
                 do i = 2, m
                    a( i, 1_${ik}$ ) = zero
                 end do
                 if( m>1_${ik}$ ) then
                    ! form q(2:m,2:m)
                    call stdlib${ii}$_sorgqr( m-1, m-1, m-1, a( 2_${ik}$, 2_${ik}$ ), lda, tau, work,lwork, iinfo )
                              
                 end if
              end if
           else
              ! form p**t, determined by a call to stdlib${ii}$_sgebrd to reduce a k-by-n
              ! matrix
              if( k<n ) then
                 ! if k < n, assume k <= m <= n
                 call stdlib${ii}$_sorglq( m, n, k, a, lda, tau, work, lwork, iinfo )
              else
                 ! if k >= n, assume m = n
                 ! shift the vectors which define the elementary reflectors one
                 ! row downward, and set the first row and column of p**t to
                 ! those of the unit matrix
                 a( 1_${ik}$, 1_${ik}$ ) = one
                 do i = 2, n
                    a( i, 1_${ik}$ ) = zero
                 end do
                 do j = 2, n
                    do i = j - 1, 2, -1
                       a( i, j ) = a( i-1, j )
                    end do
                    a( 1_${ik}$, j ) = zero
                 end do
                 if( n>1_${ik}$ ) then
                    ! form p**t(2:n,2:n)
                    call stdlib${ii}$_sorglq( n-1, n-1, n-1, a( 2_${ik}$, 2_${ik}$ ), lda, tau, work,lwork, iinfo )
                              
                 end if
              end if
           end if
           work( 1_${ik}$ ) = lwkopt
           return
     end subroutine stdlib${ii}$_sorgbr

     pure module subroutine stdlib${ii}$_dorgbr( vect, m, n, k, a, lda, tau, work, lwork, info )
     !! DORGBR generates one of the real orthogonal matrices Q or P**T
     !! determined by DGEBRD when reducing a real matrix A to bidiagonal
     !! form: A = Q * B * P**T.  Q and P**T are defined as products of
     !! elementary reflectors H(i) or G(i) respectively.
     !! If VECT = 'Q', A is assumed to have been an M-by-K matrix, and Q
     !! is of order M:
     !! if m >= k, Q = H(1) H(2) . . . H(k) and DORGBR returns the first n
     !! columns of Q, where m >= n >= k;
     !! if m < k, Q = H(1) H(2) . . . H(m-1) and DORGBR returns Q as an
     !! M-by-M matrix.
     !! If VECT = 'P', A is assumed to have been a K-by-N matrix, and P**T
     !! is of order N:
     !! if k < n, P**T = G(k) . . . G(2) G(1) and DORGBR returns the first m
     !! rows of P**T, where n >= m >= k;
     !! if k >= n, P**T = G(n-1) . . . G(2) G(1) and DORGBR returns P**T as
     !! an N-by-N matrix.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: vect
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: k, lda, lwork, m, n
           ! Array Arguments 
           real(dp), intent(inout) :: a(lda,*)
           real(dp), intent(in) :: tau(*)
           real(dp), intent(out) :: work(*)
        ! =====================================================================
           
           ! Local Scalars 
           logical(lk) :: lquery, wantq
           integer(${ik}$) :: i, iinfo, j, lwkopt, mn
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           wantq = stdlib_lsame( vect, 'Q' )
           mn = min( m, n )
           lquery = ( lwork==-1_${ik}$ )
           if( .not.wantq .and. .not.stdlib_lsame( vect, 'P' ) ) then
              info = -1_${ik}$
           else if( m<0_${ik}$ ) then
              info = -2_${ik}$
           else if( n<0_${ik}$ .or. ( wantq .and. ( n>m .or. n<min( m,k ) ) ) .or. ( .not.wantq .and. ( &
                     m>n .or. m<min( n, k ) ) ) ) then
              info = -3_${ik}$
           else if( k<0_${ik}$ ) then
              info = -4_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -6_${ik}$
           else if( lwork<max( 1_${ik}$, mn ) .and. .not.lquery ) then
              info = -9_${ik}$
           end if
           if( info==0_${ik}$ ) then
              work( 1_${ik}$ ) = 1_${ik}$
              if( wantq ) then
                 if( m>=k ) then
                    call stdlib${ii}$_dorgqr( m, n, k, a, lda, tau, work, -1_${ik}$, iinfo )
                 else
                    if( m>1_${ik}$ ) then
                       call stdlib${ii}$_dorgqr( m-1, m-1, m-1, a, lda, tau, work, -1_${ik}$,iinfo )
                    end if
                 end if
              else
                 if( k<n ) then
                    call stdlib${ii}$_dorglq( m, n, k, a, lda, tau, work, -1_${ik}$, iinfo )
                 else
                    if( n>1_${ik}$ ) then
                       call stdlib${ii}$_dorglq( n-1, n-1, n-1, a, lda, tau, work, -1_${ik}$,iinfo )
                    end if
                 end if
              end if
              lwkopt = work( 1_${ik}$ )
              lwkopt = max (lwkopt, mn)
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DORGBR', -info )
              return
           else if( lquery ) then
              work( 1_${ik}$ ) = lwkopt
              return
           end if
           ! quick return if possible
           if( m==0_${ik}$ .or. n==0_${ik}$ ) then
              work( 1_${ik}$ ) = 1_${ik}$
              return
           end if
           if( wantq ) then
              ! form q, determined by a call to stdlib${ii}$_dgebrd to reduce an m-by-k
              ! matrix
              if( m>=k ) then
                 ! if m >= k, assume m >= n >= k
                 call stdlib${ii}$_dorgqr( m, n, k, a, lda, tau, work, lwork, iinfo )
              else
                 ! if m < k, assume m = n
                 ! shift the vectors which define the elementary reflectors one
                 ! column to the right, and set the first row and column of q
                 ! to those of the unit matrix
                 do j = m, 2, -1
                    a( 1_${ik}$, j ) = zero
                    do i = j + 1, m
                       a( i, j ) = a( i, j-1 )
                    end do
                 end do
                 a( 1_${ik}$, 1_${ik}$ ) = one
                 do i = 2, m
                    a( i, 1_${ik}$ ) = zero
                 end do
                 if( m>1_${ik}$ ) then
                    ! form q(2:m,2:m)
                    call stdlib${ii}$_dorgqr( m-1, m-1, m-1, a( 2_${ik}$, 2_${ik}$ ), lda, tau, work,lwork, iinfo )
                              
                 end if
              end if
           else
              ! form p**t, determined by a call to stdlib${ii}$_dgebrd to reduce a k-by-n
              ! matrix
              if( k<n ) then
                 ! if k < n, assume k <= m <= n
                 call stdlib${ii}$_dorglq( m, n, k, a, lda, tau, work, lwork, iinfo )
              else
                 ! if k >= n, assume m = n
                 ! shift the vectors which define the elementary reflectors one
                 ! row downward, and set the first row and column of p**t to
                 ! those of the unit matrix
                 a( 1_${ik}$, 1_${ik}$ ) = one
                 do i = 2, n
                    a( i, 1_${ik}$ ) = zero
                 end do
                 do j = 2, n
                    do i = j - 1, 2, -1
                       a( i, j ) = a( i-1, j )
                    end do
                    a( 1_${ik}$, j ) = zero
                 end do
                 if( n>1_${ik}$ ) then
                    ! form p**t(2:n,2:n)
                    call stdlib${ii}$_dorglq( n-1, n-1, n-1, a( 2_${ik}$, 2_${ik}$ ), lda, tau, work,lwork, iinfo )
                              
                 end if
              end if
           end if
           work( 1_${ik}$ ) = lwkopt
           return
     end subroutine stdlib${ii}$_dorgbr

#:for rk,rt,ri in REAL_KINDS_TYPES
#:if not rk in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ri}$orgbr( vect, m, n, k, a, lda, tau, work, lwork, info )
     !! DORGBR: generates one of the real orthogonal matrices Q or P**T
     !! determined by DGEBRD when reducing a real matrix A to bidiagonal
     !! form: A = Q * B * P**T.  Q and P**T are defined as products of
     !! elementary reflectors H(i) or G(i) respectively.
     !! If VECT = 'Q', A is assumed to have been an M-by-K matrix, and Q
     !! is of order M:
     !! if m >= k, Q = H(1) H(2) . . . H(k) and DORGBR returns the first n
     !! columns of Q, where m >= n >= k;
     !! if m < k, Q = H(1) H(2) . . . H(m-1) and DORGBR returns Q as an
     !! M-by-M matrix.
     !! If VECT = 'P', A is assumed to have been a K-by-N matrix, and P**T
     !! is of order N:
     !! if k < n, P**T = G(k) . . . G(2) G(1) and DORGBR returns the first m
     !! rows of P**T, where n >= m >= k;
     !! if k >= n, P**T = G(n-1) . . . G(2) G(1) and DORGBR returns P**T as
     !! an N-by-N matrix.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: vect
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: k, lda, lwork, m, n
           ! Array Arguments 
           real(${rk}$), intent(inout) :: a(lda,*)
           real(${rk}$), intent(in) :: tau(*)
           real(${rk}$), intent(out) :: work(*)
        ! =====================================================================
           
           ! Local Scalars 
           logical(lk) :: lquery, wantq
           integer(${ik}$) :: i, iinfo, j, lwkopt, mn
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           wantq = stdlib_lsame( vect, 'Q' )
           mn = min( m, n )
           lquery = ( lwork==-1_${ik}$ )
           if( .not.wantq .and. .not.stdlib_lsame( vect, 'P' ) ) then
              info = -1_${ik}$
           else if( m<0_${ik}$ ) then
              info = -2_${ik}$
           else if( n<0_${ik}$ .or. ( wantq .and. ( n>m .or. n<min( m,k ) ) ) .or. ( .not.wantq .and. ( &
                     m>n .or. m<min( n, k ) ) ) ) then
              info = -3_${ik}$
           else if( k<0_${ik}$ ) then
              info = -4_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -6_${ik}$
           else if( lwork<max( 1_${ik}$, mn ) .and. .not.lquery ) then
              info = -9_${ik}$
           end if
           if( info==0_${ik}$ ) then
              work( 1_${ik}$ ) = 1_${ik}$
              if( wantq ) then
                 if( m>=k ) then
                    call stdlib${ii}$_${ri}$orgqr( m, n, k, a, lda, tau, work, -1_${ik}$, iinfo )
                 else
                    if( m>1_${ik}$ ) then
                       call stdlib${ii}$_${ri}$orgqr( m-1, m-1, m-1, a, lda, tau, work, -1_${ik}$,iinfo )
                    end if
                 end if
              else
                 if( k<n ) then
                    call stdlib${ii}$_${ri}$orglq( m, n, k, a, lda, tau, work, -1_${ik}$, iinfo )
                 else
                    if( n>1_${ik}$ ) then
                       call stdlib${ii}$_${ri}$orglq( n-1, n-1, n-1, a, lda, tau, work, -1_${ik}$,iinfo )
                    end if
                 end if
              end if
              lwkopt = work( 1_${ik}$ )
              lwkopt = max (lwkopt, mn)
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DORGBR', -info )
              return
           else if( lquery ) then
              work( 1_${ik}$ ) = lwkopt
              return
           end if
           ! quick return if possible
           if( m==0_${ik}$ .or. n==0_${ik}$ ) then
              work( 1_${ik}$ ) = 1_${ik}$
              return
           end if
           if( wantq ) then
              ! form q, determined by a call to stdlib${ii}$_${ri}$gebrd to reduce an m-by-k
              ! matrix
              if( m>=k ) then
                 ! if m >= k, assume m >= n >= k
                 call stdlib${ii}$_${ri}$orgqr( m, n, k, a, lda, tau, work, lwork, iinfo )
              else
                 ! if m < k, assume m = n
                 ! shift the vectors which define the elementary reflectors one
                 ! column to the right, and set the first row and column of q
                 ! to those of the unit matrix
                 do j = m, 2, -1
                    a( 1_${ik}$, j ) = zero
                    do i = j + 1, m
                       a( i, j ) = a( i, j-1 )
                    end do
                 end do
                 a( 1_${ik}$, 1_${ik}$ ) = one
                 do i = 2, m
                    a( i, 1_${ik}$ ) = zero
                 end do
                 if( m>1_${ik}$ ) then
                    ! form q(2:m,2:m)
                    call stdlib${ii}$_${ri}$orgqr( m-1, m-1, m-1, a( 2_${ik}$, 2_${ik}$ ), lda, tau, work,lwork, iinfo )
                              
                 end if
              end if
           else
              ! form p**t, determined by a call to stdlib${ii}$_${ri}$gebrd to reduce a k-by-n
              ! matrix
              if( k<n ) then
                 ! if k < n, assume k <= m <= n
                 call stdlib${ii}$_${ri}$orglq( m, n, k, a, lda, tau, work, lwork, iinfo )
              else
                 ! if k >= n, assume m = n
                 ! shift the vectors which define the elementary reflectors one
                 ! row downward, and set the first row and column of p**t to
                 ! those of the unit matrix
                 a( 1_${ik}$, 1_${ik}$ ) = one
                 do i = 2, n
                    a( i, 1_${ik}$ ) = zero
                 end do
                 do j = 2, n
                    do i = j - 1, 2, -1
                       a( i, j ) = a( i-1, j )
                    end do
                    a( 1_${ik}$, j ) = zero
                 end do
                 if( n>1_${ik}$ ) then
                    ! form p**t(2:n,2:n)
                    call stdlib${ii}$_${ri}$orglq( n-1, n-1, n-1, a( 2_${ik}$, 2_${ik}$ ), lda, tau, work,lwork, iinfo )
                              
                 end if
              end if
           end if
           work( 1_${ik}$ ) = lwkopt
           return
     end subroutine stdlib${ii}$_${ri}$orgbr

#:endif
#:endfor



     pure module subroutine stdlib${ii}$_cunmbr( vect, side, trans, m, n, k, a, lda, tau, c,ldc, work, lwork, &
     !! If VECT = 'Q', CUNMBR: overwrites the general complex M-by-N matrix C
     !! with
     !! SIDE = 'L'     SIDE = 'R'
     !! TRANS = 'N':      Q * C          C * Q
     !! TRANS = 'C':      Q**H * C       C * Q**H
     !! If VECT = 'P', CUNMBR overwrites the general complex M-by-N matrix C
     !! with
     !! SIDE = 'L'     SIDE = 'R'
     !! TRANS = 'N':      P * C          C * P
     !! TRANS = 'C':      P**H * C       C * P**H
     !! Here Q and P**H are the unitary matrices determined by CGEBRD when
     !! reducing a complex matrix A to bidiagonal form: A = Q * B * P**H. Q
     !! and P**H are defined as products of elementary reflectors H(i) and
     !! G(i) respectively.
     !! Let nq = m if SIDE = 'L' and nq = n if SIDE = 'R'. Thus nq is the
     !! order of the unitary matrix Q or P**H that is applied.
     !! If VECT = 'Q', A is assumed to have been an NQ-by-K matrix:
     !! if nq >= k, Q = H(1) H(2) . . . H(k);
     !! if nq < k, Q = H(1) H(2) . . . H(nq-1).
     !! If VECT = 'P', A is assumed to have been a K-by-NQ matrix:
     !! if k < nq, P = G(1) G(2) . . . G(k);
     !! if k >= nq, P = G(1) G(2) . . . G(nq-1).
               info )
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: side, trans, vect
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: k, lda, ldc, lwork, m, n
           ! Array Arguments 
           complex(sp), intent(inout) :: a(lda,*), c(ldc,*)
           complex(sp), intent(in) :: tau(*)
           complex(sp), intent(out) :: work(*)
        ! =====================================================================
           ! Local Scalars 
           logical(lk) :: applyq, left, lquery, notran
           character :: transt
           integer(${ik}$) :: i1, i2, iinfo, lwkopt, mi, nb, ni, nq, nw
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           applyq = stdlib_lsame( vect, 'Q' )
           left = stdlib_lsame( side, 'L' )
           notran = stdlib_lsame( trans, 'N' )
           lquery = ( lwork==-1_${ik}$ )
           ! nq is the order of q or p and nw is the minimum dimension of work
           if( left ) then
              nq = m
              nw = max( 1_${ik}$, n )
           else
              nq = n
              nw = max( 1_${ik}$, m )
           end if
           if( .not.applyq .and. .not.stdlib_lsame( vect, 'P' ) ) then
              info = -1_${ik}$
           else if( .not.left .and. .not.stdlib_lsame( side, 'R' ) ) then
              info = -2_${ik}$
           else if( .not.notran .and. .not.stdlib_lsame( trans, 'C' ) ) then
              info = -3_${ik}$
           else if( m<0_${ik}$ ) then
              info = -4_${ik}$
           else if( n<0_${ik}$ ) then
              info = -5_${ik}$
           else if( k<0_${ik}$ ) then
              info = -6_${ik}$
           else if( ( applyq .and. lda<max( 1_${ik}$, nq ) ) .or.( .not.applyq .and. lda<max( 1_${ik}$, min( nq,&
                      k ) ) ) )then
              info = -8_${ik}$
           else if( ldc<max( 1_${ik}$, m ) ) then
              info = -11_${ik}$
           else if( lwork<nw .and. .not.lquery ) then
              info = -13_${ik}$
           end if
           if( info==0_${ik}$ ) then
              if( m>0_${ik}$ .and. n>0_${ik}$ ) then
                 if( applyq ) then
                    if( left ) then
                       nb = stdlib${ii}$_ilaenv( 1_${ik}$, 'CUNMQR', side // trans, m-1, n, m-1,-1_${ik}$ )
                    else
                       nb = stdlib${ii}$_ilaenv( 1_${ik}$, 'CUNMQR', side // trans, m, n-1, n-1,-1_${ik}$ )
                    end if
                 else
                    if( left ) then
                       nb = stdlib${ii}$_ilaenv( 1_${ik}$, 'CUNMLQ', side // trans, m-1, n, m-1,-1_${ik}$ )
                    else
                       nb = stdlib${ii}$_ilaenv( 1_${ik}$, 'CUNMLQ', side // trans, m, n-1, n-1,-1_${ik}$ )
                    end if
                 end if
                 lwkopt = nw*nb
              else
                 lwkopt = 1_${ik}$
              end if
              work( 1_${ik}$ ) = lwkopt
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'CUNMBR', -info )
              return
           else if( lquery ) then
              return
           end if
           ! quick return if possible
           if( m==0 .or. n==0 )return
           if( applyq ) then
              ! apply q
              if( nq>=k ) then
                 ! q was determined by a call to stdlib${ii}$_cgebrd with nq >= k
                 call stdlib${ii}$_cunmqr( side, trans, m, n, k, a, lda, tau, c, ldc,work, lwork, iinfo &
                           )
              else if( nq>1_${ik}$ ) then
                 ! q was determined by a call to stdlib${ii}$_cgebrd with nq < k
                 if( left ) then
                    mi = m - 1_${ik}$
                    ni = n
                    i1 = 2_${ik}$
                    i2 = 1_${ik}$
                 else
                    mi = m
                    ni = n - 1_${ik}$
                    i1 = 1_${ik}$
                    i2 = 2_${ik}$
                 end if
                 call stdlib${ii}$_cunmqr( side, trans, mi, ni, nq-1, a( 2_${ik}$, 1_${ik}$ ), lda, tau,c( i1, i2 ), &
                           ldc, work, lwork, iinfo )
              end if
           else
              ! apply p
              if( notran ) then
                 transt = 'C'
              else
                 transt = 'N'
              end if
              if( nq>k ) then
                 ! p was determined by a call to stdlib${ii}$_cgebrd with nq > k
                 call stdlib${ii}$_cunmlq( side, transt, m, n, k, a, lda, tau, c, ldc,work, lwork, &
                           iinfo )
              else if( nq>1_${ik}$ ) then
                 ! p was determined by a call to stdlib${ii}$_cgebrd with nq <= k
                 if( left ) then
                    mi = m - 1_${ik}$
                    ni = n
                    i1 = 2_${ik}$
                    i2 = 1_${ik}$
                 else
                    mi = m
                    ni = n - 1_${ik}$
                    i1 = 1_${ik}$
                    i2 = 2_${ik}$
                 end if
                 call stdlib${ii}$_cunmlq( side, transt, mi, ni, nq-1, a( 1_${ik}$, 2_${ik}$ ), lda,tau, c( i1, i2 ), &
                           ldc, work, lwork, iinfo )
              end if
           end if
           work( 1_${ik}$ ) = lwkopt
           return
     end subroutine stdlib${ii}$_cunmbr

     pure module subroutine stdlib${ii}$_zunmbr( vect, side, trans, m, n, k, a, lda, tau, c,ldc, work, lwork, &
     !! If VECT = 'Q', ZUNMBR: overwrites the general complex M-by-N matrix C
     !! with
     !! SIDE = 'L'     SIDE = 'R'
     !! TRANS = 'N':      Q * C          C * Q
     !! TRANS = 'C':      Q**H * C       C * Q**H
     !! If VECT = 'P', ZUNMBR overwrites the general complex M-by-N matrix C
     !! with
     !! SIDE = 'L'     SIDE = 'R'
     !! TRANS = 'N':      P * C          C * P
     !! TRANS = 'C':      P**H * C       C * P**H
     !! Here Q and P**H are the unitary matrices determined by ZGEBRD when
     !! reducing a complex matrix A to bidiagonal form: A = Q * B * P**H. Q
     !! and P**H are defined as products of elementary reflectors H(i) and
     !! G(i) respectively.
     !! Let nq = m if SIDE = 'L' and nq = n if SIDE = 'R'. Thus nq is the
     !! order of the unitary matrix Q or P**H that is applied.
     !! If VECT = 'Q', A is assumed to have been an NQ-by-K matrix:
     !! if nq >= k, Q = H(1) H(2) . . . H(k);
     !! if nq < k, Q = H(1) H(2) . . . H(nq-1).
     !! If VECT = 'P', A is assumed to have been a K-by-NQ matrix:
     !! if k < nq, P = G(1) G(2) . . . G(k);
     !! if k >= nq, P = G(1) G(2) . . . G(nq-1).
               info )
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: side, trans, vect
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: k, lda, ldc, lwork, m, n
           ! Array Arguments 
           complex(dp), intent(inout) :: a(lda,*), c(ldc,*)
           complex(dp), intent(in) :: tau(*)
           complex(dp), intent(out) :: work(*)
        ! =====================================================================
           ! Local Scalars 
           logical(lk) :: applyq, left, lquery, notran
           character :: transt
           integer(${ik}$) :: i1, i2, iinfo, lwkopt, mi, nb, ni, nq, nw
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           applyq = stdlib_lsame( vect, 'Q' )
           left = stdlib_lsame( side, 'L' )
           notran = stdlib_lsame( trans, 'N' )
           lquery = ( lwork==-1_${ik}$ )
           ! nq is the order of q or p and nw is the minimum dimension of work
           if( left ) then
              nq = m
              nw = max( 1_${ik}$, n )
           else
              nq = n
              nw = max( 1_${ik}$, m )
           end if
           if( .not.applyq .and. .not.stdlib_lsame( vect, 'P' ) ) then
              info = -1_${ik}$
           else if( .not.left .and. .not.stdlib_lsame( side, 'R' ) ) then
              info = -2_${ik}$
           else if( .not.notran .and. .not.stdlib_lsame( trans, 'C' ) ) then
              info = -3_${ik}$
           else if( m<0_${ik}$ ) then
              info = -4_${ik}$
           else if( n<0_${ik}$ ) then
              info = -5_${ik}$
           else if( k<0_${ik}$ ) then
              info = -6_${ik}$
           else if( ( applyq .and. lda<max( 1_${ik}$, nq ) ) .or.( .not.applyq .and. lda<max( 1_${ik}$, min( nq,&
                      k ) ) ) )then
              info = -8_${ik}$
           else if( ldc<max( 1_${ik}$, m ) ) then
              info = -11_${ik}$
           else if( lwork<nw .and. .not.lquery ) then
              info = -13_${ik}$
           end if
           if( info==0_${ik}$ ) then
              if( m>0_${ik}$ .and. n>0_${ik}$ ) then
                 if( applyq ) then
                    if( left ) then
                       nb = stdlib${ii}$_ilaenv( 1_${ik}$, 'ZUNMQR', side // trans, m-1, n, m-1,-1_${ik}$ )
                    else
                       nb = stdlib${ii}$_ilaenv( 1_${ik}$, 'ZUNMQR', side // trans, m, n-1, n-1,-1_${ik}$ )
                    end if
                 else
                    if( left ) then
                       nb = stdlib${ii}$_ilaenv( 1_${ik}$, 'ZUNMLQ', side // trans, m-1, n, m-1,-1_${ik}$ )
                    else
                       nb = stdlib${ii}$_ilaenv( 1_${ik}$, 'ZUNMLQ', side // trans, m, n-1, n-1,-1_${ik}$ )
                    end if
                 end if
                 lwkopt = nw*nb
              else
                 lwkopt = 1_${ik}$
              end if
              work( 1_${ik}$ ) = lwkopt
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'ZUNMBR', -info )
              return
           else if( lquery ) then
              return
           end if
           ! quick return if possible
           if( m==0 .or. n==0 )return
           if( applyq ) then
              ! apply q
              if( nq>=k ) then
                 ! q was determined by a call to stdlib${ii}$_zgebrd with nq >= k
                 call stdlib${ii}$_zunmqr( side, trans, m, n, k, a, lda, tau, c, ldc,work, lwork, iinfo &
                           )
              else if( nq>1_${ik}$ ) then
                 ! q was determined by a call to stdlib${ii}$_zgebrd with nq < k
                 if( left ) then
                    mi = m - 1_${ik}$
                    ni = n
                    i1 = 2_${ik}$
                    i2 = 1_${ik}$
                 else
                    mi = m
                    ni = n - 1_${ik}$
                    i1 = 1_${ik}$
                    i2 = 2_${ik}$
                 end if
                 call stdlib${ii}$_zunmqr( side, trans, mi, ni, nq-1, a( 2_${ik}$, 1_${ik}$ ), lda, tau,c( i1, i2 ), &
                           ldc, work, lwork, iinfo )
              end if
           else
              ! apply p
              if( notran ) then
                 transt = 'C'
              else
                 transt = 'N'
              end if
              if( nq>k ) then
                 ! p was determined by a call to stdlib${ii}$_zgebrd with nq > k
                 call stdlib${ii}$_zunmlq( side, transt, m, n, k, a, lda, tau, c, ldc,work, lwork, &
                           iinfo )
              else if( nq>1_${ik}$ ) then
                 ! p was determined by a call to stdlib${ii}$_zgebrd with nq <= k
                 if( left ) then
                    mi = m - 1_${ik}$
                    ni = n
                    i1 = 2_${ik}$
                    i2 = 1_${ik}$
                 else
                    mi = m
                    ni = n - 1_${ik}$
                    i1 = 1_${ik}$
                    i2 = 2_${ik}$
                 end if
                 call stdlib${ii}$_zunmlq( side, transt, mi, ni, nq-1, a( 1_${ik}$, 2_${ik}$ ), lda,tau, c( i1, i2 ), &
                           ldc, work, lwork, iinfo )
              end if
           end if
           work( 1_${ik}$ ) = lwkopt
           return
     end subroutine stdlib${ii}$_zunmbr

#:for ck,ct,ci in CMPLX_KINDS_TYPES
#:if not ck in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ci}$unmbr( vect, side, trans, m, n, k, a, lda, tau, c,ldc, work, lwork, &
     !! If VECT = 'Q', ZUNMBR: overwrites the general complex M-by-N matrix C
     !! with
     !! SIDE = 'L'     SIDE = 'R'
     !! TRANS = 'N':      Q * C          C * Q
     !! TRANS = 'C':      Q**H * C       C * Q**H
     !! If VECT = 'P', ZUNMBR overwrites the general complex M-by-N matrix C
     !! with
     !! SIDE = 'L'     SIDE = 'R'
     !! TRANS = 'N':      P * C          C * P
     !! TRANS = 'C':      P**H * C       C * P**H
     !! Here Q and P**H are the unitary matrices determined by ZGEBRD when
     !! reducing a complex matrix A to bidiagonal form: A = Q * B * P**H. Q
     !! and P**H are defined as products of elementary reflectors H(i) and
     !! G(i) respectively.
     !! Let nq = m if SIDE = 'L' and nq = n if SIDE = 'R'. Thus nq is the
     !! order of the unitary matrix Q or P**H that is applied.
     !! If VECT = 'Q', A is assumed to have been an NQ-by-K matrix:
     !! if nq >= k, Q = H(1) H(2) . . . H(k);
     !! if nq < k, Q = H(1) H(2) . . . H(nq-1).
     !! If VECT = 'P', A is assumed to have been a K-by-NQ matrix:
     !! if k < nq, P = G(1) G(2) . . . G(k);
     !! if k >= nq, P = G(1) G(2) . . . G(nq-1).
               info )
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${ck}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: side, trans, vect
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: k, lda, ldc, lwork, m, n
           ! Array Arguments 
           complex(${ck}$), intent(inout) :: a(lda,*), c(ldc,*)
           complex(${ck}$), intent(in) :: tau(*)
           complex(${ck}$), intent(out) :: work(*)
        ! =====================================================================
           ! Local Scalars 
           logical(lk) :: applyq, left, lquery, notran
           character :: transt
           integer(${ik}$) :: i1, i2, iinfo, lwkopt, mi, nb, ni, nq, nw
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           applyq = stdlib_lsame( vect, 'Q' )
           left = stdlib_lsame( side, 'L' )
           notran = stdlib_lsame( trans, 'N' )
           lquery = ( lwork==-1_${ik}$ )
           ! nq is the order of q or p and nw is the minimum dimension of work
           if( left ) then
              nq = m
              nw = max( 1_${ik}$, n )
           else
              nq = n
              nw = max( 1_${ik}$, m )
           end if
           if( .not.applyq .and. .not.stdlib_lsame( vect, 'P' ) ) then
              info = -1_${ik}$
           else if( .not.left .and. .not.stdlib_lsame( side, 'R' ) ) then
              info = -2_${ik}$
           else if( .not.notran .and. .not.stdlib_lsame( trans, 'C' ) ) then
              info = -3_${ik}$
           else if( m<0_${ik}$ ) then
              info = -4_${ik}$
           else if( n<0_${ik}$ ) then
              info = -5_${ik}$
           else if( k<0_${ik}$ ) then
              info = -6_${ik}$
           else if( ( applyq .and. lda<max( 1_${ik}$, nq ) ) .or.( .not.applyq .and. lda<max( 1_${ik}$, min( nq,&
                      k ) ) ) )then
              info = -8_${ik}$
           else if( ldc<max( 1_${ik}$, m ) ) then
              info = -11_${ik}$
           else if( lwork<nw .and. .not.lquery ) then
              info = -13_${ik}$
           end if
           if( info==0_${ik}$ ) then
              if( m>0_${ik}$ .and. n>0_${ik}$ ) then
                 if( applyq ) then
                    if( left ) then
                       nb = stdlib${ii}$_ilaenv( 1_${ik}$, 'ZUNMQR', side // trans, m-1, n, m-1,-1_${ik}$ )
                    else
                       nb = stdlib${ii}$_ilaenv( 1_${ik}$, 'ZUNMQR', side // trans, m, n-1, n-1,-1_${ik}$ )
                    end if
                 else
                    if( left ) then
                       nb = stdlib${ii}$_ilaenv( 1_${ik}$, 'ZUNMLQ', side // trans, m-1, n, m-1,-1_${ik}$ )
                    else
                       nb = stdlib${ii}$_ilaenv( 1_${ik}$, 'ZUNMLQ', side // trans, m, n-1, n-1,-1_${ik}$ )
                    end if
                 end if
                 lwkopt = nw*nb
              else
                 lwkopt = 1_${ik}$
              end if
              work( 1_${ik}$ ) = lwkopt
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'ZUNMBR', -info )
              return
           else if( lquery ) then
              return
           end if
           ! quick return if possible
           if( m==0 .or. n==0 )return
           if( applyq ) then
              ! apply q
              if( nq>=k ) then
                 ! q was determined by a call to stdlib${ii}$_${ci}$gebrd with nq >= k
                 call stdlib${ii}$_${ci}$unmqr( side, trans, m, n, k, a, lda, tau, c, ldc,work, lwork, iinfo &
                           )
              else if( nq>1_${ik}$ ) then
                 ! q was determined by a call to stdlib${ii}$_${ci}$gebrd with nq < k
                 if( left ) then
                    mi = m - 1_${ik}$
                    ni = n
                    i1 = 2_${ik}$
                    i2 = 1_${ik}$
                 else
                    mi = m
                    ni = n - 1_${ik}$
                    i1 = 1_${ik}$
                    i2 = 2_${ik}$
                 end if
                 call stdlib${ii}$_${ci}$unmqr( side, trans, mi, ni, nq-1, a( 2_${ik}$, 1_${ik}$ ), lda, tau,c( i1, i2 ), &
                           ldc, work, lwork, iinfo )
              end if
           else
              ! apply p
              if( notran ) then
                 transt = 'C'
              else
                 transt = 'N'
              end if
              if( nq>k ) then
                 ! p was determined by a call to stdlib${ii}$_${ci}$gebrd with nq > k
                 call stdlib${ii}$_${ci}$unmlq( side, transt, m, n, k, a, lda, tau, c, ldc,work, lwork, &
                           iinfo )
              else if( nq>1_${ik}$ ) then
                 ! p was determined by a call to stdlib${ii}$_${ci}$gebrd with nq <= k
                 if( left ) then
                    mi = m - 1_${ik}$
                    ni = n
                    i1 = 2_${ik}$
                    i2 = 1_${ik}$
                 else
                    mi = m
                    ni = n - 1_${ik}$
                    i1 = 1_${ik}$
                    i2 = 2_${ik}$
                 end if
                 call stdlib${ii}$_${ci}$unmlq( side, transt, mi, ni, nq-1, a( 1_${ik}$, 2_${ik}$ ), lda,tau, c( i1, i2 ), &
                           ldc, work, lwork, iinfo )
              end if
           end if
           work( 1_${ik}$ ) = lwkopt
           return
     end subroutine stdlib${ii}$_${ci}$unmbr

#:endif
#:endfor



     pure module subroutine stdlib${ii}$_sormbr( vect, side, trans, m, n, k, a, lda, tau, c,ldc, work, lwork, &
     !! If VECT = 'Q', SORMBR: overwrites the general real M-by-N matrix C
     !! with
     !! SIDE = 'L'     SIDE = 'R'
     !! TRANS = 'N':      Q * C          C * Q
     !! TRANS = 'T':      Q**T * C       C * Q**T
     !! If VECT = 'P', SORMBR overwrites the general real M-by-N matrix C
     !! with
     !! SIDE = 'L'     SIDE = 'R'
     !! TRANS = 'N':      P * C          C * P
     !! TRANS = 'T':      P**T * C       C * P**T
     !! Here Q and P**T are the orthogonal matrices determined by SGEBRD when
     !! reducing a real matrix A to bidiagonal form: A = Q * B * P**T. Q and
     !! P**T are defined as products of elementary reflectors H(i) and G(i)
     !! respectively.
     !! Let nq = m if SIDE = 'L' and nq = n if SIDE = 'R'. Thus nq is the
     !! order of the orthogonal matrix Q or P**T that is applied.
     !! If VECT = 'Q', A is assumed to have been an NQ-by-K matrix:
     !! if nq >= k, Q = H(1) H(2) . . . H(k);
     !! if nq < k, Q = H(1) H(2) . . . H(nq-1).
     !! If VECT = 'P', A is assumed to have been a K-by-NQ matrix:
     !! if k < nq, P = G(1) G(2) . . . G(k);
     !! if k >= nq, P = G(1) G(2) . . . G(nq-1).
               info )
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: side, trans, vect
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: k, lda, ldc, lwork, m, n
           ! Array Arguments 
           real(sp), intent(inout) :: a(lda,*), c(ldc,*)
           real(sp), intent(in) :: tau(*)
           real(sp), intent(out) :: work(*)
        ! =====================================================================
           ! Local Scalars 
           logical(lk) :: applyq, left, lquery, notran
           character :: transt
           integer(${ik}$) :: i1, i2, iinfo, lwkopt, mi, nb, ni, nq, nw
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           applyq = stdlib_lsame( vect, 'Q' )
           left = stdlib_lsame( side, 'L' )
           notran = stdlib_lsame( trans, 'N' )
           lquery = ( lwork==-1_${ik}$ )
           ! nq is the order of q or p and nw is the minimum dimension of work
           if( left ) then
              nq = m
              nw = max( 1_${ik}$, n )
           else
              nq = n
              nw = max( 1_${ik}$, m )
           end if
           if( .not.applyq .and. .not.stdlib_lsame( vect, 'P' ) ) then
              info = -1_${ik}$
           else if( .not.left .and. .not.stdlib_lsame( side, 'R' ) ) then
              info = -2_${ik}$
           else if( .not.notran .and. .not.stdlib_lsame( trans, 'T' ) ) then
              info = -3_${ik}$
           else if( m<0_${ik}$ ) then
              info = -4_${ik}$
           else if( n<0_${ik}$ ) then
              info = -5_${ik}$
           else if( k<0_${ik}$ ) then
              info = -6_${ik}$
           else if( ( applyq .and. lda<max( 1_${ik}$, nq ) ) .or.( .not.applyq .and. lda<max( 1_${ik}$, min( nq,&
                      k ) ) ) )then
              info = -8_${ik}$
           else if( ldc<max( 1_${ik}$, m ) ) then
              info = -11_${ik}$
           else if( lwork<nw .and. .not.lquery ) then
              info = -13_${ik}$
           end if
           if( info==0_${ik}$ ) then
              if( applyq ) then
                 if( left ) then
                    nb = stdlib${ii}$_ilaenv( 1_${ik}$, 'SORMQR', side // trans, m-1, n, m-1,-1_${ik}$ )
                 else
                    nb = stdlib${ii}$_ilaenv( 1_${ik}$, 'SORMQR', side // trans, m, n-1, n-1,-1_${ik}$ )
                 end if
              else
                 if( left ) then
                    nb = stdlib${ii}$_ilaenv( 1_${ik}$, 'SORMLQ', side // trans, m-1, n, m-1,-1_${ik}$ )
                 else
                    nb = stdlib${ii}$_ilaenv( 1_${ik}$, 'SORMLQ', side // trans, m, n-1, n-1,-1_${ik}$ )
                 end if
              end if
              lwkopt = nw*nb
              work( 1_${ik}$ ) = lwkopt
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'SORMBR', -info )
              return
           else if( lquery ) then
              return
           end if
           ! quick return if possible
           work( 1_${ik}$ ) = 1_${ik}$
           if( m==0 .or. n==0 )return
           if( applyq ) then
              ! apply q
              if( nq>=k ) then
                 ! q was determined by a call to stdlib${ii}$_sgebrd with nq >= k
                 call stdlib${ii}$_sormqr( side, trans, m, n, k, a, lda, tau, c, ldc,work, lwork, iinfo &
                           )
              else if( nq>1_${ik}$ ) then
                 ! q was determined by a call to stdlib${ii}$_sgebrd with nq < k
                 if( left ) then
                    mi = m - 1_${ik}$
                    ni = n
                    i1 = 2_${ik}$
                    i2 = 1_${ik}$
                 else
                    mi = m
                    ni = n - 1_${ik}$
                    i1 = 1_${ik}$
                    i2 = 2_${ik}$
                 end if
                 call stdlib${ii}$_sormqr( side, trans, mi, ni, nq-1, a( 2_${ik}$, 1_${ik}$ ), lda, tau,c( i1, i2 ), &
                           ldc, work, lwork, iinfo )
              end if
           else
              ! apply p
              if( notran ) then
                 transt = 'T'
              else
                 transt = 'N'
              end if
              if( nq>k ) then
                 ! p was determined by a call to stdlib${ii}$_sgebrd with nq > k
                 call stdlib${ii}$_sormlq( side, transt, m, n, k, a, lda, tau, c, ldc,work, lwork, &
                           iinfo )
              else if( nq>1_${ik}$ ) then
                 ! p was determined by a call to stdlib${ii}$_sgebrd with nq <= k
                 if( left ) then
                    mi = m - 1_${ik}$
                    ni = n
                    i1 = 2_${ik}$
                    i2 = 1_${ik}$
                 else
                    mi = m
                    ni = n - 1_${ik}$
                    i1 = 1_${ik}$
                    i2 = 2_${ik}$
                 end if
                 call stdlib${ii}$_sormlq( side, transt, mi, ni, nq-1, a( 1_${ik}$, 2_${ik}$ ), lda,tau, c( i1, i2 ), &
                           ldc, work, lwork, iinfo )
              end if
           end if
           work( 1_${ik}$ ) = lwkopt
           return
     end subroutine stdlib${ii}$_sormbr

     pure module subroutine stdlib${ii}$_dormbr( vect, side, trans, m, n, k, a, lda, tau, c,ldc, work, lwork, &
     !! If VECT = 'Q', DORMBR: overwrites the general real M-by-N matrix C
     !! with
     !! SIDE = 'L'     SIDE = 'R'
     !! TRANS = 'N':      Q * C          C * Q
     !! TRANS = 'T':      Q**T * C       C * Q**T
     !! If VECT = 'P', DORMBR overwrites the general real M-by-N matrix C
     !! with
     !! SIDE = 'L'     SIDE = 'R'
     !! TRANS = 'N':      P * C          C * P
     !! TRANS = 'T':      P**T * C       C * P**T
     !! Here Q and P**T are the orthogonal matrices determined by DGEBRD when
     !! reducing a real matrix A to bidiagonal form: A = Q * B * P**T. Q and
     !! P**T are defined as products of elementary reflectors H(i) and G(i)
     !! respectively.
     !! Let nq = m if SIDE = 'L' and nq = n if SIDE = 'R'. Thus nq is the
     !! order of the orthogonal matrix Q or P**T that is applied.
     !! If VECT = 'Q', A is assumed to have been an NQ-by-K matrix:
     !! if nq >= k, Q = H(1) H(2) . . . H(k);
     !! if nq < k, Q = H(1) H(2) . . . H(nq-1).
     !! If VECT = 'P', A is assumed to have been a K-by-NQ matrix:
     !! if k < nq, P = G(1) G(2) . . . G(k);
     !! if k >= nq, P = G(1) G(2) . . . G(nq-1).
               info )
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: side, trans, vect
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: k, lda, ldc, lwork, m, n
           ! Array Arguments 
           real(dp), intent(inout) :: a(lda,*), c(ldc,*)
           real(dp), intent(in) :: tau(*)
           real(dp), intent(out) :: work(*)
        ! =====================================================================
           ! Local Scalars 
           logical(lk) :: applyq, left, lquery, notran
           character :: transt
           integer(${ik}$) :: i1, i2, iinfo, lwkopt, mi, nb, ni, nq, nw
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           applyq = stdlib_lsame( vect, 'Q' )
           left = stdlib_lsame( side, 'L' )
           notran = stdlib_lsame( trans, 'N' )
           lquery = ( lwork==-1_${ik}$ )
           ! nq is the order of q or p and nw is the minimum dimension of work
           if( left ) then
              nq = m
              nw = max( 1_${ik}$, n )
           else
              nq = n
              nw = max( 1_${ik}$, m )
           end if
           if( .not.applyq .and. .not.stdlib_lsame( vect, 'P' ) ) then
              info = -1_${ik}$
           else if( .not.left .and. .not.stdlib_lsame( side, 'R' ) ) then
              info = -2_${ik}$
           else if( .not.notran .and. .not.stdlib_lsame( trans, 'T' ) ) then
              info = -3_${ik}$
           else if( m<0_${ik}$ ) then
              info = -4_${ik}$
           else if( n<0_${ik}$ ) then
              info = -5_${ik}$
           else if( k<0_${ik}$ ) then
              info = -6_${ik}$
           else if( ( applyq .and. lda<max( 1_${ik}$, nq ) ) .or.( .not.applyq .and. lda<max( 1_${ik}$, min( nq,&
                      k ) ) ) )then
              info = -8_${ik}$
           else if( ldc<max( 1_${ik}$, m ) ) then
              info = -11_${ik}$
           else if( lwork<nw .and. .not.lquery ) then
              info = -13_${ik}$
           end if
           if( info==0_${ik}$ ) then
              if( applyq ) then
                 if( left ) then
                    nb = stdlib${ii}$_ilaenv( 1_${ik}$, 'DORMQR', side // trans, m-1, n, m-1,-1_${ik}$ )
                 else
                    nb = stdlib${ii}$_ilaenv( 1_${ik}$, 'DORMQR', side // trans, m, n-1, n-1,-1_${ik}$ )
                 end if
              else
                 if( left ) then
                    nb = stdlib${ii}$_ilaenv( 1_${ik}$, 'DORMLQ', side // trans, m-1, n, m-1,-1_${ik}$ )
                 else
                    nb = stdlib${ii}$_ilaenv( 1_${ik}$, 'DORMLQ', side // trans, m, n-1, n-1,-1_${ik}$ )
                 end if
              end if
              lwkopt = nw*nb
              work( 1_${ik}$ ) = lwkopt
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DORMBR', -info )
              return
           else if( lquery ) then
              return
           end if
           ! quick return if possible
           work( 1_${ik}$ ) = 1_${ik}$
           if( m==0 .or. n==0 )return
           if( applyq ) then
              ! apply q
              if( nq>=k ) then
                 ! q was determined by a call to stdlib${ii}$_dgebrd with nq >= k
                 call stdlib${ii}$_dormqr( side, trans, m, n, k, a, lda, tau, c, ldc,work, lwork, iinfo &
                           )
              else if( nq>1_${ik}$ ) then
                 ! q was determined by a call to stdlib${ii}$_dgebrd with nq < k
                 if( left ) then
                    mi = m - 1_${ik}$
                    ni = n
                    i1 = 2_${ik}$
                    i2 = 1_${ik}$
                 else
                    mi = m
                    ni = n - 1_${ik}$
                    i1 = 1_${ik}$
                    i2 = 2_${ik}$
                 end if
                 call stdlib${ii}$_dormqr( side, trans, mi, ni, nq-1, a( 2_${ik}$, 1_${ik}$ ), lda, tau,c( i1, i2 ), &
                           ldc, work, lwork, iinfo )
              end if
           else
              ! apply p
              if( notran ) then
                 transt = 'T'
              else
                 transt = 'N'
              end if
              if( nq>k ) then
                 ! p was determined by a call to stdlib${ii}$_dgebrd with nq > k
                 call stdlib${ii}$_dormlq( side, transt, m, n, k, a, lda, tau, c, ldc,work, lwork, &
                           iinfo )
              else if( nq>1_${ik}$ ) then
                 ! p was determined by a call to stdlib${ii}$_dgebrd with nq <= k
                 if( left ) then
                    mi = m - 1_${ik}$
                    ni = n
                    i1 = 2_${ik}$
                    i2 = 1_${ik}$
                 else
                    mi = m
                    ni = n - 1_${ik}$
                    i1 = 1_${ik}$
                    i2 = 2_${ik}$
                 end if
                 call stdlib${ii}$_dormlq( side, transt, mi, ni, nq-1, a( 1_${ik}$, 2_${ik}$ ), lda,tau, c( i1, i2 ), &
                           ldc, work, lwork, iinfo )
              end if
           end if
           work( 1_${ik}$ ) = lwkopt
           return
     end subroutine stdlib${ii}$_dormbr

#:for rk,rt,ri in REAL_KINDS_TYPES
#:if not rk in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ri}$ormbr( vect, side, trans, m, n, k, a, lda, tau, c,ldc, work, lwork, &
     !! If VECT = 'Q', DORMBR: overwrites the general real M-by-N matrix C
     !! with
     !! SIDE = 'L'     SIDE = 'R'
     !! TRANS = 'N':      Q * C          C * Q
     !! TRANS = 'T':      Q**T * C       C * Q**T
     !! If VECT = 'P', DORMBR overwrites the general real M-by-N matrix C
     !! with
     !! SIDE = 'L'     SIDE = 'R'
     !! TRANS = 'N':      P * C          C * P
     !! TRANS = 'T':      P**T * C       C * P**T
     !! Here Q and P**T are the orthogonal matrices determined by DGEBRD when
     !! reducing a real matrix A to bidiagonal form: A = Q * B * P**T. Q and
     !! P**T are defined as products of elementary reflectors H(i) and G(i)
     !! respectively.
     !! Let nq = m if SIDE = 'L' and nq = n if SIDE = 'R'. Thus nq is the
     !! order of the orthogonal matrix Q or P**T that is applied.
     !! If VECT = 'Q', A is assumed to have been an NQ-by-K matrix:
     !! if nq >= k, Q = H(1) H(2) . . . H(k);
     !! if nq < k, Q = H(1) H(2) . . . H(nq-1).
     !! If VECT = 'P', A is assumed to have been a K-by-NQ matrix:
     !! if k < nq, P = G(1) G(2) . . . G(k);
     !! if k >= nq, P = G(1) G(2) . . . G(nq-1).
               info )
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: side, trans, vect
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: k, lda, ldc, lwork, m, n
           ! Array Arguments 
           real(${rk}$), intent(inout) :: a(lda,*), c(ldc,*)
           real(${rk}$), intent(in) :: tau(*)
           real(${rk}$), intent(out) :: work(*)
        ! =====================================================================
           ! Local Scalars 
           logical(lk) :: applyq, left, lquery, notran
           character :: transt
           integer(${ik}$) :: i1, i2, iinfo, lwkopt, mi, nb, ni, nq, nw
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           applyq = stdlib_lsame( vect, 'Q' )
           left = stdlib_lsame( side, 'L' )
           notran = stdlib_lsame( trans, 'N' )
           lquery = ( lwork==-1_${ik}$ )
           ! nq is the order of q or p and nw is the minimum dimension of work
           if( left ) then
              nq = m
              nw = max( 1_${ik}$, n )
           else
              nq = n
              nw = max( 1_${ik}$, m )
           end if
           if( .not.applyq .and. .not.stdlib_lsame( vect, 'P' ) ) then
              info = -1_${ik}$
           else if( .not.left .and. .not.stdlib_lsame( side, 'R' ) ) then
              info = -2_${ik}$
           else if( .not.notran .and. .not.stdlib_lsame( trans, 'T' ) ) then
              info = -3_${ik}$
           else if( m<0_${ik}$ ) then
              info = -4_${ik}$
           else if( n<0_${ik}$ ) then
              info = -5_${ik}$
           else if( k<0_${ik}$ ) then
              info = -6_${ik}$
           else if( ( applyq .and. lda<max( 1_${ik}$, nq ) ) .or.( .not.applyq .and. lda<max( 1_${ik}$, min( nq,&
                      k ) ) ) )then
              info = -8_${ik}$
           else if( ldc<max( 1_${ik}$, m ) ) then
              info = -11_${ik}$
           else if( lwork<nw .and. .not.lquery ) then
              info = -13_${ik}$
           end if
           if( info==0_${ik}$ ) then
              if( applyq ) then
                 if( left ) then
                    nb = stdlib${ii}$_ilaenv( 1_${ik}$, 'DORMQR', side // trans, m-1, n, m-1,-1_${ik}$ )
                 else
                    nb = stdlib${ii}$_ilaenv( 1_${ik}$, 'DORMQR', side // trans, m, n-1, n-1,-1_${ik}$ )
                 end if
              else
                 if( left ) then
                    nb = stdlib${ii}$_ilaenv( 1_${ik}$, 'DORMLQ', side // trans, m-1, n, m-1,-1_${ik}$ )
                 else
                    nb = stdlib${ii}$_ilaenv( 1_${ik}$, 'DORMLQ', side // trans, m, n-1, n-1,-1_${ik}$ )
                 end if
              end if
              lwkopt = nw*nb
              work( 1_${ik}$ ) = lwkopt
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DORMBR', -info )
              return
           else if( lquery ) then
              return
           end if
           ! quick return if possible
           work( 1_${ik}$ ) = 1_${ik}$
           if( m==0 .or. n==0 )return
           if( applyq ) then
              ! apply q
              if( nq>=k ) then
                 ! q was determined by a call to stdlib${ii}$_${ri}$gebrd with nq >= k
                 call stdlib${ii}$_${ri}$ormqr( side, trans, m, n, k, a, lda, tau, c, ldc,work, lwork, iinfo &
                           )
              else if( nq>1_${ik}$ ) then
                 ! q was determined by a call to stdlib${ii}$_${ri}$gebrd with nq < k
                 if( left ) then
                    mi = m - 1_${ik}$
                    ni = n
                    i1 = 2_${ik}$
                    i2 = 1_${ik}$
                 else
                    mi = m
                    ni = n - 1_${ik}$
                    i1 = 1_${ik}$
                    i2 = 2_${ik}$
                 end if
                 call stdlib${ii}$_${ri}$ormqr( side, trans, mi, ni, nq-1, a( 2_${ik}$, 1_${ik}$ ), lda, tau,c( i1, i2 ), &
                           ldc, work, lwork, iinfo )
              end if
           else
              ! apply p
              if( notran ) then
                 transt = 'T'
              else
                 transt = 'N'
              end if
              if( nq>k ) then
                 ! p was determined by a call to stdlib${ii}$_${ri}$gebrd with nq > k
                 call stdlib${ii}$_${ri}$ormlq( side, transt, m, n, k, a, lda, tau, c, ldc,work, lwork, &
                           iinfo )
              else if( nq>1_${ik}$ ) then
                 ! p was determined by a call to stdlib${ii}$_${ri}$gebrd with nq <= k
                 if( left ) then
                    mi = m - 1_${ik}$
                    ni = n
                    i1 = 2_${ik}$
                    i2 = 1_${ik}$
                 else
                    mi = m
                    ni = n - 1_${ik}$
                    i1 = 1_${ik}$
                    i2 = 2_${ik}$
                 end if
                 call stdlib${ii}$_${ri}$ormlq( side, transt, mi, ni, nq-1, a( 1_${ik}$, 2_${ik}$ ), lda,tau, c( i1, i2 ), &
                           ldc, work, lwork, iinfo )
              end if
           end if
           work( 1_${ik}$ ) = lwkopt
           return
     end subroutine stdlib${ii}$_${ri}$ormbr

#:endif
#:endfor


#:endfor
end submodule stdlib_lapack_svd_comp