stdlib_lapack_orthogonal_factors_qr.fypp Source File


Source Code

#:include "common.fypp" 
submodule(stdlib_lapack_orthogonal_factors) stdlib_lapack_orthogonal_factors_qr
  implicit none


  contains
#:for ik,it,ii in LINALG_INT_KINDS_TYPES

     pure module subroutine stdlib${ii}$_sgeqr( m, n, a, lda, t, tsize, work, lwork,info )
     !! SGEQR computes a QR factorization of a real M-by-N matrix A:
     !! A = Q * ( R ),
     !! ( 0 )
     !! where:
     !! Q is a M-by-M orthogonal matrix;
     !! R is an upper-triangular N-by-N matrix;
     !! 0 is a (M-N)-by-N zero matrix, if M > N.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd. --
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, m, n, tsize, lwork
           ! Array Arguments 
           real(sp), intent(inout) :: a(lda,*)
           real(sp), intent(out) :: t(*), work(*)
        ! =====================================================================
           ! Local Scalars 
           logical(lk) :: lquery, lminws, mint, minw
           integer(${ik}$) :: mb, nb, mintsz, nblcks
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           lquery = ( tsize==-1_${ik}$ .or. tsize==-2_${ik}$ .or.lwork==-1_${ik}$ .or. lwork==-2_${ik}$ )
           mint = .false.
           minw = .false.
           if( tsize==-2_${ik}$ .or. lwork==-2_${ik}$ ) then
             if( tsize/=-1_${ik}$ ) mint = .true.
             if( lwork/=-1_${ik}$ ) minw = .true.
           end if
           ! determine the block size
           if( min( m, n )>0_${ik}$ ) then
             mb = stdlib${ii}$_ilaenv( 1_${ik}$, 'SGEQR ', ' ', m, n, 1_${ik}$, -1_${ik}$ )
             nb = stdlib${ii}$_ilaenv( 1_${ik}$, 'SGEQR ', ' ', m, n, 2_${ik}$, -1_${ik}$ )
           else
             mb = m
             nb = 1_${ik}$
           end if
           if( mb>m .or. mb<=n ) mb = m
           if( nb>min( m, n ) .or. nb<1_${ik}$ ) nb = 1_${ik}$
           mintsz = n + 5_${ik}$
           if ( mb>n .and. m>n ) then
             if( mod( m - n, mb - n )==0_${ik}$ ) then
               nblcks = ( m - n ) / ( mb - n )
             else
               nblcks = ( m - n ) / ( mb - n ) + 1_${ik}$
             end if
           else
             nblcks = 1_${ik}$
           end if
           ! determine if the workspace size satisfies minimal size
           lminws = .false.
           if( ( tsize<max( 1_${ik}$, nb*n*nblcks + 5_${ik}$ ) .or. lwork<nb*n ).and. ( lwork>=n ) .and. ( &
                     tsize>=mintsz ).and. ( .not.lquery ) ) then
             if( tsize<max( 1_${ik}$, nb*n*nblcks + 5_${ik}$ ) ) then
               lminws = .true.
               nb = 1_${ik}$
               mb = m
             end if
             if( lwork<nb*n ) then
               lminws = .true.
               nb = 1_${ik}$
             end if
           end if
           if( m<0_${ik}$ ) then
             info = -1_${ik}$
           else if( n<0_${ik}$ ) then
             info = -2_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
             info = -4_${ik}$
           else if( tsize<max( 1_${ik}$, nb*n*nblcks + 5_${ik}$ ).and. ( .not.lquery ) .and. ( .not.lminws ) ) &
                     then
             info = -6_${ik}$
           else if( ( lwork<max( 1_${ik}$, n*nb ) ) .and. ( .not.lquery ).and. ( .not.lminws ) ) &
                     then
             info = -8_${ik}$
           end if
           if( info==0_${ik}$ ) then
             if( mint ) then
               t( 1_${ik}$ ) = mintsz
             else
               t( 1_${ik}$ ) = nb*n*nblcks + 5_${ik}$
             end if
             t( 2_${ik}$ ) = mb
             t( 3_${ik}$ ) = nb
             if( minw ) then
               work( 1_${ik}$ ) = max( 1_${ik}$, n )
             else
               work( 1_${ik}$ ) = max( 1_${ik}$, nb*n )
             end if
           end if
           if( info/=0_${ik}$ ) then
             call stdlib${ii}$_xerbla( 'SGEQR', -info )
             return
           else if( lquery ) then
             return
           end if
           ! quick return if possible
           if( min( m, n )==0_${ik}$ ) then
             return
           end if
           ! the qr decomposition
           if( ( m<=n ) .or. ( mb<=n ) .or. ( mb>=m ) ) then
             call stdlib${ii}$_sgeqrt( m, n, nb, a, lda, t( 6_${ik}$ ), nb, work, info )
           else
             call stdlib${ii}$_slatsqr( m, n, mb, nb, a, lda, t( 6_${ik}$ ), nb, work,lwork, info )
           end if
           work( 1_${ik}$ ) = max( 1_${ik}$, nb*n )
           return
     end subroutine stdlib${ii}$_sgeqr

     pure module subroutine stdlib${ii}$_dgeqr( m, n, a, lda, t, tsize, work, lwork,info )
     !! DGEQR computes a QR factorization of a real M-by-N matrix A:
     !! A = Q * ( R ),
     !! ( 0 )
     !! where:
     !! Q is a M-by-M orthogonal matrix;
     !! R is an upper-triangular N-by-N matrix;
     !! 0 is a (M-N)-by-N zero matrix, if M > N.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd. --
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, m, n, tsize, lwork
           ! Array Arguments 
           real(dp), intent(inout) :: a(lda,*)
           real(dp), intent(out) :: t(*), work(*)
        ! =====================================================================
           ! Local Scalars 
           logical(lk) :: lquery, lminws, mint, minw
           integer(${ik}$) :: mb, nb, mintsz, nblcks
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           lquery = ( tsize==-1_${ik}$ .or. tsize==-2_${ik}$ .or.lwork==-1_${ik}$ .or. lwork==-2_${ik}$ )
           mint = .false.
           minw = .false.
           if( tsize==-2_${ik}$ .or. lwork==-2_${ik}$ ) then
             if( tsize/=-1_${ik}$ ) mint = .true.
             if( lwork/=-1_${ik}$ ) minw = .true.
           end if
           ! determine the block size
           if( min( m, n )>0_${ik}$ ) then
             mb = stdlib${ii}$_ilaenv( 1_${ik}$, 'DGEQR ', ' ', m, n, 1_${ik}$, -1_${ik}$ )
             nb = stdlib${ii}$_ilaenv( 1_${ik}$, 'DGEQR ', ' ', m, n, 2_${ik}$, -1_${ik}$ )
           else
             mb = m
             nb = 1_${ik}$
           end if
           if( mb>m .or. mb<=n ) mb = m
           if( nb>min( m, n ) .or. nb<1_${ik}$ ) nb = 1_${ik}$
           mintsz = n + 5_${ik}$
           if( mb>n .and. m>n ) then
             if( mod( m - n, mb - n )==0_${ik}$ ) then
               nblcks = ( m - n ) / ( mb - n )
             else
               nblcks = ( m - n ) / ( mb - n ) + 1_${ik}$
             end if
           else
             nblcks = 1_${ik}$
           end if
           ! determine if the workspace size satisfies minimal size
           lminws = .false.
           if( ( tsize<max( 1_${ik}$, nb*n*nblcks + 5_${ik}$ ) .or. lwork<nb*n ).and. ( lwork>=n ) .and. ( &
                     tsize>=mintsz ).and. ( .not.lquery ) ) then
             if( tsize<max( 1_${ik}$, nb*n*nblcks + 5_${ik}$ ) ) then
               lminws = .true.
               nb = 1_${ik}$
               mb = m
             end if
             if( lwork<nb*n ) then
               lminws = .true.
               nb = 1_${ik}$
             end if
           end if
           if( m<0_${ik}$ ) then
             info = -1_${ik}$
           else if( n<0_${ik}$ ) then
             info = -2_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
             info = -4_${ik}$
           else if( tsize<max( 1_${ik}$, nb*n*nblcks + 5_${ik}$ ).and. ( .not.lquery ) .and. ( .not.lminws ) ) &
                     then
             info = -6_${ik}$
           else if( ( lwork<max( 1_${ik}$, n*nb ) ) .and. ( .not.lquery ).and. ( .not.lminws ) ) &
                     then
             info = -8_${ik}$
           end if
           if( info==0_${ik}$ ) then
             if( mint ) then
               t( 1_${ik}$ ) = mintsz
             else
               t( 1_${ik}$ ) = nb*n*nblcks + 5_${ik}$
             end if
             t( 2_${ik}$ ) = mb
             t( 3_${ik}$ ) = nb
             if( minw ) then
               work( 1_${ik}$ ) = max( 1_${ik}$, n )
             else
               work( 1_${ik}$ ) = max( 1_${ik}$, nb*n )
             end if
           end if
           if( info/=0_${ik}$ ) then
             call stdlib${ii}$_xerbla( 'DGEQR', -info )
             return
           else if( lquery ) then
             return
           end if
           ! quick return if possible
           if( min( m, n )==0_${ik}$ ) then
             return
           end if
           ! the qr decomposition
           if( ( m<=n ) .or. ( mb<=n ) .or. ( mb>=m ) ) then
             call stdlib${ii}$_dgeqrt( m, n, nb, a, lda, t( 6_${ik}$ ), nb, work, info )
           else
             call stdlib${ii}$_dlatsqr( m, n, mb, nb, a, lda, t( 6_${ik}$ ), nb, work,lwork, info )
           end if
           work( 1_${ik}$ ) = max( 1_${ik}$, nb*n )
           return
     end subroutine stdlib${ii}$_dgeqr

#:for rk,rt,ri in REAL_KINDS_TYPES
#:if not rk in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ri}$geqr( m, n, a, lda, t, tsize, work, lwork,info )
     !! DGEQR: computes a QR factorization of a real M-by-N matrix A:
     !! A = Q * ( R ),
     !! ( 0 )
     !! where:
     !! Q is a M-by-M orthogonal matrix;
     !! R is an upper-triangular N-by-N matrix;
     !! 0 is a (M-N)-by-N zero matrix, if M > N.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd. --
           use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, m, n, tsize, lwork
           ! Array Arguments 
           real(${rk}$), intent(inout) :: a(lda,*)
           real(${rk}$), intent(out) :: t(*), work(*)
        ! =====================================================================
           ! Local Scalars 
           logical(lk) :: lquery, lminws, mint, minw
           integer(${ik}$) :: mb, nb, mintsz, nblcks
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           lquery = ( tsize==-1_${ik}$ .or. tsize==-2_${ik}$ .or.lwork==-1_${ik}$ .or. lwork==-2_${ik}$ )
           mint = .false.
           minw = .false.
           if( tsize==-2_${ik}$ .or. lwork==-2_${ik}$ ) then
             if( tsize/=-1_${ik}$ ) mint = .true.
             if( lwork/=-1_${ik}$ ) minw = .true.
           end if
           ! determine the block size
           if( min( m, n )>0_${ik}$ ) then
             mb = stdlib${ii}$_ilaenv( 1_${ik}$, 'DGEQR ', ' ', m, n, 1_${ik}$, -1_${ik}$ )
             nb = stdlib${ii}$_ilaenv( 1_${ik}$, 'DGEQR ', ' ', m, n, 2_${ik}$, -1_${ik}$ )
           else
             mb = m
             nb = 1_${ik}$
           end if
           if( mb>m .or. mb<=n ) mb = m
           if( nb>min( m, n ) .or. nb<1_${ik}$ ) nb = 1_${ik}$
           mintsz = n + 5_${ik}$
           if( mb>n .and. m>n ) then
             if( mod( m - n, mb - n )==0_${ik}$ ) then
               nblcks = ( m - n ) / ( mb - n )
             else
               nblcks = ( m - n ) / ( mb - n ) + 1_${ik}$
             end if
           else
             nblcks = 1_${ik}$
           end if
           ! determine if the workspace size satisfies minimal size
           lminws = .false.
           if( ( tsize<max( 1_${ik}$, nb*n*nblcks + 5_${ik}$ ) .or. lwork<nb*n ).and. ( lwork>=n ) .and. ( &
                     tsize>=mintsz ).and. ( .not.lquery ) ) then
             if( tsize<max( 1_${ik}$, nb*n*nblcks + 5_${ik}$ ) ) then
               lminws = .true.
               nb = 1_${ik}$
               mb = m
             end if
             if( lwork<nb*n ) then
               lminws = .true.
               nb = 1_${ik}$
             end if
           end if
           if( m<0_${ik}$ ) then
             info = -1_${ik}$
           else if( n<0_${ik}$ ) then
             info = -2_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
             info = -4_${ik}$
           else if( tsize<max( 1_${ik}$, nb*n*nblcks + 5_${ik}$ ).and. ( .not.lquery ) .and. ( .not.lminws ) ) &
                     then
             info = -6_${ik}$
           else if( ( lwork<max( 1_${ik}$, n*nb ) ) .and. ( .not.lquery ).and. ( .not.lminws ) ) &
                     then
             info = -8_${ik}$
           end if
           if( info==0_${ik}$ ) then
             if( mint ) then
               t( 1_${ik}$ ) = mintsz
             else
               t( 1_${ik}$ ) = nb*n*nblcks + 5_${ik}$
             end if
             t( 2_${ik}$ ) = mb
             t( 3_${ik}$ ) = nb
             if( minw ) then
               work( 1_${ik}$ ) = max( 1_${ik}$, n )
             else
               work( 1_${ik}$ ) = max( 1_${ik}$, nb*n )
             end if
           end if
           if( info/=0_${ik}$ ) then
             call stdlib${ii}$_xerbla( 'DGEQR', -info )
             return
           else if( lquery ) then
             return
           end if
           ! quick return if possible
           if( min( m, n )==0_${ik}$ ) then
             return
           end if
           ! the qr decomposition
           if( ( m<=n ) .or. ( mb<=n ) .or. ( mb>=m ) ) then
             call stdlib${ii}$_${ri}$geqrt( m, n, nb, a, lda, t( 6_${ik}$ ), nb, work, info )
           else
             call stdlib${ii}$_${ri}$latsqr( m, n, mb, nb, a, lda, t( 6_${ik}$ ), nb, work,lwork, info )
           end if
           work( 1_${ik}$ ) = max( 1_${ik}$, nb*n )
           return
     end subroutine stdlib${ii}$_${ri}$geqr

#:endif
#:endfor

     pure module subroutine stdlib${ii}$_cgeqr( m, n, a, lda, t, tsize, work, lwork,info )
     !! CGEQR computes a QR factorization of a complex M-by-N matrix A:
     !! A = Q * ( R ),
     !! ( 0 )
     !! where:
     !! Q is a M-by-M orthogonal matrix;
     !! R is an upper-triangular N-by-N matrix;
     !! 0 is a (M-N)-by-N zero matrix, if M > N.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd. --
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, m, n, tsize, lwork
           ! Array Arguments 
           complex(sp), intent(inout) :: a(lda,*)
           complex(sp), intent(out) :: t(*), work(*)
        ! =====================================================================
           ! Local Scalars 
           logical(lk) :: lquery, lminws, mint, minw
           integer(${ik}$) :: mb, nb, mintsz, nblcks
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           lquery = ( tsize==-1_${ik}$ .or. tsize==-2_${ik}$ .or.lwork==-1_${ik}$ .or. lwork==-2_${ik}$ )
           mint = .false.
           minw = .false.
           if( tsize==-2_${ik}$ .or. lwork==-2_${ik}$ ) then
             if( tsize/=-1_${ik}$ ) mint = .true.
             if( lwork/=-1_${ik}$ ) minw = .true.
           end if
           ! determine the block size
           if( min( m, n )>0_${ik}$ ) then
             mb = stdlib${ii}$_ilaenv( 1_${ik}$, 'CGEQR ', ' ', m, n, 1_${ik}$, -1_${ik}$ )
             nb = stdlib${ii}$_ilaenv( 1_${ik}$, 'CGEQR ', ' ', m, n, 2_${ik}$, -1_${ik}$ )
           else
             mb = m
             nb = 1_${ik}$
           end if
           if( mb>m .or. mb<=n ) mb = m
           if( nb>min( m, n ) .or. nb<1_${ik}$ ) nb = 1_${ik}$
           mintsz = n + 5_${ik}$
           if( mb>n .and. m>n ) then
             if( mod( m - n, mb - n )==0_${ik}$ ) then
               nblcks = ( m - n ) / ( mb - n )
             else
               nblcks = ( m - n ) / ( mb - n ) + 1_${ik}$
             end if
           else
             nblcks = 1_${ik}$
           end if
           ! determine if the workspace size satisfies minimal size
           lminws = .false.
           if( ( tsize<max( 1_${ik}$, nb*n*nblcks + 5_${ik}$ ) .or. lwork<nb*n ).and. ( lwork>=n ) .and. ( &
                     tsize>=mintsz ).and. ( .not.lquery ) ) then
             if( tsize<max( 1_${ik}$, nb*n*nblcks + 5_${ik}$ ) ) then
               lminws = .true.
               nb = 1_${ik}$
               mb = m
             end if
             if( lwork<nb*n ) then
               lminws = .true.
               nb = 1_${ik}$
             end if
           end if
           if( m<0_${ik}$ ) then
             info = -1_${ik}$
           else if( n<0_${ik}$ ) then
             info = -2_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
             info = -4_${ik}$
           else if( tsize<max( 1_${ik}$, nb*n*nblcks + 5_${ik}$ ).and. ( .not.lquery ) .and. ( .not.lminws ) ) &
                     then
             info = -6_${ik}$
           else if( ( lwork<max( 1_${ik}$, n*nb ) ) .and. ( .not.lquery ).and. ( .not.lminws ) ) &
                     then
             info = -8_${ik}$
           end if
           if( info==0_${ik}$ ) then
             if( mint ) then
               t( 1_${ik}$ ) = mintsz
             else
               t( 1_${ik}$ ) = nb*n*nblcks + 5_${ik}$
             end if
             t( 2_${ik}$ ) = mb
             t( 3_${ik}$ ) = nb
             if( minw ) then
               work( 1_${ik}$ ) = max( 1_${ik}$, n )
             else
               work( 1_${ik}$ ) = max( 1_${ik}$, nb*n )
             end if
           end if
           if( info/=0_${ik}$ ) then
             call stdlib${ii}$_xerbla( 'CGEQR', -info )
             return
           else if( lquery ) then
             return
           end if
           ! quick return if possible
           if( min( m, n )==0_${ik}$ ) then
             return
           end if
           ! the qr decomposition
           if( ( m<=n ) .or. ( mb<=n ) .or. ( mb>=m ) ) then
             call stdlib${ii}$_cgeqrt( m, n, nb, a, lda, t( 6_${ik}$ ), nb, work, info )
           else
             call stdlib${ii}$_clatsqr( m, n, mb, nb, a, lda, t( 6_${ik}$ ), nb, work,lwork, info )
           end if
           work( 1_${ik}$ ) = max( 1_${ik}$, nb*n )
           return
     end subroutine stdlib${ii}$_cgeqr

     pure module subroutine stdlib${ii}$_zgeqr( m, n, a, lda, t, tsize, work, lwork,info )
     !! ZGEQR computes a QR factorization of a complex M-by-N matrix A:
     !! A = Q * ( R ),
     !! ( 0 )
     !! where:
     !! Q is a M-by-M orthogonal matrix;
     !! R is an upper-triangular N-by-N matrix;
     !! 0 is a (M-N)-by-N zero matrix, if M > N.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd. --
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, m, n, tsize, lwork
           ! Array Arguments 
           complex(dp), intent(inout) :: a(lda,*)
           complex(dp), intent(out) :: t(*), work(*)
        ! =====================================================================
           ! Local Scalars 
           logical(lk) :: lquery, lminws, mint, minw
           integer(${ik}$) :: mb, nb, mintsz, nblcks
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           lquery = ( tsize==-1_${ik}$ .or. tsize==-2_${ik}$ .or.lwork==-1_${ik}$ .or. lwork==-2_${ik}$ )
           mint = .false.
           minw = .false.
           if( tsize==-2_${ik}$ .or. lwork==-2_${ik}$ ) then
             if( tsize/=-1_${ik}$ ) mint = .true.
             if( lwork/=-1_${ik}$ ) minw = .true.
           end if
           ! determine the block size
           if( min ( m, n )>0_${ik}$ ) then
             mb = stdlib${ii}$_ilaenv( 1_${ik}$, 'ZGEQR ', ' ', m, n, 1_${ik}$, -1_${ik}$ )
             nb = stdlib${ii}$_ilaenv( 1_${ik}$, 'ZGEQR ', ' ', m, n, 2_${ik}$, -1_${ik}$ )
           else
             mb = m
             nb = 1_${ik}$
           end if
           if( mb>m .or. mb<=n ) mb = m
           if( nb>min( m, n ) .or. nb<1_${ik}$ ) nb = 1_${ik}$
           mintsz = n + 5_${ik}$
           if( mb>n .and. m>n ) then
             if( mod( m - n, mb - n )==0_${ik}$ ) then
               nblcks = ( m - n ) / ( mb - n )
             else
               nblcks = ( m - n ) / ( mb - n ) + 1_${ik}$
             end if
           else
             nblcks = 1_${ik}$
           end if
           ! determine if the workspace size satisfies minimal size
           lminws = .false.
           if( ( tsize<max( 1_${ik}$, nb*n*nblcks + 5_${ik}$ ) .or. lwork<nb*n ).and. ( lwork>=n ) .and. ( &
                     tsize>=mintsz ).and. ( .not.lquery ) ) then
             if( tsize<max( 1_${ik}$, nb*n*nblcks + 5_${ik}$ ) ) then
               lminws = .true.
               nb = 1_${ik}$
               mb = m
             end if
             if( lwork<nb*n ) then
               lminws = .true.
               nb = 1_${ik}$
             end if
           end if
           if( m<0_${ik}$ ) then
             info = -1_${ik}$
           else if( n<0_${ik}$ ) then
             info = -2_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
             info = -4_${ik}$
           else if( tsize<max( 1_${ik}$, nb*n*nblcks + 5_${ik}$ ).and. ( .not.lquery ) .and. ( .not.lminws ) ) &
                     then
             info = -6_${ik}$
           else if( ( lwork<max( 1_${ik}$, n*nb ) ) .and. ( .not.lquery ).and. ( .not.lminws ) ) &
                     then
             info = -8_${ik}$
           end if
           if( info==0_${ik}$ ) then
             if( mint ) then
               t( 1_${ik}$ ) = mintsz
             else
               t( 1_${ik}$ ) = nb*n*nblcks + 5_${ik}$
             end if
             t( 2_${ik}$ ) = mb
             t( 3_${ik}$ ) = nb
             if( minw ) then
               work( 1_${ik}$ ) = max( 1_${ik}$, n )
             else
               work( 1_${ik}$ ) = max( 1_${ik}$, nb*n )
             end if
           end if
           if( info/=0_${ik}$ ) then
             call stdlib${ii}$_xerbla( 'ZGEQR', -info )
             return
           else if( lquery ) then
             return
           end if
           ! quick return if possible
           if( min( m, n )==0_${ik}$ ) then
             return
           end if
           ! the qr decomposition
           if( ( m<=n ) .or. ( mb<=n ) .or. ( mb>=m ) ) then
             call stdlib${ii}$_zgeqrt( m, n, nb, a, lda, t( 6_${ik}$ ), nb, work, info )
           else
             call stdlib${ii}$_zlatsqr( m, n, mb, nb, a, lda, t( 6_${ik}$ ), nb, work,lwork, info )
           end if
           work( 1_${ik}$ ) = max( 1_${ik}$, nb*n )
           return
     end subroutine stdlib${ii}$_zgeqr

#:for ck,ct,ci in CMPLX_KINDS_TYPES
#:if not ck in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ci}$geqr( m, n, a, lda, t, tsize, work, lwork,info )
     !! ZGEQR: computes a QR factorization of a complex M-by-N matrix A:
     !! A = Q * ( R ),
     !! ( 0 )
     !! where:
     !! Q is a M-by-M orthogonal matrix;
     !! R is an upper-triangular N-by-N matrix;
     !! 0 is a (M-N)-by-N zero matrix, if M > N.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd. --
           use stdlib_blas_constants_${ck}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, m, n, tsize, lwork
           ! Array Arguments 
           complex(${ck}$), intent(inout) :: a(lda,*)
           complex(${ck}$), intent(out) :: t(*), work(*)
        ! =====================================================================
           ! Local Scalars 
           logical(lk) :: lquery, lminws, mint, minw
           integer(${ik}$) :: mb, nb, mintsz, nblcks
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           lquery = ( tsize==-1_${ik}$ .or. tsize==-2_${ik}$ .or.lwork==-1_${ik}$ .or. lwork==-2_${ik}$ )
           mint = .false.
           minw = .false.
           if( tsize==-2_${ik}$ .or. lwork==-2_${ik}$ ) then
             if( tsize/=-1_${ik}$ ) mint = .true.
             if( lwork/=-1_${ik}$ ) minw = .true.
           end if
           ! determine the block size
           if( min ( m, n )>0_${ik}$ ) then
             mb = stdlib${ii}$_ilaenv( 1_${ik}$, 'ZGEQR ', ' ', m, n, 1_${ik}$, -1_${ik}$ )
             nb = stdlib${ii}$_ilaenv( 1_${ik}$, 'ZGEQR ', ' ', m, n, 2_${ik}$, -1_${ik}$ )
           else
             mb = m
             nb = 1_${ik}$
           end if
           if( mb>m .or. mb<=n ) mb = m
           if( nb>min( m, n ) .or. nb<1_${ik}$ ) nb = 1_${ik}$
           mintsz = n + 5_${ik}$
           if( mb>n .and. m>n ) then
             if( mod( m - n, mb - n )==0_${ik}$ ) then
               nblcks = ( m - n ) / ( mb - n )
             else
               nblcks = ( m - n ) / ( mb - n ) + 1_${ik}$
             end if
           else
             nblcks = 1_${ik}$
           end if
           ! determine if the workspace size satisfies minimal size
           lminws = .false.
           if( ( tsize<max( 1_${ik}$, nb*n*nblcks + 5_${ik}$ ) .or. lwork<nb*n ).and. ( lwork>=n ) .and. ( &
                     tsize>=mintsz ).and. ( .not.lquery ) ) then
             if( tsize<max( 1_${ik}$, nb*n*nblcks + 5_${ik}$ ) ) then
               lminws = .true.
               nb = 1_${ik}$
               mb = m
             end if
             if( lwork<nb*n ) then
               lminws = .true.
               nb = 1_${ik}$
             end if
           end if
           if( m<0_${ik}$ ) then
             info = -1_${ik}$
           else if( n<0_${ik}$ ) then
             info = -2_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
             info = -4_${ik}$
           else if( tsize<max( 1_${ik}$, nb*n*nblcks + 5_${ik}$ ).and. ( .not.lquery ) .and. ( .not.lminws ) ) &
                     then
             info = -6_${ik}$
           else if( ( lwork<max( 1_${ik}$, n*nb ) ) .and. ( .not.lquery ).and. ( .not.lminws ) ) &
                     then
             info = -8_${ik}$
           end if
           if( info==0_${ik}$ ) then
             if( mint ) then
               t( 1_${ik}$ ) = mintsz
             else
               t( 1_${ik}$ ) = nb*n*nblcks + 5_${ik}$
             end if
             t( 2_${ik}$ ) = mb
             t( 3_${ik}$ ) = nb
             if( minw ) then
               work( 1_${ik}$ ) = max( 1_${ik}$, n )
             else
               work( 1_${ik}$ ) = max( 1_${ik}$, nb*n )
             end if
           end if
           if( info/=0_${ik}$ ) then
             call stdlib${ii}$_xerbla( 'ZGEQR', -info )
             return
           else if( lquery ) then
             return
           end if
           ! quick return if possible
           if( min( m, n )==0_${ik}$ ) then
             return
           end if
           ! the qr decomposition
           if( ( m<=n ) .or. ( mb<=n ) .or. ( mb>=m ) ) then
             call stdlib${ii}$_${ci}$geqrt( m, n, nb, a, lda, t( 6_${ik}$ ), nb, work, info )
           else
             call stdlib${ii}$_${ci}$latsqr( m, n, mb, nb, a, lda, t( 6_${ik}$ ), nb, work,lwork, info )
           end if
           work( 1_${ik}$ ) = max( 1_${ik}$, nb*n )
           return
     end subroutine stdlib${ii}$_${ci}$geqr

#:endif
#:endfor



     pure module subroutine stdlib${ii}$_sgemqr( side, trans, m, n, k, a, lda, t, tsize,c, ldc, work, lwork, &
     !! SGEMQR overwrites the general real M-by-N matrix C with
     !! SIDE = 'L'     SIDE = 'R'
     !! TRANS = 'N':      Q * C          C * Q
     !! TRANS = 'T':      Q**T * C       C * Q**T
     !! where Q is a real orthogonal matrix defined as the product
     !! of blocked elementary reflectors computed by tall skinny
     !! QR factorization (SGEQR)
               info )
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: side, trans
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, m, n, k, tsize, lwork, ldc
           ! Array Arguments 
           real(sp), intent(in) :: a(lda,*), t(*)
           real(sp), intent(inout) :: c(ldc,*)
           real(sp), intent(out) :: work(*)
       ! =====================================================================
           ! Local Scalars 
           logical(lk) :: left, right, tran, notran, lquery
           integer(${ik}$) :: mb, nb, lw, nblcks, mn
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           lquery  = lwork==-1_${ik}$
           notran  = stdlib_lsame( trans, 'N' )
           tran    = stdlib_lsame( trans, 'T' )
           left    = stdlib_lsame( side, 'L' )
           right   = stdlib_lsame( side, 'R' )
           mb = int( t( 2_${ik}$ ),KIND=${ik}$)
           nb = int( t( 3_${ik}$ ),KIND=${ik}$)
           if( left ) then
             lw = n * nb
             mn = m
           else
             lw = mb * nb
             mn = n
           end if
           if( ( mb>k ) .and. ( mn>k ) ) then
             if( mod( mn - k, mb - k )==0_${ik}$ ) then
               nblcks = ( mn - k ) / ( mb - k )
             else
               nblcks = ( mn - k ) / ( mb - k ) + 1_${ik}$
             end if
           else
             nblcks = 1_${ik}$
           end if
           info = 0_${ik}$
           if( .not.left .and. .not.right ) then
             info = -1_${ik}$
           else if( .not.tran .and. .not.notran ) then
             info = -2_${ik}$
           else if( m<0_${ik}$ ) then
             info = -3_${ik}$
           else if( n<0_${ik}$ ) then
             info = -4_${ik}$
           else if( k<0_${ik}$ .or. k>mn ) then
             info = -5_${ik}$
           else if( lda<max( 1_${ik}$, mn ) ) then
             info = -7_${ik}$
           else if( tsize<5_${ik}$ ) then
             info = -9_${ik}$
           else if( ldc<max( 1_${ik}$, m ) ) then
             info = -11_${ik}$
           else if( ( lwork<max( 1_${ik}$, lw ) ) .and. ( .not.lquery ) ) then
             info = -13_${ik}$
           end if
           if( info==0_${ik}$ ) then
             work( 1_${ik}$ ) = lw
           end if
           if( info/=0_${ik}$ ) then
             call stdlib${ii}$_xerbla( 'SGEMQR', -info )
             return
           else if( lquery ) then
             return
           end if
           ! quick return if possible
           if( min( m, n, k )==0_${ik}$ ) then
             return
           end if
           if( ( left .and. m<=k ) .or. ( right .and. n<=k ).or. ( mb<=k ) .or. ( mb>=max( m, n, &
                     k ) ) ) then
             call stdlib${ii}$_sgemqrt( side, trans, m, n, k, nb, a, lda, t( 6_${ik}$ ),nb, c, ldc, work, info &
                       )
           else
             call stdlib${ii}$_slamtsqr( side, trans, m, n, k, mb, nb, a, lda, t( 6_${ik}$ ),nb, c, ldc, work, &
                       lwork, info )
           end if
           work( 1_${ik}$ ) = lw
           return
     end subroutine stdlib${ii}$_sgemqr

     pure module subroutine stdlib${ii}$_dgemqr( side, trans, m, n, k, a, lda, t, tsize,c, ldc, work, lwork, &
     !! DGEMQR overwrites the general real M-by-N matrix C with
     !! SIDE = 'L'     SIDE = 'R'
     !! TRANS = 'N':      Q * C          C * Q
     !! TRANS = 'T':      Q**T * C       C * Q**T
     !! where Q is a real orthogonal matrix defined as the product
     !! of blocked elementary reflectors computed by tall skinny
     !! QR factorization (DGEQR)
               info )
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: side, trans
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, m, n, k, tsize, lwork, ldc
           ! Array Arguments 
           real(dp), intent(in) :: a(lda,*), t(*)
           real(dp), intent(inout) :: c(ldc,*)
           real(dp), intent(out) :: work(*)
       ! =====================================================================
           ! Local Scalars 
           logical(lk) :: left, right, tran, notran, lquery
           integer(${ik}$) :: mb, nb, lw, nblcks, mn
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           lquery  = lwork==-1_${ik}$
           notran  = stdlib_lsame( trans, 'N' )
           tran    = stdlib_lsame( trans, 'T' )
           left    = stdlib_lsame( side, 'L' )
           right   = stdlib_lsame( side, 'R' )
           mb = int( t( 2_${ik}$ ),KIND=${ik}$)
           nb = int( t( 3_${ik}$ ),KIND=${ik}$)
           if( left ) then
             lw = n * nb
             mn = m
           else
             lw = mb * nb
             mn = n
           end if
           if( ( mb>k ) .and. ( mn>k ) ) then
             if( mod( mn - k, mb - k )==0_${ik}$ ) then
               nblcks = ( mn - k ) / ( mb - k )
             else
               nblcks = ( mn - k ) / ( mb - k ) + 1_${ik}$
             end if
           else
             nblcks = 1_${ik}$
           end if
           info = 0_${ik}$
           if( .not.left .and. .not.right ) then
             info = -1_${ik}$
           else if( .not.tran .and. .not.notran ) then
             info = -2_${ik}$
           else if( m<0_${ik}$ ) then
             info = -3_${ik}$
           else if( n<0_${ik}$ ) then
             info = -4_${ik}$
           else if( k<0_${ik}$ .or. k>mn ) then
             info = -5_${ik}$
           else if( lda<max( 1_${ik}$, mn ) ) then
             info = -7_${ik}$
           else if( tsize<5_${ik}$ ) then
             info = -9_${ik}$
           else if( ldc<max( 1_${ik}$, m ) ) then
             info = -11_${ik}$
           else if( ( lwork<max( 1_${ik}$, lw ) ) .and. ( .not.lquery ) ) then
             info = -13_${ik}$
           end if
           if( info==0_${ik}$ ) then
             work( 1_${ik}$ ) = lw
           end if
           if( info/=0_${ik}$ ) then
             call stdlib${ii}$_xerbla( 'DGEMQR', -info )
             return
           else if( lquery ) then
             return
           end if
           ! quick return if possible
           if( min( m, n, k )==0_${ik}$ ) then
             return
           end if
           if( ( left .and. m<=k ) .or. ( right .and. n<=k ).or. ( mb<=k ) .or. ( mb>=max( m, n, &
                     k ) ) ) then
             call stdlib${ii}$_dgemqrt( side, trans, m, n, k, nb, a, lda, t( 6_${ik}$ ),nb, c, ldc, work, info &
                       )
           else
             call stdlib${ii}$_dlamtsqr( side, trans, m, n, k, mb, nb, a, lda, t( 6_${ik}$ ),nb, c, ldc, work, &
                       lwork, info )
           end if
           work( 1_${ik}$ ) = lw
           return
     end subroutine stdlib${ii}$_dgemqr

#:for rk,rt,ri in REAL_KINDS_TYPES
#:if not rk in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ri}$gemqr( side, trans, m, n, k, a, lda, t, tsize,c, ldc, work, lwork, &
     !! DGEMQR: overwrites the general real M-by-N matrix C with
     !! SIDE = 'L'     SIDE = 'R'
     !! TRANS = 'N':      Q * C          C * Q
     !! TRANS = 'T':      Q**T * C       C * Q**T
     !! where Q is a real orthogonal matrix defined as the product
     !! of blocked elementary reflectors computed by tall skinny
     !! QR factorization (DGEQR)
               info )
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: side, trans
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, m, n, k, tsize, lwork, ldc
           ! Array Arguments 
           real(${rk}$), intent(in) :: a(lda,*), t(*)
           real(${rk}$), intent(inout) :: c(ldc,*)
           real(${rk}$), intent(out) :: work(*)
       ! =====================================================================
           ! Local Scalars 
           logical(lk) :: left, right, tran, notran, lquery
           integer(${ik}$) :: mb, nb, lw, nblcks, mn
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           lquery  = lwork==-1_${ik}$
           notran  = stdlib_lsame( trans, 'N' )
           tran    = stdlib_lsame( trans, 'T' )
           left    = stdlib_lsame( side, 'L' )
           right   = stdlib_lsame( side, 'R' )
           mb = int( t( 2_${ik}$ ),KIND=${ik}$)
           nb = int( t( 3_${ik}$ ),KIND=${ik}$)
           if( left ) then
             lw = n * nb
             mn = m
           else
             lw = mb * nb
             mn = n
           end if
           if( ( mb>k ) .and. ( mn>k ) ) then
             if( mod( mn - k, mb - k )==0_${ik}$ ) then
               nblcks = ( mn - k ) / ( mb - k )
             else
               nblcks = ( mn - k ) / ( mb - k ) + 1_${ik}$
             end if
           else
             nblcks = 1_${ik}$
           end if
           info = 0_${ik}$
           if( .not.left .and. .not.right ) then
             info = -1_${ik}$
           else if( .not.tran .and. .not.notran ) then
             info = -2_${ik}$
           else if( m<0_${ik}$ ) then
             info = -3_${ik}$
           else if( n<0_${ik}$ ) then
             info = -4_${ik}$
           else if( k<0_${ik}$ .or. k>mn ) then
             info = -5_${ik}$
           else if( lda<max( 1_${ik}$, mn ) ) then
             info = -7_${ik}$
           else if( tsize<5_${ik}$ ) then
             info = -9_${ik}$
           else if( ldc<max( 1_${ik}$, m ) ) then
             info = -11_${ik}$
           else if( ( lwork<max( 1_${ik}$, lw ) ) .and. ( .not.lquery ) ) then
             info = -13_${ik}$
           end if
           if( info==0_${ik}$ ) then
             work( 1_${ik}$ ) = lw
           end if
           if( info/=0_${ik}$ ) then
             call stdlib${ii}$_xerbla( 'DGEMQR', -info )
             return
           else if( lquery ) then
             return
           end if
           ! quick return if possible
           if( min( m, n, k )==0_${ik}$ ) then
             return
           end if
           if( ( left .and. m<=k ) .or. ( right .and. n<=k ).or. ( mb<=k ) .or. ( mb>=max( m, n, &
                     k ) ) ) then
             call stdlib${ii}$_${ri}$gemqrt( side, trans, m, n, k, nb, a, lda, t( 6_${ik}$ ),nb, c, ldc, work, info &
                       )
           else
             call stdlib${ii}$_${ri}$lamtsqr( side, trans, m, n, k, mb, nb, a, lda, t( 6_${ik}$ ),nb, c, ldc, work, &
                       lwork, info )
           end if
           work( 1_${ik}$ ) = lw
           return
     end subroutine stdlib${ii}$_${ri}$gemqr

#:endif
#:endfor

     pure module subroutine stdlib${ii}$_cgemqr( side, trans, m, n, k, a, lda, t, tsize,c, ldc, work, lwork, &
     !! CGEMQR overwrites the general real M-by-N matrix C with
     !! SIDE = 'L'     SIDE = 'R'
     !! TRANS = 'N':      Q * C          C * Q
     !! TRANS = 'T':      Q**H * C       C * Q**H
     !! where Q is a complex unitary matrix defined as the product
     !! of blocked elementary reflectors computed by tall skinny
     !! QR factorization (CGEQR)
               info )
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: side, trans
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, m, n, k, tsize, lwork, ldc
           ! Array Arguments 
           complex(sp), intent(in) :: a(lda,*), t(*)
           complex(sp), intent(inout) :: c(ldc,*)
           complex(sp), intent(out) :: work(*)
       ! =====================================================================
           ! Local Scalars 
           logical(lk) :: left, right, tran, notran, lquery
           integer(${ik}$) :: mb, nb, lw, nblcks, mn
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           lquery  = lwork==-1_${ik}$
           notran  = stdlib_lsame( trans, 'N' )
           tran    = stdlib_lsame( trans, 'C' )
           left    = stdlib_lsame( side, 'L' )
           right   = stdlib_lsame( side, 'R' )
           mb = int( t( 2_${ik}$ ),KIND=${ik}$)
           nb = int( t( 3_${ik}$ ),KIND=${ik}$)
           if( left ) then
             lw = n * nb
             mn = m
           else
             lw = mb * nb
             mn = n
           end if
           if( ( mb>k ) .and. ( mn>k ) ) then
             if( mod( mn - k, mb - k )==0_${ik}$ ) then
               nblcks = ( mn - k ) / ( mb - k )
             else
               nblcks = ( mn - k ) / ( mb - k ) + 1_${ik}$
             end if
           else
             nblcks = 1_${ik}$
           end if
           info = 0_${ik}$
           if( .not.left .and. .not.right ) then
             info = -1_${ik}$
           else if( .not.tran .and. .not.notran ) then
             info = -2_${ik}$
           else if( m<0_${ik}$ ) then
             info = -3_${ik}$
           else if( n<0_${ik}$ ) then
             info = -4_${ik}$
           else if( k<0_${ik}$ .or. k>mn ) then
             info = -5_${ik}$
           else if( lda<max( 1_${ik}$, mn ) ) then
             info = -7_${ik}$
           else if( tsize<5_${ik}$ ) then
             info = -9_${ik}$
           else if( ldc<max( 1_${ik}$, m ) ) then
             info = -11_${ik}$
           else if( ( lwork<max( 1_${ik}$, lw ) ) .and. ( .not.lquery ) ) then
             info = -13_${ik}$
           end if
           if( info==0_${ik}$ ) then
             work( 1_${ik}$ ) = lw
           end if
           if( info/=0_${ik}$ ) then
             call stdlib${ii}$_xerbla( 'CGEMQR', -info )
             return
           else if( lquery ) then
             return
           end if
           ! quick return if possible
           if( min( m, n, k )==0_${ik}$ ) then
             return
           end if
           if( ( left .and. m<=k ) .or. ( right .and. n<=k ).or. ( mb<=k ) .or. ( mb>=max( m, n, &
                     k ) ) ) then
             call stdlib${ii}$_cgemqrt( side, trans, m, n, k, nb, a, lda, t( 6_${ik}$ ),nb, c, ldc, work, info &
                       )
           else
             call stdlib${ii}$_clamtsqr( side, trans, m, n, k, mb, nb, a, lda, t( 6_${ik}$ ),nb, c, ldc, work, &
                       lwork, info )
           end if
           work( 1_${ik}$ ) = lw
           return
     end subroutine stdlib${ii}$_cgemqr

     pure module subroutine stdlib${ii}$_zgemqr( side, trans, m, n, k, a, lda, t, tsize,c, ldc, work, lwork, &
     !! ZGEMQR overwrites the general real M-by-N matrix C with
     !! SIDE = 'L'     SIDE = 'R'
     !! TRANS = 'N':      Q * C          C * Q
     !! TRANS = 'T':      Q**H * C       C * Q**H
     !! where Q is a complex unitary matrix defined as the product
     !! of blocked elementary reflectors computed by tall skinny
     !! QR factorization (ZGEQR)
               info )
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: side, trans
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, m, n, k, tsize, lwork, ldc
           ! Array Arguments 
           complex(dp), intent(in) :: a(lda,*), t(*)
           complex(dp), intent(inout) :: c(ldc,*)
           complex(dp), intent(out) :: work(*)
       ! =====================================================================
           ! Local Scalars 
           logical(lk) :: left, right, tran, notran, lquery
           integer(${ik}$) :: mb, nb, lw, nblcks, mn
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           lquery  = lwork==-1_${ik}$
           notran  = stdlib_lsame( trans, 'N' )
           tran    = stdlib_lsame( trans, 'C' )
           left    = stdlib_lsame( side, 'L' )
           right   = stdlib_lsame( side, 'R' )
           mb = int( t( 2_${ik}$ ),KIND=${ik}$)
           nb = int( t( 3_${ik}$ ),KIND=${ik}$)
           if( left ) then
             lw = n * nb
             mn = m
           else
             lw = mb * nb
             mn = n
           end if
           if( ( mb>k ) .and. ( mn>k ) ) then
             if( mod( mn - k, mb - k )==0_${ik}$ ) then
               nblcks = ( mn - k ) / ( mb - k )
             else
               nblcks = ( mn - k ) / ( mb - k ) + 1_${ik}$
             end if
           else
             nblcks = 1_${ik}$
           end if
           info = 0_${ik}$
           if( .not.left .and. .not.right ) then
             info = -1_${ik}$
           else if( .not.tran .and. .not.notran ) then
             info = -2_${ik}$
           else if( m<0_${ik}$ ) then
             info = -3_${ik}$
           else if( n<0_${ik}$ ) then
             info = -4_${ik}$
           else if( k<0_${ik}$ .or. k>mn ) then
             info = -5_${ik}$
           else if( lda<max( 1_${ik}$, mn ) ) then
             info = -7_${ik}$
           else if( tsize<5_${ik}$ ) then
             info = -9_${ik}$
           else if( ldc<max( 1_${ik}$, m ) ) then
             info = -11_${ik}$
           else if( ( lwork<max( 1_${ik}$, lw ) ) .and. ( .not.lquery ) ) then
             info = -13_${ik}$
           end if
           if( info==0_${ik}$ ) then
             work( 1_${ik}$ ) = lw
           end if
           if( info/=0_${ik}$ ) then
             call stdlib${ii}$_xerbla( 'ZGEMQR', -info )
             return
           else if( lquery ) then
             return
           end if
           ! quick return if possible
           if( min( m, n, k )==0_${ik}$ ) then
             return
           end if
           if( ( left .and. m<=k ) .or. ( right .and. n<=k ).or. ( mb<=k ) .or. ( mb>=max( m, n, &
                     k ) ) ) then
             call stdlib${ii}$_zgemqrt( side, trans, m, n, k, nb, a, lda, t( 6_${ik}$ ),nb, c, ldc, work, info &
                       )
           else
             call stdlib${ii}$_zlamtsqr( side, trans, m, n, k, mb, nb, a, lda, t( 6_${ik}$ ),nb, c, ldc, work, &
                       lwork, info )
           end if
           work( 1_${ik}$ ) = lw
           return
     end subroutine stdlib${ii}$_zgemqr

#:for ck,ct,ci in CMPLX_KINDS_TYPES
#:if not ck in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ci}$gemqr( side, trans, m, n, k, a, lda, t, tsize,c, ldc, work, lwork, &
     !! ZGEMQR: overwrites the general real M-by-N matrix C with
     !! SIDE = 'L'     SIDE = 'R'
     !! TRANS = 'N':      Q * C          C * Q
     !! TRANS = 'T':      Q**H * C       C * Q**H
     !! where Q is a complex unitary matrix defined as the product
     !! of blocked elementary reflectors computed by tall skinny
     !! QR factorization (ZGEQR)
               info )
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${ck}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: side, trans
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, m, n, k, tsize, lwork, ldc
           ! Array Arguments 
           complex(${ck}$), intent(in) :: a(lda,*), t(*)
           complex(${ck}$), intent(inout) :: c(ldc,*)
           complex(${ck}$), intent(out) :: work(*)
       ! =====================================================================
           ! Local Scalars 
           logical(lk) :: left, right, tran, notran, lquery
           integer(${ik}$) :: mb, nb, lw, nblcks, mn
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           lquery  = lwork==-1_${ik}$
           notran  = stdlib_lsame( trans, 'N' )
           tran    = stdlib_lsame( trans, 'C' )
           left    = stdlib_lsame( side, 'L' )
           right   = stdlib_lsame( side, 'R' )
           mb = int( t( 2_${ik}$ ),KIND=${ik}$)
           nb = int( t( 3_${ik}$ ),KIND=${ik}$)
           if( left ) then
             lw = n * nb
             mn = m
           else
             lw = mb * nb
             mn = n
           end if
           if( ( mb>k ) .and. ( mn>k ) ) then
             if( mod( mn - k, mb - k )==0_${ik}$ ) then
               nblcks = ( mn - k ) / ( mb - k )
             else
               nblcks = ( mn - k ) / ( mb - k ) + 1_${ik}$
             end if
           else
             nblcks = 1_${ik}$
           end if
           info = 0_${ik}$
           if( .not.left .and. .not.right ) then
             info = -1_${ik}$
           else if( .not.tran .and. .not.notran ) then
             info = -2_${ik}$
           else if( m<0_${ik}$ ) then
             info = -3_${ik}$
           else if( n<0_${ik}$ ) then
             info = -4_${ik}$
           else if( k<0_${ik}$ .or. k>mn ) then
             info = -5_${ik}$
           else if( lda<max( 1_${ik}$, mn ) ) then
             info = -7_${ik}$
           else if( tsize<5_${ik}$ ) then
             info = -9_${ik}$
           else if( ldc<max( 1_${ik}$, m ) ) then
             info = -11_${ik}$
           else if( ( lwork<max( 1_${ik}$, lw ) ) .and. ( .not.lquery ) ) then
             info = -13_${ik}$
           end if
           if( info==0_${ik}$ ) then
             work( 1_${ik}$ ) = lw
           end if
           if( info/=0_${ik}$ ) then
             call stdlib${ii}$_xerbla( 'ZGEMQR', -info )
             return
           else if( lquery ) then
             return
           end if
           ! quick return if possible
           if( min( m, n, k )==0_${ik}$ ) then
             return
           end if
           if( ( left .and. m<=k ) .or. ( right .and. n<=k ).or. ( mb<=k ) .or. ( mb>=max( m, n, &
                     k ) ) ) then
             call stdlib${ii}$_${ci}$gemqrt( side, trans, m, n, k, nb, a, lda, t( 6_${ik}$ ),nb, c, ldc, work, info &
                       )
           else
             call stdlib${ii}$_${ci}$lamtsqr( side, trans, m, n, k, mb, nb, a, lda, t( 6_${ik}$ ),nb, c, ldc, work, &
                       lwork, info )
           end if
           work( 1_${ik}$ ) = lw
           return
     end subroutine stdlib${ii}$_${ci}$gemqr

#:endif
#:endfor



     pure module subroutine stdlib${ii}$_sgeqrf( m, n, a, lda, tau, work, lwork, info )
     !! SGEQRF computes a QR factorization of a real M-by-N matrix A:
     !! A = Q * ( R ),
     !! ( 0 )
     !! where:
     !! Q is a M-by-M orthogonal matrix;
     !! R is an upper-triangular N-by-N matrix;
     !! 0 is a (M-N)-by-N zero matrix, if M > N.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, lwork, m, n
           ! Array Arguments 
           real(sp), intent(inout) :: a(lda,*)
           real(sp), intent(out) :: tau(*), work(*)
        ! =====================================================================
           ! Local Scalars 
           logical(lk) :: lquery
           integer(${ik}$) :: i, ib, iinfo, iws, k, ldwork, lwkopt, nb, nbmin, nx
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           k = min( m, n )
           info = 0_${ik}$
           nb = stdlib${ii}$_ilaenv( 1_${ik}$, 'SGEQRF', ' ', m, n, -1_${ik}$, -1_${ik}$ )
           lquery = ( lwork==-1_${ik}$ )
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -4_${ik}$
           else if( .not.lquery ) then
              if( lwork<=0_${ik}$ .or. ( m>0_${ik}$ .and. lwork<max( 1_${ik}$, n ) ) )info = -7_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'SGEQRF', -info )
              return
           else if( lquery ) then
              if( k==0_${ik}$ ) then
                 lwkopt = 1_${ik}$
              else
                 lwkopt = n*nb
              end if
              work( 1_${ik}$ ) = lwkopt
              return
           end if
           ! quick return if possible
           if( k==0_${ik}$ ) then
              work( 1_${ik}$ ) = 1_${ik}$
              return
           end if
           nbmin = 2_${ik}$
           nx = 0_${ik}$
           iws = n
           if( nb>1_${ik}$ .and. nb<k ) then
              ! determine when to cross over from blocked to unblocked code.
              nx = max( 0_${ik}$, stdlib${ii}$_ilaenv( 3_${ik}$, 'SGEQRF', ' ', m, n, -1_${ik}$, -1_${ik}$ ) )
              if( nx<k ) then
                 ! determine if workspace is large enough for blocked code.
                 ldwork = n
                 iws = ldwork*nb
                 if( lwork<iws ) then
                    ! not enough workspace to use optimal nb:  reduce nb and
                    ! determine the minimum value of nb.
                    nb = lwork / ldwork
                    nbmin = max( 2_${ik}$, stdlib${ii}$_ilaenv( 2_${ik}$, 'SGEQRF', ' ', m, n, -1_${ik}$,-1_${ik}$ ) )
                 end if
              end if
           end if
           if( nb>=nbmin .and. nb<k .and. nx<k ) then
              ! use blocked code initially
              do i = 1, k - nx, nb
                 ib = min( k-i+1, nb )
                 ! compute the qr factorization of the current block
                 ! a(i:m,i:i+ib-1)
                 call stdlib${ii}$_sgeqr2( m-i+1, ib, a( i, i ), lda, tau( i ), work,iinfo )
                 if( i+ib<=n ) then
                    ! form the triangular factor of the block reflector
                    ! h = h(i) h(i+1) . . . h(i+ib-1)
                    call stdlib${ii}$_slarft( 'FORWARD', 'COLUMNWISE', m-i+1, ib,a( i, i ), lda, tau( i &
                              ), work, ldwork )
                    ! apply h**t to a(i:m,i+ib:n) from the left
                    call stdlib${ii}$_slarfb( 'LEFT', 'TRANSPOSE', 'FORWARD','COLUMNWISE', m-i+1, n-i-&
                    ib+1, ib,a( i, i ), lda, work, ldwork, a( i, i+ib ),lda, work( ib+1 ), ldwork &
                              )
                 end if
              end do
           else
              i = 1_${ik}$
           end if
           ! use unblocked code to factor the last or only block.
           if( i<=k )call stdlib${ii}$_sgeqr2( m-i+1, n-i+1, a( i, i ), lda, tau( i ), work,iinfo )
                     
           work( 1_${ik}$ ) = iws
           return
     end subroutine stdlib${ii}$_sgeqrf

     pure module subroutine stdlib${ii}$_dgeqrf( m, n, a, lda, tau, work, lwork, info )
     !! DGEQRF computes a QR factorization of a real M-by-N matrix A:
     !! A = Q * ( R ),
     !! ( 0 )
     !! where:
     !! Q is a M-by-M orthogonal matrix;
     !! R is an upper-triangular N-by-N matrix;
     !! 0 is a (M-N)-by-N zero matrix, if M > N.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, lwork, m, n
           ! Array Arguments 
           real(dp), intent(inout) :: a(lda,*)
           real(dp), intent(out) :: tau(*), work(*)
        ! =====================================================================
           ! Local Scalars 
           logical(lk) :: lquery
           integer(${ik}$) :: i, ib, iinfo, iws, k, ldwork, lwkopt, nb, nbmin, nx
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           k = min( m, n )
           info = 0_${ik}$
           nb = stdlib${ii}$_ilaenv( 1_${ik}$, 'DGEQRF', ' ', m, n, -1_${ik}$, -1_${ik}$ )
           lquery = ( lwork==-1_${ik}$ )
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -4_${ik}$
           else if( .not.lquery ) then
              if( lwork<=0_${ik}$ .or. ( m>0_${ik}$ .and. lwork<max( 1_${ik}$, n ) ) )info = -7_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DGEQRF', -info )
              return
           else if( lquery ) then
              if( k==0_${ik}$ ) then
                 lwkopt = 1_${ik}$
              else
                 lwkopt = n*nb
              end if
              work( 1_${ik}$ ) = lwkopt
              return
           end if
           ! quick return if possible
           if( k==0_${ik}$ ) then
              work( 1_${ik}$ ) = 1_${ik}$
              return
           end if
           nbmin = 2_${ik}$
           nx = 0_${ik}$
           iws = n
           if( nb>1_${ik}$ .and. nb<k ) then
              ! determine when to cross over from blocked to unblocked code.
              nx = max( 0_${ik}$, stdlib${ii}$_ilaenv( 3_${ik}$, 'DGEQRF', ' ', m, n, -1_${ik}$, -1_${ik}$ ) )
              if( nx<k ) then
                 ! determine if workspace is large enough for blocked code.
                 ldwork = n
                 iws = ldwork*nb
                 if( lwork<iws ) then
                    ! not enough workspace to use optimal nb:  reduce nb and
                    ! determine the minimum value of nb.
                    nb = lwork / ldwork
                    nbmin = max( 2_${ik}$, stdlib${ii}$_ilaenv( 2_${ik}$, 'DGEQRF', ' ', m, n, -1_${ik}$,-1_${ik}$ ) )
                 end if
              end if
           end if
           if( nb>=nbmin .and. nb<k .and. nx<k ) then
              ! use blocked code initially
              do i = 1, k - nx, nb
                 ib = min( k-i+1, nb )
                 ! compute the qr factorization of the current block
                 ! a(i:m,i:i+ib-1)
                 call stdlib${ii}$_dgeqr2( m-i+1, ib, a( i, i ), lda, tau( i ), work,iinfo )
                 if( i+ib<=n ) then
                    ! form the triangular factor of the block reflector
                    ! h = h(i) h(i+1) . . . h(i+ib-1)
                    call stdlib${ii}$_dlarft( 'FORWARD', 'COLUMNWISE', m-i+1, ib,a( i, i ), lda, tau( i &
                              ), work, ldwork )
                    ! apply h**t to a(i:m,i+ib:n) from the left
                    call stdlib${ii}$_dlarfb( 'LEFT', 'TRANSPOSE', 'FORWARD','COLUMNWISE', m-i+1, n-i-&
                    ib+1, ib,a( i, i ), lda, work, ldwork, a( i, i+ib ),lda, work( ib+1 ), ldwork &
                              )
                 end if
              end do
           else
              i = 1_${ik}$
           end if
           ! use unblocked code to factor the last or only block.
           if( i<=k )call stdlib${ii}$_dgeqr2( m-i+1, n-i+1, a( i, i ), lda, tau( i ), work,iinfo )
                     
           work( 1_${ik}$ ) = iws
           return
     end subroutine stdlib${ii}$_dgeqrf

#:for rk,rt,ri in REAL_KINDS_TYPES
#:if not rk in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ri}$geqrf( m, n, a, lda, tau, work, lwork, info )
     !! DGEQRF: computes a QR factorization of a real M-by-N matrix A:
     !! A = Q * ( R ),
     !! ( 0 )
     !! where:
     !! Q is a M-by-M orthogonal matrix;
     !! R is an upper-triangular N-by-N matrix;
     !! 0 is a (M-N)-by-N zero matrix, if M > N.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, lwork, m, n
           ! Array Arguments 
           real(${rk}$), intent(inout) :: a(lda,*)
           real(${rk}$), intent(out) :: tau(*), work(*)
        ! =====================================================================
           ! Local Scalars 
           logical(lk) :: lquery
           integer(${ik}$) :: i, ib, iinfo, iws, k, ldwork, lwkopt, nb, nbmin, nx
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           k = min( m, n )
           info = 0_${ik}$
           nb = stdlib${ii}$_ilaenv( 1_${ik}$, 'DGEQRF', ' ', m, n, -1_${ik}$, -1_${ik}$ )
           lquery = ( lwork==-1_${ik}$ )
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -4_${ik}$
           else if( .not.lquery ) then
              if( lwork<=0_${ik}$ .or. ( m>0_${ik}$ .and. lwork<max( 1_${ik}$, n ) ) )info = -7_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DGEQRF', -info )
              return
           else if( lquery ) then
              if( k==0_${ik}$ ) then
                 lwkopt = 1_${ik}$
              else
                 lwkopt = n*nb
              end if
              work( 1_${ik}$ ) = lwkopt
              return
           end if
           ! quick return if possible
           if( k==0_${ik}$ ) then
              work( 1_${ik}$ ) = 1_${ik}$
              return
           end if
           nbmin = 2_${ik}$
           nx = 0_${ik}$
           iws = n
           if( nb>1_${ik}$ .and. nb<k ) then
              ! determine when to cross over from blocked to unblocked code.
              nx = max( 0_${ik}$, stdlib${ii}$_ilaenv( 3_${ik}$, 'DGEQRF', ' ', m, n, -1_${ik}$, -1_${ik}$ ) )
              if( nx<k ) then
                 ! determine if workspace is large enough for blocked code.
                 ldwork = n
                 iws = ldwork*nb
                 if( lwork<iws ) then
                    ! not enough workspace to use optimal nb:  reduce nb and
                    ! determine the minimum value of nb.
                    nb = lwork / ldwork
                    nbmin = max( 2_${ik}$, stdlib${ii}$_ilaenv( 2_${ik}$, 'DGEQRF', ' ', m, n, -1_${ik}$,-1_${ik}$ ) )
                 end if
              end if
           end if
           if( nb>=nbmin .and. nb<k .and. nx<k ) then
              ! use blocked code initially
              do i = 1, k - nx, nb
                 ib = min( k-i+1, nb )
                 ! compute the qr factorization of the current block
                 ! a(i:m,i:i+ib-1)
                 call stdlib${ii}$_${ri}$geqr2( m-i+1, ib, a( i, i ), lda, tau( i ), work,iinfo )
                 if( i+ib<=n ) then
                    ! form the triangular factor of the block reflector
                    ! h = h(i) h(i+1) . . . h(i+ib-1)
                    call stdlib${ii}$_${ri}$larft( 'FORWARD', 'COLUMNWISE', m-i+1, ib,a( i, i ), lda, tau( i &
                              ), work, ldwork )
                    ! apply h**t to a(i:m,i+ib:n) from the left
                    call stdlib${ii}$_${ri}$larfb( 'LEFT', 'TRANSPOSE', 'FORWARD','COLUMNWISE', m-i+1, n-i-&
                    ib+1, ib,a( i, i ), lda, work, ldwork, a( i, i+ib ),lda, work( ib+1 ), ldwork &
                              )
                 end if
              end do
           else
              i = 1_${ik}$
           end if
           ! use unblocked code to factor the last or only block.
           if( i<=k )call stdlib${ii}$_${ri}$geqr2( m-i+1, n-i+1, a( i, i ), lda, tau( i ), work,iinfo )
                     
           work( 1_${ik}$ ) = iws
           return
     end subroutine stdlib${ii}$_${ri}$geqrf

#:endif
#:endfor

     pure module subroutine stdlib${ii}$_cgeqrf( m, n, a, lda, tau, work, lwork, info )
     !! CGEQRF computes a QR factorization of a complex M-by-N matrix A:
     !! A = Q * ( R ),
     !! ( 0 )
     !! where:
     !! Q is a M-by-M orthogonal matrix;
     !! R is an upper-triangular N-by-N matrix;
     !! 0 is a (M-N)-by-N zero matrix, if M > N.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, lwork, m, n
           ! Array Arguments 
           complex(sp), intent(inout) :: a(lda,*)
           complex(sp), intent(out) :: tau(*), work(*)
        ! =====================================================================
           ! Local Scalars 
           logical(lk) :: lquery
           integer(${ik}$) :: i, ib, iinfo, iws, k, ldwork, lwkopt, nb, nbmin, nx
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           k = min( m, n )
           info = 0_${ik}$
           nb = stdlib${ii}$_ilaenv( 1_${ik}$, 'CGEQRF', ' ', m, n, -1_${ik}$, -1_${ik}$ )
           lquery = ( lwork==-1_${ik}$ )
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -4_${ik}$
           else if( .not.lquery ) then
              if( lwork<=0_${ik}$ .or. ( m>0_${ik}$ .and. lwork<max( 1_${ik}$, n ) ) )info = -7_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'CGEQRF', -info )
              return
           else if( lquery ) then
              if( k==0_${ik}$ ) then
                 lwkopt = 1_${ik}$
              else
                 lwkopt = n*nb
              end if
              work( 1_${ik}$ ) = lwkopt
              return
           end if
           ! quick return if possible
           if( k==0_${ik}$ ) then
              work( 1_${ik}$ ) = 1_${ik}$
              return
           end if
           nbmin = 2_${ik}$
           nx = 0_${ik}$
           iws = n
           if( nb>1_${ik}$ .and. nb<k ) then
              ! determine when to cross over from blocked to unblocked code.
              nx = max( 0_${ik}$, stdlib${ii}$_ilaenv( 3_${ik}$, 'CGEQRF', ' ', m, n, -1_${ik}$, -1_${ik}$ ) )
              if( nx<k ) then
                 ! determine if workspace is large enough for blocked code.
                 ldwork = n
                 iws = ldwork*nb
                 if( lwork<iws ) then
                    ! not enough workspace to use optimal nb:  reduce nb and
                    ! determine the minimum value of nb.
                    nb = lwork / ldwork
                    nbmin = max( 2_${ik}$, stdlib${ii}$_ilaenv( 2_${ik}$, 'CGEQRF', ' ', m, n, -1_${ik}$,-1_${ik}$ ) )
                 end if
              end if
           end if
           if( nb>=nbmin .and. nb<k .and. nx<k ) then
              ! use blocked code initially
              do i = 1, k - nx, nb
                 ib = min( k-i+1, nb )
                 ! compute the qr factorization of the current block
                 ! a(i:m,i:i+ib-1)
                 call stdlib${ii}$_cgeqr2( m-i+1, ib, a( i, i ), lda, tau( i ), work,iinfo )
                 if( i+ib<=n ) then
                    ! form the triangular factor of the block reflector
                    ! h = h(i) h(i+1) . . . h(i+ib-1)
                    call stdlib${ii}$_clarft( 'FORWARD', 'COLUMNWISE', m-i+1, ib,a( i, i ), lda, tau( i &
                              ), work, ldwork )
                    ! apply h**h to a(i:m,i+ib:n) from the left
                    call stdlib${ii}$_clarfb( 'LEFT', 'CONJUGATE TRANSPOSE', 'FORWARD','COLUMNWISE', m-&
                    i+1, n-i-ib+1, ib,a( i, i ), lda, work, ldwork, a( i, i+ib ),lda, work( ib+1 )&
                              , ldwork )
                 end if
              end do
           else
              i = 1_${ik}$
           end if
           ! use unblocked code to factor the last or only block.
           if( i<=k )call stdlib${ii}$_cgeqr2( m-i+1, n-i+1, a( i, i ), lda, tau( i ), work,iinfo )
                     
           work( 1_${ik}$ ) = iws
           return
     end subroutine stdlib${ii}$_cgeqrf

     pure module subroutine stdlib${ii}$_zgeqrf( m, n, a, lda, tau, work, lwork, info )
     !! ZGEQRF computes a QR factorization of a complex M-by-N matrix A:
     !! A = Q * ( R ),
     !! ( 0 )
     !! where:
     !! Q is a M-by-M orthogonal matrix;
     !! R is an upper-triangular N-by-N matrix;
     !! 0 is a (M-N)-by-N zero matrix, if M > N.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, lwork, m, n
           ! Array Arguments 
           complex(dp), intent(inout) :: a(lda,*)
           complex(dp), intent(out) :: tau(*), work(*)
        ! =====================================================================
           ! Local Scalars 
           logical(lk) :: lquery
           integer(${ik}$) :: i, ib, iinfo, iws, k, ldwork, lwkopt, nb, nbmin, nx
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           k = min( m, n )
           info = 0_${ik}$
           nb = stdlib${ii}$_ilaenv( 1_${ik}$, 'ZGEQRF', ' ', m, n, -1_${ik}$, -1_${ik}$ )
           lquery = ( lwork==-1_${ik}$ )
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -4_${ik}$
           else if( .not.lquery ) then
              if( lwork<=0_${ik}$ .or. ( m>0_${ik}$ .and. lwork<max( 1_${ik}$, n ) ) )info = -7_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'ZGEQRF', -info )
              return
           else if( lquery ) then
              if( k==0_${ik}$ ) then
                 lwkopt = 1_${ik}$
              else
                 lwkopt = n*nb
              end if
              work( 1_${ik}$ ) = lwkopt
              return
           end if
           ! quick return if possible
           if( k==0_${ik}$ ) then
              work( 1_${ik}$ ) = 1_${ik}$
              return
           end if
           nbmin = 2_${ik}$
           nx = 0_${ik}$
           iws = n
           if( nb>1_${ik}$ .and. nb<k ) then
              ! determine when to cross over from blocked to unblocked code.
              nx = max( 0_${ik}$, stdlib${ii}$_ilaenv( 3_${ik}$, 'ZGEQRF', ' ', m, n, -1_${ik}$, -1_${ik}$ ) )
              if( nx<k ) then
                 ! determine if workspace is large enough for blocked code.
                 ldwork = n
                 iws = ldwork*nb
                 if( lwork<iws ) then
                    ! not enough workspace to use optimal nb:  reduce nb and
                    ! determine the minimum value of nb.
                    nb = lwork / ldwork
                    nbmin = max( 2_${ik}$, stdlib${ii}$_ilaenv( 2_${ik}$, 'ZGEQRF', ' ', m, n, -1_${ik}$,-1_${ik}$ ) )
                 end if
              end if
           end if
           if( nb>=nbmin .and. nb<k .and. nx<k ) then
              ! use blocked code initially
              do i = 1, k - nx, nb
                 ib = min( k-i+1, nb )
                 ! compute the qr factorization of the current block
                 ! a(i:m,i:i+ib-1)
                 call stdlib${ii}$_zgeqr2( m-i+1, ib, a( i, i ), lda, tau( i ), work,iinfo )
                 if( i+ib<=n ) then
                    ! form the triangular factor of the block reflector
                    ! h = h(i) h(i+1) . . . h(i+ib-1)
                    call stdlib${ii}$_zlarft( 'FORWARD', 'COLUMNWISE', m-i+1, ib,a( i, i ), lda, tau( i &
                              ), work, ldwork )
                    ! apply h**h to a(i:m,i+ib:n) from the left
                    call stdlib${ii}$_zlarfb( 'LEFT', 'CONJUGATE TRANSPOSE', 'FORWARD','COLUMNWISE', m-&
                    i+1, n-i-ib+1, ib,a( i, i ), lda, work, ldwork, a( i, i+ib ),lda, work( ib+1 )&
                              , ldwork )
                 end if
              end do
           else
              i = 1_${ik}$
           end if
           ! use unblocked code to factor the last or only block.
           if( i<=k )call stdlib${ii}$_zgeqr2( m-i+1, n-i+1, a( i, i ), lda, tau( i ), work,iinfo )
                     
           work( 1_${ik}$ ) = iws
           return
     end subroutine stdlib${ii}$_zgeqrf

#:for ck,ct,ci in CMPLX_KINDS_TYPES
#:if not ck in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ci}$geqrf( m, n, a, lda, tau, work, lwork, info )
     !! ZGEQRF: computes a QR factorization of a complex M-by-N matrix A:
     !! A = Q * ( R ),
     !! ( 0 )
     !! where:
     !! Q is a M-by-M orthogonal matrix;
     !! R is an upper-triangular N-by-N matrix;
     !! 0 is a (M-N)-by-N zero matrix, if M > N.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${ck}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, lwork, m, n
           ! Array Arguments 
           complex(${ck}$), intent(inout) :: a(lda,*)
           complex(${ck}$), intent(out) :: tau(*), work(*)
        ! =====================================================================
           ! Local Scalars 
           logical(lk) :: lquery
           integer(${ik}$) :: i, ib, iinfo, iws, k, ldwork, lwkopt, nb, nbmin, nx
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           k = min( m, n )
           info = 0_${ik}$
           nb = stdlib${ii}$_ilaenv( 1_${ik}$, 'ZGEQRF', ' ', m, n, -1_${ik}$, -1_${ik}$ )
           lquery = ( lwork==-1_${ik}$ )
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -4_${ik}$
           else if( .not.lquery ) then
              if( lwork<=0_${ik}$ .or. ( m>0_${ik}$ .and. lwork<max( 1_${ik}$, n ) ) )info = -7_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'ZGEQRF', -info )
              return
           else if( lquery ) then
              if( k==0_${ik}$ ) then
                 lwkopt = 1_${ik}$
              else
                 lwkopt = n*nb
              end if
              work( 1_${ik}$ ) = lwkopt
              return
           end if
           ! quick return if possible
           if( k==0_${ik}$ ) then
              work( 1_${ik}$ ) = 1_${ik}$
              return
           end if
           nbmin = 2_${ik}$
           nx = 0_${ik}$
           iws = n
           if( nb>1_${ik}$ .and. nb<k ) then
              ! determine when to cross over from blocked to unblocked code.
              nx = max( 0_${ik}$, stdlib${ii}$_ilaenv( 3_${ik}$, 'ZGEQRF', ' ', m, n, -1_${ik}$, -1_${ik}$ ) )
              if( nx<k ) then
                 ! determine if workspace is large enough for blocked code.
                 ldwork = n
                 iws = ldwork*nb
                 if( lwork<iws ) then
                    ! not enough workspace to use optimal nb:  reduce nb and
                    ! determine the minimum value of nb.
                    nb = lwork / ldwork
                    nbmin = max( 2_${ik}$, stdlib${ii}$_ilaenv( 2_${ik}$, 'ZGEQRF', ' ', m, n, -1_${ik}$,-1_${ik}$ ) )
                 end if
              end if
           end if
           if( nb>=nbmin .and. nb<k .and. nx<k ) then
              ! use blocked code initially
              do i = 1, k - nx, nb
                 ib = min( k-i+1, nb )
                 ! compute the qr factorization of the current block
                 ! a(i:m,i:i+ib-1)
                 call stdlib${ii}$_${ci}$geqr2( m-i+1, ib, a( i, i ), lda, tau( i ), work,iinfo )
                 if( i+ib<=n ) then
                    ! form the triangular factor of the block reflector
                    ! h = h(i) h(i+1) . . . h(i+ib-1)
                    call stdlib${ii}$_${ci}$larft( 'FORWARD', 'COLUMNWISE', m-i+1, ib,a( i, i ), lda, tau( i &
                              ), work, ldwork )
                    ! apply h**h to a(i:m,i+ib:n) from the left
                    call stdlib${ii}$_${ci}$larfb( 'LEFT', 'CONJUGATE TRANSPOSE', 'FORWARD','COLUMNWISE', m-&
                    i+1, n-i-ib+1, ib,a( i, i ), lda, work, ldwork, a( i, i+ib ),lda, work( ib+1 )&
                              , ldwork )
                 end if
              end do
           else
              i = 1_${ik}$
           end if
           ! use unblocked code to factor the last or only block.
           if( i<=k )call stdlib${ii}$_${ci}$geqr2( m-i+1, n-i+1, a( i, i ), lda, tau( i ), work,iinfo )
                     
           work( 1_${ik}$ ) = iws
           return
     end subroutine stdlib${ii}$_${ci}$geqrf

#:endif
#:endfor



     pure module subroutine stdlib${ii}$_sgeqr2( m, n, a, lda, tau, work, info )
     !! SGEQR2 computes a QR factorization of a real m-by-n matrix A:
     !! A = Q * ( R ),
     !! ( 0 )
     !! where:
     !! Q is a m-by-m orthogonal matrix;
     !! R is an upper-triangular n-by-n matrix;
     !! 0 is a (m-n)-by-n zero matrix, if m > n.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, m, n
           ! Array Arguments 
           real(sp), intent(inout) :: a(lda,*)
           real(sp), intent(out) :: tau(*), work(*)
        ! =====================================================================
           
           ! Local Scalars 
           integer(${ik}$) :: i, k
           real(sp) :: aii
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -4_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'SGEQR2', -info )
              return
           end if
           k = min( m, n )
           do i = 1, k
              ! generate elementary reflector h(i) to annihilate a(i+1:m,i)
              call stdlib${ii}$_slarfg( m-i+1, a( i, i ), a( min( i+1, m ), i ), 1_${ik}$,tau( i ) )
              if( i<n ) then
                 ! apply h(i) to a(i:m,i+1:n) from the left
                 aii = a( i, i )
                 a( i, i ) = one
                 call stdlib${ii}$_slarf( 'LEFT', m-i+1, n-i, a( i, i ), 1_${ik}$, tau( i ),a( i, i+1 ), lda, &
                           work )
                 a( i, i ) = aii
              end if
           end do
           return
     end subroutine stdlib${ii}$_sgeqr2

     pure module subroutine stdlib${ii}$_dgeqr2( m, n, a, lda, tau, work, info )
     !! DGEQR2 computes a QR factorization of a real m-by-n matrix A:
     !! A = Q * ( R ),
     !! ( 0 )
     !! where:
     !! Q is a m-by-m orthogonal matrix;
     !! R is an upper-triangular n-by-n matrix;
     !! 0 is a (m-n)-by-n zero matrix, if m > n.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, m, n
           ! Array Arguments 
           real(dp), intent(inout) :: a(lda,*)
           real(dp), intent(out) :: tau(*), work(*)
        ! =====================================================================
           
           ! Local Scalars 
           integer(${ik}$) :: i, k
           real(dp) :: aii
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -4_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DGEQR2', -info )
              return
           end if
           k = min( m, n )
           do i = 1, k
              ! generate elementary reflector h(i) to annihilate a(i+1:m,i)
              call stdlib${ii}$_dlarfg( m-i+1, a( i, i ), a( min( i+1, m ), i ), 1_${ik}$,tau( i ) )
              if( i<n ) then
                 ! apply h(i) to a(i:m,i+1:n) from the left
                 aii = a( i, i )
                 a( i, i ) = one
                 call stdlib${ii}$_dlarf( 'LEFT', m-i+1, n-i, a( i, i ), 1_${ik}$, tau( i ),a( i, i+1 ), lda, &
                           work )
                 a( i, i ) = aii
              end if
           end do
           return
     end subroutine stdlib${ii}$_dgeqr2

#:for rk,rt,ri in REAL_KINDS_TYPES
#:if not rk in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ri}$geqr2( m, n, a, lda, tau, work, info )
     !! DGEQR2: computes a QR factorization of a real m-by-n matrix A:
     !! A = Q * ( R ),
     !! ( 0 )
     !! where:
     !! Q is a m-by-m orthogonal matrix;
     !! R is an upper-triangular n-by-n matrix;
     !! 0 is a (m-n)-by-n zero matrix, if m > n.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, m, n
           ! Array Arguments 
           real(${rk}$), intent(inout) :: a(lda,*)
           real(${rk}$), intent(out) :: tau(*), work(*)
        ! =====================================================================
           
           ! Local Scalars 
           integer(${ik}$) :: i, k
           real(${rk}$) :: aii
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -4_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DGEQR2', -info )
              return
           end if
           k = min( m, n )
           do i = 1, k
              ! generate elementary reflector h(i) to annihilate a(i+1:m,i)
              call stdlib${ii}$_${ri}$larfg( m-i+1, a( i, i ), a( min( i+1, m ), i ), 1_${ik}$,tau( i ) )
              if( i<n ) then
                 ! apply h(i) to a(i:m,i+1:n) from the left
                 aii = a( i, i )
                 a( i, i ) = one
                 call stdlib${ii}$_${ri}$larf( 'LEFT', m-i+1, n-i, a( i, i ), 1_${ik}$, tau( i ),a( i, i+1 ), lda, &
                           work )
                 a( i, i ) = aii
              end if
           end do
           return
     end subroutine stdlib${ii}$_${ri}$geqr2

#:endif
#:endfor

     pure module subroutine stdlib${ii}$_cgeqr2( m, n, a, lda, tau, work, info )
     !! CGEQR2 computes a QR factorization of a complex m-by-n matrix A:
     !! A = Q * ( R ),
     !! ( 0 )
     !! where:
     !! Q is a m-by-m orthogonal matrix;
     !! R is an upper-triangular n-by-n matrix;
     !! 0 is a (m-n)-by-n zero matrix, if m > n.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, m, n
           ! Array Arguments 
           complex(sp), intent(inout) :: a(lda,*)
           complex(sp), intent(out) :: tau(*), work(*)
        ! =====================================================================
           
           ! Local Scalars 
           integer(${ik}$) :: i, k
           complex(sp) :: alpha
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -4_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'CGEQR2', -info )
              return
           end if
           k = min( m, n )
           do i = 1, k
              ! generate elementary reflector h(i) to annihilate a(i+1:m,i)
              call stdlib${ii}$_clarfg( m-i+1, a( i, i ), a( min( i+1, m ), i ), 1_${ik}$,tau( i ) )
              if( i<n ) then
                 ! apply h(i)**h to a(i:m,i+1:n) from the left
                 alpha = a( i, i )
                 a( i, i ) = cone
                 call stdlib${ii}$_clarf( 'LEFT', m-i+1, n-i, a( i, i ), 1_${ik}$,conjg( tau( i ) ), a( i, i+1 &
                           ), lda, work )
                 a( i, i ) = alpha
              end if
           end do
           return
     end subroutine stdlib${ii}$_cgeqr2

     pure module subroutine stdlib${ii}$_zgeqr2( m, n, a, lda, tau, work, info )
     !! ZGEQR2 computes a QR factorization of a complex m-by-n matrix A:
     !! A = Q * ( R ),
     !! ( 0 )
     !! where:
     !! Q is a m-by-m orthogonal matrix;
     !! R is an upper-triangular n-by-n matrix;
     !! 0 is a (m-n)-by-n zero matrix, if m > n.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, m, n
           ! Array Arguments 
           complex(dp), intent(inout) :: a(lda,*)
           complex(dp), intent(out) :: tau(*), work(*)
        ! =====================================================================
           
           ! Local Scalars 
           integer(${ik}$) :: i, k
           complex(dp) :: alpha
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -4_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'ZGEQR2', -info )
              return
           end if
           k = min( m, n )
           do i = 1, k
              ! generate elementary reflector h(i) to annihilate a(i+1:m,i)
              call stdlib${ii}$_zlarfg( m-i+1, a( i, i ), a( min( i+1, m ), i ), 1_${ik}$,tau( i ) )
              if( i<n ) then
                 ! apply h(i)**h to a(i:m,i+1:n) from the left
                 alpha = a( i, i )
                 a( i, i ) = cone
                 call stdlib${ii}$_zlarf( 'LEFT', m-i+1, n-i, a( i, i ), 1_${ik}$,conjg( tau( i ) ), a( i, i+1 &
                           ), lda, work )
                 a( i, i ) = alpha
              end if
           end do
           return
     end subroutine stdlib${ii}$_zgeqr2

#:for ck,ct,ci in CMPLX_KINDS_TYPES
#:if not ck in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ci}$geqr2( m, n, a, lda, tau, work, info )
     !! ZGEQR2: computes a QR factorization of a complex m-by-n matrix A:
     !! A = Q * ( R ),
     !! ( 0 )
     !! where:
     !! Q is a m-by-m orthogonal matrix;
     !! R is an upper-triangular n-by-n matrix;
     !! 0 is a (m-n)-by-n zero matrix, if m > n.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${ck}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, m, n
           ! Array Arguments 
           complex(${ck}$), intent(inout) :: a(lda,*)
           complex(${ck}$), intent(out) :: tau(*), work(*)
        ! =====================================================================
           
           ! Local Scalars 
           integer(${ik}$) :: i, k
           complex(${ck}$) :: alpha
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -4_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'ZGEQR2', -info )
              return
           end if
           k = min( m, n )
           do i = 1, k
              ! generate elementary reflector h(i) to annihilate a(i+1:m,i)
              call stdlib${ii}$_${ci}$larfg( m-i+1, a( i, i ), a( min( i+1, m ), i ), 1_${ik}$,tau( i ) )
              if( i<n ) then
                 ! apply h(i)**h to a(i:m,i+1:n) from the left
                 alpha = a( i, i )
                 a( i, i ) = cone
                 call stdlib${ii}$_${ci}$larf( 'LEFT', m-i+1, n-i, a( i, i ), 1_${ik}$,conjg( tau( i ) ), a( i, i+1 &
                           ), lda, work )
                 a( i, i ) = alpha
              end if
           end do
           return
     end subroutine stdlib${ii}$_${ci}$geqr2

#:endif
#:endfor



     pure module subroutine stdlib${ii}$_cungqr( m, n, k, a, lda, tau, work, lwork, info )
     !! CUNGQR generates an M-by-N complex matrix Q with orthonormal columns,
     !! which is defined as the first N columns of a product of K elementary
     !! reflectors of order M
     !! Q  =  H(1) H(2) . . . H(k)
     !! as returned by CGEQRF.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: k, lda, lwork, m, n
           ! Array Arguments 
           complex(sp), intent(inout) :: a(lda,*)
           complex(sp), intent(in) :: tau(*)
           complex(sp), intent(out) :: work(*)
        ! =====================================================================
           
           ! Local Scalars 
           logical(lk) :: lquery
           integer(${ik}$) :: i, ib, iinfo, iws, j, ki, kk, l, ldwork, lwkopt, nb, nbmin, nx
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           nb = stdlib${ii}$_ilaenv( 1_${ik}$, 'CUNGQR', ' ', m, n, k, -1_${ik}$ )
           lwkopt = max( 1_${ik}$, n )*nb
           work( 1_${ik}$ ) = lwkopt
           lquery = ( lwork==-1_${ik}$ )
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ .or. n>m ) then
              info = -2_${ik}$
           else if( k<0_${ik}$ .or. k>n ) then
              info = -3_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -5_${ik}$
           else if( lwork<max( 1_${ik}$, n ) .and. .not.lquery ) then
              info = -8_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'CUNGQR', -info )
              return
           else if( lquery ) then
              return
           end if
           ! quick return if possible
           if( n<=0_${ik}$ ) then
              work( 1_${ik}$ ) = 1_${ik}$
              return
           end if
           nbmin = 2_${ik}$
           nx = 0_${ik}$
           iws = n
           if( nb>1_${ik}$ .and. nb<k ) then
              ! determine when to cross over from blocked to unblocked code.
              nx = max( 0_${ik}$, stdlib${ii}$_ilaenv( 3_${ik}$, 'CUNGQR', ' ', m, n, k, -1_${ik}$ ) )
              if( nx<k ) then
                 ! determine if workspace is large enough for blocked code.
                 ldwork = n
                 iws = ldwork*nb
                 if( lwork<iws ) then
                    ! not enough workspace to use optimal nb:  reduce nb and
                    ! determine the minimum value of nb.
                    nb = lwork / ldwork
                    nbmin = max( 2_${ik}$, stdlib${ii}$_ilaenv( 2_${ik}$, 'CUNGQR', ' ', m, n, k, -1_${ik}$ ) )
                 end if
              end if
           end if
           if( nb>=nbmin .and. nb<k .and. nx<k ) then
              ! use blocked code after the last block.
              ! the first kk columns are handled by the block method.
              ki = ( ( k-nx-1 ) / nb )*nb
              kk = min( k, ki+nb )
              ! set a(1:kk,kk+1:n) to czero.
              do j = kk + 1, n
                 do i = 1, kk
                    a( i, j ) = czero
                 end do
              end do
           else
              kk = 0_${ik}$
           end if
           ! use unblocked code for the last or only block.
           if( kk<n )call stdlib${ii}$_cung2r( m-kk, n-kk, k-kk, a( kk+1, kk+1 ), lda,tau( kk+1 ), work,&
                      iinfo )
           if( kk>0_${ik}$ ) then
              ! use blocked code
              do i = ki + 1, 1, -nb
                 ib = min( nb, k-i+1 )
                 if( i+ib<=n ) then
                    ! form the triangular factor of the block reflector
                    ! h = h(i) h(i+1) . . . h(i+ib-1)
                    call stdlib${ii}$_clarft( 'FORWARD', 'COLUMNWISE', m-i+1, ib,a( i, i ), lda, tau( i &
                              ), work, ldwork )
                    ! apply h to a(i:m,i+ib:n) from the left
                    call stdlib${ii}$_clarfb( 'LEFT', 'NO TRANSPOSE', 'FORWARD','COLUMNWISE', m-i+1, n-&
                    i-ib+1, ib,a( i, i ), lda, work, ldwork, a( i, i+ib ),lda, work( ib+1 ), &
                              ldwork )
                 end if
                 ! apply h to rows i:m of current block
                 call stdlib${ii}$_cung2r( m-i+1, ib, ib, a( i, i ), lda, tau( i ), work,iinfo )
                 ! set rows 1:i-1 of current block to czero
                 do j = i, i + ib - 1
                    do l = 1, i - 1
                       a( l, j ) = czero
                    end do
                 end do
              end do
           end if
           work( 1_${ik}$ ) = iws
           return
     end subroutine stdlib${ii}$_cungqr

     pure module subroutine stdlib${ii}$_zungqr( m, n, k, a, lda, tau, work, lwork, info )
     !! ZUNGQR generates an M-by-N complex matrix Q with orthonormal columns,
     !! which is defined as the first N columns of a product of K elementary
     !! reflectors of order M
     !! Q  =  H(1) H(2) . . . H(k)
     !! as returned by ZGEQRF.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: k, lda, lwork, m, n
           ! Array Arguments 
           complex(dp), intent(inout) :: a(lda,*)
           complex(dp), intent(in) :: tau(*)
           complex(dp), intent(out) :: work(*)
        ! =====================================================================
           
           ! Local Scalars 
           logical(lk) :: lquery
           integer(${ik}$) :: i, ib, iinfo, iws, j, ki, kk, l, ldwork, lwkopt, nb, nbmin, nx
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           nb = stdlib${ii}$_ilaenv( 1_${ik}$, 'ZUNGQR', ' ', m, n, k, -1_${ik}$ )
           lwkopt = max( 1_${ik}$, n )*nb
           work( 1_${ik}$ ) = lwkopt
           lquery = ( lwork==-1_${ik}$ )
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ .or. n>m ) then
              info = -2_${ik}$
           else if( k<0_${ik}$ .or. k>n ) then
              info = -3_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -5_${ik}$
           else if( lwork<max( 1_${ik}$, n ) .and. .not.lquery ) then
              info = -8_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'ZUNGQR', -info )
              return
           else if( lquery ) then
              return
           end if
           ! quick return if possible
           if( n<=0_${ik}$ ) then
              work( 1_${ik}$ ) = 1_${ik}$
              return
           end if
           nbmin = 2_${ik}$
           nx = 0_${ik}$
           iws = n
           if( nb>1_${ik}$ .and. nb<k ) then
              ! determine when to cross over from blocked to unblocked code.
              nx = max( 0_${ik}$, stdlib${ii}$_ilaenv( 3_${ik}$, 'ZUNGQR', ' ', m, n, k, -1_${ik}$ ) )
              if( nx<k ) then
                 ! determine if workspace is large enough for blocked code.
                 ldwork = n
                 iws = ldwork*nb
                 if( lwork<iws ) then
                    ! not enough workspace to use optimal nb:  reduce nb and
                    ! determine the minimum value of nb.
                    nb = lwork / ldwork
                    nbmin = max( 2_${ik}$, stdlib${ii}$_ilaenv( 2_${ik}$, 'ZUNGQR', ' ', m, n, k, -1_${ik}$ ) )
                 end if
              end if
           end if
           if( nb>=nbmin .and. nb<k .and. nx<k ) then
              ! use blocked code after the last block.
              ! the first kk columns are handled by the block method.
              ki = ( ( k-nx-1 ) / nb )*nb
              kk = min( k, ki+nb )
              ! set a(1:kk,kk+1:n) to czero.
              do j = kk + 1, n
                 do i = 1, kk
                    a( i, j ) = czero
                 end do
              end do
           else
              kk = 0_${ik}$
           end if
           ! use unblocked code for the last or only block.
           if( kk<n )call stdlib${ii}$_zung2r( m-kk, n-kk, k-kk, a( kk+1, kk+1 ), lda,tau( kk+1 ), work,&
                      iinfo )
           if( kk>0_${ik}$ ) then
              ! use blocked code
              do i = ki + 1, 1, -nb
                 ib = min( nb, k-i+1 )
                 if( i+ib<=n ) then
                    ! form the triangular factor of the block reflector
                    ! h = h(i) h(i+1) . . . h(i+ib-1)
                    call stdlib${ii}$_zlarft( 'FORWARD', 'COLUMNWISE', m-i+1, ib,a( i, i ), lda, tau( i &
                              ), work, ldwork )
                    ! apply h to a(i:m,i+ib:n) from the left
                    call stdlib${ii}$_zlarfb( 'LEFT', 'NO TRANSPOSE', 'FORWARD','COLUMNWISE', m-i+1, n-&
                    i-ib+1, ib,a( i, i ), lda, work, ldwork, a( i, i+ib ),lda, work( ib+1 ), &
                              ldwork )
                 end if
                 ! apply h to rows i:m of current block
                 call stdlib${ii}$_zung2r( m-i+1, ib, ib, a( i, i ), lda, tau( i ), work,iinfo )
                 ! set rows 1:i-1 of current block to czero
                 do j = i, i + ib - 1
                    do l = 1, i - 1
                       a( l, j ) = czero
                    end do
                 end do
              end do
           end if
           work( 1_${ik}$ ) = iws
           return
     end subroutine stdlib${ii}$_zungqr

#:for ck,ct,ci in CMPLX_KINDS_TYPES
#:if not ck in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ci}$ungqr( m, n, k, a, lda, tau, work, lwork, info )
     !! ZUNGQR: generates an M-by-N complex matrix Q with orthonormal columns,
     !! which is defined as the first N columns of a product of K elementary
     !! reflectors of order M
     !! Q  =  H(1) H(2) . . . H(k)
     !! as returned by ZGEQRF.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${ck}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: k, lda, lwork, m, n
           ! Array Arguments 
           complex(${ck}$), intent(inout) :: a(lda,*)
           complex(${ck}$), intent(in) :: tau(*)
           complex(${ck}$), intent(out) :: work(*)
        ! =====================================================================
           
           ! Local Scalars 
           logical(lk) :: lquery
           integer(${ik}$) :: i, ib, iinfo, iws, j, ki, kk, l, ldwork, lwkopt, nb, nbmin, nx
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           nb = stdlib${ii}$_ilaenv( 1_${ik}$, 'ZUNGQR', ' ', m, n, k, -1_${ik}$ )
           lwkopt = max( 1_${ik}$, n )*nb
           work( 1_${ik}$ ) = lwkopt
           lquery = ( lwork==-1_${ik}$ )
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ .or. n>m ) then
              info = -2_${ik}$
           else if( k<0_${ik}$ .or. k>n ) then
              info = -3_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -5_${ik}$
           else if( lwork<max( 1_${ik}$, n ) .and. .not.lquery ) then
              info = -8_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'ZUNGQR', -info )
              return
           else if( lquery ) then
              return
           end if
           ! quick return if possible
           if( n<=0_${ik}$ ) then
              work( 1_${ik}$ ) = 1_${ik}$
              return
           end if
           nbmin = 2_${ik}$
           nx = 0_${ik}$
           iws = n
           if( nb>1_${ik}$ .and. nb<k ) then
              ! determine when to cross over from blocked to unblocked code.
              nx = max( 0_${ik}$, stdlib${ii}$_ilaenv( 3_${ik}$, 'ZUNGQR', ' ', m, n, k, -1_${ik}$ ) )
              if( nx<k ) then
                 ! determine if workspace is large enough for blocked code.
                 ldwork = n
                 iws = ldwork*nb
                 if( lwork<iws ) then
                    ! not enough workspace to use optimal nb:  reduce nb and
                    ! determine the minimum value of nb.
                    nb = lwork / ldwork
                    nbmin = max( 2_${ik}$, stdlib${ii}$_ilaenv( 2_${ik}$, 'ZUNGQR', ' ', m, n, k, -1_${ik}$ ) )
                 end if
              end if
           end if
           if( nb>=nbmin .and. nb<k .and. nx<k ) then
              ! use blocked code after the last block.
              ! the first kk columns are handled by the block method.
              ki = ( ( k-nx-1 ) / nb )*nb
              kk = min( k, ki+nb )
              ! set a(1:kk,kk+1:n) to czero.
              do j = kk + 1, n
                 do i = 1, kk
                    a( i, j ) = czero
                 end do
              end do
           else
              kk = 0_${ik}$
           end if
           ! use unblocked code for the last or only block.
           if( kk<n )call stdlib${ii}$_${ci}$ung2r( m-kk, n-kk, k-kk, a( kk+1, kk+1 ), lda,tau( kk+1 ), work,&
                      iinfo )
           if( kk>0_${ik}$ ) then
              ! use blocked code
              do i = ki + 1, 1, -nb
                 ib = min( nb, k-i+1 )
                 if( i+ib<=n ) then
                    ! form the triangular factor of the block reflector
                    ! h = h(i) h(i+1) . . . h(i+ib-1)
                    call stdlib${ii}$_${ci}$larft( 'FORWARD', 'COLUMNWISE', m-i+1, ib,a( i, i ), lda, tau( i &
                              ), work, ldwork )
                    ! apply h to a(i:m,i+ib:n) from the left
                    call stdlib${ii}$_${ci}$larfb( 'LEFT', 'NO TRANSPOSE', 'FORWARD','COLUMNWISE', m-i+1, n-&
                    i-ib+1, ib,a( i, i ), lda, work, ldwork, a( i, i+ib ),lda, work( ib+1 ), &
                              ldwork )
                 end if
                 ! apply h to rows i:m of current block
                 call stdlib${ii}$_${ci}$ung2r( m-i+1, ib, ib, a( i, i ), lda, tau( i ), work,iinfo )
                 ! set rows 1:i-1 of current block to czero
                 do j = i, i + ib - 1
                    do l = 1, i - 1
                       a( l, j ) = czero
                    end do
                 end do
              end do
           end if
           work( 1_${ik}$ ) = iws
           return
     end subroutine stdlib${ii}$_${ci}$ungqr

#:endif
#:endfor



     pure module subroutine stdlib${ii}$_cung2r( m, n, k, a, lda, tau, work, info )
     !! CUNG2R generates an m by n complex matrix Q with orthonormal columns,
     !! which is defined as the first n columns of a product of k elementary
     !! reflectors of order m
     !! Q  =  H(1) H(2) . . . H(k)
     !! as returned by CGEQRF.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: k, lda, m, n
           ! Array Arguments 
           complex(sp), intent(inout) :: a(lda,*)
           complex(sp), intent(in) :: tau(*)
           complex(sp), intent(out) :: work(*)
        ! =====================================================================
           
           ! Local Scalars 
           integer(${ik}$) :: i, j, l
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ .or. n>m ) then
              info = -2_${ik}$
           else if( k<0_${ik}$ .or. k>n ) then
              info = -3_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -5_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'CUNG2R', -info )
              return
           end if
           ! quick return if possible
           if( n<=0 )return
           ! initialise columns k+1:n to columns of the unit matrix
           do j = k + 1, n
              do l = 1, m
                 a( l, j ) = czero
              end do
              a( j, j ) = cone
           end do
           do i = k, 1, -1
              ! apply h(i) to a(i:m,i:n) from the left
              if( i<n ) then
                 a( i, i ) = cone
                 call stdlib${ii}$_clarf( 'LEFT', m-i+1, n-i, a( i, i ), 1_${ik}$, tau( i ),a( i, i+1 ), lda, &
                           work )
              end if
              if( i<m )call stdlib${ii}$_cscal( m-i, -tau( i ), a( i+1, i ), 1_${ik}$ )
              a( i, i ) = cone - tau( i )
              ! set a(1:i-1,i) to czero
              do l = 1, i - 1
                 a( l, i ) = czero
              end do
           end do
           return
     end subroutine stdlib${ii}$_cung2r

     pure module subroutine stdlib${ii}$_zung2r( m, n, k, a, lda, tau, work, info )
     !! ZUNG2R generates an m by n complex matrix Q with orthonormal columns,
     !! which is defined as the first n columns of a product of k elementary
     !! reflectors of order m
     !! Q  =  H(1) H(2) . . . H(k)
     !! as returned by ZGEQRF.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: k, lda, m, n
           ! Array Arguments 
           complex(dp), intent(inout) :: a(lda,*)
           complex(dp), intent(in) :: tau(*)
           complex(dp), intent(out) :: work(*)
        ! =====================================================================
           
           ! Local Scalars 
           integer(${ik}$) :: i, j, l
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ .or. n>m ) then
              info = -2_${ik}$
           else if( k<0_${ik}$ .or. k>n ) then
              info = -3_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -5_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'ZUNG2R', -info )
              return
           end if
           ! quick return if possible
           if( n<=0 )return
           ! initialise columns k+1:n to columns of the unit matrix
           do j = k + 1, n
              do l = 1, m
                 a( l, j ) = czero
              end do
              a( j, j ) = cone
           end do
           do i = k, 1, -1
              ! apply h(i) to a(i:m,i:n) from the left
              if( i<n ) then
                 a( i, i ) = cone
                 call stdlib${ii}$_zlarf( 'LEFT', m-i+1, n-i, a( i, i ), 1_${ik}$, tau( i ),a( i, i+1 ), lda, &
                           work )
              end if
              if( i<m )call stdlib${ii}$_zscal( m-i, -tau( i ), a( i+1, i ), 1_${ik}$ )
              a( i, i ) = cone - tau( i )
              ! set a(1:i-1,i) to czero
              do l = 1, i - 1
                 a( l, i ) = czero
              end do
           end do
           return
     end subroutine stdlib${ii}$_zung2r

#:for ck,ct,ci in CMPLX_KINDS_TYPES
#:if not ck in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ci}$ung2r( m, n, k, a, lda, tau, work, info )
     !! ZUNG2R: generates an m by n complex matrix Q with orthonormal columns,
     !! which is defined as the first n columns of a product of k elementary
     !! reflectors of order m
     !! Q  =  H(1) H(2) . . . H(k)
     !! as returned by ZGEQRF.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${ck}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: k, lda, m, n
           ! Array Arguments 
           complex(${ck}$), intent(inout) :: a(lda,*)
           complex(${ck}$), intent(in) :: tau(*)
           complex(${ck}$), intent(out) :: work(*)
        ! =====================================================================
           
           ! Local Scalars 
           integer(${ik}$) :: i, j, l
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ .or. n>m ) then
              info = -2_${ik}$
           else if( k<0_${ik}$ .or. k>n ) then
              info = -3_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -5_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'ZUNG2R', -info )
              return
           end if
           ! quick return if possible
           if( n<=0 )return
           ! initialise columns k+1:n to columns of the unit matrix
           do j = k + 1, n
              do l = 1, m
                 a( l, j ) = czero
              end do
              a( j, j ) = cone
           end do
           do i = k, 1, -1
              ! apply h(i) to a(i:m,i:n) from the left
              if( i<n ) then
                 a( i, i ) = cone
                 call stdlib${ii}$_${ci}$larf( 'LEFT', m-i+1, n-i, a( i, i ), 1_${ik}$, tau( i ),a( i, i+1 ), lda, &
                           work )
              end if
              if( i<m )call stdlib${ii}$_${ci}$scal( m-i, -tau( i ), a( i+1, i ), 1_${ik}$ )
              a( i, i ) = cone - tau( i )
              ! set a(1:i-1,i) to czero
              do l = 1, i - 1
                 a( l, i ) = czero
              end do
           end do
           return
     end subroutine stdlib${ii}$_${ci}$ung2r

#:endif
#:endfor



     pure module subroutine stdlib${ii}$_cunmqr( side, trans, m, n, k, a, lda, tau, c, ldc,work, lwork, info )
     !! CUNMQR overwrites the general complex M-by-N matrix C with
     !! SIDE = 'L'     SIDE = 'R'
     !! TRANS = 'N':      Q * C          C * Q
     !! TRANS = 'C':      Q**H * C       C * Q**H
     !! where Q is a complex unitary matrix defined as the product of k
     !! elementary reflectors
     !! Q = H(1) H(2) . . . H(k)
     !! as returned by CGEQRF. Q is of order M if SIDE = 'L' and of order N
     !! if SIDE = 'R'.
               
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: side, trans
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: k, lda, ldc, lwork, m, n
           ! Array Arguments 
           complex(sp), intent(inout) :: a(lda,*), c(ldc,*)
           complex(sp), intent(in) :: tau(*)
           complex(sp), intent(out) :: work(*)
        ! =====================================================================
           ! Parameters 
           integer(${ik}$), parameter :: nbmax = 64_${ik}$
           integer(${ik}$), parameter :: ldt = nbmax+1
           integer(${ik}$), parameter :: tsize = ldt*nbmax
           
           ! Local Scalars 
           logical(lk) :: left, lquery, notran
           integer(${ik}$) :: i, i1, i2, i3, ib, ic, iinfo, iwt, jc, ldwork, lwkopt, mi, nb, nbmin, &
                     ni, nq, nw
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           left = stdlib_lsame( side, 'L' )
           notran = stdlib_lsame( trans, 'N' )
           lquery = ( lwork==-1_${ik}$ )
           ! nq is the order of q and nw is the minimum dimension of work
           if( left ) then
              nq = m
              nw = max( 1_${ik}$, n )
           else
              nq = n
              nw = max( 1_${ik}$, m )
           end if
           if( .not.left .and. .not.stdlib_lsame( side, 'R' ) ) then
              info = -1_${ik}$
           else if( .not.notran .and. .not.stdlib_lsame( trans, 'C' ) ) then
              info = -2_${ik}$
           else if( m<0_${ik}$ ) then
              info = -3_${ik}$
           else if( n<0_${ik}$ ) then
              info = -4_${ik}$
           else if( k<0_${ik}$ .or. k>nq ) then
              info = -5_${ik}$
           else if( lda<max( 1_${ik}$, nq ) ) then
              info = -7_${ik}$
           else if( ldc<max( 1_${ik}$, m ) ) then
              info = -10_${ik}$
           else if( lwork<nw .and. .not.lquery ) then
              info = -12_${ik}$
           end if
           if( info==0_${ik}$ ) then
              ! compute the workspace requirements
              nb = min( nbmax, stdlib${ii}$_ilaenv( 1_${ik}$, 'CUNMQR', side // trans, m, n, k,-1_${ik}$ ) )
              lwkopt = nw*nb + tsize
              work( 1_${ik}$ ) = lwkopt
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'CUNMQR', -info )
              return
           else if( lquery ) then
              return
           end if
           ! quick return if possible
           if( m==0_${ik}$ .or. n==0_${ik}$ .or. k==0_${ik}$ ) then
              work( 1_${ik}$ ) = 1_${ik}$
              return
           end if
           nbmin = 2_${ik}$
           ldwork = nw
           if( nb>1_${ik}$ .and. nb<k ) then
              if( lwork<lwkopt ) then
                 nb = (lwork-tsize) / ldwork
                 nbmin = max( 2_${ik}$, stdlib${ii}$_ilaenv( 2_${ik}$, 'CUNMQR', side // trans, m, n, k,-1_${ik}$ ) )
              end if
           end if
           if( nb<nbmin .or. nb>=k ) then
              ! use unblocked code
              call stdlib${ii}$_cunm2r( side, trans, m, n, k, a, lda, tau, c, ldc, work,iinfo )
           else
              ! use blocked code
              iwt = 1_${ik}$ + nw*nb
              if( ( left .and. .not.notran ) .or.( .not.left .and. notran ) ) then
                 i1 = 1_${ik}$
                 i2 = k
                 i3 = nb
              else
                 i1 = ( ( k-1 ) / nb )*nb + 1_${ik}$
                 i2 = 1_${ik}$
                 i3 = -nb
              end if
              if( left ) then
                 ni = n
                 jc = 1_${ik}$
              else
                 mi = m
                 ic = 1_${ik}$
              end if
              do i = i1, i2, i3
                 ib = min( nb, k-i+1 )
                 ! form the triangular factor of the block reflector
                 ! h = h(i) h(i+1) . . . h(i+ib-1)
                 call stdlib${ii}$_clarft( 'FORWARD', 'COLUMNWISE', nq-i+1, ib, a( i, i ),lda, tau( i ),&
                            work( iwt ), ldt )
                 if( left ) then
                    ! h or h**h is applied to c(i:m,1:n)
                    mi = m - i + 1_${ik}$
                    ic = i
                 else
                    ! h or h**h is applied to c(1:m,i:n)
                    ni = n - i + 1_${ik}$
                    jc = i
                 end if
                 ! apply h or h**h
                 call stdlib${ii}$_clarfb( side, trans, 'FORWARD', 'COLUMNWISE', mi, ni,ib, a( i, i ), &
                           lda, work( iwt ), ldt,c( ic, jc ), ldc, work, ldwork )
              end do
           end if
           work( 1_${ik}$ ) = lwkopt
           return
     end subroutine stdlib${ii}$_cunmqr

     pure module subroutine stdlib${ii}$_zunmqr( side, trans, m, n, k, a, lda, tau, c, ldc,work, lwork, info )
     !! ZUNMQR overwrites the general complex M-by-N matrix C with
     !! SIDE = 'L'     SIDE = 'R'
     !! TRANS = 'N':      Q * C          C * Q
     !! TRANS = 'C':      Q**H * C       C * Q**H
     !! where Q is a complex unitary matrix defined as the product of k
     !! elementary reflectors
     !! Q = H(1) H(2) . . . H(k)
     !! as returned by ZGEQRF. Q is of order M if SIDE = 'L' and of order N
     !! if SIDE = 'R'.
               
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: side, trans
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: k, lda, ldc, lwork, m, n
           ! Array Arguments 
           complex(dp), intent(inout) :: a(lda,*), c(ldc,*)
           complex(dp), intent(in) :: tau(*)
           complex(dp), intent(out) :: work(*)
        ! =====================================================================
           ! Parameters 
           integer(${ik}$), parameter :: nbmax = 64_${ik}$
           integer(${ik}$), parameter :: ldt = nbmax+1
           integer(${ik}$), parameter :: tsize = ldt*nbmax
           
           ! Local Scalars 
           logical(lk) :: left, lquery, notran
           integer(${ik}$) :: i, i1, i2, i3, ib, ic, iinfo, iwt, jc, ldwork, lwkopt, mi, nb, nbmin, &
                     ni, nq, nw
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           left = stdlib_lsame( side, 'L' )
           notran = stdlib_lsame( trans, 'N' )
           lquery = ( lwork==-1_${ik}$ )
           ! nq is the order of q and nw is the minimum dimension of work
           if( left ) then
              nq = m
              nw = max( 1_${ik}$, n )
           else
              nq = n
              nw = max( 1_${ik}$, m )
           end if
           if( .not.left .and. .not.stdlib_lsame( side, 'R' ) ) then
              info = -1_${ik}$
           else if( .not.notran .and. .not.stdlib_lsame( trans, 'C' ) ) then
              info = -2_${ik}$
           else if( m<0_${ik}$ ) then
              info = -3_${ik}$
           else if( n<0_${ik}$ ) then
              info = -4_${ik}$
           else if( k<0_${ik}$ .or. k>nq ) then
              info = -5_${ik}$
           else if( lda<max( 1_${ik}$, nq ) ) then
              info = -7_${ik}$
           else if( ldc<max( 1_${ik}$, m ) ) then
              info = -10_${ik}$
           else if( lwork<nw .and. .not.lquery ) then
              info = -12_${ik}$
           end if
           if( info==0_${ik}$ ) then
              ! compute the workspace requirements
              nb = min( nbmax, stdlib${ii}$_ilaenv( 1_${ik}$, 'ZUNMQR', side // trans, m, n, k,-1_${ik}$ ) )
              lwkopt = nw*nb + tsize
              work( 1_${ik}$ ) = lwkopt
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'ZUNMQR', -info )
              return
           else if( lquery ) then
              return
           end if
           ! quick return if possible
           if( m==0_${ik}$ .or. n==0_${ik}$ .or. k==0_${ik}$ ) then
              work( 1_${ik}$ ) = 1_${ik}$
              return
           end if
           nbmin = 2_${ik}$
           ldwork = nw
           if( nb>1_${ik}$ .and. nb<k ) then
              if( lwork<lwkopt ) then
                 nb = (lwork-tsize) / ldwork
                 nbmin = max( 2_${ik}$, stdlib${ii}$_ilaenv( 2_${ik}$, 'ZUNMQR', side // trans, m, n, k,-1_${ik}$ ) )
              end if
           end if
           if( nb<nbmin .or. nb>=k ) then
              ! use unblocked code
              call stdlib${ii}$_zunm2r( side, trans, m, n, k, a, lda, tau, c, ldc, work,iinfo )
           else
              ! use blocked code
              iwt = 1_${ik}$ + nw*nb
              if( ( left .and. .not.notran ) .or.( .not.left .and. notran ) ) then
                 i1 = 1_${ik}$
                 i2 = k
                 i3 = nb
              else
                 i1 = ( ( k-1 ) / nb )*nb + 1_${ik}$
                 i2 = 1_${ik}$
                 i3 = -nb
              end if
              if( left ) then
                 ni = n
                 jc = 1_${ik}$
              else
                 mi = m
                 ic = 1_${ik}$
              end if
              do i = i1, i2, i3
                 ib = min( nb, k-i+1 )
                 ! form the triangular factor of the block reflector
                 ! h = h(i) h(i+1) . . . h(i+ib-1)
                 call stdlib${ii}$_zlarft( 'FORWARD', 'COLUMNWISE', nq-i+1, ib, a( i, i ),lda, tau( i ),&
                            work( iwt ), ldt )
                 if( left ) then
                    ! h or h**h is applied to c(i:m,1:n)
                    mi = m - i + 1_${ik}$
                    ic = i
                 else
                    ! h or h**h is applied to c(1:m,i:n)
                    ni = n - i + 1_${ik}$
                    jc = i
                 end if
                 ! apply h or h**h
                 call stdlib${ii}$_zlarfb( side, trans, 'FORWARD', 'COLUMNWISE', mi, ni,ib, a( i, i ), &
                           lda, work( iwt ), ldt,c( ic, jc ), ldc, work, ldwork )
              end do
           end if
           work( 1_${ik}$ ) = lwkopt
           return
     end subroutine stdlib${ii}$_zunmqr

#:for ck,ct,ci in CMPLX_KINDS_TYPES
#:if not ck in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ci}$unmqr( side, trans, m, n, k, a, lda, tau, c, ldc,work, lwork, info )
     !! ZUNMQR: overwrites the general complex M-by-N matrix C with
     !! SIDE = 'L'     SIDE = 'R'
     !! TRANS = 'N':      Q * C          C * Q
     !! TRANS = 'C':      Q**H * C       C * Q**H
     !! where Q is a complex unitary matrix defined as the product of k
     !! elementary reflectors
     !! Q = H(1) H(2) . . . H(k)
     !! as returned by ZGEQRF. Q is of order M if SIDE = 'L' and of order N
     !! if SIDE = 'R'.
               
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${ck}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: side, trans
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: k, lda, ldc, lwork, m, n
           ! Array Arguments 
           complex(${ck}$), intent(inout) :: a(lda,*), c(ldc,*)
           complex(${ck}$), intent(in) :: tau(*)
           complex(${ck}$), intent(out) :: work(*)
        ! =====================================================================
           ! Parameters 
           integer(${ik}$), parameter :: nbmax = 64_${ik}$
           integer(${ik}$), parameter :: ldt = nbmax+1
           integer(${ik}$), parameter :: tsize = ldt*nbmax
           
           ! Local Scalars 
           logical(lk) :: left, lquery, notran
           integer(${ik}$) :: i, i1, i2, i3, ib, ic, iinfo, iwt, jc, ldwork, lwkopt, mi, nb, nbmin, &
                     ni, nq, nw
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           left = stdlib_lsame( side, 'L' )
           notran = stdlib_lsame( trans, 'N' )
           lquery = ( lwork==-1_${ik}$ )
           ! nq is the order of q and nw is the minimum dimension of work
           if( left ) then
              nq = m
              nw = max( 1_${ik}$, n )
           else
              nq = n
              nw = max( 1_${ik}$, m )
           end if
           if( .not.left .and. .not.stdlib_lsame( side, 'R' ) ) then
              info = -1_${ik}$
           else if( .not.notran .and. .not.stdlib_lsame( trans, 'C' ) ) then
              info = -2_${ik}$
           else if( m<0_${ik}$ ) then
              info = -3_${ik}$
           else if( n<0_${ik}$ ) then
              info = -4_${ik}$
           else if( k<0_${ik}$ .or. k>nq ) then
              info = -5_${ik}$
           else if( lda<max( 1_${ik}$, nq ) ) then
              info = -7_${ik}$
           else if( ldc<max( 1_${ik}$, m ) ) then
              info = -10_${ik}$
           else if( lwork<nw .and. .not.lquery ) then
              info = -12_${ik}$
           end if
           if( info==0_${ik}$ ) then
              ! compute the workspace requirements
              nb = min( nbmax, stdlib${ii}$_ilaenv( 1_${ik}$, 'ZUNMQR', side // trans, m, n, k,-1_${ik}$ ) )
              lwkopt = nw*nb + tsize
              work( 1_${ik}$ ) = lwkopt
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'ZUNMQR', -info )
              return
           else if( lquery ) then
              return
           end if
           ! quick return if possible
           if( m==0_${ik}$ .or. n==0_${ik}$ .or. k==0_${ik}$ ) then
              work( 1_${ik}$ ) = 1_${ik}$
              return
           end if
           nbmin = 2_${ik}$
           ldwork = nw
           if( nb>1_${ik}$ .and. nb<k ) then
              if( lwork<lwkopt ) then
                 nb = (lwork-tsize) / ldwork
                 nbmin = max( 2_${ik}$, stdlib${ii}$_ilaenv( 2_${ik}$, 'ZUNMQR', side // trans, m, n, k,-1_${ik}$ ) )
              end if
           end if
           if( nb<nbmin .or. nb>=k ) then
              ! use unblocked code
              call stdlib${ii}$_${ci}$unm2r( side, trans, m, n, k, a, lda, tau, c, ldc, work,iinfo )
           else
              ! use blocked code
              iwt = 1_${ik}$ + nw*nb
              if( ( left .and. .not.notran ) .or.( .not.left .and. notran ) ) then
                 i1 = 1_${ik}$
                 i2 = k
                 i3 = nb
              else
                 i1 = ( ( k-1 ) / nb )*nb + 1_${ik}$
                 i2 = 1_${ik}$
                 i3 = -nb
              end if
              if( left ) then
                 ni = n
                 jc = 1_${ik}$
              else
                 mi = m
                 ic = 1_${ik}$
              end if
              do i = i1, i2, i3
                 ib = min( nb, k-i+1 )
                 ! form the triangular factor of the block reflector
                 ! h = h(i) h(i+1) . . . h(i+ib-1)
                 call stdlib${ii}$_${ci}$larft( 'FORWARD', 'COLUMNWISE', nq-i+1, ib, a( i, i ),lda, tau( i ),&
                            work( iwt ), ldt )
                 if( left ) then
                    ! h or h**h is applied to c(i:m,1:n)
                    mi = m - i + 1_${ik}$
                    ic = i
                 else
                    ! h or h**h is applied to c(1:m,i:n)
                    ni = n - i + 1_${ik}$
                    jc = i
                 end if
                 ! apply h or h**h
                 call stdlib${ii}$_${ci}$larfb( side, trans, 'FORWARD', 'COLUMNWISE', mi, ni,ib, a( i, i ), &
                           lda, work( iwt ), ldt,c( ic, jc ), ldc, work, ldwork )
              end do
           end if
           work( 1_${ik}$ ) = lwkopt
           return
     end subroutine stdlib${ii}$_${ci}$unmqr

#:endif
#:endfor



     pure module subroutine stdlib${ii}$_cunm2r( side, trans, m, n, k, a, lda, tau, c, ldc,work, info )
     !! CUNM2R overwrites the general complex m-by-n matrix C with
     !! Q * C  if SIDE = 'L' and TRANS = 'N', or
     !! Q**H* C  if SIDE = 'L' and TRANS = 'C', or
     !! C * Q  if SIDE = 'R' and TRANS = 'N', or
     !! C * Q**H if SIDE = 'R' and TRANS = 'C',
     !! where Q is a complex unitary matrix defined as the product of k
     !! elementary reflectors
     !! Q = H(1) H(2) . . . H(k)
     !! as returned by CGEQRF. Q is of order m if SIDE = 'L' and of order n
     !! if SIDE = 'R'.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: side, trans
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: k, lda, ldc, m, n
           ! Array Arguments 
           complex(sp), intent(inout) :: a(lda,*), c(ldc,*)
           complex(sp), intent(in) :: tau(*)
           complex(sp), intent(out) :: work(*)
        ! =====================================================================
           
           ! Local Scalars 
           logical(lk) :: left, notran
           integer(${ik}$) :: i, i1, i2, i3, ic, jc, mi, ni, nq
           complex(sp) :: aii, taui
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           left = stdlib_lsame( side, 'L' )
           notran = stdlib_lsame( trans, 'N' )
           ! nq is the order of q
           if( left ) then
              nq = m
           else
              nq = n
           end if
           if( .not.left .and. .not.stdlib_lsame( side, 'R' ) ) then
              info = -1_${ik}$
           else if( .not.notran .and. .not.stdlib_lsame( trans, 'C' ) ) then
              info = -2_${ik}$
           else if( m<0_${ik}$ ) then
              info = -3_${ik}$
           else if( n<0_${ik}$ ) then
              info = -4_${ik}$
           else if( k<0_${ik}$ .or. k>nq ) then
              info = -5_${ik}$
           else if( lda<max( 1_${ik}$, nq ) ) then
              info = -7_${ik}$
           else if( ldc<max( 1_${ik}$, m ) ) then
              info = -10_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'CUNM2R', -info )
              return
           end if
           ! quick return if possible
           if( m==0 .or. n==0 .or. k==0 )return
           if( ( left .and. .not.notran .or. .not.left .and. notran ) ) then
              i1 = 1_${ik}$
              i2 = k
              i3 = 1_${ik}$
           else
              i1 = k
              i2 = 1_${ik}$
              i3 = -1_${ik}$
           end if
           if( left ) then
              ni = n
              jc = 1_${ik}$
           else
              mi = m
              ic = 1_${ik}$
           end if
           do i = i1, i2, i3
              if( left ) then
                 ! h(i) or h(i)**h is applied to c(i:m,1:n)
                 mi = m - i + 1_${ik}$
                 ic = i
              else
                 ! h(i) or h(i)**h is applied to c(1:m,i:n)
                 ni = n - i + 1_${ik}$
                 jc = i
              end if
              ! apply h(i) or h(i)**h
              if( notran ) then
                 taui = tau( i )
              else
                 taui = conjg( tau( i ) )
              end if
              aii = a( i, i )
              a( i, i ) = cone
              call stdlib${ii}$_clarf( side, mi, ni, a( i, i ), 1_${ik}$, taui, c( ic, jc ), ldc,work )
              a( i, i ) = aii
           end do
           return
     end subroutine stdlib${ii}$_cunm2r

     pure module subroutine stdlib${ii}$_zunm2r( side, trans, m, n, k, a, lda, tau, c, ldc,work, info )
     !! ZUNM2R overwrites the general complex m-by-n matrix C with
     !! Q * C  if SIDE = 'L' and TRANS = 'N', or
     !! Q**H* C  if SIDE = 'L' and TRANS = 'C', or
     !! C * Q  if SIDE = 'R' and TRANS = 'N', or
     !! C * Q**H if SIDE = 'R' and TRANS = 'C',
     !! where Q is a complex unitary matrix defined as the product of k
     !! elementary reflectors
     !! Q = H(1) H(2) . . . H(k)
     !! as returned by ZGEQRF. Q is of order m if SIDE = 'L' and of order n
     !! if SIDE = 'R'.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: side, trans
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: k, lda, ldc, m, n
           ! Array Arguments 
           complex(dp), intent(inout) :: a(lda,*), c(ldc,*)
           complex(dp), intent(in) :: tau(*)
           complex(dp), intent(out) :: work(*)
        ! =====================================================================
           
           ! Local Scalars 
           logical(lk) :: left, notran
           integer(${ik}$) :: i, i1, i2, i3, ic, jc, mi, ni, nq
           complex(dp) :: aii, taui
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           left = stdlib_lsame( side, 'L' )
           notran = stdlib_lsame( trans, 'N' )
           ! nq is the order of q
           if( left ) then
              nq = m
           else
              nq = n
           end if
           if( .not.left .and. .not.stdlib_lsame( side, 'R' ) ) then
              info = -1_${ik}$
           else if( .not.notran .and. .not.stdlib_lsame( trans, 'C' ) ) then
              info = -2_${ik}$
           else if( m<0_${ik}$ ) then
              info = -3_${ik}$
           else if( n<0_${ik}$ ) then
              info = -4_${ik}$
           else if( k<0_${ik}$ .or. k>nq ) then
              info = -5_${ik}$
           else if( lda<max( 1_${ik}$, nq ) ) then
              info = -7_${ik}$
           else if( ldc<max( 1_${ik}$, m ) ) then
              info = -10_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'ZUNM2R', -info )
              return
           end if
           ! quick return if possible
           if( m==0 .or. n==0 .or. k==0 )return
           if( ( left .and. .not.notran .or. .not.left .and. notran ) ) then
              i1 = 1_${ik}$
              i2 = k
              i3 = 1_${ik}$
           else
              i1 = k
              i2 = 1_${ik}$
              i3 = -1_${ik}$
           end if
           if( left ) then
              ni = n
              jc = 1_${ik}$
           else
              mi = m
              ic = 1_${ik}$
           end if
           do i = i1, i2, i3
              if( left ) then
                 ! h(i) or h(i)**h is applied to c(i:m,1:n)
                 mi = m - i + 1_${ik}$
                 ic = i
              else
                 ! h(i) or h(i)**h is applied to c(1:m,i:n)
                 ni = n - i + 1_${ik}$
                 jc = i
              end if
              ! apply h(i) or h(i)**h
              if( notran ) then
                 taui = tau( i )
              else
                 taui = conjg( tau( i ) )
              end if
              aii = a( i, i )
              a( i, i ) = cone
              call stdlib${ii}$_zlarf( side, mi, ni, a( i, i ), 1_${ik}$, taui, c( ic, jc ), ldc,work )
              a( i, i ) = aii
           end do
           return
     end subroutine stdlib${ii}$_zunm2r

#:for ck,ct,ci in CMPLX_KINDS_TYPES
#:if not ck in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ci}$unm2r( side, trans, m, n, k, a, lda, tau, c, ldc,work, info )
     !! ZUNM2R: overwrites the general complex m-by-n matrix C with
     !! Q * C  if SIDE = 'L' and TRANS = 'N', or
     !! Q**H* C  if SIDE = 'L' and TRANS = 'C', or
     !! C * Q  if SIDE = 'R' and TRANS = 'N', or
     !! C * Q**H if SIDE = 'R' and TRANS = 'C',
     !! where Q is a complex unitary matrix defined as the product of k
     !! elementary reflectors
     !! Q = H(1) H(2) . . . H(k)
     !! as returned by ZGEQRF. Q is of order m if SIDE = 'L' and of order n
     !! if SIDE = 'R'.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${ck}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: side, trans
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: k, lda, ldc, m, n
           ! Array Arguments 
           complex(${ck}$), intent(inout) :: a(lda,*), c(ldc,*)
           complex(${ck}$), intent(in) :: tau(*)
           complex(${ck}$), intent(out) :: work(*)
        ! =====================================================================
           
           ! Local Scalars 
           logical(lk) :: left, notran
           integer(${ik}$) :: i, i1, i2, i3, ic, jc, mi, ni, nq
           complex(${ck}$) :: aii, taui
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           left = stdlib_lsame( side, 'L' )
           notran = stdlib_lsame( trans, 'N' )
           ! nq is the order of q
           if( left ) then
              nq = m
           else
              nq = n
           end if
           if( .not.left .and. .not.stdlib_lsame( side, 'R' ) ) then
              info = -1_${ik}$
           else if( .not.notran .and. .not.stdlib_lsame( trans, 'C' ) ) then
              info = -2_${ik}$
           else if( m<0_${ik}$ ) then
              info = -3_${ik}$
           else if( n<0_${ik}$ ) then
              info = -4_${ik}$
           else if( k<0_${ik}$ .or. k>nq ) then
              info = -5_${ik}$
           else if( lda<max( 1_${ik}$, nq ) ) then
              info = -7_${ik}$
           else if( ldc<max( 1_${ik}$, m ) ) then
              info = -10_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'ZUNM2R', -info )
              return
           end if
           ! quick return if possible
           if( m==0 .or. n==0 .or. k==0 )return
           if( ( left .and. .not.notran .or. .not.left .and. notran ) ) then
              i1 = 1_${ik}$
              i2 = k
              i3 = 1_${ik}$
           else
              i1 = k
              i2 = 1_${ik}$
              i3 = -1_${ik}$
           end if
           if( left ) then
              ni = n
              jc = 1_${ik}$
           else
              mi = m
              ic = 1_${ik}$
           end if
           do i = i1, i2, i3
              if( left ) then
                 ! h(i) or h(i)**h is applied to c(i:m,1:n)
                 mi = m - i + 1_${ik}$
                 ic = i
              else
                 ! h(i) or h(i)**h is applied to c(1:m,i:n)
                 ni = n - i + 1_${ik}$
                 jc = i
              end if
              ! apply h(i) or h(i)**h
              if( notran ) then
                 taui = tau( i )
              else
                 taui = conjg( tau( i ) )
              end if
              aii = a( i, i )
              a( i, i ) = cone
              call stdlib${ii}$_${ci}$larf( side, mi, ni, a( i, i ), 1_${ik}$, taui, c( ic, jc ), ldc,work )
              a( i, i ) = aii
           end do
           return
     end subroutine stdlib${ii}$_${ci}$unm2r

#:endif
#:endfor



     pure module subroutine stdlib${ii}$_sorgqr( m, n, k, a, lda, tau, work, lwork, info )
     !! SORGQR generates an M-by-N real matrix Q with orthonormal columns,
     !! which is defined as the first N columns of a product of K elementary
     !! reflectors of order M
     !! Q  =  H(1) H(2) . . . H(k)
     !! as returned by SGEQRF.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: k, lda, lwork, m, n
           ! Array Arguments 
           real(sp), intent(inout) :: a(lda,*)
           real(sp), intent(in) :: tau(*)
           real(sp), intent(out) :: work(*)
        ! =====================================================================
           
           ! Local Scalars 
           logical(lk) :: lquery
           integer(${ik}$) :: i, ib, iinfo, iws, j, ki, kk, l, ldwork, lwkopt, nb, nbmin, nx
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           nb = stdlib${ii}$_ilaenv( 1_${ik}$, 'SORGQR', ' ', m, n, k, -1_${ik}$ )
           lwkopt = max( 1_${ik}$, n )*nb
           work( 1_${ik}$ ) = lwkopt
           lquery = ( lwork==-1_${ik}$ )
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ .or. n>m ) then
              info = -2_${ik}$
           else if( k<0_${ik}$ .or. k>n ) then
              info = -3_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -5_${ik}$
           else if( lwork<max( 1_${ik}$, n ) .and. .not.lquery ) then
              info = -8_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'SORGQR', -info )
              return
           else if( lquery ) then
              return
           end if
           ! quick return if possible
           if( n<=0_${ik}$ ) then
              work( 1_${ik}$ ) = 1_${ik}$
              return
           end if
           nbmin = 2_${ik}$
           nx = 0_${ik}$
           iws = n
           if( nb>1_${ik}$ .and. nb<k ) then
              ! determine when to cross over from blocked to unblocked code.
              nx = max( 0_${ik}$, stdlib${ii}$_ilaenv( 3_${ik}$, 'SORGQR', ' ', m, n, k, -1_${ik}$ ) )
              if( nx<k ) then
                 ! determine if workspace is large enough for blocked code.
                 ldwork = n
                 iws = ldwork*nb
                 if( lwork<iws ) then
                    ! not enough workspace to use optimal nb:  reduce nb and
                    ! determine the minimum value of nb.
                    nb = lwork / ldwork
                    nbmin = max( 2_${ik}$, stdlib${ii}$_ilaenv( 2_${ik}$, 'SORGQR', ' ', m, n, k, -1_${ik}$ ) )
                 end if
              end if
           end if
           if( nb>=nbmin .and. nb<k .and. nx<k ) then
              ! use blocked code after the last block.
              ! the first kk columns are handled by the block method.
              ki = ( ( k-nx-1 ) / nb )*nb
              kk = min( k, ki+nb )
              ! set a(1:kk,kk+1:n) to zero.
              do j = kk + 1, n
                 do i = 1, kk
                    a( i, j ) = zero
                 end do
              end do
           else
              kk = 0_${ik}$
           end if
           ! use unblocked code for the last or only block.
           if( kk<n )call stdlib${ii}$_sorg2r( m-kk, n-kk, k-kk, a( kk+1, kk+1 ), lda,tau( kk+1 ), work,&
                      iinfo )
           if( kk>0_${ik}$ ) then
              ! use blocked code
              do i = ki + 1, 1, -nb
                 ib = min( nb, k-i+1 )
                 if( i+ib<=n ) then
                    ! form the triangular factor of the block reflector
                    ! h = h(i) h(i+1) . . . h(i+ib-1)
                    call stdlib${ii}$_slarft( 'FORWARD', 'COLUMNWISE', m-i+1, ib,a( i, i ), lda, tau( i &
                              ), work, ldwork )
                    ! apply h to a(i:m,i+ib:n) from the left
                    call stdlib${ii}$_slarfb( 'LEFT', 'NO TRANSPOSE', 'FORWARD','COLUMNWISE', m-i+1, n-&
                    i-ib+1, ib,a( i, i ), lda, work, ldwork, a( i, i+ib ),lda, work( ib+1 ), &
                              ldwork )
                 end if
                 ! apply h to rows i:m of current block
                 call stdlib${ii}$_sorg2r( m-i+1, ib, ib, a( i, i ), lda, tau( i ), work,iinfo )
                 ! set rows 1:i-1 of current block to zero
                 do j = i, i + ib - 1
                    do l = 1, i - 1
                       a( l, j ) = zero
                    end do
                 end do
              end do
           end if
           work( 1_${ik}$ ) = iws
           return
     end subroutine stdlib${ii}$_sorgqr

     pure module subroutine stdlib${ii}$_dorgqr( m, n, k, a, lda, tau, work, lwork, info )
     !! DORGQR generates an M-by-N real matrix Q with orthonormal columns,
     !! which is defined as the first N columns of a product of K elementary
     !! reflectors of order M
     !! Q  =  H(1) H(2) . . . H(k)
     !! as returned by DGEQRF.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: k, lda, lwork, m, n
           ! Array Arguments 
           real(dp), intent(inout) :: a(lda,*)
           real(dp), intent(in) :: tau(*)
           real(dp), intent(out) :: work(*)
        ! =====================================================================
           
           ! Local Scalars 
           logical(lk) :: lquery
           integer(${ik}$) :: i, ib, iinfo, iws, j, ki, kk, l, ldwork, lwkopt, nb, nbmin, nx
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           nb = stdlib${ii}$_ilaenv( 1_${ik}$, 'DORGQR', ' ', m, n, k, -1_${ik}$ )
           lwkopt = max( 1_${ik}$, n )*nb
           work( 1_${ik}$ ) = lwkopt
           lquery = ( lwork==-1_${ik}$ )
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ .or. n>m ) then
              info = -2_${ik}$
           else if( k<0_${ik}$ .or. k>n ) then
              info = -3_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -5_${ik}$
           else if( lwork<max( 1_${ik}$, n ) .and. .not.lquery ) then
              info = -8_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DORGQR', -info )
              return
           else if( lquery ) then
              return
           end if
           ! quick return if possible
           if( n<=0_${ik}$ ) then
              work( 1_${ik}$ ) = 1_${ik}$
              return
           end if
           nbmin = 2_${ik}$
           nx = 0_${ik}$
           iws = n
           if( nb>1_${ik}$ .and. nb<k ) then
              ! determine when to cross over from blocked to unblocked code.
              nx = max( 0_${ik}$, stdlib${ii}$_ilaenv( 3_${ik}$, 'DORGQR', ' ', m, n, k, -1_${ik}$ ) )
              if( nx<k ) then
                 ! determine if workspace is large enough for blocked code.
                 ldwork = n
                 iws = ldwork*nb
                 if( lwork<iws ) then
                    ! not enough workspace to use optimal nb:  reduce nb and
                    ! determine the minimum value of nb.
                    nb = lwork / ldwork
                    nbmin = max( 2_${ik}$, stdlib${ii}$_ilaenv( 2_${ik}$, 'DORGQR', ' ', m, n, k, -1_${ik}$ ) )
                 end if
              end if
           end if
           if( nb>=nbmin .and. nb<k .and. nx<k ) then
              ! use blocked code after the last block.
              ! the first kk columns are handled by the block method.
              ki = ( ( k-nx-1 ) / nb )*nb
              kk = min( k, ki+nb )
              ! set a(1:kk,kk+1:n) to zero.
              do j = kk + 1, n
                 do i = 1, kk
                    a( i, j ) = zero
                 end do
              end do
           else
              kk = 0_${ik}$
           end if
           ! use unblocked code for the last or only block.
           if( kk<n )call stdlib${ii}$_dorg2r( m-kk, n-kk, k-kk, a( kk+1, kk+1 ), lda,tau( kk+1 ), work,&
                      iinfo )
           if( kk>0_${ik}$ ) then
              ! use blocked code
              do i = ki + 1, 1, -nb
                 ib = min( nb, k-i+1 )
                 if( i+ib<=n ) then
                    ! form the triangular factor of the block reflector
                    ! h = h(i) h(i+1) . . . h(i+ib-1)
                    call stdlib${ii}$_dlarft( 'FORWARD', 'COLUMNWISE', m-i+1, ib,a( i, i ), lda, tau( i &
                              ), work, ldwork )
                    ! apply h to a(i:m,i+ib:n) from the left
                    call stdlib${ii}$_dlarfb( 'LEFT', 'NO TRANSPOSE', 'FORWARD','COLUMNWISE', m-i+1, n-&
                    i-ib+1, ib,a( i, i ), lda, work, ldwork, a( i, i+ib ),lda, work( ib+1 ), &
                              ldwork )
                 end if
                 ! apply h to rows i:m of current block
                 call stdlib${ii}$_dorg2r( m-i+1, ib, ib, a( i, i ), lda, tau( i ), work,iinfo )
                 ! set rows 1:i-1 of current block to zero
                 do j = i, i + ib - 1
                    do l = 1, i - 1
                       a( l, j ) = zero
                    end do
                 end do
              end do
           end if
           work( 1_${ik}$ ) = iws
           return
     end subroutine stdlib${ii}$_dorgqr

#:for rk,rt,ri in REAL_KINDS_TYPES
#:if not rk in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ri}$orgqr( m, n, k, a, lda, tau, work, lwork, info )
     !! DORGQR: generates an M-by-N real matrix Q with orthonormal columns,
     !! which is defined as the first N columns of a product of K elementary
     !! reflectors of order M
     !! Q  =  H(1) H(2) . . . H(k)
     !! as returned by DGEQRF.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: k, lda, lwork, m, n
           ! Array Arguments 
           real(${rk}$), intent(inout) :: a(lda,*)
           real(${rk}$), intent(in) :: tau(*)
           real(${rk}$), intent(out) :: work(*)
        ! =====================================================================
           
           ! Local Scalars 
           logical(lk) :: lquery
           integer(${ik}$) :: i, ib, iinfo, iws, j, ki, kk, l, ldwork, lwkopt, nb, nbmin, nx
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           nb = stdlib${ii}$_ilaenv( 1_${ik}$, 'DORGQR', ' ', m, n, k, -1_${ik}$ )
           lwkopt = max( 1_${ik}$, n )*nb
           work( 1_${ik}$ ) = lwkopt
           lquery = ( lwork==-1_${ik}$ )
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ .or. n>m ) then
              info = -2_${ik}$
           else if( k<0_${ik}$ .or. k>n ) then
              info = -3_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -5_${ik}$
           else if( lwork<max( 1_${ik}$, n ) .and. .not.lquery ) then
              info = -8_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DORGQR', -info )
              return
           else if( lquery ) then
              return
           end if
           ! quick return if possible
           if( n<=0_${ik}$ ) then
              work( 1_${ik}$ ) = 1_${ik}$
              return
           end if
           nbmin = 2_${ik}$
           nx = 0_${ik}$
           iws = n
           if( nb>1_${ik}$ .and. nb<k ) then
              ! determine when to cross over from blocked to unblocked code.
              nx = max( 0_${ik}$, stdlib${ii}$_ilaenv( 3_${ik}$, 'DORGQR', ' ', m, n, k, -1_${ik}$ ) )
              if( nx<k ) then
                 ! determine if workspace is large enough for blocked code.
                 ldwork = n
                 iws = ldwork*nb
                 if( lwork<iws ) then
                    ! not enough workspace to use optimal nb:  reduce nb and
                    ! determine the minimum value of nb.
                    nb = lwork / ldwork
                    nbmin = max( 2_${ik}$, stdlib${ii}$_ilaenv( 2_${ik}$, 'DORGQR', ' ', m, n, k, -1_${ik}$ ) )
                 end if
              end if
           end if
           if( nb>=nbmin .and. nb<k .and. nx<k ) then
              ! use blocked code after the last block.
              ! the first kk columns are handled by the block method.
              ki = ( ( k-nx-1 ) / nb )*nb
              kk = min( k, ki+nb )
              ! set a(1:kk,kk+1:n) to zero.
              do j = kk + 1, n
                 do i = 1, kk
                    a( i, j ) = zero
                 end do
              end do
           else
              kk = 0_${ik}$
           end if
           ! use unblocked code for the last or only block.
           if( kk<n )call stdlib${ii}$_${ri}$org2r( m-kk, n-kk, k-kk, a( kk+1, kk+1 ), lda,tau( kk+1 ), work,&
                      iinfo )
           if( kk>0_${ik}$ ) then
              ! use blocked code
              do i = ki + 1, 1, -nb
                 ib = min( nb, k-i+1 )
                 if( i+ib<=n ) then
                    ! form the triangular factor of the block reflector
                    ! h = h(i) h(i+1) . . . h(i+ib-1)
                    call stdlib${ii}$_${ri}$larft( 'FORWARD', 'COLUMNWISE', m-i+1, ib,a( i, i ), lda, tau( i &
                              ), work, ldwork )
                    ! apply h to a(i:m,i+ib:n) from the left
                    call stdlib${ii}$_${ri}$larfb( 'LEFT', 'NO TRANSPOSE', 'FORWARD','COLUMNWISE', m-i+1, n-&
                    i-ib+1, ib,a( i, i ), lda, work, ldwork, a( i, i+ib ),lda, work( ib+1 ), &
                              ldwork )
                 end if
                 ! apply h to rows i:m of current block
                 call stdlib${ii}$_${ri}$org2r( m-i+1, ib, ib, a( i, i ), lda, tau( i ), work,iinfo )
                 ! set rows 1:i-1 of current block to zero
                 do j = i, i + ib - 1
                    do l = 1, i - 1
                       a( l, j ) = zero
                    end do
                 end do
              end do
           end if
           work( 1_${ik}$ ) = iws
           return
     end subroutine stdlib${ii}$_${ri}$orgqr

#:endif
#:endfor



     pure module subroutine stdlib${ii}$_sorg2r( m, n, k, a, lda, tau, work, info )
     !! SORG2R generates an m by n real matrix Q with orthonormal columns,
     !! which is defined as the first n columns of a product of k elementary
     !! reflectors of order m
     !! Q  =  H(1) H(2) . . . H(k)
     !! as returned by SGEQRF.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: k, lda, m, n
           ! Array Arguments 
           real(sp), intent(inout) :: a(lda,*)
           real(sp), intent(in) :: tau(*)
           real(sp), intent(out) :: work(*)
        ! =====================================================================
           
           ! Local Scalars 
           integer(${ik}$) :: i, j, l
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ .or. n>m ) then
              info = -2_${ik}$
           else if( k<0_${ik}$ .or. k>n ) then
              info = -3_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -5_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'SORG2R', -info )
              return
           end if
           ! quick return if possible
           if( n<=0 )return
           ! initialise columns k+1:n to columns of the unit matrix
           do j = k + 1, n
              do l = 1, m
                 a( l, j ) = zero
              end do
              a( j, j ) = one
           end do
           do i = k, 1, -1
              ! apply h(i) to a(i:m,i:n) from the left
              if( i<n ) then
                 a( i, i ) = one
                 call stdlib${ii}$_slarf( 'LEFT', m-i+1, n-i, a( i, i ), 1_${ik}$, tau( i ),a( i, i+1 ), lda, &
                           work )
              end if
              if( i<m )call stdlib${ii}$_sscal( m-i, -tau( i ), a( i+1, i ), 1_${ik}$ )
              a( i, i ) = one - tau( i )
              ! set a(1:i-1,i) to zero
              do l = 1, i - 1
                 a( l, i ) = zero
              end do
           end do
           return
     end subroutine stdlib${ii}$_sorg2r

     pure module subroutine stdlib${ii}$_dorg2r( m, n, k, a, lda, tau, work, info )
     !! DORG2R generates an m by n real matrix Q with orthonormal columns,
     !! which is defined as the first n columns of a product of k elementary
     !! reflectors of order m
     !! Q  =  H(1) H(2) . . . H(k)
     !! as returned by DGEQRF.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: k, lda, m, n
           ! Array Arguments 
           real(dp), intent(inout) :: a(lda,*)
           real(dp), intent(in) :: tau(*)
           real(dp), intent(out) :: work(*)
        ! =====================================================================
           
           ! Local Scalars 
           integer(${ik}$) :: i, j, l
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ .or. n>m ) then
              info = -2_${ik}$
           else if( k<0_${ik}$ .or. k>n ) then
              info = -3_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -5_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DORG2R', -info )
              return
           end if
           ! quick return if possible
           if( n<=0 )return
           ! initialise columns k+1:n to columns of the unit matrix
           do j = k + 1, n
              do l = 1, m
                 a( l, j ) = zero
              end do
              a( j, j ) = one
           end do
           do i = k, 1, -1
              ! apply h(i) to a(i:m,i:n) from the left
              if( i<n ) then
                 a( i, i ) = one
                 call stdlib${ii}$_dlarf( 'LEFT', m-i+1, n-i, a( i, i ), 1_${ik}$, tau( i ),a( i, i+1 ), lda, &
                           work )
              end if
              if( i<m )call stdlib${ii}$_dscal( m-i, -tau( i ), a( i+1, i ), 1_${ik}$ )
              a( i, i ) = one - tau( i )
              ! set a(1:i-1,i) to zero
              do l = 1, i - 1
                 a( l, i ) = zero
              end do
           end do
           return
     end subroutine stdlib${ii}$_dorg2r

#:for rk,rt,ri in REAL_KINDS_TYPES
#:if not rk in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ri}$org2r( m, n, k, a, lda, tau, work, info )
     !! DORG2R: generates an m by n real matrix Q with orthonormal columns,
     !! which is defined as the first n columns of a product of k elementary
     !! reflectors of order m
     !! Q  =  H(1) H(2) . . . H(k)
     !! as returned by DGEQRF.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: k, lda, m, n
           ! Array Arguments 
           real(${rk}$), intent(inout) :: a(lda,*)
           real(${rk}$), intent(in) :: tau(*)
           real(${rk}$), intent(out) :: work(*)
        ! =====================================================================
           
           ! Local Scalars 
           integer(${ik}$) :: i, j, l
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ .or. n>m ) then
              info = -2_${ik}$
           else if( k<0_${ik}$ .or. k>n ) then
              info = -3_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -5_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DORG2R', -info )
              return
           end if
           ! quick return if possible
           if( n<=0 )return
           ! initialise columns k+1:n to columns of the unit matrix
           do j = k + 1, n
              do l = 1, m
                 a( l, j ) = zero
              end do
              a( j, j ) = one
           end do
           do i = k, 1, -1
              ! apply h(i) to a(i:m,i:n) from the left
              if( i<n ) then
                 a( i, i ) = one
                 call stdlib${ii}$_${ri}$larf( 'LEFT', m-i+1, n-i, a( i, i ), 1_${ik}$, tau( i ),a( i, i+1 ), lda, &
                           work )
              end if
              if( i<m )call stdlib${ii}$_${ri}$scal( m-i, -tau( i ), a( i+1, i ), 1_${ik}$ )
              a( i, i ) = one - tau( i )
              ! set a(1:i-1,i) to zero
              do l = 1, i - 1
                 a( l, i ) = zero
              end do
           end do
           return
     end subroutine stdlib${ii}$_${ri}$org2r

#:endif
#:endfor



     pure module subroutine stdlib${ii}$_sormqr( side, trans, m, n, k, a, lda, tau, c, ldc,work, lwork, info )
     !! SORMQR overwrites the general real M-by-N matrix C with
     !! SIDE = 'L'     SIDE = 'R'
     !! TRANS = 'N':      Q * C          C * Q
     !! TRANS = 'T':      Q**T * C       C * Q**T
     !! where Q is a real orthogonal matrix defined as the product of k
     !! elementary reflectors
     !! Q = H(1) H(2) . . . H(k)
     !! as returned by SGEQRF. Q is of order M if SIDE = 'L' and of order N
     !! if SIDE = 'R'.
               
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: side, trans
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: k, lda, ldc, lwork, m, n
           ! Array Arguments 
           real(sp), intent(inout) :: a(lda,*), c(ldc,*)
           real(sp), intent(in) :: tau(*)
           real(sp), intent(out) :: work(*)
        ! =====================================================================
           ! Parameters 
           integer(${ik}$), parameter :: nbmax = 64_${ik}$
           integer(${ik}$), parameter :: ldt = nbmax+1
           integer(${ik}$), parameter :: tsize = ldt*nbmax
           
           ! Local Scalars 
           logical(lk) :: left, lquery, notran
           integer(${ik}$) :: i, i1, i2, i3, ib, ic, iinfo, iwt, jc, ldwork, lwkopt, mi, nb, nbmin, &
                     ni, nq, nw
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           left = stdlib_lsame( side, 'L' )
           notran = stdlib_lsame( trans, 'N' )
           lquery = ( lwork==-1_${ik}$ )
           ! nq is the order of q and nw is the minimum dimension of work
           if( left ) then
              nq = m
              nw = max( 1_${ik}$, n )
           else
              nq = n
              nw = max( 1_${ik}$, m )
           end if
           if( .not.left .and. .not.stdlib_lsame( side, 'R' ) ) then
              info = -1_${ik}$
           else if( .not.notran .and. .not.stdlib_lsame( trans, 'T' ) ) then
              info = -2_${ik}$
           else if( m<0_${ik}$ ) then
              info = -3_${ik}$
           else if( n<0_${ik}$ ) then
              info = -4_${ik}$
           else if( k<0_${ik}$ .or. k>nq ) then
              info = -5_${ik}$
           else if( lda<max( 1_${ik}$, nq ) ) then
              info = -7_${ik}$
           else if( ldc<max( 1_${ik}$, m ) ) then
              info = -10_${ik}$
           else if( lwork<nw .and. .not.lquery ) then
              info = -12_${ik}$
           end if
           if( info==0_${ik}$ ) then
              ! compute the workspace requirements
              nb = min( nbmax, stdlib${ii}$_ilaenv( 1_${ik}$, 'SORMQR', side // trans, m, n, k,-1_${ik}$ ) )
              lwkopt = nw*nb + tsize
              work( 1_${ik}$ ) = lwkopt
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'SORMQR', -info )
              return
           else if( lquery ) then
              return
           end if
           ! quick return if possible
           if( m==0_${ik}$ .or. n==0_${ik}$ .or. k==0_${ik}$ ) then
              work( 1_${ik}$ ) = 1_${ik}$
              return
           end if
           nbmin = 2_${ik}$
           ldwork = nw
           if( nb>1_${ik}$ .and. nb<k ) then
              if( lwork<lwkopt ) then
                 nb = (lwork-tsize) / ldwork
                 nbmin = max( 2_${ik}$, stdlib${ii}$_ilaenv( 2_${ik}$, 'SORMQR', side // trans, m, n, k,-1_${ik}$ ) )
              end if
           end if
           if( nb<nbmin .or. nb>=k ) then
              ! use unblocked code
              call stdlib${ii}$_sorm2r( side, trans, m, n, k, a, lda, tau, c, ldc, work,iinfo )
           else
              ! use blocked code
              iwt = 1_${ik}$ + nw*nb
              if( ( left .and. .not.notran ) .or.( .not.left .and. notran ) ) then
                 i1 = 1_${ik}$
                 i2 = k
                 i3 = nb
              else
                 i1 = ( ( k-1 ) / nb )*nb + 1_${ik}$
                 i2 = 1_${ik}$
                 i3 = -nb
              end if
              if( left ) then
                 ni = n
                 jc = 1_${ik}$
              else
                 mi = m
                 ic = 1_${ik}$
              end if
              do i = i1, i2, i3
                 ib = min( nb, k-i+1 )
                 ! form the triangular factor of the block reflector
                 ! h = h(i) h(i+1) . . . h(i+ib-1)
                 call stdlib${ii}$_slarft( 'FORWARD', 'COLUMNWISE', nq-i+1, ib, a( i, i ),lda, tau( i ),&
                            work( iwt ), ldt )
                 if( left ) then
                    ! h or h**t is applied to c(i:m,1:n)
                    mi = m - i + 1_${ik}$
                    ic = i
                 else
                    ! h or h**t is applied to c(1:m,i:n)
                    ni = n - i + 1_${ik}$
                    jc = i
                 end if
                 ! apply h or h**t
                 call stdlib${ii}$_slarfb( side, trans, 'FORWARD', 'COLUMNWISE', mi, ni,ib, a( i, i ), &
                           lda, work( iwt ), ldt,c( ic, jc ), ldc, work, ldwork )
              end do
           end if
           work( 1_${ik}$ ) = lwkopt
           return
     end subroutine stdlib${ii}$_sormqr

     pure module subroutine stdlib${ii}$_dormqr( side, trans, m, n, k, a, lda, tau, c, ldc,work, lwork, info )
     !! DORMQR overwrites the general real M-by-N matrix C with
     !! SIDE = 'L'     SIDE = 'R'
     !! TRANS = 'N':      Q * C          C * Q
     !! TRANS = 'T':      Q**T * C       C * Q**T
     !! where Q is a real orthogonal matrix defined as the product of k
     !! elementary reflectors
     !! Q = H(1) H(2) . . . H(k)
     !! as returned by DGEQRF. Q is of order M if SIDE = 'L' and of order N
     !! if SIDE = 'R'.
               
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: side, trans
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: k, lda, ldc, lwork, m, n
           ! Array Arguments 
           real(dp), intent(inout) :: a(lda,*), c(ldc,*)
           real(dp), intent(in) :: tau(*)
           real(dp), intent(out) :: work(*)
        ! =====================================================================
           ! Parameters 
           integer(${ik}$), parameter :: nbmax = 64_${ik}$
           integer(${ik}$), parameter :: ldt = nbmax+1
           integer(${ik}$), parameter :: tsize = ldt*nbmax
           
           ! Local Scalars 
           logical(lk) :: left, lquery, notran
           integer(${ik}$) :: i, i1, i2, i3, ib, ic, iinfo, iwt, jc, ldwork, lwkopt, mi, nb, nbmin, &
                     ni, nq, nw
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           left = stdlib_lsame( side, 'L' )
           notran = stdlib_lsame( trans, 'N' )
           lquery = ( lwork==-1_${ik}$ )
           ! nq is the order of q and nw is the minimum dimension of work
           if( left ) then
              nq = m
              nw = max( 1_${ik}$, n )
           else
              nq = n
              nw = max( 1_${ik}$, m )
           end if
           if( .not.left .and. .not.stdlib_lsame( side, 'R' ) ) then
              info = -1_${ik}$
           else if( .not.notran .and. .not.stdlib_lsame( trans, 'T' ) ) then
              info = -2_${ik}$
           else if( m<0_${ik}$ ) then
              info = -3_${ik}$
           else if( n<0_${ik}$ ) then
              info = -4_${ik}$
           else if( k<0_${ik}$ .or. k>nq ) then
              info = -5_${ik}$
           else if( lda<max( 1_${ik}$, nq ) ) then
              info = -7_${ik}$
           else if( ldc<max( 1_${ik}$, m ) ) then
              info = -10_${ik}$
           else if( lwork<nw .and. .not.lquery ) then
              info = -12_${ik}$
           end if
           if( info==0_${ik}$ ) then
              ! compute the workspace requirements
              nb = min( nbmax, stdlib${ii}$_ilaenv( 1_${ik}$, 'DORMQR', side // trans, m, n, k,-1_${ik}$ ) )
              lwkopt = nw*nb + tsize
              work( 1_${ik}$ ) = lwkopt
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DORMQR', -info )
              return
           else if( lquery ) then
              return
           end if
           ! quick return if possible
           if( m==0_${ik}$ .or. n==0_${ik}$ .or. k==0_${ik}$ ) then
              work( 1_${ik}$ ) = 1_${ik}$
              return
           end if
           nbmin = 2_${ik}$
           ldwork = nw
           if( nb>1_${ik}$ .and. nb<k ) then
              if( lwork<lwkopt ) then
                 nb = (lwork-tsize) / ldwork
                 nbmin = max( 2_${ik}$, stdlib${ii}$_ilaenv( 2_${ik}$, 'DORMQR', side // trans, m, n, k,-1_${ik}$ ) )
              end if
           end if
           if( nb<nbmin .or. nb>=k ) then
              ! use unblocked code
              call stdlib${ii}$_dorm2r( side, trans, m, n, k, a, lda, tau, c, ldc, work,iinfo )
           else
              ! use blocked code
              iwt = 1_${ik}$ + nw*nb
              if( ( left .and. .not.notran ) .or.( .not.left .and. notran ) ) then
                 i1 = 1_${ik}$
                 i2 = k
                 i3 = nb
              else
                 i1 = ( ( k-1 ) / nb )*nb + 1_${ik}$
                 i2 = 1_${ik}$
                 i3 = -nb
              end if
              if( left ) then
                 ni = n
                 jc = 1_${ik}$
              else
                 mi = m
                 ic = 1_${ik}$
              end if
              do i = i1, i2, i3
                 ib = min( nb, k-i+1 )
                 ! form the triangular factor of the block reflector
                 ! h = h(i) h(i+1) . . . h(i+ib-1)
                 call stdlib${ii}$_dlarft( 'FORWARD', 'COLUMNWISE', nq-i+1, ib, a( i, i ),lda, tau( i ),&
                            work( iwt ), ldt )
                 if( left ) then
                    ! h or h**t is applied to c(i:m,1:n)
                    mi = m - i + 1_${ik}$
                    ic = i
                 else
                    ! h or h**t is applied to c(1:m,i:n)
                    ni = n - i + 1_${ik}$
                    jc = i
                 end if
                 ! apply h or h**t
                 call stdlib${ii}$_dlarfb( side, trans, 'FORWARD', 'COLUMNWISE', mi, ni,ib, a( i, i ), &
                           lda, work( iwt ), ldt,c( ic, jc ), ldc, work, ldwork )
              end do
           end if
           work( 1_${ik}$ ) = lwkopt
           return
     end subroutine stdlib${ii}$_dormqr

#:for rk,rt,ri in REAL_KINDS_TYPES
#:if not rk in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ri}$ormqr( side, trans, m, n, k, a, lda, tau, c, ldc,work, lwork, info )
     !! DORMQR: overwrites the general real M-by-N matrix C with
     !! SIDE = 'L'     SIDE = 'R'
     !! TRANS = 'N':      Q * C          C * Q
     !! TRANS = 'T':      Q**T * C       C * Q**T
     !! where Q is a real orthogonal matrix defined as the product of k
     !! elementary reflectors
     !! Q = H(1) H(2) . . . H(k)
     !! as returned by DGEQRF. Q is of order M if SIDE = 'L' and of order N
     !! if SIDE = 'R'.
               
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: side, trans
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: k, lda, ldc, lwork, m, n
           ! Array Arguments 
           real(${rk}$), intent(inout) :: a(lda,*), c(ldc,*)
           real(${rk}$), intent(in) :: tau(*)
           real(${rk}$), intent(out) :: work(*)
        ! =====================================================================
           ! Parameters 
           integer(${ik}$), parameter :: nbmax = 64_${ik}$
           integer(${ik}$), parameter :: ldt = nbmax+1
           integer(${ik}$), parameter :: tsize = ldt*nbmax
           
           ! Local Scalars 
           logical(lk) :: left, lquery, notran
           integer(${ik}$) :: i, i1, i2, i3, ib, ic, iinfo, iwt, jc, ldwork, lwkopt, mi, nb, nbmin, &
                     ni, nq, nw
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           left = stdlib_lsame( side, 'L' )
           notran = stdlib_lsame( trans, 'N' )
           lquery = ( lwork==-1_${ik}$ )
           ! nq is the order of q and nw is the minimum dimension of work
           if( left ) then
              nq = m
              nw = max( 1_${ik}$, n )
           else
              nq = n
              nw = max( 1_${ik}$, m )
           end if
           if( .not.left .and. .not.stdlib_lsame( side, 'R' ) ) then
              info = -1_${ik}$
           else if( .not.notran .and. .not.stdlib_lsame( trans, 'T' ) ) then
              info = -2_${ik}$
           else if( m<0_${ik}$ ) then
              info = -3_${ik}$
           else if( n<0_${ik}$ ) then
              info = -4_${ik}$
           else if( k<0_${ik}$ .or. k>nq ) then
              info = -5_${ik}$
           else if( lda<max( 1_${ik}$, nq ) ) then
              info = -7_${ik}$
           else if( ldc<max( 1_${ik}$, m ) ) then
              info = -10_${ik}$
           else if( lwork<nw .and. .not.lquery ) then
              info = -12_${ik}$
           end if
           if( info==0_${ik}$ ) then
              ! compute the workspace requirements
              nb = min( nbmax, stdlib${ii}$_ilaenv( 1_${ik}$, 'DORMQR', side // trans, m, n, k,-1_${ik}$ ) )
              lwkopt = nw*nb + tsize
              work( 1_${ik}$ ) = lwkopt
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DORMQR', -info )
              return
           else if( lquery ) then
              return
           end if
           ! quick return if possible
           if( m==0_${ik}$ .or. n==0_${ik}$ .or. k==0_${ik}$ ) then
              work( 1_${ik}$ ) = 1_${ik}$
              return
           end if
           nbmin = 2_${ik}$
           ldwork = nw
           if( nb>1_${ik}$ .and. nb<k ) then
              if( lwork<lwkopt ) then
                 nb = (lwork-tsize) / ldwork
                 nbmin = max( 2_${ik}$, stdlib${ii}$_ilaenv( 2_${ik}$, 'DORMQR', side // trans, m, n, k,-1_${ik}$ ) )
              end if
           end if
           if( nb<nbmin .or. nb>=k ) then
              ! use unblocked code
              call stdlib${ii}$_${ri}$orm2r( side, trans, m, n, k, a, lda, tau, c, ldc, work,iinfo )
           else
              ! use blocked code
              iwt = 1_${ik}$ + nw*nb
              if( ( left .and. .not.notran ) .or.( .not.left .and. notran ) ) then
                 i1 = 1_${ik}$
                 i2 = k
                 i3 = nb
              else
                 i1 = ( ( k-1 ) / nb )*nb + 1_${ik}$
                 i2 = 1_${ik}$
                 i3 = -nb
              end if
              if( left ) then
                 ni = n
                 jc = 1_${ik}$
              else
                 mi = m
                 ic = 1_${ik}$
              end if
              do i = i1, i2, i3
                 ib = min( nb, k-i+1 )
                 ! form the triangular factor of the block reflector
                 ! h = h(i) h(i+1) . . . h(i+ib-1)
                 call stdlib${ii}$_${ri}$larft( 'FORWARD', 'COLUMNWISE', nq-i+1, ib, a( i, i ),lda, tau( i ),&
                            work( iwt ), ldt )
                 if( left ) then
                    ! h or h**t is applied to c(i:m,1:n)
                    mi = m - i + 1_${ik}$
                    ic = i
                 else
                    ! h or h**t is applied to c(1:m,i:n)
                    ni = n - i + 1_${ik}$
                    jc = i
                 end if
                 ! apply h or h**t
                 call stdlib${ii}$_${ri}$larfb( side, trans, 'FORWARD', 'COLUMNWISE', mi, ni,ib, a( i, i ), &
                           lda, work( iwt ), ldt,c( ic, jc ), ldc, work, ldwork )
              end do
           end if
           work( 1_${ik}$ ) = lwkopt
           return
     end subroutine stdlib${ii}$_${ri}$ormqr

#:endif
#:endfor



     pure module subroutine stdlib${ii}$_sorm2r( side, trans, m, n, k, a, lda, tau, c, ldc,work, info )
     !! SORM2R overwrites the general real m by n matrix C with
     !! Q * C  if SIDE = 'L' and TRANS = 'N', or
     !! Q**T* C  if SIDE = 'L' and TRANS = 'T', or
     !! C * Q  if SIDE = 'R' and TRANS = 'N', or
     !! C * Q**T if SIDE = 'R' and TRANS = 'T',
     !! where Q is a real orthogonal matrix defined as the product of k
     !! elementary reflectors
     !! Q = H(1) H(2) . . . H(k)
     !! as returned by SGEQRF. Q is of order m if SIDE = 'L' and of order n
     !! if SIDE = 'R'.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: side, trans
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: k, lda, ldc, m, n
           ! Array Arguments 
           real(sp), intent(inout) :: a(lda,*), c(ldc,*)
           real(sp), intent(in) :: tau(*)
           real(sp), intent(out) :: work(*)
        ! =====================================================================
           
           ! Local Scalars 
           logical(lk) :: left, notran
           integer(${ik}$) :: i, i1, i2, i3, ic, jc, mi, ni, nq
           real(sp) :: aii
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           left = stdlib_lsame( side, 'L' )
           notran = stdlib_lsame( trans, 'N' )
           ! nq is the order of q
           if( left ) then
              nq = m
           else
              nq = n
           end if
           if( .not.left .and. .not.stdlib_lsame( side, 'R' ) ) then
              info = -1_${ik}$
           else if( .not.notran .and. .not.stdlib_lsame( trans, 'T' ) ) then
              info = -2_${ik}$
           else if( m<0_${ik}$ ) then
              info = -3_${ik}$
           else if( n<0_${ik}$ ) then
              info = -4_${ik}$
           else if( k<0_${ik}$ .or. k>nq ) then
              info = -5_${ik}$
           else if( lda<max( 1_${ik}$, nq ) ) then
              info = -7_${ik}$
           else if( ldc<max( 1_${ik}$, m ) ) then
              info = -10_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'SORM2R', -info )
              return
           end if
           ! quick return if possible
           if( m==0 .or. n==0 .or. k==0 )return
           if( ( left .and. .not.notran ) .or. ( .not.left .and. notran ) )then
              i1 = 1_${ik}$
              i2 = k
              i3 = 1_${ik}$
           else
              i1 = k
              i2 = 1_${ik}$
              i3 = -1_${ik}$
           end if
           if( left ) then
              ni = n
              jc = 1_${ik}$
           else
              mi = m
              ic = 1_${ik}$
           end if
           do i = i1, i2, i3
              if( left ) then
                 ! h(i) is applied to c(i:m,1:n)
                 mi = m - i + 1_${ik}$
                 ic = i
              else
                 ! h(i) is applied to c(1:m,i:n)
                 ni = n - i + 1_${ik}$
                 jc = i
              end if
              ! apply h(i)
              aii = a( i, i )
              a( i, i ) = one
              call stdlib${ii}$_slarf( side, mi, ni, a( i, i ), 1_${ik}$, tau( i ), c( ic, jc ),ldc, work )
                        
              a( i, i ) = aii
           end do
           return
     end subroutine stdlib${ii}$_sorm2r

     pure module subroutine stdlib${ii}$_dorm2r( side, trans, m, n, k, a, lda, tau, c, ldc,work, info )
     !! DORM2R overwrites the general real m by n matrix C with
     !! Q * C  if SIDE = 'L' and TRANS = 'N', or
     !! Q**T* C  if SIDE = 'L' and TRANS = 'T', or
     !! C * Q  if SIDE = 'R' and TRANS = 'N', or
     !! C * Q**T if SIDE = 'R' and TRANS = 'T',
     !! where Q is a real orthogonal matrix defined as the product of k
     !! elementary reflectors
     !! Q = H(1) H(2) . . . H(k)
     !! as returned by DGEQRF. Q is of order m if SIDE = 'L' and of order n
     !! if SIDE = 'R'.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: side, trans
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: k, lda, ldc, m, n
           ! Array Arguments 
           real(dp), intent(inout) :: a(lda,*), c(ldc,*)
           real(dp), intent(in) :: tau(*)
           real(dp), intent(out) :: work(*)
        ! =====================================================================
           
           ! Local Scalars 
           logical(lk) :: left, notran
           integer(${ik}$) :: i, i1, i2, i3, ic, jc, mi, ni, nq
           real(dp) :: aii
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           left = stdlib_lsame( side, 'L' )
           notran = stdlib_lsame( trans, 'N' )
           ! nq is the order of q
           if( left ) then
              nq = m
           else
              nq = n
           end if
           if( .not.left .and. .not.stdlib_lsame( side, 'R' ) ) then
              info = -1_${ik}$
           else if( .not.notran .and. .not.stdlib_lsame( trans, 'T' ) ) then
              info = -2_${ik}$
           else if( m<0_${ik}$ ) then
              info = -3_${ik}$
           else if( n<0_${ik}$ ) then
              info = -4_${ik}$
           else if( k<0_${ik}$ .or. k>nq ) then
              info = -5_${ik}$
           else if( lda<max( 1_${ik}$, nq ) ) then
              info = -7_${ik}$
           else if( ldc<max( 1_${ik}$, m ) ) then
              info = -10_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DORM2R', -info )
              return
           end if
           ! quick return if possible
           if( m==0 .or. n==0 .or. k==0 )return
           if( ( left .and. .not.notran ) .or. ( .not.left .and. notran ) )then
              i1 = 1_${ik}$
              i2 = k
              i3 = 1_${ik}$
           else
              i1 = k
              i2 = 1_${ik}$
              i3 = -1_${ik}$
           end if
           if( left ) then
              ni = n
              jc = 1_${ik}$
           else
              mi = m
              ic = 1_${ik}$
           end if
           do i = i1, i2, i3
              if( left ) then
                 ! h(i) is applied to c(i:m,1:n)
                 mi = m - i + 1_${ik}$
                 ic = i
              else
                 ! h(i) is applied to c(1:m,i:n)
                 ni = n - i + 1_${ik}$
                 jc = i
              end if
              ! apply h(i)
              aii = a( i, i )
              a( i, i ) = one
              call stdlib${ii}$_dlarf( side, mi, ni, a( i, i ), 1_${ik}$, tau( i ), c( ic, jc ),ldc, work )
                        
              a( i, i ) = aii
           end do
           return
     end subroutine stdlib${ii}$_dorm2r

#:for rk,rt,ri in REAL_KINDS_TYPES
#:if not rk in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ri}$orm2r( side, trans, m, n, k, a, lda, tau, c, ldc,work, info )
     !! DORM2R: overwrites the general real m by n matrix C with
     !! Q * C  if SIDE = 'L' and TRANS = 'N', or
     !! Q**T* C  if SIDE = 'L' and TRANS = 'T', or
     !! C * Q  if SIDE = 'R' and TRANS = 'N', or
     !! C * Q**T if SIDE = 'R' and TRANS = 'T',
     !! where Q is a real orthogonal matrix defined as the product of k
     !! elementary reflectors
     !! Q = H(1) H(2) . . . H(k)
     !! as returned by DGEQRF. Q is of order m if SIDE = 'L' and of order n
     !! if SIDE = 'R'.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: side, trans
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: k, lda, ldc, m, n
           ! Array Arguments 
           real(${rk}$), intent(inout) :: a(lda,*), c(ldc,*)
           real(${rk}$), intent(in) :: tau(*)
           real(${rk}$), intent(out) :: work(*)
        ! =====================================================================
           
           ! Local Scalars 
           logical(lk) :: left, notran
           integer(${ik}$) :: i, i1, i2, i3, ic, jc, mi, ni, nq
           real(${rk}$) :: aii
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           left = stdlib_lsame( side, 'L' )
           notran = stdlib_lsame( trans, 'N' )
           ! nq is the order of q
           if( left ) then
              nq = m
           else
              nq = n
           end if
           if( .not.left .and. .not.stdlib_lsame( side, 'R' ) ) then
              info = -1_${ik}$
           else if( .not.notran .and. .not.stdlib_lsame( trans, 'T' ) ) then
              info = -2_${ik}$
           else if( m<0_${ik}$ ) then
              info = -3_${ik}$
           else if( n<0_${ik}$ ) then
              info = -4_${ik}$
           else if( k<0_${ik}$ .or. k>nq ) then
              info = -5_${ik}$
           else if( lda<max( 1_${ik}$, nq ) ) then
              info = -7_${ik}$
           else if( ldc<max( 1_${ik}$, m ) ) then
              info = -10_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DORM2R', -info )
              return
           end if
           ! quick return if possible
           if( m==0 .or. n==0 .or. k==0 )return
           if( ( left .and. .not.notran ) .or. ( .not.left .and. notran ) )then
              i1 = 1_${ik}$
              i2 = k
              i3 = 1_${ik}$
           else
              i1 = k
              i2 = 1_${ik}$
              i3 = -1_${ik}$
           end if
           if( left ) then
              ni = n
              jc = 1_${ik}$
           else
              mi = m
              ic = 1_${ik}$
           end if
           do i = i1, i2, i3
              if( left ) then
                 ! h(i) is applied to c(i:m,1:n)
                 mi = m - i + 1_${ik}$
                 ic = i
              else
                 ! h(i) is applied to c(1:m,i:n)
                 ni = n - i + 1_${ik}$
                 jc = i
              end if
              ! apply h(i)
              aii = a( i, i )
              a( i, i ) = one
              call stdlib${ii}$_${ri}$larf( side, mi, ni, a( i, i ), 1_${ik}$, tau( i ), c( ic, jc ),ldc, work )
                        
              a( i, i ) = aii
           end do
           return
     end subroutine stdlib${ii}$_${ri}$orm2r

#:endif
#:endfor



     pure module subroutine stdlib${ii}$_sgeqrt( m, n, nb, a, lda, t, ldt, work, info )
     !! SGEQRT computes a blocked QR factorization of a real M-by-N matrix A
     !! using the compact WY representation of Q.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, ldt, m, n, nb
           ! Array Arguments 
           real(sp), intent(inout) :: a(lda,*)
           real(sp), intent(out) :: t(ldt,*), work(*)
       ! =====================================================================
           ! Local Scalars 
           logical(lk), parameter :: use_recursive_qr = .true.
           integer(${ik}$) :: i, ib, iinfo, k
           
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( nb<1_${ik}$ .or. ( nb>min(m,n) .and. min(m,n)>0_${ik}$ ) )then
              info = -3_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -5_${ik}$
           else if( ldt<nb ) then
              info = -7_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'SGEQRT', -info )
              return
           end if
           ! quick return if possible
           k = min( m, n )
           if( k==0 ) return
           ! blocked loop of length k
           do i = 1, k,  nb
              ib = min( k-i+1, nb )
           ! compute the qr factorization of the current block a(i:m,i:i+ib-1)
              if( use_recursive_qr ) then
                 call stdlib${ii}$_sgeqrt3( m-i+1, ib, a(i,i), lda, t(1_${ik}$,i), ldt, iinfo )
              else
                 call stdlib${ii}$_sgeqrt2( m-i+1, ib, a(i,i), lda, t(1_${ik}$,i), ldt, iinfo )
              end if
              if( i+ib<=n ) then
           ! update by applying h**t to a(i:m,i+ib:n) from the left
                 call stdlib${ii}$_slarfb( 'L', 'T', 'F', 'C', m-i+1, n-i-ib+1, ib,a( i, i ), lda, t( 1_${ik}$,&
                            i ), ldt,a( i, i+ib ), lda, work , n-i-ib+1 )
              end if
           end do
           return
     end subroutine stdlib${ii}$_sgeqrt

     pure module subroutine stdlib${ii}$_dgeqrt( m, n, nb, a, lda, t, ldt, work, info )
     !! DGEQRT computes a blocked QR factorization of a real M-by-N matrix A
     !! using the compact WY representation of Q.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, ldt, m, n, nb
           ! Array Arguments 
           real(dp), intent(inout) :: a(lda,*)
           real(dp), intent(out) :: t(ldt,*), work(*)
       ! =====================================================================
           ! Local Scalars 
           logical(lk), parameter :: use_recursive_qr = .true.
           integer(${ik}$) :: i, ib, iinfo, k
           
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( nb<1_${ik}$ .or. ( nb>min(m,n) .and. min(m,n)>0_${ik}$ ) )then
              info = -3_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -5_${ik}$
           else if( ldt<nb ) then
              info = -7_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DGEQRT', -info )
              return
           end if
           ! quick return if possible
           k = min( m, n )
           if( k==0 ) return
           ! blocked loop of length k
           do i = 1, k,  nb
              ib = min( k-i+1, nb )
           ! compute the qr factorization of the current block a(i:m,i:i+ib-1)
              if( use_recursive_qr ) then
                 call stdlib${ii}$_dgeqrt3( m-i+1, ib, a(i,i), lda, t(1_${ik}$,i), ldt, iinfo )
              else
                 call stdlib${ii}$_dgeqrt2( m-i+1, ib, a(i,i), lda, t(1_${ik}$,i), ldt, iinfo )
              end if
              if( i+ib<=n ) then
           ! update by applying h**t to a(i:m,i+ib:n) from the left
                 call stdlib${ii}$_dlarfb( 'L', 'T', 'F', 'C', m-i+1, n-i-ib+1, ib,a( i, i ), lda, t( 1_${ik}$,&
                            i ), ldt,a( i, i+ib ), lda, work , n-i-ib+1 )
              end if
           end do
           return
     end subroutine stdlib${ii}$_dgeqrt

#:for rk,rt,ri in REAL_KINDS_TYPES
#:if not rk in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ri}$geqrt( m, n, nb, a, lda, t, ldt, work, info )
     !! DGEQRT: computes a blocked QR factorization of a real M-by-N matrix A
     !! using the compact WY representation of Q.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, ldt, m, n, nb
           ! Array Arguments 
           real(${rk}$), intent(inout) :: a(lda,*)
           real(${rk}$), intent(out) :: t(ldt,*), work(*)
       ! =====================================================================
           ! Local Scalars 
           logical(lk), parameter :: use_recursive_qr = .true.
           integer(${ik}$) :: i, ib, iinfo, k
           
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( nb<1_${ik}$ .or. ( nb>min(m,n) .and. min(m,n)>0_${ik}$ ) )then
              info = -3_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -5_${ik}$
           else if( ldt<nb ) then
              info = -7_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DGEQRT', -info )
              return
           end if
           ! quick return if possible
           k = min( m, n )
           if( k==0 ) return
           ! blocked loop of length k
           do i = 1, k,  nb
              ib = min( k-i+1, nb )
           ! compute the qr factorization of the current block a(i:m,i:i+ib-1)
              if( use_recursive_qr ) then
                 call stdlib${ii}$_${ri}$geqrt3( m-i+1, ib, a(i,i), lda, t(1_${ik}$,i), ldt, iinfo )
              else
                 call stdlib${ii}$_${ri}$geqrt2( m-i+1, ib, a(i,i), lda, t(1_${ik}$,i), ldt, iinfo )
              end if
              if( i+ib<=n ) then
           ! update by applying h**t to a(i:m,i+ib:n) from the left
                 call stdlib${ii}$_${ri}$larfb( 'L', 'T', 'F', 'C', m-i+1, n-i-ib+1, ib,a( i, i ), lda, t( 1_${ik}$,&
                            i ), ldt,a( i, i+ib ), lda, work , n-i-ib+1 )
              end if
           end do
           return
     end subroutine stdlib${ii}$_${ri}$geqrt

#:endif
#:endfor

     pure module subroutine stdlib${ii}$_cgeqrt( m, n, nb, a, lda, t, ldt, work, info )
     !! CGEQRT computes a blocked QR factorization of a complex M-by-N matrix A
     !! using the compact WY representation of Q.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, ldt, m, n, nb
           ! Array Arguments 
           complex(sp), intent(inout) :: a(lda,*)
           complex(sp), intent(out) :: t(ldt,*), work(*)
       ! =====================================================================
           ! Local Scalars 
           logical(lk), parameter :: use_recursive_qr = .true.
           integer(${ik}$) :: i, ib, iinfo, k
           
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( nb<1_${ik}$ .or. ( nb>min(m,n) .and. min(m,n)>0_${ik}$ ) )then
              info = -3_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -5_${ik}$
           else if( ldt<nb ) then
              info = -7_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'CGEQRT', -info )
              return
           end if
           ! quick return if possible
           k = min( m, n )
           if( k==0 ) return
           ! blocked loop of length k
           do i = 1, k,  nb
              ib = min( k-i+1, nb )
           ! compute the qr factorization of the current block a(i:m,i:i+ib-1)
              if( use_recursive_qr ) then
                 call stdlib${ii}$_cgeqrt3( m-i+1, ib, a(i,i), lda, t(1_${ik}$,i), ldt, iinfo )
              else
                 call stdlib${ii}$_cgeqrt2( m-i+1, ib, a(i,i), lda, t(1_${ik}$,i), ldt, iinfo )
              end if
              if( i+ib<=n ) then
           ! update by applying h**h to a(i:m,i+ib:n) from the left
                 call stdlib${ii}$_clarfb( 'L', 'C', 'F', 'C', m-i+1, n-i-ib+1, ib,a( i, i ), lda, t( 1_${ik}$,&
                            i ), ldt,a( i, i+ib ), lda, work , n-i-ib+1 )
              end if
           end do
           return
     end subroutine stdlib${ii}$_cgeqrt

     pure module subroutine stdlib${ii}$_zgeqrt( m, n, nb, a, lda, t, ldt, work, info )
     !! ZGEQRT computes a blocked QR factorization of a complex M-by-N matrix A
     !! using the compact WY representation of Q.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, ldt, m, n, nb
           ! Array Arguments 
           complex(dp), intent(inout) :: a(lda,*)
           complex(dp), intent(out) :: t(ldt,*), work(*)
       ! =====================================================================
           ! Local Scalars 
           logical(lk), parameter :: use_recursive_qr = .true.
           integer(${ik}$) :: i, ib, iinfo, k
           
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( nb<1_${ik}$ .or. ( nb>min(m,n) .and. min(m,n)>0_${ik}$ ) )then
              info = -3_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -5_${ik}$
           else if( ldt<nb ) then
              info = -7_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'ZGEQRT', -info )
              return
           end if
           ! quick return if possible
           k = min( m, n )
           if( k==0 ) return
           ! blocked loop of length k
           do i = 1, k,  nb
              ib = min( k-i+1, nb )
           ! compute the qr factorization of the current block a(i:m,i:i+ib-1)
              if( use_recursive_qr ) then
                 call stdlib${ii}$_zgeqrt3( m-i+1, ib, a(i,i), lda, t(1_${ik}$,i), ldt, iinfo )
              else
                 call stdlib${ii}$_zgeqrt2( m-i+1, ib, a(i,i), lda, t(1_${ik}$,i), ldt, iinfo )
              end if
              if( i+ib<=n ) then
           ! update by applying h**h to a(i:m,i+ib:n) from the left
                 call stdlib${ii}$_zlarfb( 'L', 'C', 'F', 'C', m-i+1, n-i-ib+1, ib,a( i, i ), lda, t( 1_${ik}$,&
                            i ), ldt,a( i, i+ib ), lda, work , n-i-ib+1 )
              end if
           end do
           return
     end subroutine stdlib${ii}$_zgeqrt

#:for ck,ct,ci in CMPLX_KINDS_TYPES
#:if not ck in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ci}$geqrt( m, n, nb, a, lda, t, ldt, work, info )
     !! ZGEQRT: computes a blocked QR factorization of a complex M-by-N matrix A
     !! using the compact WY representation of Q.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${ck}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, ldt, m, n, nb
           ! Array Arguments 
           complex(${ck}$), intent(inout) :: a(lda,*)
           complex(${ck}$), intent(out) :: t(ldt,*), work(*)
       ! =====================================================================
           ! Local Scalars 
           logical(lk), parameter :: use_recursive_qr = .true.
           integer(${ik}$) :: i, ib, iinfo, k
           
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( nb<1_${ik}$ .or. ( nb>min(m,n) .and. min(m,n)>0_${ik}$ ) )then
              info = -3_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -5_${ik}$
           else if( ldt<nb ) then
              info = -7_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'ZGEQRT', -info )
              return
           end if
           ! quick return if possible
           k = min( m, n )
           if( k==0 ) return
           ! blocked loop of length k
           do i = 1, k,  nb
              ib = min( k-i+1, nb )
           ! compute the qr factorization of the current block a(i:m,i:i+ib-1)
              if( use_recursive_qr ) then
                 call stdlib${ii}$_${ci}$geqrt3( m-i+1, ib, a(i,i), lda, t(1_${ik}$,i), ldt, iinfo )
              else
                 call stdlib${ii}$_${ci}$geqrt2( m-i+1, ib, a(i,i), lda, t(1_${ik}$,i), ldt, iinfo )
              end if
              if( i+ib<=n ) then
           ! update by applying h**h to a(i:m,i+ib:n) from the left
                 call stdlib${ii}$_${ci}$larfb( 'L', 'C', 'F', 'C', m-i+1, n-i-ib+1, ib,a( i, i ), lda, t( 1_${ik}$,&
                            i ), ldt,a( i, i+ib ), lda, work , n-i-ib+1 )
              end if
           end do
           return
     end subroutine stdlib${ii}$_${ci}$geqrt

#:endif
#:endfor



     pure module subroutine stdlib${ii}$_sgeqrt2( m, n, a, lda, t, ldt, info )
     !! SGEQRT2 computes a QR factorization of a real M-by-N matrix A,
     !! using the compact WY representation of Q.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, ldt, m, n
           ! Array Arguments 
           real(sp), intent(inout) :: a(lda,*)
           real(sp), intent(out) :: t(ldt,*)
        ! =====================================================================
           
           ! Local Scalars 
           integer(${ik}$) :: i, k
           real(sp) :: aii, alpha
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( m<n ) then
              info = -1_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -4_${ik}$
           else if( ldt<max( 1_${ik}$, n ) ) then
              info = -6_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'SGEQRT2', -info )
              return
           end if
           k = min( m, n )
           do i = 1, k
              ! generate elem. refl. h(i) to annihilate a(i+1:m,i), tau(i) -> t(i,1)
              call stdlib${ii}$_slarfg( m-i+1, a( i, i ), a( min( i+1, m ), i ), 1_${ik}$,t( i, 1_${ik}$ ) )
              if( i<n ) then
                 ! apply h(i) to a(i:m,i+1:n) from the left
                 aii = a( i, i )
                 a( i, i ) = one
                 ! w(1:n-i) := a(i:m,i+1:n)^h * a(i:m,i) [w = t(:,n)]
                 call stdlib${ii}$_sgemv( 'T',m-i+1, n-i, one, a( i, i+1 ), lda,a( i, i ), 1_${ik}$, zero, t( &
                           1_${ik}$, n ), 1_${ik}$ )
                 ! a(i:m,i+1:n) = a(i:m,i+1:n) + alpha*a(i:m,i)*w(1:n-1)^h
                 alpha = -(t( i, 1_${ik}$ ))
                 call stdlib${ii}$_sger( m-i+1, n-i, alpha, a( i, i ), 1_${ik}$,t( 1_${ik}$, n ), 1_${ik}$, a( i, i+1 ), lda &
                           )
                 a( i, i ) = aii
              end if
           end do
           do i = 2, n
              aii = a( i, i )
              a( i, i ) = one
              ! t(1:i-1,i) := alpha * a(i:m,1:i-1)**t * a(i:m,i)
              alpha = -t( i, 1_${ik}$ )
              call stdlib${ii}$_sgemv( 'T', m-i+1, i-1, alpha, a( i, 1_${ik}$ ), lda,a( i, i ), 1_${ik}$, zero, t( 1_${ik}$, &
                        i ), 1_${ik}$ )
              a( i, i ) = aii
              ! t(1:i-1,i) := t(1:i-1,1:i-1) * t(1:i-1,i)
              call stdlib${ii}$_strmv( 'U', 'N', 'N', i-1, t, ldt, t( 1_${ik}$, i ), 1_${ik}$ )
                 ! t(i,i) = tau(i)
                 t( i, i ) = t( i, 1_${ik}$ )
                 t( i, 1_${ik}$) = zero
           end do
     end subroutine stdlib${ii}$_sgeqrt2

     pure module subroutine stdlib${ii}$_dgeqrt2( m, n, a, lda, t, ldt, info )
     !! DGEQRT2 computes a QR factorization of a real M-by-N matrix A,
     !! using the compact WY representation of Q.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, ldt, m, n
           ! Array Arguments 
           real(dp), intent(inout) :: a(lda,*)
           real(dp), intent(out) :: t(ldt,*)
        ! =====================================================================
           
           ! Local Scalars 
           integer(${ik}$) :: i, k
           real(dp) :: aii, alpha
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( m<n ) then
              info = -1_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -4_${ik}$
           else if( ldt<max( 1_${ik}$, n ) ) then
              info = -6_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DGEQRT2', -info )
              return
           end if
           k = min( m, n )
           do i = 1, k
              ! generate elem. refl. h(i) to annihilate a(i+1:m,i), tau(i) -> t(i,1)
              call stdlib${ii}$_dlarfg( m-i+1, a( i, i ), a( min( i+1, m ), i ), 1_${ik}$,t( i, 1_${ik}$ ) )
              if( i<n ) then
                 ! apply h(i) to a(i:m,i+1:n) from the left
                 aii = a( i, i )
                 a( i, i ) = one
                 ! w(1:n-i) := a(i:m,i+1:n)^h * a(i:m,i) [w = t(:,n)]
                 call stdlib${ii}$_dgemv( 'T',m-i+1, n-i, one, a( i, i+1 ), lda,a( i, i ), 1_${ik}$, zero, t( &
                           1_${ik}$, n ), 1_${ik}$ )
                 ! a(i:m,i+1:n) = a(i:m,i+1:n) + alpha*a(i:m,i)*w(1:n-1)^h
                 alpha = -(t( i, 1_${ik}$ ))
                 call stdlib${ii}$_dger( m-i+1, n-i, alpha, a( i, i ), 1_${ik}$,t( 1_${ik}$, n ), 1_${ik}$, a( i, i+1 ), lda &
                           )
                 a( i, i ) = aii
              end if
           end do
           do i = 2, n
              aii = a( i, i )
              a( i, i ) = one
              ! t(1:i-1,i) := alpha * a(i:m,1:i-1)**t * a(i:m,i)
              alpha = -t( i, 1_${ik}$ )
              call stdlib${ii}$_dgemv( 'T', m-i+1, i-1, alpha, a( i, 1_${ik}$ ), lda,a( i, i ), 1_${ik}$, zero, t( 1_${ik}$, &
                        i ), 1_${ik}$ )
              a( i, i ) = aii
              ! t(1:i-1,i) := t(1:i-1,1:i-1) * t(1:i-1,i)
              call stdlib${ii}$_dtrmv( 'U', 'N', 'N', i-1, t, ldt, t( 1_${ik}$, i ), 1_${ik}$ )
                 ! t(i,i) = tau(i)
                 t( i, i ) = t( i, 1_${ik}$ )
                 t( i, 1_${ik}$) = zero
           end do
     end subroutine stdlib${ii}$_dgeqrt2

#:for rk,rt,ri in REAL_KINDS_TYPES
#:if not rk in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ri}$geqrt2( m, n, a, lda, t, ldt, info )
     !! DGEQRT2: computes a QR factorization of a real M-by-N matrix A,
     !! using the compact WY representation of Q.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, ldt, m, n
           ! Array Arguments 
           real(${rk}$), intent(inout) :: a(lda,*)
           real(${rk}$), intent(out) :: t(ldt,*)
        ! =====================================================================
           
           ! Local Scalars 
           integer(${ik}$) :: i, k
           real(${rk}$) :: aii, alpha
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( m<n ) then
              info = -1_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -4_${ik}$
           else if( ldt<max( 1_${ik}$, n ) ) then
              info = -6_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DGEQRT2', -info )
              return
           end if
           k = min( m, n )
           do i = 1, k
              ! generate elem. refl. h(i) to annihilate a(i+1:m,i), tau(i) -> t(i,1)
              call stdlib${ii}$_${ri}$larfg( m-i+1, a( i, i ), a( min( i+1, m ), i ), 1_${ik}$,t( i, 1_${ik}$ ) )
              if( i<n ) then
                 ! apply h(i) to a(i:m,i+1:n) from the left
                 aii = a( i, i )
                 a( i, i ) = one
                 ! w(1:n-i) := a(i:m,i+1:n)^h * a(i:m,i) [w = t(:,n)]
                 call stdlib${ii}$_${ri}$gemv( 'T',m-i+1, n-i, one, a( i, i+1 ), lda,a( i, i ), 1_${ik}$, zero, t( &
                           1_${ik}$, n ), 1_${ik}$ )
                 ! a(i:m,i+1:n) = a(i:m,i+1:n) + alpha*a(i:m,i)*w(1:n-1)^h
                 alpha = -(t( i, 1_${ik}$ ))
                 call stdlib${ii}$_${ri}$ger( m-i+1, n-i, alpha, a( i, i ), 1_${ik}$,t( 1_${ik}$, n ), 1_${ik}$, a( i, i+1 ), lda &
                           )
                 a( i, i ) = aii
              end if
           end do
           do i = 2, n
              aii = a( i, i )
              a( i, i ) = one
              ! t(1:i-1,i) := alpha * a(i:m,1:i-1)**t * a(i:m,i)
              alpha = -t( i, 1_${ik}$ )
              call stdlib${ii}$_${ri}$gemv( 'T', m-i+1, i-1, alpha, a( i, 1_${ik}$ ), lda,a( i, i ), 1_${ik}$, zero, t( 1_${ik}$, &
                        i ), 1_${ik}$ )
              a( i, i ) = aii
              ! t(1:i-1,i) := t(1:i-1,1:i-1) * t(1:i-1,i)
              call stdlib${ii}$_${ri}$trmv( 'U', 'N', 'N', i-1, t, ldt, t( 1_${ik}$, i ), 1_${ik}$ )
                 ! t(i,i) = tau(i)
                 t( i, i ) = t( i, 1_${ik}$ )
                 t( i, 1_${ik}$) = zero
           end do
     end subroutine stdlib${ii}$_${ri}$geqrt2

#:endif
#:endfor

     pure module subroutine stdlib${ii}$_cgeqrt2( m, n, a, lda, t, ldt, info )
     !! CGEQRT2 computes a QR factorization of a complex M-by-N matrix A,
     !! using the compact WY representation of Q.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, ldt, m, n
           ! Array Arguments 
           complex(sp), intent(inout) :: a(lda,*)
           complex(sp), intent(out) :: t(ldt,*)
        ! =====================================================================
           
           ! Local Scalars 
           integer(${ik}$) :: i, k
           complex(sp) :: aii, alpha
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( m<n ) then
              info = -1_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -4_${ik}$
           else if( ldt<max( 1_${ik}$, n ) ) then
              info = -6_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'CGEQRT2', -info )
              return
           end if
           k = min( m, n )
           do i = 1, k
              ! generate elem. refl. h(i) to annihilate a(i+1:m,i), tau(i) -> t(i,1)
              call stdlib${ii}$_clarfg( m-i+1, a( i, i ), a( min( i+1, m ), i ), 1_${ik}$,t( i, 1_${ik}$ ) )
              if( i<n ) then
                 ! apply h(i) to a(i:m,i+1:n) from the left
                 aii = a( i, i )
                 a( i, i ) = cone
                 ! w(1:n-i) := a(i:m,i+1:n)**h * a(i:m,i) [w = t(:,n)]
                 call stdlib${ii}$_cgemv( 'C',m-i+1, n-i, cone, a( i, i+1 ), lda,a( i, i ), 1_${ik}$, czero, t(&
                            1_${ik}$, n ), 1_${ik}$ )
                 ! a(i:m,i+1:n) = a(i:m,i+1:n) + alpha*a(i:m,i)*w(1:n-1)**h
                 alpha = -conjg(t( i, 1_${ik}$ ))
                 call stdlib${ii}$_cgerc( m-i+1, n-i, alpha, a( i, i ), 1_${ik}$,t( 1_${ik}$, n ), 1_${ik}$, a( i, i+1 ), &
                           lda )
                 a( i, i ) = aii
              end if
           end do
           do i = 2, n
              aii = a( i, i )
              a( i, i ) = cone
              ! t(1:i-1,i) := alpha * a(i:m,1:i-1)**h * a(i:m,i)
              alpha = -t( i, 1_${ik}$ )
              call stdlib${ii}$_cgemv( 'C', m-i+1, i-1, alpha, a( i, 1_${ik}$ ), lda,a( i, i ), 1_${ik}$, czero, t( 1_${ik}$,&
                         i ), 1_${ik}$ )
              a( i, i ) = aii
              ! t(1:i-1,i) := t(1:i-1,1:i-1) * t(1:i-1,i)
              call stdlib${ii}$_ctrmv( 'U', 'N', 'N', i-1, t, ldt, t( 1_${ik}$, i ), 1_${ik}$ )
                 ! t(i,i) = tau(i)
                 t( i, i ) = t( i, 1_${ik}$ )
                 t( i, 1_${ik}$) = czero
           end do
     end subroutine stdlib${ii}$_cgeqrt2

     pure module subroutine stdlib${ii}$_zgeqrt2( m, n, a, lda, t, ldt, info )
     !! ZGEQRT2 computes a QR factorization of a complex M-by-N matrix A,
     !! using the compact WY representation of Q.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, ldt, m, n
           ! Array Arguments 
           complex(dp), intent(inout) :: a(lda,*)
           complex(dp), intent(out) :: t(ldt,*)
        ! =====================================================================
           
           ! Local Scalars 
           integer(${ik}$) :: i, k
           complex(dp) :: aii, alpha
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( m<n ) then
              info = -1_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -4_${ik}$
           else if( ldt<max( 1_${ik}$, n ) ) then
              info = -6_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'ZGEQRT2', -info )
              return
           end if
           k = min( m, n )
           do i = 1, k
              ! generate elem. refl. h(i) to annihilate a(i+1:m,i), tau(i) -> t(i,1)
              call stdlib${ii}$_zlarfg( m-i+1, a( i, i ), a( min( i+1, m ), i ), 1_${ik}$,t( i, 1_${ik}$ ) )
              if( i<n ) then
                 ! apply h(i) to a(i:m,i+1:n) from the left
                 aii = a( i, i )
                 a( i, i ) = cone
                 ! w(1:n-i) := a(i:m,i+1:n)^h * a(i:m,i) [w = t(:,n)]
                 call stdlib${ii}$_zgemv( 'C',m-i+1, n-i, cone, a( i, i+1 ), lda,a( i, i ), 1_${ik}$, czero, t(&
                            1_${ik}$, n ), 1_${ik}$ )
                 ! a(i:m,i+1:n) = a(i:m,i+1:n) + alpha*a(i:m,i)*w(1:n-1)^h
                 alpha = -conjg(t( i, 1_${ik}$ ))
                 call stdlib${ii}$_zgerc( m-i+1, n-i, alpha, a( i, i ), 1_${ik}$,t( 1_${ik}$, n ), 1_${ik}$, a( i, i+1 ), &
                           lda )
                 a( i, i ) = aii
              end if
           end do
           do i = 2, n
              aii = a( i, i )
              a( i, i ) = cone
              ! t(1:i-1,i) := alpha * a(i:m,1:i-1)**h * a(i:m,i)
              alpha = -t( i, 1_${ik}$ )
              call stdlib${ii}$_zgemv( 'C', m-i+1, i-1, alpha, a( i, 1_${ik}$ ), lda,a( i, i ), 1_${ik}$, czero, t( 1_${ik}$,&
                         i ), 1_${ik}$ )
              a( i, i ) = aii
              ! t(1:i-1,i) := t(1:i-1,1:i-1) * t(1:i-1,i)
              call stdlib${ii}$_ztrmv( 'U', 'N', 'N', i-1, t, ldt, t( 1_${ik}$, i ), 1_${ik}$ )
                 ! t(i,i) = tau(i)
                 t( i, i ) = t( i, 1_${ik}$ )
                 t( i, 1_${ik}$) = czero
           end do
     end subroutine stdlib${ii}$_zgeqrt2

#:for ck,ct,ci in CMPLX_KINDS_TYPES
#:if not ck in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ci}$geqrt2( m, n, a, lda, t, ldt, info )
     !! ZGEQRT2: computes a QR factorization of a complex M-by-N matrix A,
     !! using the compact WY representation of Q.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${ck}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, ldt, m, n
           ! Array Arguments 
           complex(${ck}$), intent(inout) :: a(lda,*)
           complex(${ck}$), intent(out) :: t(ldt,*)
        ! =====================================================================
           
           ! Local Scalars 
           integer(${ik}$) :: i, k
           complex(${ck}$) :: aii, alpha
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( m<n ) then
              info = -1_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -4_${ik}$
           else if( ldt<max( 1_${ik}$, n ) ) then
              info = -6_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'ZGEQRT2', -info )
              return
           end if
           k = min( m, n )
           do i = 1, k
              ! generate elem. refl. h(i) to annihilate a(i+1:m,i), tau(i) -> t(i,1)
              call stdlib${ii}$_${ci}$larfg( m-i+1, a( i, i ), a( min( i+1, m ), i ), 1_${ik}$,t( i, 1_${ik}$ ) )
              if( i<n ) then
                 ! apply h(i) to a(i:m,i+1:n) from the left
                 aii = a( i, i )
                 a( i, i ) = cone
                 ! w(1:n-i) := a(i:m,i+1:n)^h * a(i:m,i) [w = t(:,n)]
                 call stdlib${ii}$_${ci}$gemv( 'C',m-i+1, n-i, cone, a( i, i+1 ), lda,a( i, i ), 1_${ik}$, czero, t(&
                            1_${ik}$, n ), 1_${ik}$ )
                 ! a(i:m,i+1:n) = a(i:m,i+1:n) + alpha*a(i:m,i)*w(1:n-1)^h
                 alpha = -conjg(t( i, 1_${ik}$ ))
                 call stdlib${ii}$_${ci}$gerc( m-i+1, n-i, alpha, a( i, i ), 1_${ik}$,t( 1_${ik}$, n ), 1_${ik}$, a( i, i+1 ), &
                           lda )
                 a( i, i ) = aii
              end if
           end do
           do i = 2, n
              aii = a( i, i )
              a( i, i ) = cone
              ! t(1:i-1,i) := alpha * a(i:m,1:i-1)**h * a(i:m,i)
              alpha = -t( i, 1_${ik}$ )
              call stdlib${ii}$_${ci}$gemv( 'C', m-i+1, i-1, alpha, a( i, 1_${ik}$ ), lda,a( i, i ), 1_${ik}$, czero, t( 1_${ik}$,&
                         i ), 1_${ik}$ )
              a( i, i ) = aii
              ! t(1:i-1,i) := t(1:i-1,1:i-1) * t(1:i-1,i)
              call stdlib${ii}$_${ci}$trmv( 'U', 'N', 'N', i-1, t, ldt, t( 1_${ik}$, i ), 1_${ik}$ )
                 ! t(i,i) = tau(i)
                 t( i, i ) = t( i, 1_${ik}$ )
                 t( i, 1_${ik}$) = czero
           end do
     end subroutine stdlib${ii}$_${ci}$geqrt2

#:endif
#:endfor



     pure recursive module subroutine stdlib${ii}$_sgeqrt3( m, n, a, lda, t, ldt, info )
     !! SGEQRT3 recursively computes a QR factorization of a real M-by-N
     !! matrix A, using the compact WY representation of Q.
     !! Based on the algorithm of Elmroth and Gustavson,
     !! IBM J. Res. Develop. Vol 44 No. 4 July 2000.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, m, n, ldt
           ! Array Arguments 
           real(sp), intent(inout) :: a(lda,*)
           real(sp), intent(out) :: t(ldt,*)
        ! =====================================================================
           
           ! Local Scalars 
           integer(${ik}$) :: i, i1, j, j1, n1, n2, iinfo
           ! Executable Statements 
           info = 0_${ik}$
           if( n < 0_${ik}$ ) then
              info = -2_${ik}$
           else if( m < n ) then
              info = -1_${ik}$
           else if( lda < max( 1_${ik}$, m ) ) then
              info = -4_${ik}$
           else if( ldt < max( 1_${ik}$, n ) ) then
              info = -6_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'SGEQRT3', -info )
              return
           end if
           if( n==1_${ik}$ ) then
              ! compute householder transform when n=1
              call stdlib${ii}$_slarfg( m, a(1_${ik}$,1_${ik}$), a( min( 2_${ik}$, m ), 1_${ik}$ ), 1_${ik}$, t(1_${ik}$,1_${ik}$) )
           else
              ! otherwise, split a into blocks...
              n1 = n/2_${ik}$
              n2 = n-n1
              j1 = min( n1+1, n )
              i1 = min( n+1, m )
              ! compute a(1:m,1:n1) <- (y1,r1,t1), where q1 = i - y1 t1 y1^h
              call stdlib${ii}$_sgeqrt3( m, n1, a, lda, t, ldt, iinfo )
              ! compute a(1:m,j1:n) = q1^h a(1:m,j1:n) [workspace: t(1:n1,j1:n)]
              do j=1,n2
                 do i=1,n1
                    t( i, j+n1 ) = a( i, j+n1 )
                 end do
              end do
              call stdlib${ii}$_strmm( 'L', 'L', 'T', 'U', n1, n2, one,a, lda, t( 1_${ik}$, j1 ), ldt )
              call stdlib${ii}$_sgemm( 'T', 'N', n1, n2, m-n1, one, a( j1, 1_${ik}$ ), lda,a( j1, j1 ), lda, &
                        one, t( 1_${ik}$, j1 ), ldt)
              call stdlib${ii}$_strmm( 'L', 'U', 'T', 'N', n1, n2, one,t, ldt, t( 1_${ik}$, j1 ), ldt )
              call stdlib${ii}$_sgemm( 'N', 'N', m-n1, n2, n1, -one, a( j1, 1_${ik}$ ), lda,t( 1_${ik}$, j1 ), ldt, &
                        one, a( j1, j1 ), lda )
              call stdlib${ii}$_strmm( 'L', 'L', 'N', 'U', n1, n2, one,a, lda, t( 1_${ik}$, j1 ), ldt )
              do j=1,n2
                 do i=1,n1
                    a( i, j+n1 ) = a( i, j+n1 ) - t( i, j+n1 )
                 end do
              end do
              ! compute a(j1:m,j1:n) <- (y2,r2,t2) where q2 = i - y2 t2 y2^h
              call stdlib${ii}$_sgeqrt3( m-n1, n2, a( j1, j1 ), lda,t( j1, j1 ), ldt, iinfo )
              ! compute t3 = t(1:n1,j1:n) = -t1 y1^h y2 t2
              do i=1,n1
                 do j=1,n2
                    t( i, j+n1 ) = (a( j+n1, i ))
                 end do
              end do
              call stdlib${ii}$_strmm( 'R', 'L', 'N', 'U', n1, n2, one,a( j1, j1 ), lda, t( 1_${ik}$, j1 ), &
                        ldt )
              call stdlib${ii}$_sgemm( 'T', 'N', n1, n2, m-n, one, a( i1, 1_${ik}$ ), lda,a( i1, j1 ), lda, &
                        one, t( 1_${ik}$, j1 ), ldt )
              call stdlib${ii}$_strmm( 'L', 'U', 'N', 'N', n1, n2, -one, t, ldt,t( 1_${ik}$, j1 ), ldt )
                        
              call stdlib${ii}$_strmm( 'R', 'U', 'N', 'N', n1, n2, one,t( j1, j1 ), ldt, t( 1_${ik}$, j1 ), &
                        ldt )
              ! y = (y1,y2); r = [ r1  a(1:n1,j1:n) ];  t = [t1 t3]
                               ! [  0        r2     ]       [ 0 t2]
           end if
           return
     end subroutine stdlib${ii}$_sgeqrt3

     pure recursive module subroutine stdlib${ii}$_dgeqrt3( m, n, a, lda, t, ldt, info )
     !! DGEQRT3 recursively computes a QR factorization of a real M-by-N
     !! matrix A, using the compact WY representation of Q.
     !! Based on the algorithm of Elmroth and Gustavson,
     !! IBM J. Res. Develop. Vol 44 No. 4 July 2000.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, m, n, ldt
           ! Array Arguments 
           real(dp), intent(inout) :: a(lda,*)
           real(dp), intent(out) :: t(ldt,*)
        ! =====================================================================
           
           ! Local Scalars 
           integer(${ik}$) :: i, i1, j, j1, n1, n2, iinfo
           ! Executable Statements 
           info = 0_${ik}$
           if( n < 0_${ik}$ ) then
              info = -2_${ik}$
           else if( m < n ) then
              info = -1_${ik}$
           else if( lda < max( 1_${ik}$, m ) ) then
              info = -4_${ik}$
           else if( ldt < max( 1_${ik}$, n ) ) then
              info = -6_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DGEQRT3', -info )
              return
           end if
           if( n==1_${ik}$ ) then
              ! compute householder transform when n=1
              call stdlib${ii}$_dlarfg( m, a(1_${ik}$,1_${ik}$), a( min( 2_${ik}$, m ), 1_${ik}$ ), 1_${ik}$, t(1_${ik}$,1_${ik}$) )
           else
              ! otherwise, split a into blocks...
              n1 = n/2_${ik}$
              n2 = n-n1
              j1 = min( n1+1, n )
              i1 = min( n+1, m )
              ! compute a(1:m,1:n1) <- (y1,r1,t1), where q1 = i - y1 t1 y1^h
              call stdlib${ii}$_dgeqrt3( m, n1, a, lda, t, ldt, iinfo )
              ! compute a(1:m,j1:n) = q1^h a(1:m,j1:n) [workspace: t(1:n1,j1:n)]
              do j=1,n2
                 do i=1,n1
                    t( i, j+n1 ) = a( i, j+n1 )
                 end do
              end do
              call stdlib${ii}$_dtrmm( 'L', 'L', 'T', 'U', n1, n2, one,a, lda, t( 1_${ik}$, j1 ), ldt )
              call stdlib${ii}$_dgemm( 'T', 'N', n1, n2, m-n1, one, a( j1, 1_${ik}$ ), lda,a( j1, j1 ), lda, &
                        one, t( 1_${ik}$, j1 ), ldt)
              call stdlib${ii}$_dtrmm( 'L', 'U', 'T', 'N', n1, n2, one,t, ldt, t( 1_${ik}$, j1 ), ldt )
              call stdlib${ii}$_dgemm( 'N', 'N', m-n1, n2, n1, -one, a( j1, 1_${ik}$ ), lda,t( 1_${ik}$, j1 ), ldt, &
                        one, a( j1, j1 ), lda )
              call stdlib${ii}$_dtrmm( 'L', 'L', 'N', 'U', n1, n2, one,a, lda, t( 1_${ik}$, j1 ), ldt )
              do j=1,n2
                 do i=1,n1
                    a( i, j+n1 ) = a( i, j+n1 ) - t( i, j+n1 )
                 end do
              end do
              ! compute a(j1:m,j1:n) <- (y2,r2,t2) where q2 = i - y2 t2 y2^h
              call stdlib${ii}$_dgeqrt3( m-n1, n2, a( j1, j1 ), lda,t( j1, j1 ), ldt, iinfo )
              ! compute t3 = t(1:n1,j1:n) = -t1 y1^h y2 t2
              do i=1,n1
                 do j=1,n2
                    t( i, j+n1 ) = (a( j+n1, i ))
                 end do
              end do
              call stdlib${ii}$_dtrmm( 'R', 'L', 'N', 'U', n1, n2, one,a( j1, j1 ), lda, t( 1_${ik}$, j1 ), &
                        ldt )
              call stdlib${ii}$_dgemm( 'T', 'N', n1, n2, m-n, one, a( i1, 1_${ik}$ ), lda,a( i1, j1 ), lda, &
                        one, t( 1_${ik}$, j1 ), ldt )
              call stdlib${ii}$_dtrmm( 'L', 'U', 'N', 'N', n1, n2, -one, t, ldt,t( 1_${ik}$, j1 ), ldt )
                        
              call stdlib${ii}$_dtrmm( 'R', 'U', 'N', 'N', n1, n2, one,t( j1, j1 ), ldt, t( 1_${ik}$, j1 ), &
                        ldt )
              ! y = (y1,y2); r = [ r1  a(1:n1,j1:n) ];  t = [t1 t3]
                               ! [  0        r2     ]       [ 0 t2]
           end if
           return
     end subroutine stdlib${ii}$_dgeqrt3

#:for rk,rt,ri in REAL_KINDS_TYPES
#:if not rk in ["sp","dp"]
     pure recursive module subroutine stdlib${ii}$_${ri}$geqrt3( m, n, a, lda, t, ldt, info )
     !! DGEQRT3: recursively computes a QR factorization of a real M-by-N
     !! matrix A, using the compact WY representation of Q.
     !! Based on the algorithm of Elmroth and Gustavson,
     !! IBM J. Res. Develop. Vol 44 No. 4 July 2000.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, m, n, ldt
           ! Array Arguments 
           real(${rk}$), intent(inout) :: a(lda,*)
           real(${rk}$), intent(out) :: t(ldt,*)
        ! =====================================================================
           
           ! Local Scalars 
           integer(${ik}$) :: i, i1, j, j1, n1, n2, iinfo
           ! Executable Statements 
           info = 0_${ik}$
           if( n < 0_${ik}$ ) then
              info = -2_${ik}$
           else if( m < n ) then
              info = -1_${ik}$
           else if( lda < max( 1_${ik}$, m ) ) then
              info = -4_${ik}$
           else if( ldt < max( 1_${ik}$, n ) ) then
              info = -6_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DGEQRT3', -info )
              return
           end if
           if( n==1_${ik}$ ) then
              ! compute householder transform when n=1
              call stdlib${ii}$_${ri}$larfg( m, a(1_${ik}$,1_${ik}$), a( min( 2_${ik}$, m ), 1_${ik}$ ), 1_${ik}$, t(1_${ik}$,1_${ik}$) )
           else
              ! otherwise, split a into blocks...
              n1 = n/2_${ik}$
              n2 = n-n1
              j1 = min( n1+1, n )
              i1 = min( n+1, m )
              ! compute a(1:m,1:n1) <- (y1,r1,t1), where q1 = i - y1 t1 y1^h
              call stdlib${ii}$_${ri}$geqrt3( m, n1, a, lda, t, ldt, iinfo )
              ! compute a(1:m,j1:n) = q1^h a(1:m,j1:n) [workspace: t(1:n1,j1:n)]
              do j=1,n2
                 do i=1,n1
                    t( i, j+n1 ) = a( i, j+n1 )
                 end do
              end do
              call stdlib${ii}$_${ri}$trmm( 'L', 'L', 'T', 'U', n1, n2, one,a, lda, t( 1_${ik}$, j1 ), ldt )
              call stdlib${ii}$_${ri}$gemm( 'T', 'N', n1, n2, m-n1, one, a( j1, 1_${ik}$ ), lda,a( j1, j1 ), lda, &
                        one, t( 1_${ik}$, j1 ), ldt)
              call stdlib${ii}$_${ri}$trmm( 'L', 'U', 'T', 'N', n1, n2, one,t, ldt, t( 1_${ik}$, j1 ), ldt )
              call stdlib${ii}$_${ri}$gemm( 'N', 'N', m-n1, n2, n1, -one, a( j1, 1_${ik}$ ), lda,t( 1_${ik}$, j1 ), ldt, &
                        one, a( j1, j1 ), lda )
              call stdlib${ii}$_${ri}$trmm( 'L', 'L', 'N', 'U', n1, n2, one,a, lda, t( 1_${ik}$, j1 ), ldt )
              do j=1,n2
                 do i=1,n1
                    a( i, j+n1 ) = a( i, j+n1 ) - t( i, j+n1 )
                 end do
              end do
              ! compute a(j1:m,j1:n) <- (y2,r2,t2) where q2 = i - y2 t2 y2^h
              call stdlib${ii}$_${ri}$geqrt3( m-n1, n2, a( j1, j1 ), lda,t( j1, j1 ), ldt, iinfo )
              ! compute t3 = t(1:n1,j1:n) = -t1 y1^h y2 t2
              do i=1,n1
                 do j=1,n2
                    t( i, j+n1 ) = (a( j+n1, i ))
                 end do
              end do
              call stdlib${ii}$_${ri}$trmm( 'R', 'L', 'N', 'U', n1, n2, one,a( j1, j1 ), lda, t( 1_${ik}$, j1 ), &
                        ldt )
              call stdlib${ii}$_${ri}$gemm( 'T', 'N', n1, n2, m-n, one, a( i1, 1_${ik}$ ), lda,a( i1, j1 ), lda, &
                        one, t( 1_${ik}$, j1 ), ldt )
              call stdlib${ii}$_${ri}$trmm( 'L', 'U', 'N', 'N', n1, n2, -one, t, ldt,t( 1_${ik}$, j1 ), ldt )
                        
              call stdlib${ii}$_${ri}$trmm( 'R', 'U', 'N', 'N', n1, n2, one,t( j1, j1 ), ldt, t( 1_${ik}$, j1 ), &
                        ldt )
              ! y = (y1,y2); r = [ r1  a(1:n1,j1:n) ];  t = [t1 t3]
                               ! [  0        r2     ]       [ 0 t2]
           end if
           return
     end subroutine stdlib${ii}$_${ri}$geqrt3

#:endif
#:endfor

     pure recursive module subroutine stdlib${ii}$_cgeqrt3( m, n, a, lda, t, ldt, info )
     !! CGEQRT3 recursively computes a QR factorization of a complex M-by-N matrix A,
     !! using the compact WY representation of Q.
     !! Based on the algorithm of Elmroth and Gustavson,
     !! IBM J. Res. Develop. Vol 44 No. 4 July 2000.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, m, n, ldt
           ! Array Arguments 
           complex(sp), intent(inout) :: a(lda,*)
           complex(sp), intent(out) :: t(ldt,*)
        ! =====================================================================
           
           ! Local Scalars 
           integer(${ik}$) :: i, i1, j, j1, n1, n2, iinfo
           ! Executable Statements 
           info = 0_${ik}$
           if( n < 0_${ik}$ ) then
              info = -2_${ik}$
           else if( m < n ) then
              info = -1_${ik}$
           else if( lda < max( 1_${ik}$, m ) ) then
              info = -4_${ik}$
           else if( ldt < max( 1_${ik}$, n ) ) then
              info = -6_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'CGEQRT3', -info )
              return
           end if
           if( n==1_${ik}$ ) then
              ! compute householder transform when n=1
              call stdlib${ii}$_clarfg( m, a(1_${ik}$,1_${ik}$), a( min( 2_${ik}$, m ), 1_${ik}$ ), 1_${ik}$, t(1_${ik}$,1_${ik}$) )
           else
              ! otherwise, split a into blocks...
              n1 = n/2_${ik}$
              n2 = n-n1
              j1 = min( n1+1, n )
              i1 = min( n+1, m )
              ! compute a(1:m,1:n1) <- (y1,r1,t1), where q1 = i - y1 t1 y1**h
              call stdlib${ii}$_cgeqrt3( m, n1, a, lda, t, ldt, iinfo )
              ! compute a(1:m,j1:n) = q1**h a(1:m,j1:n) [workspace: t(1:n1,j1:n)]
              do j=1,n2
                 do i=1,n1
                    t( i, j+n1 ) = a( i, j+n1 )
                 end do
              end do
              call stdlib${ii}$_ctrmm( 'L', 'L', 'C', 'U', n1, n2, cone,a, lda, t( 1_${ik}$, j1 ), ldt )
                        
              call stdlib${ii}$_cgemm( 'C', 'N', n1, n2, m-n1, cone, a( j1, 1_${ik}$ ), lda,a( j1, j1 ), lda, &
                        cone, t( 1_${ik}$, j1 ), ldt)
              call stdlib${ii}$_ctrmm( 'L', 'U', 'C', 'N', n1, n2, cone,t, ldt, t( 1_${ik}$, j1 ), ldt )
                        
              call stdlib${ii}$_cgemm( 'N', 'N', m-n1, n2, n1, -cone, a( j1, 1_${ik}$ ), lda,t( 1_${ik}$, j1 ), ldt, &
                        cone, a( j1, j1 ), lda )
              call stdlib${ii}$_ctrmm( 'L', 'L', 'N', 'U', n1, n2, cone,a, lda, t( 1_${ik}$, j1 ), ldt )
                        
              do j=1,n2
                 do i=1,n1
                    a( i, j+n1 ) = a( i, j+n1 ) - t( i, j+n1 )
                 end do
              end do
              ! compute a(j1:m,j1:n) <- (y2,r2,t2) where q2 = i - y2 t2 y2**h
              call stdlib${ii}$_cgeqrt3( m-n1, n2, a( j1, j1 ), lda,t( j1, j1 ), ldt, iinfo )
              ! compute t3 = t(1:n1,j1:n) = -t1 y1**h y2 t2
              do i=1,n1
                 do j=1,n2
                    t( i, j+n1 ) = conjg(a( j+n1, i ))
                 end do
              end do
              call stdlib${ii}$_ctrmm( 'R', 'L', 'N', 'U', n1, n2, cone,a( j1, j1 ), lda, t( 1_${ik}$, j1 ), &
                        ldt )
              call stdlib${ii}$_cgemm( 'C', 'N', n1, n2, m-n, cone, a( i1, 1_${ik}$ ), lda,a( i1, j1 ), lda, &
                        cone, t( 1_${ik}$, j1 ), ldt )
              call stdlib${ii}$_ctrmm( 'L', 'U', 'N', 'N', n1, n2, -cone, t, ldt,t( 1_${ik}$, j1 ), ldt )
                        
              call stdlib${ii}$_ctrmm( 'R', 'U', 'N', 'N', n1, n2, cone,t( j1, j1 ), ldt, t( 1_${ik}$, j1 ), &
                        ldt )
              ! y = (y1,y2); r = [ r1  a(1:n1,j1:n) ];  t = [t1 t3]
                               ! [  0        r2     ]       [ 0 t2]
           end if
           return
     end subroutine stdlib${ii}$_cgeqrt3

     pure recursive module subroutine stdlib${ii}$_zgeqrt3( m, n, a, lda, t, ldt, info )
     !! ZGEQRT3 recursively computes a QR factorization of a complex M-by-N
     !! matrix A, using the compact WY representation of Q.
     !! Based on the algorithm of Elmroth and Gustavson,
     !! IBM J. Res. Develop. Vol 44 No. 4 July 2000.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, m, n, ldt
           ! Array Arguments 
           complex(dp), intent(inout) :: a(lda,*)
           complex(dp), intent(out) :: t(ldt,*)
        ! =====================================================================
           
           ! Local Scalars 
           integer(${ik}$) :: i, i1, j, j1, n1, n2, iinfo
           ! Executable Statements 
           info = 0_${ik}$
           if( n < 0_${ik}$ ) then
              info = -2_${ik}$
           else if( m < n ) then
              info = -1_${ik}$
           else if( lda < max( 1_${ik}$, m ) ) then
              info = -4_${ik}$
           else if( ldt < max( 1_${ik}$, n ) ) then
              info = -6_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'ZGEQRT3', -info )
              return
           end if
           if( n==1_${ik}$ ) then
              ! compute householder transform when n=1
              call stdlib${ii}$_zlarfg( m, a(1_${ik}$,1_${ik}$), a( min( 2_${ik}$, m ), 1_${ik}$ ), 1_${ik}$, t(1_${ik}$,1_${ik}$) )
           else
              ! otherwise, split a into blocks...
              n1 = n/2_${ik}$
              n2 = n-n1
              j1 = min( n1+1, n )
              i1 = min( n+1, m )
              ! compute a(1:m,1:n1) <- (y1,r1,t1), where q1 = i - y1 t1 y1^h
              call stdlib${ii}$_zgeqrt3( m, n1, a, lda, t, ldt, iinfo )
              ! compute a(1:m,j1:n) = q1^h a(1:m,j1:n) [workspace: t(1:n1,j1:n)]
              do j=1,n2
                 do i=1,n1
                    t( i, j+n1 ) = a( i, j+n1 )
                 end do
              end do
              call stdlib${ii}$_ztrmm( 'L', 'L', 'C', 'U', n1, n2, cone,a, lda, t( 1_${ik}$, j1 ), ldt )
                        
              call stdlib${ii}$_zgemm( 'C', 'N', n1, n2, m-n1, cone, a( j1, 1_${ik}$ ), lda,a( j1, j1 ), lda, &
                        cone, t( 1_${ik}$, j1 ), ldt)
              call stdlib${ii}$_ztrmm( 'L', 'U', 'C', 'N', n1, n2, cone,t, ldt, t( 1_${ik}$, j1 ), ldt )
                        
              call stdlib${ii}$_zgemm( 'N', 'N', m-n1, n2, n1, -cone, a( j1, 1_${ik}$ ), lda,t( 1_${ik}$, j1 ), ldt, &
                        cone, a( j1, j1 ), lda )
              call stdlib${ii}$_ztrmm( 'L', 'L', 'N', 'U', n1, n2, cone,a, lda, t( 1_${ik}$, j1 ), ldt )
                        
              do j=1,n2
                 do i=1,n1
                    a( i, j+n1 ) = a( i, j+n1 ) - t( i, j+n1 )
                 end do
              end do
              ! compute a(j1:m,j1:n) <- (y2,r2,t2) where q2 = i - y2 t2 y2^h
              call stdlib${ii}$_zgeqrt3( m-n1, n2, a( j1, j1 ), lda,t( j1, j1 ), ldt, iinfo )
              ! compute t3 = t(1:n1,j1:n) = -t1 y1^h y2 t2
              do i=1,n1
                 do j=1,n2
                    t( i, j+n1 ) = conjg(a( j+n1, i ))
                 end do
              end do
              call stdlib${ii}$_ztrmm( 'R', 'L', 'N', 'U', n1, n2, cone,a( j1, j1 ), lda, t( 1_${ik}$, j1 ), &
                        ldt )
              call stdlib${ii}$_zgemm( 'C', 'N', n1, n2, m-n, cone, a( i1, 1_${ik}$ ), lda,a( i1, j1 ), lda, &
                        cone, t( 1_${ik}$, j1 ), ldt )
              call stdlib${ii}$_ztrmm( 'L', 'U', 'N', 'N', n1, n2, -cone, t, ldt,t( 1_${ik}$, j1 ), ldt )
                        
              call stdlib${ii}$_ztrmm( 'R', 'U', 'N', 'N', n1, n2, cone,t( j1, j1 ), ldt, t( 1_${ik}$, j1 ), &
                        ldt )
              ! y = (y1,y2); r = [ r1  a(1:n1,j1:n) ];  t = [t1 t3]
                               ! [  0        r2     ]       [ 0 t2]
           end if
           return
     end subroutine stdlib${ii}$_zgeqrt3

#:for ck,ct,ci in CMPLX_KINDS_TYPES
#:if not ck in ["sp","dp"]
     pure recursive module subroutine stdlib${ii}$_${ci}$geqrt3( m, n, a, lda, t, ldt, info )
     !! ZGEQRT3: recursively computes a QR factorization of a complex M-by-N
     !! matrix A, using the compact WY representation of Q.
     !! Based on the algorithm of Elmroth and Gustavson,
     !! IBM J. Res. Develop. Vol 44 No. 4 July 2000.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${ck}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, m, n, ldt
           ! Array Arguments 
           complex(${ck}$), intent(inout) :: a(lda,*)
           complex(${ck}$), intent(out) :: t(ldt,*)
        ! =====================================================================
           
           ! Local Scalars 
           integer(${ik}$) :: i, i1, j, j1, n1, n2, iinfo
           ! Executable Statements 
           info = 0_${ik}$
           if( n < 0_${ik}$ ) then
              info = -2_${ik}$
           else if( m < n ) then
              info = -1_${ik}$
           else if( lda < max( 1_${ik}$, m ) ) then
              info = -4_${ik}$
           else if( ldt < max( 1_${ik}$, n ) ) then
              info = -6_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'ZGEQRT3', -info )
              return
           end if
           if( n==1_${ik}$ ) then
              ! compute householder transform when n=1
              call stdlib${ii}$_${ci}$larfg( m, a(1_${ik}$,1_${ik}$), a( min( 2_${ik}$, m ), 1_${ik}$ ), 1_${ik}$, t(1_${ik}$,1_${ik}$) )
           else
              ! otherwise, split a into blocks...
              n1 = n/2_${ik}$
              n2 = n-n1
              j1 = min( n1+1, n )
              i1 = min( n+1, m )
              ! compute a(1:m,1:n1) <- (y1,r1,t1), where q1 = i - y1 t1 y1^h
              call stdlib${ii}$_${ci}$geqrt3( m, n1, a, lda, t, ldt, iinfo )
              ! compute a(1:m,j1:n) = q1^h a(1:m,j1:n) [workspace: t(1:n1,j1:n)]
              do j=1,n2
                 do i=1,n1
                    t( i, j+n1 ) = a( i, j+n1 )
                 end do
              end do
              call stdlib${ii}$_${ci}$trmm( 'L', 'L', 'C', 'U', n1, n2, cone,a, lda, t( 1_${ik}$, j1 ), ldt )
                        
              call stdlib${ii}$_${ci}$gemm( 'C', 'N', n1, n2, m-n1, cone, a( j1, 1_${ik}$ ), lda,a( j1, j1 ), lda, &
                        cone, t( 1_${ik}$, j1 ), ldt)
              call stdlib${ii}$_${ci}$trmm( 'L', 'U', 'C', 'N', n1, n2, cone,t, ldt, t( 1_${ik}$, j1 ), ldt )
                        
              call stdlib${ii}$_${ci}$gemm( 'N', 'N', m-n1, n2, n1, -cone, a( j1, 1_${ik}$ ), lda,t( 1_${ik}$, j1 ), ldt, &
                        cone, a( j1, j1 ), lda )
              call stdlib${ii}$_${ci}$trmm( 'L', 'L', 'N', 'U', n1, n2, cone,a, lda, t( 1_${ik}$, j1 ), ldt )
                        
              do j=1,n2
                 do i=1,n1
                    a( i, j+n1 ) = a( i, j+n1 ) - t( i, j+n1 )
                 end do
              end do
              ! compute a(j1:m,j1:n) <- (y2,r2,t2) where q2 = i - y2 t2 y2^h
              call stdlib${ii}$_${ci}$geqrt3( m-n1, n2, a( j1, j1 ), lda,t( j1, j1 ), ldt, iinfo )
              ! compute t3 = t(1:n1,j1:n) = -t1 y1^h y2 t2
              do i=1,n1
                 do j=1,n2
                    t( i, j+n1 ) = conjg(a( j+n1, i ))
                 end do
              end do
              call stdlib${ii}$_${ci}$trmm( 'R', 'L', 'N', 'U', n1, n2, cone,a( j1, j1 ), lda, t( 1_${ik}$, j1 ), &
                        ldt )
              call stdlib${ii}$_${ci}$gemm( 'C', 'N', n1, n2, m-n, cone, a( i1, 1_${ik}$ ), lda,a( i1, j1 ), lda, &
                        cone, t( 1_${ik}$, j1 ), ldt )
              call stdlib${ii}$_${ci}$trmm( 'L', 'U', 'N', 'N', n1, n2, -cone, t, ldt,t( 1_${ik}$, j1 ), ldt )
                        
              call stdlib${ii}$_${ci}$trmm( 'R', 'U', 'N', 'N', n1, n2, cone,t( j1, j1 ), ldt, t( 1_${ik}$, j1 ), &
                        ldt )
              ! y = (y1,y2); r = [ r1  a(1:n1,j1:n) ];  t = [t1 t3]
                               ! [  0        r2     ]       [ 0 t2]
           end if
           return
     end subroutine stdlib${ii}$_${ci}$geqrt3

#:endif
#:endfor



     pure module subroutine stdlib${ii}$_sgemqrt( side, trans, m, n, k, nb, v, ldv, t, ldt,c, ldc, work, info )
     !! SGEMQRT overwrites the general real M-by-N matrix C with
     !! SIDE = 'L'     SIDE = 'R'
     !! TRANS = 'N':      Q C            C Q
     !! TRANS = 'T':   Q**T C            C Q**T
     !! where Q is a real orthogonal matrix defined as the product of K
     !! elementary reflectors:
     !! Q = H(1) H(2) . . . H(K) = I - V T V**T
     !! generated using the compact WY representation as returned by SGEQRT.
     !! Q is of order M if SIDE = 'L' and of order N  if SIDE = 'R'.
               
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: side, trans
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: k, ldv, ldc, m, n, nb, ldt
           ! Array Arguments 
           real(sp), intent(in) :: v(ldv,*), t(ldt,*)
           real(sp), intent(inout) :: c(ldc,*)
           real(sp), intent(out) :: work(*)
        ! =====================================================================
           ! Local Scalars 
           logical(lk) :: left, right, tran, notran
           integer(${ik}$) :: i, ib, ldwork, kf, q
           ! Intrinsic Functions 
           ! Executable Statements 
           ! Test The Input Arguments 
           info   = 0_${ik}$
           left   = stdlib_lsame( side,  'L' )
           right  = stdlib_lsame( side,  'R' )
           tran   = stdlib_lsame( trans, 'T' )
           notran = stdlib_lsame( trans, 'N' )
           if( left ) then
              ldwork = max( 1_${ik}$, n )
              q = m
           else if ( right ) then
              ldwork = max( 1_${ik}$, m )
              q = n
           end if
           if( .not.left .and. .not.right ) then
              info = -1_${ik}$
           else if( .not.tran .and. .not.notran ) then
              info = -2_${ik}$
           else if( m<0_${ik}$ ) then
              info = -3_${ik}$
           else if( n<0_${ik}$ ) then
              info = -4_${ik}$
           else if( k<0_${ik}$ .or. k>q ) then
              info = -5_${ik}$
           else if( nb<1_${ik}$ .or. (nb>k .and. k>0_${ik}$)) then
              info = -6_${ik}$
           else if( ldv<max( 1_${ik}$, q ) ) then
              info = -8_${ik}$
           else if( ldt<nb ) then
              info = -10_${ik}$
           else if( ldc<max( 1_${ik}$, m ) ) then
              info = -12_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'SGEMQRT', -info )
              return
           end if
           ! Quick Return If Possible 
           if( m==0 .or. n==0 .or. k==0 ) return
           if( left .and. tran ) then
              do i = 1, k, nb
                 ib = min( nb, k-i+1 )
                 call stdlib${ii}$_slarfb( 'L', 'T', 'F', 'C', m-i+1, n, ib,v( i, i ), ldv, t( 1_${ik}$, i ), &
                           ldt,c( i, 1_${ik}$ ), ldc, work, ldwork )
              end do
           else if( right .and. notran ) then
              do i = 1, k, nb
                 ib = min( nb, k-i+1 )
                 call stdlib${ii}$_slarfb( 'R', 'N', 'F', 'C', m, n-i+1, ib,v( i, i ), ldv, t( 1_${ik}$, i ), &
                           ldt,c( 1_${ik}$, i ), ldc, work, ldwork )
              end do
           else if( left .and. notran ) then
              kf = ((k-1)/nb)*nb+1
              do i = kf, 1, -nb
                 ib = min( nb, k-i+1 )
                 call stdlib${ii}$_slarfb( 'L', 'N', 'F', 'C', m-i+1, n, ib,v( i, i ), ldv, t( 1_${ik}$, i ), &
                           ldt,c( i, 1_${ik}$ ), ldc, work, ldwork )
              end do
           else if( right .and. tran ) then
              kf = ((k-1)/nb)*nb+1
              do i = kf, 1, -nb
                 ib = min( nb, k-i+1 )
                 call stdlib${ii}$_slarfb( 'R', 'T', 'F', 'C', m, n-i+1, ib,v( i, i ), ldv, t( 1_${ik}$, i ), &
                           ldt,c( 1_${ik}$, i ), ldc, work, ldwork )
              end do
           end if
           return
     end subroutine stdlib${ii}$_sgemqrt

     pure module subroutine stdlib${ii}$_dgemqrt( side, trans, m, n, k, nb, v, ldv, t, ldt,c, ldc, work, info )
     !! DGEMQRT overwrites the general real M-by-N matrix C with
     !! SIDE = 'L'     SIDE = 'R'
     !! TRANS = 'N':      Q C            C Q
     !! TRANS = 'T':   Q**T C            C Q**T
     !! where Q is a real orthogonal matrix defined as the product of K
     !! elementary reflectors:
     !! Q = H(1) H(2) . . . H(K) = I - V T V**T
     !! generated using the compact WY representation as returned by DGEQRT.
     !! Q is of order M if SIDE = 'L' and of order N  if SIDE = 'R'.
               
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: side, trans
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: k, ldv, ldc, m, n, nb, ldt
           ! Array Arguments 
           real(dp), intent(in) :: v(ldv,*), t(ldt,*)
           real(dp), intent(inout) :: c(ldc,*)
           real(dp), intent(out) :: work(*)
        ! =====================================================================
           ! Local Scalars 
           logical(lk) :: left, right, tran, notran
           integer(${ik}$) :: i, ib, ldwork, kf, q
           ! Intrinsic Functions 
           ! Executable Statements 
           ! Test The Input Arguments 
           info   = 0_${ik}$
           left   = stdlib_lsame( side,  'L' )
           right  = stdlib_lsame( side,  'R' )
           tran   = stdlib_lsame( trans, 'T' )
           notran = stdlib_lsame( trans, 'N' )
           if( left ) then
              ldwork = max( 1_${ik}$, n )
              q = m
           else if ( right ) then
              ldwork = max( 1_${ik}$, m )
              q = n
           end if
           if( .not.left .and. .not.right ) then
              info = -1_${ik}$
           else if( .not.tran .and. .not.notran ) then
              info = -2_${ik}$
           else if( m<0_${ik}$ ) then
              info = -3_${ik}$
           else if( n<0_${ik}$ ) then
              info = -4_${ik}$
           else if( k<0_${ik}$ .or. k>q ) then
              info = -5_${ik}$
           else if( nb<1_${ik}$ .or. (nb>k .and. k>0_${ik}$)) then
              info = -6_${ik}$
           else if( ldv<max( 1_${ik}$, q ) ) then
              info = -8_${ik}$
           else if( ldt<nb ) then
              info = -10_${ik}$
           else if( ldc<max( 1_${ik}$, m ) ) then
              info = -12_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DGEMQRT', -info )
              return
           end if
           ! Quick Return If Possible 
           if( m==0 .or. n==0 .or. k==0 ) return
           if( left .and. tran ) then
              do i = 1, k, nb
                 ib = min( nb, k-i+1 )
                 call stdlib${ii}$_dlarfb( 'L', 'T', 'F', 'C', m-i+1, n, ib,v( i, i ), ldv, t( 1_${ik}$, i ), &
                           ldt,c( i, 1_${ik}$ ), ldc, work, ldwork )
              end do
           else if( right .and. notran ) then
              do i = 1, k, nb
                 ib = min( nb, k-i+1 )
                 call stdlib${ii}$_dlarfb( 'R', 'N', 'F', 'C', m, n-i+1, ib,v( i, i ), ldv, t( 1_${ik}$, i ), &
                           ldt,c( 1_${ik}$, i ), ldc, work, ldwork )
              end do
           else if( left .and. notran ) then
              kf = ((k-1)/nb)*nb+1
              do i = kf, 1, -nb
                 ib = min( nb, k-i+1 )
                 call stdlib${ii}$_dlarfb( 'L', 'N', 'F', 'C', m-i+1, n, ib,v( i, i ), ldv, t( 1_${ik}$, i ), &
                           ldt,c( i, 1_${ik}$ ), ldc, work, ldwork )
              end do
           else if( right .and. tran ) then
              kf = ((k-1)/nb)*nb+1
              do i = kf, 1, -nb
                 ib = min( nb, k-i+1 )
                 call stdlib${ii}$_dlarfb( 'R', 'T', 'F', 'C', m, n-i+1, ib,v( i, i ), ldv, t( 1_${ik}$, i ), &
                           ldt,c( 1_${ik}$, i ), ldc, work, ldwork )
              end do
           end if
           return
     end subroutine stdlib${ii}$_dgemqrt

#:for rk,rt,ri in REAL_KINDS_TYPES
#:if not rk in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ri}$gemqrt( side, trans, m, n, k, nb, v, ldv, t, ldt,c, ldc, work, info )
     !! DGEMQRT: overwrites the general real M-by-N matrix C with
     !! SIDE = 'L'     SIDE = 'R'
     !! TRANS = 'N':      Q C            C Q
     !! TRANS = 'T':   Q**T C            C Q**T
     !! where Q is a real orthogonal matrix defined as the product of K
     !! elementary reflectors:
     !! Q = H(1) H(2) . . . H(K) = I - V T V**T
     !! generated using the compact WY representation as returned by DGEQRT.
     !! Q is of order M if SIDE = 'L' and of order N  if SIDE = 'R'.
               
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: side, trans
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: k, ldv, ldc, m, n, nb, ldt
           ! Array Arguments 
           real(${rk}$), intent(in) :: v(ldv,*), t(ldt,*)
           real(${rk}$), intent(inout) :: c(ldc,*)
           real(${rk}$), intent(out) :: work(*)
        ! =====================================================================
           ! Local Scalars 
           logical(lk) :: left, right, tran, notran
           integer(${ik}$) :: i, ib, ldwork, kf, q
           ! Intrinsic Functions 
           ! Executable Statements 
           ! Test The Input Arguments 
           info   = 0_${ik}$
           left   = stdlib_lsame( side,  'L' )
           right  = stdlib_lsame( side,  'R' )
           tran   = stdlib_lsame( trans, 'T' )
           notran = stdlib_lsame( trans, 'N' )
           if( left ) then
              ldwork = max( 1_${ik}$, n )
              q = m
           else if ( right ) then
              ldwork = max( 1_${ik}$, m )
              q = n
           end if
           if( .not.left .and. .not.right ) then
              info = -1_${ik}$
           else if( .not.tran .and. .not.notran ) then
              info = -2_${ik}$
           else if( m<0_${ik}$ ) then
              info = -3_${ik}$
           else if( n<0_${ik}$ ) then
              info = -4_${ik}$
           else if( k<0_${ik}$ .or. k>q ) then
              info = -5_${ik}$
           else if( nb<1_${ik}$ .or. (nb>k .and. k>0_${ik}$)) then
              info = -6_${ik}$
           else if( ldv<max( 1_${ik}$, q ) ) then
              info = -8_${ik}$
           else if( ldt<nb ) then
              info = -10_${ik}$
           else if( ldc<max( 1_${ik}$, m ) ) then
              info = -12_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DGEMQRT', -info )
              return
           end if
           ! Quick Return If Possible 
           if( m==0 .or. n==0 .or. k==0 ) return
           if( left .and. tran ) then
              do i = 1, k, nb
                 ib = min( nb, k-i+1 )
                 call stdlib${ii}$_${ri}$larfb( 'L', 'T', 'F', 'C', m-i+1, n, ib,v( i, i ), ldv, t( 1_${ik}$, i ), &
                           ldt,c( i, 1_${ik}$ ), ldc, work, ldwork )
              end do
           else if( right .and. notran ) then
              do i = 1, k, nb
                 ib = min( nb, k-i+1 )
                 call stdlib${ii}$_${ri}$larfb( 'R', 'N', 'F', 'C', m, n-i+1, ib,v( i, i ), ldv, t( 1_${ik}$, i ), &
                           ldt,c( 1_${ik}$, i ), ldc, work, ldwork )
              end do
           else if( left .and. notran ) then
              kf = ((k-1)/nb)*nb+1
              do i = kf, 1, -nb
                 ib = min( nb, k-i+1 )
                 call stdlib${ii}$_${ri}$larfb( 'L', 'N', 'F', 'C', m-i+1, n, ib,v( i, i ), ldv, t( 1_${ik}$, i ), &
                           ldt,c( i, 1_${ik}$ ), ldc, work, ldwork )
              end do
           else if( right .and. tran ) then
              kf = ((k-1)/nb)*nb+1
              do i = kf, 1, -nb
                 ib = min( nb, k-i+1 )
                 call stdlib${ii}$_${ri}$larfb( 'R', 'T', 'F', 'C', m, n-i+1, ib,v( i, i ), ldv, t( 1_${ik}$, i ), &
                           ldt,c( 1_${ik}$, i ), ldc, work, ldwork )
              end do
           end if
           return
     end subroutine stdlib${ii}$_${ri}$gemqrt

#:endif
#:endfor

     pure module subroutine stdlib${ii}$_cgemqrt( side, trans, m, n, k, nb, v, ldv, t, ldt,c, ldc, work, info )
     !! CGEMQRT overwrites the general complex M-by-N matrix C with
     !! SIDE = 'L'     SIDE = 'R'
     !! TRANS = 'N':      Q C            C Q
     !! TRANS = 'C':    Q**H C            C Q**H
     !! where Q is a complex orthogonal matrix defined as the product of K
     !! elementary reflectors:
     !! Q = H(1) H(2) . . . H(K) = I - V T V**H
     !! generated using the compact WY representation as returned by CGEQRT.
     !! Q is of order M if SIDE = 'L' and of order N  if SIDE = 'R'.
               
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: side, trans
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: k, ldv, ldc, m, n, nb, ldt
           ! Array Arguments 
           complex(sp), intent(in) :: v(ldv,*), t(ldt,*)
           complex(sp), intent(inout) :: c(ldc,*)
           complex(sp), intent(out) :: work(*)
        ! =====================================================================
           ! Local Scalars 
           logical(lk) :: left, right, tran, notran
           integer(${ik}$) :: i, ib, ldwork, kf, q
           ! Intrinsic Functions 
           ! Executable Statements 
           ! Test The Input Arguments 
           info   = 0_${ik}$
           left   = stdlib_lsame( side,  'L' )
           right  = stdlib_lsame( side,  'R' )
           tran   = stdlib_lsame( trans, 'C' )
           notran = stdlib_lsame( trans, 'N' )
           if( left ) then
              ldwork = max( 1_${ik}$, n )
              q = m
           else if ( right ) then
              ldwork = max( 1_${ik}$, m )
              q = n
           end if
           if( .not.left .and. .not.right ) then
              info = -1_${ik}$
           else if( .not.tran .and. .not.notran ) then
              info = -2_${ik}$
           else if( m<0_${ik}$ ) then
              info = -3_${ik}$
           else if( n<0_${ik}$ ) then
              info = -4_${ik}$
           else if( k<0_${ik}$ .or. k>q ) then
              info = -5_${ik}$
           else if( nb<1_${ik}$ .or. (nb>k .and. k>0_${ik}$)) then
              info = -6_${ik}$
           else if( ldv<max( 1_${ik}$, q ) ) then
              info = -8_${ik}$
           else if( ldt<nb ) then
              info = -10_${ik}$
           else if( ldc<max( 1_${ik}$, m ) ) then
              info = -12_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'CGEMQRT', -info )
              return
           end if
           ! Quick Return If Possible 
           if( m==0 .or. n==0 .or. k==0 ) return
           if( left .and. tran ) then
              do i = 1, k, nb
                 ib = min( nb, k-i+1 )
                 call stdlib${ii}$_clarfb( 'L', 'C', 'F', 'C', m-i+1, n, ib,v( i, i ), ldv, t( 1_${ik}$, i ), &
                           ldt,c( i, 1_${ik}$ ), ldc, work, ldwork )
              end do
           else if( right .and. notran ) then
              do i = 1, k, nb
                 ib = min( nb, k-i+1 )
                 call stdlib${ii}$_clarfb( 'R', 'N', 'F', 'C', m, n-i+1, ib,v( i, i ), ldv, t( 1_${ik}$, i ), &
                           ldt,c( 1_${ik}$, i ), ldc, work, ldwork )
              end do
           else if( left .and. notran ) then
              kf = ((k-1)/nb)*nb+1
              do i = kf, 1, -nb
                 ib = min( nb, k-i+1 )
                 call stdlib${ii}$_clarfb( 'L', 'N', 'F', 'C', m-i+1, n, ib,v( i, i ), ldv, t( 1_${ik}$, i ), &
                           ldt,c( i, 1_${ik}$ ), ldc, work, ldwork )
              end do
           else if( right .and. tran ) then
              kf = ((k-1)/nb)*nb+1
              do i = kf, 1, -nb
                 ib = min( nb, k-i+1 )
                 call stdlib${ii}$_clarfb( 'R', 'C', 'F', 'C', m, n-i+1, ib,v( i, i ), ldv, t( 1_${ik}$, i ), &
                           ldt,c( 1_${ik}$, i ), ldc, work, ldwork )
              end do
           end if
           return
     end subroutine stdlib${ii}$_cgemqrt

     pure module subroutine stdlib${ii}$_zgemqrt( side, trans, m, n, k, nb, v, ldv, t, ldt,c, ldc, work, info )
     !! ZGEMQRT overwrites the general complex M-by-N matrix C with
     !! SIDE = 'L'     SIDE = 'R'
     !! TRANS = 'N':      Q C            C Q
     !! TRANS = 'C':    Q**H C            C Q**H
     !! where Q is a complex orthogonal matrix defined as the product of K
     !! elementary reflectors:
     !! Q = H(1) H(2) . . . H(K) = I - V T V**H
     !! generated using the compact WY representation as returned by ZGEQRT.
     !! Q is of order M if SIDE = 'L' and of order N  if SIDE = 'R'.
               
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: side, trans
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: k, ldv, ldc, m, n, nb, ldt
           ! Array Arguments 
           complex(dp), intent(in) :: v(ldv,*), t(ldt,*)
           complex(dp), intent(inout) :: c(ldc,*)
           complex(dp), intent(out) :: work(*)
        ! =====================================================================
           ! Local Scalars 
           logical(lk) :: left, right, tran, notran
           integer(${ik}$) :: i, ib, ldwork, kf, q
           ! Intrinsic Functions 
           ! Executable Statements 
           ! Test The Input Arguments 
           info   = 0_${ik}$
           left   = stdlib_lsame( side,  'L' )
           right  = stdlib_lsame( side,  'R' )
           tran   = stdlib_lsame( trans, 'C' )
           notran = stdlib_lsame( trans, 'N' )
           if( left ) then
              ldwork = max( 1_${ik}$, n )
              q = m
           else if ( right ) then
              ldwork = max( 1_${ik}$, m )
              q = n
           end if
           if( .not.left .and. .not.right ) then
              info = -1_${ik}$
           else if( .not.tran .and. .not.notran ) then
              info = -2_${ik}$
           else if( m<0_${ik}$ ) then
              info = -3_${ik}$
           else if( n<0_${ik}$ ) then
              info = -4_${ik}$
           else if( k<0_${ik}$ .or. k>q ) then
              info = -5_${ik}$
           else if( nb<1_${ik}$ .or. (nb>k .and. k>0_${ik}$)) then
              info = -6_${ik}$
           else if( ldv<max( 1_${ik}$, q ) ) then
              info = -8_${ik}$
           else if( ldt<nb ) then
              info = -10_${ik}$
           else if( ldc<max( 1_${ik}$, m ) ) then
              info = -12_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'ZGEMQRT', -info )
              return
           end if
           ! Quick Return If Possible 
           if( m==0 .or. n==0 .or. k==0 ) return
           if( left .and. tran ) then
              do i = 1, k, nb
                 ib = min( nb, k-i+1 )
                 call stdlib${ii}$_zlarfb( 'L', 'C', 'F', 'C', m-i+1, n, ib,v( i, i ), ldv, t( 1_${ik}$, i ), &
                           ldt,c( i, 1_${ik}$ ), ldc, work, ldwork )
              end do
           else if( right .and. notran ) then
              do i = 1, k, nb
                 ib = min( nb, k-i+1 )
                 call stdlib${ii}$_zlarfb( 'R', 'N', 'F', 'C', m, n-i+1, ib,v( i, i ), ldv, t( 1_${ik}$, i ), &
                           ldt,c( 1_${ik}$, i ), ldc, work, ldwork )
              end do
           else if( left .and. notran ) then
              kf = ((k-1)/nb)*nb+1
              do i = kf, 1, -nb
                 ib = min( nb, k-i+1 )
                 call stdlib${ii}$_zlarfb( 'L', 'N', 'F', 'C', m-i+1, n, ib,v( i, i ), ldv, t( 1_${ik}$, i ), &
                           ldt,c( i, 1_${ik}$ ), ldc, work, ldwork )
              end do
           else if( right .and. tran ) then
              kf = ((k-1)/nb)*nb+1
              do i = kf, 1, -nb
                 ib = min( nb, k-i+1 )
                 call stdlib${ii}$_zlarfb( 'R', 'C', 'F', 'C', m, n-i+1, ib,v( i, i ), ldv, t( 1_${ik}$, i ), &
                           ldt,c( 1_${ik}$, i ), ldc, work, ldwork )
              end do
           end if
           return
     end subroutine stdlib${ii}$_zgemqrt

#:for ck,ct,ci in CMPLX_KINDS_TYPES
#:if not ck in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ci}$gemqrt( side, trans, m, n, k, nb, v, ldv, t, ldt,c, ldc, work, info )
     !! ZGEMQRT: overwrites the general complex M-by-N matrix C with
     !! SIDE = 'L'     SIDE = 'R'
     !! TRANS = 'N':      Q C            C Q
     !! TRANS = 'C':    Q**H C            C Q**H
     !! where Q is a complex orthogonal matrix defined as the product of K
     !! elementary reflectors:
     !! Q = H(1) H(2) . . . H(K) = I - V T V**H
     !! generated using the compact WY representation as returned by ZGEQRT.
     !! Q is of order M if SIDE = 'L' and of order N  if SIDE = 'R'.
               
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${ck}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: side, trans
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: k, ldv, ldc, m, n, nb, ldt
           ! Array Arguments 
           complex(${ck}$), intent(in) :: v(ldv,*), t(ldt,*)
           complex(${ck}$), intent(inout) :: c(ldc,*)
           complex(${ck}$), intent(out) :: work(*)
        ! =====================================================================
           ! Local Scalars 
           logical(lk) :: left, right, tran, notran
           integer(${ik}$) :: i, ib, ldwork, kf, q
           ! Intrinsic Functions 
           ! Executable Statements 
           ! Test The Input Arguments 
           info   = 0_${ik}$
           left   = stdlib_lsame( side,  'L' )
           right  = stdlib_lsame( side,  'R' )
           tran   = stdlib_lsame( trans, 'C' )
           notran = stdlib_lsame( trans, 'N' )
           if( left ) then
              ldwork = max( 1_${ik}$, n )
              q = m
           else if ( right ) then
              ldwork = max( 1_${ik}$, m )
              q = n
           end if
           if( .not.left .and. .not.right ) then
              info = -1_${ik}$
           else if( .not.tran .and. .not.notran ) then
              info = -2_${ik}$
           else if( m<0_${ik}$ ) then
              info = -3_${ik}$
           else if( n<0_${ik}$ ) then
              info = -4_${ik}$
           else if( k<0_${ik}$ .or. k>q ) then
              info = -5_${ik}$
           else if( nb<1_${ik}$ .or. (nb>k .and. k>0_${ik}$)) then
              info = -6_${ik}$
           else if( ldv<max( 1_${ik}$, q ) ) then
              info = -8_${ik}$
           else if( ldt<nb ) then
              info = -10_${ik}$
           else if( ldc<max( 1_${ik}$, m ) ) then
              info = -12_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'ZGEMQRT', -info )
              return
           end if
           ! Quick Return If Possible 
           if( m==0 .or. n==0 .or. k==0 ) return
           if( left .and. tran ) then
              do i = 1, k, nb
                 ib = min( nb, k-i+1 )
                 call stdlib${ii}$_${ci}$larfb( 'L', 'C', 'F', 'C', m-i+1, n, ib,v( i, i ), ldv, t( 1_${ik}$, i ), &
                           ldt,c( i, 1_${ik}$ ), ldc, work, ldwork )
              end do
           else if( right .and. notran ) then
              do i = 1, k, nb
                 ib = min( nb, k-i+1 )
                 call stdlib${ii}$_${ci}$larfb( 'R', 'N', 'F', 'C', m, n-i+1, ib,v( i, i ), ldv, t( 1_${ik}$, i ), &
                           ldt,c( 1_${ik}$, i ), ldc, work, ldwork )
              end do
           else if( left .and. notran ) then
              kf = ((k-1)/nb)*nb+1
              do i = kf, 1, -nb
                 ib = min( nb, k-i+1 )
                 call stdlib${ii}$_${ci}$larfb( 'L', 'N', 'F', 'C', m-i+1, n, ib,v( i, i ), ldv, t( 1_${ik}$, i ), &
                           ldt,c( i, 1_${ik}$ ), ldc, work, ldwork )
              end do
           else if( right .and. tran ) then
              kf = ((k-1)/nb)*nb+1
              do i = kf, 1, -nb
                 ib = min( nb, k-i+1 )
                 call stdlib${ii}$_${ci}$larfb( 'R', 'C', 'F', 'C', m, n-i+1, ib,v( i, i ), ldv, t( 1_${ik}$, i ), &
                           ldt,c( 1_${ik}$, i ), ldc, work, ldwork )
              end do
           end if
           return
     end subroutine stdlib${ii}$_${ci}$gemqrt

#:endif
#:endfor



     module subroutine stdlib${ii}$_sgeqrfp( m, n, a, lda, tau, work, lwork, info )
     !! SGEQR2P computes a QR factorization of a real M-by-N matrix A:
     !! A = Q * ( R ),
     !! ( 0 )
     !! where:
     !! Q is a M-by-M orthogonal matrix;
     !! R is an upper-triangular N-by-N matrix with nonnegative diagonal
     !! entries;
     !! 0 is a (M-N)-by-N zero matrix, if M > N.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, lwork, m, n
           ! Array Arguments 
           real(sp), intent(inout) :: a(lda,*)
           real(sp), intent(out) :: tau(*), work(*)
        ! =====================================================================
           ! Local Scalars 
           logical(lk) :: lquery
           integer(${ik}$) :: i, ib, iinfo, iws, k, ldwork, lwkopt, nb, nbmin, nx
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           nb = stdlib${ii}$_ilaenv( 1_${ik}$, 'SGEQRF', ' ', m, n, -1_${ik}$, -1_${ik}$ )
           lwkopt = n*nb
           work( 1_${ik}$ ) = lwkopt
           lquery = ( lwork==-1_${ik}$ )
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -4_${ik}$
           else if( lwork<max( 1_${ik}$, n ) .and. .not.lquery ) then
              info = -7_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'SGEQRFP', -info )
              return
           else if( lquery ) then
              return
           end if
           ! quick return if possible
           k = min( m, n )
           if( k==0_${ik}$ ) then
              work( 1_${ik}$ ) = 1_${ik}$
              return
           end if
           nbmin = 2_${ik}$
           nx = 0_${ik}$
           iws = n
           if( nb>1_${ik}$ .and. nb<k ) then
              ! determine when to cross over from blocked to unblocked code.
              nx = max( 0_${ik}$, stdlib${ii}$_ilaenv( 3_${ik}$, 'SGEQRF', ' ', m, n, -1_${ik}$, -1_${ik}$ ) )
              if( nx<k ) then
                 ! determine if workspace is large enough for blocked code.
                 ldwork = n
                 iws = ldwork*nb
                 if( lwork<iws ) then
                    ! not enough workspace to use optimal nb:  reduce nb and
                    ! determine the minimum value of nb.
                    nb = lwork / ldwork
                    nbmin = max( 2_${ik}$, stdlib${ii}$_ilaenv( 2_${ik}$, 'SGEQRF', ' ', m, n, -1_${ik}$,-1_${ik}$ ) )
                 end if
              end if
           end if
           if( nb>=nbmin .and. nb<k .and. nx<k ) then
              ! use blocked code initially
              do i = 1, k - nx, nb
                 ib = min( k-i+1, nb )
                 ! compute the qr factorization of the current block
                 ! a(i:m,i:i+ib-1)
                 call stdlib${ii}$_sgeqr2p( m-i+1, ib, a( i, i ), lda, tau( i ), work,iinfo )
                 if( i+ib<=n ) then
                    ! form the triangular factor of the block reflector
                    ! h = h(i) h(i+1) . . . h(i+ib-1)
                    call stdlib${ii}$_slarft( 'FORWARD', 'COLUMNWISE', m-i+1, ib,a( i, i ), lda, tau( i &
                              ), work, ldwork )
                    ! apply h**t to a(i:m,i+ib:n) from the left
                    call stdlib${ii}$_slarfb( 'LEFT', 'TRANSPOSE', 'FORWARD','COLUMNWISE', m-i+1, n-i-&
                    ib+1, ib,a( i, i ), lda, work, ldwork, a( i, i+ib ),lda, work( ib+1 ), ldwork &
                              )
                 end if
              end do
           else
              i = 1_${ik}$
           end if
           ! use unblocked code to factor the last or only block.
           if( i<=k )call stdlib${ii}$_sgeqr2p( m-i+1, n-i+1, a( i, i ), lda, tau( i ), work,iinfo )
                     
           work( 1_${ik}$ ) = iws
           return
     end subroutine stdlib${ii}$_sgeqrfp

     module subroutine stdlib${ii}$_dgeqrfp( m, n, a, lda, tau, work, lwork, info )
     !! DGEQR2P computes a QR factorization of a real M-by-N matrix A:
     !! A = Q * ( R ),
     !! ( 0 )
     !! where:
     !! Q is a M-by-M orthogonal matrix;
     !! R is an upper-triangular N-by-N matrix with nonnegative diagonal
     !! entries;
     !! 0 is a (M-N)-by-N zero matrix, if M > N.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, lwork, m, n
           ! Array Arguments 
           real(dp), intent(inout) :: a(lda,*)
           real(dp), intent(out) :: tau(*), work(*)
        ! =====================================================================
           ! Local Scalars 
           logical(lk) :: lquery
           integer(${ik}$) :: i, ib, iinfo, iws, k, ldwork, lwkopt, nb, nbmin, nx
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           nb = stdlib${ii}$_ilaenv( 1_${ik}$, 'DGEQRF', ' ', m, n, -1_${ik}$, -1_${ik}$ )
           lwkopt = n*nb
           work( 1_${ik}$ ) = lwkopt
           lquery = ( lwork==-1_${ik}$ )
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -4_${ik}$
           else if( lwork<max( 1_${ik}$, n ) .and. .not.lquery ) then
              info = -7_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DGEQRFP', -info )
              return
           else if( lquery ) then
              return
           end if
           ! quick return if possible
           k = min( m, n )
           if( k==0_${ik}$ ) then
              work( 1_${ik}$ ) = 1_${ik}$
              return
           end if
           nbmin = 2_${ik}$
           nx = 0_${ik}$
           iws = n
           if( nb>1_${ik}$ .and. nb<k ) then
              ! determine when to cross over from blocked to unblocked code.
              nx = max( 0_${ik}$, stdlib${ii}$_ilaenv( 3_${ik}$, 'DGEQRF', ' ', m, n, -1_${ik}$, -1_${ik}$ ) )
              if( nx<k ) then
                 ! determine if workspace is large enough for blocked code.
                 ldwork = n
                 iws = ldwork*nb
                 if( lwork<iws ) then
                    ! not enough workspace to use optimal nb:  reduce nb and
                    ! determine the minimum value of nb.
                    nb = lwork / ldwork
                    nbmin = max( 2_${ik}$, stdlib${ii}$_ilaenv( 2_${ik}$, 'DGEQRF', ' ', m, n, -1_${ik}$,-1_${ik}$ ) )
                 end if
              end if
           end if
           if( nb>=nbmin .and. nb<k .and. nx<k ) then
              ! use blocked code initially
              do i = 1, k - nx, nb
                 ib = min( k-i+1, nb )
                 ! compute the qr factorization of the current block
                 ! a(i:m,i:i+ib-1)
                 call stdlib${ii}$_dgeqr2p( m-i+1, ib, a( i, i ), lda, tau( i ), work,iinfo )
                 if( i+ib<=n ) then
                    ! form the triangular factor of the block reflector
                    ! h = h(i) h(i+1) . . . h(i+ib-1)
                    call stdlib${ii}$_dlarft( 'FORWARD', 'COLUMNWISE', m-i+1, ib,a( i, i ), lda, tau( i &
                              ), work, ldwork )
                    ! apply h**t to a(i:m,i+ib:n) from the left
                    call stdlib${ii}$_dlarfb( 'LEFT', 'TRANSPOSE', 'FORWARD','COLUMNWISE', m-i+1, n-i-&
                    ib+1, ib,a( i, i ), lda, work, ldwork, a( i, i+ib ),lda, work( ib+1 ), ldwork &
                              )
                 end if
              end do
           else
              i = 1_${ik}$
           end if
           ! use unblocked code to factor the last or only block.
           if( i<=k )call stdlib${ii}$_dgeqr2p( m-i+1, n-i+1, a( i, i ), lda, tau( i ), work,iinfo )
                     
           work( 1_${ik}$ ) = iws
           return
     end subroutine stdlib${ii}$_dgeqrfp

#:for rk,rt,ri in REAL_KINDS_TYPES
#:if not rk in ["sp","dp"]
     module subroutine stdlib${ii}$_${ri}$geqrfp( m, n, a, lda, tau, work, lwork, info )
     !! DGEQR2P computes a QR factorization of a real M-by-N matrix A:
     !! A = Q * ( R ),
     !! ( 0 )
     !! where:
     !! Q is a M-by-M orthogonal matrix;
     !! R is an upper-triangular N-by-N matrix with nonnegative diagonal
     !! entries;
     !! 0 is a (M-N)-by-N zero matrix, if M > N.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, lwork, m, n
           ! Array Arguments 
           real(${rk}$), intent(inout) :: a(lda,*)
           real(${rk}$), intent(out) :: tau(*), work(*)
        ! =====================================================================
           ! Local Scalars 
           logical(lk) :: lquery
           integer(${ik}$) :: i, ib, iinfo, iws, k, ldwork, lwkopt, nb, nbmin, nx
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           nb = stdlib${ii}$_ilaenv( 1_${ik}$, 'DGEQRF', ' ', m, n, -1_${ik}$, -1_${ik}$ )
           lwkopt = n*nb
           work( 1_${ik}$ ) = lwkopt
           lquery = ( lwork==-1_${ik}$ )
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -4_${ik}$
           else if( lwork<max( 1_${ik}$, n ) .and. .not.lquery ) then
              info = -7_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DGEQRFP', -info )
              return
           else if( lquery ) then
              return
           end if
           ! quick return if possible
           k = min( m, n )
           if( k==0_${ik}$ ) then
              work( 1_${ik}$ ) = 1_${ik}$
              return
           end if
           nbmin = 2_${ik}$
           nx = 0_${ik}$
           iws = n
           if( nb>1_${ik}$ .and. nb<k ) then
              ! determine when to cross over from blocked to unblocked code.
              nx = max( 0_${ik}$, stdlib${ii}$_ilaenv( 3_${ik}$, 'DGEQRF', ' ', m, n, -1_${ik}$, -1_${ik}$ ) )
              if( nx<k ) then
                 ! determine if workspace is large enough for blocked code.
                 ldwork = n
                 iws = ldwork*nb
                 if( lwork<iws ) then
                    ! not enough workspace to use optimal nb:  reduce nb and
                    ! determine the minimum value of nb.
                    nb = lwork / ldwork
                    nbmin = max( 2_${ik}$, stdlib${ii}$_ilaenv( 2_${ik}$, 'DGEQRF', ' ', m, n, -1_${ik}$,-1_${ik}$ ) )
                 end if
              end if
           end if
           if( nb>=nbmin .and. nb<k .and. nx<k ) then
              ! use blocked code initially
              do i = 1, k - nx, nb
                 ib = min( k-i+1, nb )
                 ! compute the qr factorization of the current block
                 ! a(i:m,i:i+ib-1)
                 call stdlib${ii}$_${ri}$geqr2p( m-i+1, ib, a( i, i ), lda, tau( i ), work,iinfo )
                 if( i+ib<=n ) then
                    ! form the triangular factor of the block reflector
                    ! h = h(i) h(i+1) . . . h(i+ib-1)
                    call stdlib${ii}$_${ri}$larft( 'FORWARD', 'COLUMNWISE', m-i+1, ib,a( i, i ), lda, tau( i &
                              ), work, ldwork )
                    ! apply h**t to a(i:m,i+ib:n) from the left
                    call stdlib${ii}$_${ri}$larfb( 'LEFT', 'TRANSPOSE', 'FORWARD','COLUMNWISE', m-i+1, n-i-&
                    ib+1, ib,a( i, i ), lda, work, ldwork, a( i, i+ib ),lda, work( ib+1 ), ldwork &
                              )
                 end if
              end do
           else
              i = 1_${ik}$
           end if
           ! use unblocked code to factor the last or only block.
           if( i<=k )call stdlib${ii}$_${ri}$geqr2p( m-i+1, n-i+1, a( i, i ), lda, tau( i ), work,iinfo )
                     
           work( 1_${ik}$ ) = iws
           return
     end subroutine stdlib${ii}$_${ri}$geqrfp

#:endif
#:endfor

     module subroutine stdlib${ii}$_cgeqrfp( m, n, a, lda, tau, work, lwork, info )
     !! CGEQR2P computes a QR factorization of a complex M-by-N matrix A:
     !! A = Q * ( R ),
     !! ( 0 )
     !! where:
     !! Q is a M-by-M orthogonal matrix;
     !! R is an upper-triangular N-by-N matrix with nonnegative diagonal
     !! entries;
     !! 0 is a (M-N)-by-N zero matrix, if M > N.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, lwork, m, n
           ! Array Arguments 
           complex(sp), intent(inout) :: a(lda,*)
           complex(sp), intent(out) :: tau(*), work(*)
        ! =====================================================================
           ! Local Scalars 
           logical(lk) :: lquery
           integer(${ik}$) :: i, ib, iinfo, iws, k, ldwork, lwkopt, nb, nbmin, nx
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           nb = stdlib${ii}$_ilaenv( 1_${ik}$, 'CGEQRF', ' ', m, n, -1_${ik}$, -1_${ik}$ )
           lwkopt = n*nb
           work( 1_${ik}$ ) = lwkopt
           lquery = ( lwork==-1_${ik}$ )
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -4_${ik}$
           else if( lwork<max( 1_${ik}$, n ) .and. .not.lquery ) then
              info = -7_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'CGEQRFP', -info )
              return
           else if( lquery ) then
              return
           end if
           ! quick return if possible
           k = min( m, n )
           if( k==0_${ik}$ ) then
              work( 1_${ik}$ ) = 1_${ik}$
              return
           end if
           nbmin = 2_${ik}$
           nx = 0_${ik}$
           iws = n
           if( nb>1_${ik}$ .and. nb<k ) then
              ! determine when to cross over from blocked to unblocked code.
              nx = max( 0_${ik}$, stdlib${ii}$_ilaenv( 3_${ik}$, 'CGEQRF', ' ', m, n, -1_${ik}$, -1_${ik}$ ) )
              if( nx<k ) then
                 ! determine if workspace is large enough for blocked code.
                 ldwork = n
                 iws = ldwork*nb
                 if( lwork<iws ) then
                    ! not enough workspace to use optimal nb:  reduce nb and
                    ! determine the minimum value of nb.
                    nb = lwork / ldwork
                    nbmin = max( 2_${ik}$, stdlib${ii}$_ilaenv( 2_${ik}$, 'CGEQRF', ' ', m, n, -1_${ik}$,-1_${ik}$ ) )
                 end if
              end if
           end if
           if( nb>=nbmin .and. nb<k .and. nx<k ) then
              ! use blocked code initially
              do i = 1, k - nx, nb
                 ib = min( k-i+1, nb )
                 ! compute the qr factorization of the current block
                 ! a(i:m,i:i+ib-1)
                 call stdlib${ii}$_cgeqr2p( m-i+1, ib, a( i, i ), lda, tau( i ), work,iinfo )
                 if( i+ib<=n ) then
                    ! form the triangular factor of the block reflector
                    ! h = h(i) h(i+1) . . . h(i+ib-1)
                    call stdlib${ii}$_clarft( 'FORWARD', 'COLUMNWISE', m-i+1, ib,a( i, i ), lda, tau( i &
                              ), work, ldwork )
                    ! apply h**h to a(i:m,i+ib:n) from the left
                    call stdlib${ii}$_clarfb( 'LEFT', 'CONJUGATE TRANSPOSE', 'FORWARD','COLUMNWISE', m-&
                    i+1, n-i-ib+1, ib,a( i, i ), lda, work, ldwork, a( i, i+ib ),lda, work( ib+1 )&
                              , ldwork )
                 end if
              end do
           else
              i = 1_${ik}$
           end if
           ! use unblocked code to factor the last or only block.
           if( i<=k )call stdlib${ii}$_cgeqr2p( m-i+1, n-i+1, a( i, i ), lda, tau( i ), work,iinfo )
                     
           work( 1_${ik}$ ) = iws
           return
     end subroutine stdlib${ii}$_cgeqrfp

     module subroutine stdlib${ii}$_zgeqrfp( m, n, a, lda, tau, work, lwork, info )
     !! ZGEQR2P computes a QR factorization of a complex M-by-N matrix A:
     !! A = Q * ( R ),
     !! ( 0 )
     !! where:
     !! Q is a M-by-M orthogonal matrix;
     !! R is an upper-triangular N-by-N matrix with nonnegative diagonal
     !! entries;
     !! 0 is a (M-N)-by-N zero matrix, if M > N.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, lwork, m, n
           ! Array Arguments 
           complex(dp), intent(inout) :: a(lda,*)
           complex(dp), intent(out) :: tau(*), work(*)
        ! =====================================================================
           ! Local Scalars 
           logical(lk) :: lquery
           integer(${ik}$) :: i, ib, iinfo, iws, k, ldwork, lwkopt, nb, nbmin, nx
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           nb = stdlib${ii}$_ilaenv( 1_${ik}$, 'ZGEQRF', ' ', m, n, -1_${ik}$, -1_${ik}$ )
           lwkopt = n*nb
           work( 1_${ik}$ ) = lwkopt
           lquery = ( lwork==-1_${ik}$ )
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -4_${ik}$
           else if( lwork<max( 1_${ik}$, n ) .and. .not.lquery ) then
              info = -7_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'ZGEQRFP', -info )
              return
           else if( lquery ) then
              return
           end if
           ! quick return if possible
           k = min( m, n )
           if( k==0_${ik}$ ) then
              work( 1_${ik}$ ) = 1_${ik}$
              return
           end if
           nbmin = 2_${ik}$
           nx = 0_${ik}$
           iws = n
           if( nb>1_${ik}$ .and. nb<k ) then
              ! determine when to cross over from blocked to unblocked code.
              nx = max( 0_${ik}$, stdlib${ii}$_ilaenv( 3_${ik}$, 'ZGEQRF', ' ', m, n, -1_${ik}$, -1_${ik}$ ) )
              if( nx<k ) then
                 ! determine if workspace is large enough for blocked code.
                 ldwork = n
                 iws = ldwork*nb
                 if( lwork<iws ) then
                    ! not enough workspace to use optimal nb:  reduce nb and
                    ! determine the minimum value of nb.
                    nb = lwork / ldwork
                    nbmin = max( 2_${ik}$, stdlib${ii}$_ilaenv( 2_${ik}$, 'ZGEQRF', ' ', m, n, -1_${ik}$,-1_${ik}$ ) )
                 end if
              end if
           end if
           if( nb>=nbmin .and. nb<k .and. nx<k ) then
              ! use blocked code initially
              do i = 1, k - nx, nb
                 ib = min( k-i+1, nb )
                 ! compute the qr factorization of the current block
                 ! a(i:m,i:i+ib-1)
                 call stdlib${ii}$_zgeqr2p( m-i+1, ib, a( i, i ), lda, tau( i ), work,iinfo )
                 if( i+ib<=n ) then
                    ! form the triangular factor of the block reflector
                    ! h = h(i) h(i+1) . . . h(i+ib-1)
                    call stdlib${ii}$_zlarft( 'FORWARD', 'COLUMNWISE', m-i+1, ib,a( i, i ), lda, tau( i &
                              ), work, ldwork )
                    ! apply h**h to a(i:m,i+ib:n) from the left
                    call stdlib${ii}$_zlarfb( 'LEFT', 'CONJUGATE TRANSPOSE', 'FORWARD','COLUMNWISE', m-&
                    i+1, n-i-ib+1, ib,a( i, i ), lda, work, ldwork, a( i, i+ib ),lda, work( ib+1 )&
                              , ldwork )
                 end if
              end do
           else
              i = 1_${ik}$
           end if
           ! use unblocked code to factor the last or only block.
           if( i<=k )call stdlib${ii}$_zgeqr2p( m-i+1, n-i+1, a( i, i ), lda, tau( i ), work,iinfo )
                     
           work( 1_${ik}$ ) = iws
           return
     end subroutine stdlib${ii}$_zgeqrfp

#:for ck,ct,ci in CMPLX_KINDS_TYPES
#:if not ck in ["sp","dp"]
     module subroutine stdlib${ii}$_${ci}$geqrfp( m, n, a, lda, tau, work, lwork, info )
     !! ZGEQR2P computes a QR factorization of a complex M-by-N matrix A:
     !! A = Q * ( R ),
     !! ( 0 )
     !! where:
     !! Q is a M-by-M orthogonal matrix;
     !! R is an upper-triangular N-by-N matrix with nonnegative diagonal
     !! entries;
     !! 0 is a (M-N)-by-N zero matrix, if M > N.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${ck}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, lwork, m, n
           ! Array Arguments 
           complex(${ck}$), intent(inout) :: a(lda,*)
           complex(${ck}$), intent(out) :: tau(*), work(*)
        ! =====================================================================
           ! Local Scalars 
           logical(lk) :: lquery
           integer(${ik}$) :: i, ib, iinfo, iws, k, ldwork, lwkopt, nb, nbmin, nx
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           nb = stdlib${ii}$_ilaenv( 1_${ik}$, 'ZGEQRF', ' ', m, n, -1_${ik}$, -1_${ik}$ )
           lwkopt = n*nb
           work( 1_${ik}$ ) = lwkopt
           lquery = ( lwork==-1_${ik}$ )
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -4_${ik}$
           else if( lwork<max( 1_${ik}$, n ) .and. .not.lquery ) then
              info = -7_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'ZGEQRFP', -info )
              return
           else if( lquery ) then
              return
           end if
           ! quick return if possible
           k = min( m, n )
           if( k==0_${ik}$ ) then
              work( 1_${ik}$ ) = 1_${ik}$
              return
           end if
           nbmin = 2_${ik}$
           nx = 0_${ik}$
           iws = n
           if( nb>1_${ik}$ .and. nb<k ) then
              ! determine when to cross over from blocked to unblocked code.
              nx = max( 0_${ik}$, stdlib${ii}$_ilaenv( 3_${ik}$, 'ZGEQRF', ' ', m, n, -1_${ik}$, -1_${ik}$ ) )
              if( nx<k ) then
                 ! determine if workspace is large enough for blocked code.
                 ldwork = n
                 iws = ldwork*nb
                 if( lwork<iws ) then
                    ! not enough workspace to use optimal nb:  reduce nb and
                    ! determine the minimum value of nb.
                    nb = lwork / ldwork
                    nbmin = max( 2_${ik}$, stdlib${ii}$_ilaenv( 2_${ik}$, 'ZGEQRF', ' ', m, n, -1_${ik}$,-1_${ik}$ ) )
                 end if
              end if
           end if
           if( nb>=nbmin .and. nb<k .and. nx<k ) then
              ! use blocked code initially
              do i = 1, k - nx, nb
                 ib = min( k-i+1, nb )
                 ! compute the qr factorization of the current block
                 ! a(i:m,i:i+ib-1)
                 call stdlib${ii}$_${ci}$geqr2p( m-i+1, ib, a( i, i ), lda, tau( i ), work,iinfo )
                 if( i+ib<=n ) then
                    ! form the triangular factor of the block reflector
                    ! h = h(i) h(i+1) . . . h(i+ib-1)
                    call stdlib${ii}$_${ci}$larft( 'FORWARD', 'COLUMNWISE', m-i+1, ib,a( i, i ), lda, tau( i &
                              ), work, ldwork )
                    ! apply h**h to a(i:m,i+ib:n) from the left
                    call stdlib${ii}$_${ci}$larfb( 'LEFT', 'CONJUGATE TRANSPOSE', 'FORWARD','COLUMNWISE', m-&
                    i+1, n-i-ib+1, ib,a( i, i ), lda, work, ldwork, a( i, i+ib ),lda, work( ib+1 )&
                              , ldwork )
                 end if
              end do
           else
              i = 1_${ik}$
           end if
           ! use unblocked code to factor the last or only block.
           if( i<=k )call stdlib${ii}$_${ci}$geqr2p( m-i+1, n-i+1, a( i, i ), lda, tau( i ), work,iinfo )
                     
           work( 1_${ik}$ ) = iws
           return
     end subroutine stdlib${ii}$_${ci}$geqrfp

#:endif
#:endfor



     module subroutine stdlib${ii}$_sgeqr2p( m, n, a, lda, tau, work, info )
     !! SGEQR2P computes a QR factorization of a real m-by-n matrix A:
     !! A = Q * ( R ),
     !! ( 0 )
     !! where:
     !! Q is a m-by-m orthogonal matrix;
     !! R is an upper-triangular n-by-n matrix with nonnegative diagonal
     !! entries;
     !! 0 is a (m-n)-by-n zero matrix, if m > n.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, m, n
           ! Array Arguments 
           real(sp), intent(inout) :: a(lda,*)
           real(sp), intent(out) :: tau(*), work(*)
        ! =====================================================================
           
           ! Local Scalars 
           integer(${ik}$) :: i, k
           real(sp) :: aii
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -4_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'SGEQR2P', -info )
              return
           end if
           k = min( m, n )
           do i = 1, k
              ! generate elementary reflector h(i) to annihilate a(i+1:m,i)
              call stdlib${ii}$_slarfgp( m-i+1, a( i, i ), a( min( i+1, m ), i ), 1_${ik}$,tau( i ) )
              if( i<n ) then
                 ! apply h(i) to a(i:m,i+1:n) from the left
                 aii = a( i, i )
                 a( i, i ) = one
                 call stdlib${ii}$_slarf( 'LEFT', m-i+1, n-i, a( i, i ), 1_${ik}$, tau( i ),a( i, i+1 ), lda, &
                           work )
                 a( i, i ) = aii
              end if
           end do
           return
     end subroutine stdlib${ii}$_sgeqr2p

     module subroutine stdlib${ii}$_dgeqr2p( m, n, a, lda, tau, work, info )
     !! DGEQR2P computes a QR factorization of a real m-by-n matrix A:
     !! A = Q * ( R ),
     !! ( 0 )
     !! where:
     !! Q is a m-by-m orthogonal matrix;
     !! R is an upper-triangular n-by-n matrix with nonnegative diagonal
     !! entries;
     !! 0 is a (m-n)-by-n zero matrix, if m > n.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, m, n
           ! Array Arguments 
           real(dp), intent(inout) :: a(lda,*)
           real(dp), intent(out) :: tau(*), work(*)
        ! =====================================================================
           
           ! Local Scalars 
           integer(${ik}$) :: i, k
           real(dp) :: aii
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -4_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DGEQR2P', -info )
              return
           end if
           k = min( m, n )
           do i = 1, k
              ! generate elementary reflector h(i) to annihilate a(i+1:m,i)
              call stdlib${ii}$_dlarfgp( m-i+1, a( i, i ), a( min( i+1, m ), i ), 1_${ik}$,tau( i ) )
              if( i<n ) then
                 ! apply h(i) to a(i:m,i+1:n) from the left
                 aii = a( i, i )
                 a( i, i ) = one
                 call stdlib${ii}$_dlarf( 'LEFT', m-i+1, n-i, a( i, i ), 1_${ik}$, tau( i ),a( i, i+1 ), lda, &
                           work )
                 a( i, i ) = aii
              end if
           end do
           return
     end subroutine stdlib${ii}$_dgeqr2p

#:for rk,rt,ri in REAL_KINDS_TYPES
#:if not rk in ["sp","dp"]
     module subroutine stdlib${ii}$_${ri}$geqr2p( m, n, a, lda, tau, work, info )
     !! DGEQR2P: computes a QR factorization of a real m-by-n matrix A:
     !! A = Q * ( R ),
     !! ( 0 )
     !! where:
     !! Q is a m-by-m orthogonal matrix;
     !! R is an upper-triangular n-by-n matrix with nonnegative diagonal
     !! entries;
     !! 0 is a (m-n)-by-n zero matrix, if m > n.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, m, n
           ! Array Arguments 
           real(${rk}$), intent(inout) :: a(lda,*)
           real(${rk}$), intent(out) :: tau(*), work(*)
        ! =====================================================================
           
           ! Local Scalars 
           integer(${ik}$) :: i, k
           real(${rk}$) :: aii
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -4_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DGEQR2P', -info )
              return
           end if
           k = min( m, n )
           do i = 1, k
              ! generate elementary reflector h(i) to annihilate a(i+1:m,i)
              call stdlib${ii}$_${ri}$larfgp( m-i+1, a( i, i ), a( min( i+1, m ), i ), 1_${ik}$,tau( i ) )
              if( i<n ) then
                 ! apply h(i) to a(i:m,i+1:n) from the left
                 aii = a( i, i )
                 a( i, i ) = one
                 call stdlib${ii}$_${ri}$larf( 'LEFT', m-i+1, n-i, a( i, i ), 1_${ik}$, tau( i ),a( i, i+1 ), lda, &
                           work )
                 a( i, i ) = aii
              end if
           end do
           return
     end subroutine stdlib${ii}$_${ri}$geqr2p

#:endif
#:endfor

     module subroutine stdlib${ii}$_cgeqr2p( m, n, a, lda, tau, work, info )
     !! CGEQR2P computes a QR factorization of a complex m-by-n matrix A:
     !! A = Q * ( R ),
     !! ( 0 )
     !! where:
     !! Q is a m-by-m orthogonal matrix;
     !! R is an upper-triangular n-by-n matrix with nonnegative diagonal
     !! entries;
     !! 0 is a (m-n)-by-n zero matrix, if m > n.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, m, n
           ! Array Arguments 
           complex(sp), intent(inout) :: a(lda,*)
           complex(sp), intent(out) :: tau(*), work(*)
        ! =====================================================================
           
           ! Local Scalars 
           integer(${ik}$) :: i, k
           complex(sp) :: alpha
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -4_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'CGEQR2P', -info )
              return
           end if
           k = min( m, n )
           do i = 1, k
              ! generate elementary reflector h(i) to annihilate a(i+1:m,i)
              call stdlib${ii}$_clarfgp( m-i+1, a( i, i ), a( min( i+1, m ), i ), 1_${ik}$,tau( i ) )
              if( i<n ) then
                 ! apply h(i)**h to a(i:m,i+1:n) from the left
                 alpha = a( i, i )
                 a( i, i ) = cone
                 call stdlib${ii}$_clarf( 'LEFT', m-i+1, n-i, a( i, i ), 1_${ik}$,conjg( tau( i ) ), a( i, i+1 &
                           ), lda, work )
                 a( i, i ) = alpha
              end if
           end do
           return
     end subroutine stdlib${ii}$_cgeqr2p

     module subroutine stdlib${ii}$_zgeqr2p( m, n, a, lda, tau, work, info )
     !! ZGEQR2P computes a QR factorization of a complex m-by-n matrix A:
     !! A = Q * ( R ),
     !! ( 0 )
     !! where:
     !! Q is a m-by-m orthogonal matrix;
     !! R is an upper-triangular n-by-n matrix with nonnegative diagonal
     !! entries;
     !! 0 is a (m-n)-by-n zero matrix, if m > n.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, m, n
           ! Array Arguments 
           complex(dp), intent(inout) :: a(lda,*)
           complex(dp), intent(out) :: tau(*), work(*)
        ! =====================================================================
           
           ! Local Scalars 
           integer(${ik}$) :: i, k
           complex(dp) :: alpha
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -4_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'ZGEQR2P', -info )
              return
           end if
           k = min( m, n )
           do i = 1, k
              ! generate elementary reflector h(i) to annihilate a(i+1:m,i)
              call stdlib${ii}$_zlarfgp( m-i+1, a( i, i ), a( min( i+1, m ), i ), 1_${ik}$,tau( i ) )
              if( i<n ) then
                 ! apply h(i)**h to a(i:m,i+1:n) from the left
                 alpha = a( i, i )
                 a( i, i ) = cone
                 call stdlib${ii}$_zlarf( 'LEFT', m-i+1, n-i, a( i, i ), 1_${ik}$,conjg( tau( i ) ), a( i, i+1 &
                           ), lda, work )
                 a( i, i ) = alpha
              end if
           end do
           return
     end subroutine stdlib${ii}$_zgeqr2p

#:for ck,ct,ci in CMPLX_KINDS_TYPES
#:if not ck in ["sp","dp"]
     module subroutine stdlib${ii}$_${ci}$geqr2p( m, n, a, lda, tau, work, info )
     !! ZGEQR2P: computes a QR factorization of a complex m-by-n matrix A:
     !! A = Q * ( R ),
     !! ( 0 )
     !! where:
     !! Q is a m-by-m orthogonal matrix;
     !! R is an upper-triangular n-by-n matrix with nonnegative diagonal
     !! entries;
     !! 0 is a (m-n)-by-n zero matrix, if m > n.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${ck}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, m, n
           ! Array Arguments 
           complex(${ck}$), intent(inout) :: a(lda,*)
           complex(${ck}$), intent(out) :: tau(*), work(*)
        ! =====================================================================
           
           ! Local Scalars 
           integer(${ik}$) :: i, k
           complex(${ck}$) :: alpha
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -4_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'ZGEQR2P', -info )
              return
           end if
           k = min( m, n )
           do i = 1, k
              ! generate elementary reflector h(i) to annihilate a(i+1:m,i)
              call stdlib${ii}$_${ci}$larfgp( m-i+1, a( i, i ), a( min( i+1, m ), i ), 1_${ik}$,tau( i ) )
              if( i<n ) then
                 ! apply h(i)**h to a(i:m,i+1:n) from the left
                 alpha = a( i, i )
                 a( i, i ) = cone
                 call stdlib${ii}$_${ci}$larf( 'LEFT', m-i+1, n-i, a( i, i ), 1_${ik}$,conjg( tau( i ) ), a( i, i+1 &
                           ), lda, work )
                 a( i, i ) = alpha
              end if
           end do
           return
     end subroutine stdlib${ii}$_${ci}$geqr2p

#:endif
#:endfor



     pure module subroutine stdlib${ii}$_sgeqp3( m, n, a, lda, jpvt, tau, work, lwork, info )
     !! SGEQP3 computes a QR factorization with column pivoting of a
     !! matrix A:  A*P = Q*R  using Level 3 BLAS.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, lwork, m, n
           ! Array Arguments 
           integer(${ik}$), intent(inout) :: jpvt(*)
           real(sp), intent(inout) :: a(lda,*)
           real(sp), intent(out) :: tau(*), work(*)
        ! =====================================================================
           ! Parameters 
           integer(${ik}$), parameter :: inb = 1_${ik}$
           integer(${ik}$), parameter :: inbmin = 2_${ik}$
           integer(${ik}$), parameter :: ixover = 3_${ik}$
           
           ! Local Scalars 
           logical(lk) :: lquery
           integer(${ik}$) :: fjb, iws, j, jb, lwkopt, minmn, minws, na, nb, nbmin, nfxd, nx, sm, &
                     sminmn, sn, topbmn
           ! Intrinsic Functions 
           ! test input arguments
        ! ====================
           info = 0_${ik}$
           lquery = ( lwork==-1_${ik}$ )
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -4_${ik}$
           end if
           if( info==0_${ik}$ ) then
              minmn = min( m, n )
              if( minmn==0_${ik}$ ) then
                 iws = 1_${ik}$
                 lwkopt = 1_${ik}$
              else
                 iws = 3_${ik}$*n + 1_${ik}$
                 nb = stdlib${ii}$_ilaenv( inb, 'SGEQRF', ' ', m, n, -1_${ik}$, -1_${ik}$ )
                 lwkopt = 2_${ik}$*n + ( n + 1_${ik}$ )*nb
              end if
              work( 1_${ik}$ ) = lwkopt
              if( ( lwork<iws ) .and. .not.lquery ) then
                 info = -8_${ik}$
              end if
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'SGEQP3', -info )
              return
           else if( lquery ) then
              return
           end if
           ! move initial columns up front.
           nfxd = 1_${ik}$
           do j = 1, n
              if( jpvt( j )/=0_${ik}$ ) then
                 if( j/=nfxd ) then
                    call stdlib${ii}$_sswap( m, a( 1_${ik}$, j ), 1_${ik}$, a( 1_${ik}$, nfxd ), 1_${ik}$ )
                    jpvt( j ) = jpvt( nfxd )
                    jpvt( nfxd ) = j
                 else
                    jpvt( j ) = j
                 end if
                 nfxd = nfxd + 1_${ik}$
              else
                 jpvt( j ) = j
              end if
           end do
           nfxd = nfxd - 1_${ik}$
           ! factorize fixed columns
        ! =======================
           ! compute the qr factorization of fixed columns and update
           ! remaining columns.
           if( nfxd>0_${ik}$ ) then
              na = min( m, nfxd )
      ! cc      call stdlib${ii}$_sgeqr2( m, na, a, lda, tau, work, info )
              call stdlib${ii}$_sgeqrf( m, na, a, lda, tau, work, lwork, info )
              iws = max( iws, int( work( 1_${ik}$ ),KIND=${ik}$) )
              if( na<n ) then
      ! cc         call stdlib${ii}$_sorm2r( 'left', 'transpose', m, n-na, na, a, lda,
      ! cc  $                   tau, a( 1, na+1 ), lda, work, info )
                 call stdlib${ii}$_sormqr( 'LEFT', 'TRANSPOSE', m, n-na, na, a, lda, tau,a( 1_${ik}$, na+1 ), &
                           lda, work, lwork, info )
                 iws = max( iws, int( work( 1_${ik}$ ),KIND=${ik}$) )
              end if
           end if
           ! factorize free columns
        ! ======================
           if( nfxd<minmn ) then
              sm = m - nfxd
              sn = n - nfxd
              sminmn = minmn - nfxd
              ! determine the block size.
              nb = stdlib${ii}$_ilaenv( inb, 'SGEQRF', ' ', sm, sn, -1_${ik}$, -1_${ik}$ )
              nbmin = 2_${ik}$
              nx = 0_${ik}$
              if( ( nb>1_${ik}$ ) .and. ( nb<sminmn ) ) then
                 ! determine when to cross over from blocked to unblocked code.
                 nx = max( 0_${ik}$, stdlib${ii}$_ilaenv( ixover, 'SGEQRF', ' ', sm, sn, -1_${ik}$,-1_${ik}$ ) )
                 if( nx<sminmn ) then
                    ! determine if workspace is large enough for blocked code.
                    minws = 2_${ik}$*sn + ( sn+1 )*nb
                    iws = max( iws, minws )
                    if( lwork<minws ) then
                       ! not enough workspace to use optimal nb: reduce nb and
                       ! determine the minimum value of nb.
                       nb = ( lwork-2*sn ) / ( sn+1 )
                       nbmin = max( 2_${ik}$, stdlib${ii}$_ilaenv( inbmin, 'SGEQRF', ' ', sm, sn,-1_${ik}$, -1_${ik}$ ) )
                                 
                    end if
                 end if
              end if
              ! initialize partial column norms. the first n elements of work
              ! store the exact column norms.
              do j = nfxd + 1, n
                 work( j ) = stdlib${ii}$_snrm2( sm, a( nfxd+1, j ), 1_${ik}$ )
                 work( n+j ) = work( j )
              end do
              if( ( nb>=nbmin ) .and. ( nb<sminmn ) .and.( nx<sminmn ) ) then
                 ! use blocked code initially.
                 j = nfxd + 1_${ik}$
                 ! compute factorization: while loop.
                 topbmn = minmn - nx
                 30 continue
                 if( j<=topbmn ) then
                    jb = min( nb, topbmn-j+1 )
                    ! factorize jb columns among columns j:n.
                    call stdlib${ii}$_slaqps( m, n-j+1, j-1, jb, fjb, a( 1_${ik}$, j ), lda,jpvt( j ), tau( j )&
                              , work( j ), work( n+j ),work( 2_${ik}$*n+1 ), work( 2_${ik}$*n+jb+1 ), n-j+1 )
                    j = j + fjb
                    go to 30
                 end if
              else
                 j = nfxd + 1_${ik}$
              end if
              ! use unblocked code to factor the last or only block.
              if( j<=minmn )call stdlib${ii}$_slaqp2( m, n-j+1, j-1, a( 1_${ik}$, j ), lda, jpvt( j ),tau( j ),&
                         work( j ), work( n+j ),work( 2_${ik}$*n+1 ) )
           end if
           work( 1_${ik}$ ) = iws
           return
     end subroutine stdlib${ii}$_sgeqp3

     pure module subroutine stdlib${ii}$_dgeqp3( m, n, a, lda, jpvt, tau, work, lwork, info )
     !! DGEQP3 computes a QR factorization with column pivoting of a
     !! matrix A:  A*P = Q*R  using Level 3 BLAS.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, lwork, m, n
           ! Array Arguments 
           integer(${ik}$), intent(inout) :: jpvt(*)
           real(dp), intent(inout) :: a(lda,*)
           real(dp), intent(out) :: tau(*), work(*)
        ! =====================================================================
           ! Parameters 
           integer(${ik}$), parameter :: inb = 1_${ik}$
           integer(${ik}$), parameter :: inbmin = 2_${ik}$
           integer(${ik}$), parameter :: ixover = 3_${ik}$
           
           ! Local Scalars 
           logical(lk) :: lquery
           integer(${ik}$) :: fjb, iws, j, jb, lwkopt, minmn, minws, na, nb, nbmin, nfxd, nx, sm, &
                     sminmn, sn, topbmn
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test input arguments
        ! ====================
           info = 0_${ik}$
           lquery = ( lwork==-1_${ik}$ )
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -4_${ik}$
           end if
           if( info==0_${ik}$ ) then
              minmn = min( m, n )
              if( minmn==0_${ik}$ ) then
                 iws = 1_${ik}$
                 lwkopt = 1_${ik}$
              else
                 iws = 3_${ik}$*n + 1_${ik}$
                 nb = stdlib${ii}$_ilaenv( inb, 'DGEQRF', ' ', m, n, -1_${ik}$, -1_${ik}$ )
                 lwkopt = 2_${ik}$*n + ( n + 1_${ik}$ )*nb
              end if
              work( 1_${ik}$ ) = lwkopt
              if( ( lwork<iws ) .and. .not.lquery ) then
                 info = -8_${ik}$
              end if
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DGEQP3', -info )
              return
           else if( lquery ) then
              return
           end if
           ! move initial columns up front.
           nfxd = 1_${ik}$
           do j = 1, n
              if( jpvt( j )/=0_${ik}$ ) then
                 if( j/=nfxd ) then
                    call stdlib${ii}$_dswap( m, a( 1_${ik}$, j ), 1_${ik}$, a( 1_${ik}$, nfxd ), 1_${ik}$ )
                    jpvt( j ) = jpvt( nfxd )
                    jpvt( nfxd ) = j
                 else
                    jpvt( j ) = j
                 end if
                 nfxd = nfxd + 1_${ik}$
              else
                 jpvt( j ) = j
              end if
           end do
           nfxd = nfxd - 1_${ik}$
           ! factorize fixed columns
        ! =======================
           ! compute the qr factorization of fixed columns and update
           ! remaining columns.
           if( nfxd>0_${ik}$ ) then
              na = min( m, nfxd )
      ! cc      call stdlib${ii}$_dgeqr2( m, na, a, lda, tau, work, info )
              call stdlib${ii}$_dgeqrf( m, na, a, lda, tau, work, lwork, info )
              iws = max( iws, int( work( 1_${ik}$ ),KIND=${ik}$) )
              if( na<n ) then
      ! cc         call stdlib${ii}$_dorm2r( 'left', 'transpose', m, n-na, na, a, lda,
      ! cc  $                   tau, a( 1, na+1 ), lda, work, info )
                 call stdlib${ii}$_dormqr( 'LEFT', 'TRANSPOSE', m, n-na, na, a, lda, tau,a( 1_${ik}$, na+1 ), &
                           lda, work, lwork, info )
                 iws = max( iws, int( work( 1_${ik}$ ),KIND=${ik}$) )
              end if
           end if
           ! factorize free columns
        ! ======================
           if( nfxd<minmn ) then
              sm = m - nfxd
              sn = n - nfxd
              sminmn = minmn - nfxd
              ! determine the block size.
              nb = stdlib${ii}$_ilaenv( inb, 'DGEQRF', ' ', sm, sn, -1_${ik}$, -1_${ik}$ )
              nbmin = 2_${ik}$
              nx = 0_${ik}$
              if( ( nb>1_${ik}$ ) .and. ( nb<sminmn ) ) then
                 ! determine when to cross over from blocked to unblocked code.
                 nx = max( 0_${ik}$, stdlib${ii}$_ilaenv( ixover, 'DGEQRF', ' ', sm, sn, -1_${ik}$,-1_${ik}$ ) )
                 if( nx<sminmn ) then
                    ! determine if workspace is large enough for blocked code.
                    minws = 2_${ik}$*sn + ( sn+1 )*nb
                    iws = max( iws, minws )
                    if( lwork<minws ) then
                       ! not enough workspace to use optimal nb: reduce nb and
                       ! determine the minimum value of nb.
                       nb = ( lwork-2*sn ) / ( sn+1 )
                       nbmin = max( 2_${ik}$, stdlib${ii}$_ilaenv( inbmin, 'DGEQRF', ' ', sm, sn,-1_${ik}$, -1_${ik}$ ) )
                                 
                    end if
                 end if
              end if
              ! initialize partial column norms. the first n elements of work
              ! store the exact column norms.
              do j = nfxd + 1, n
                 work( j ) = stdlib${ii}$_dnrm2( sm, a( nfxd+1, j ), 1_${ik}$ )
                 work( n+j ) = work( j )
              end do
              if( ( nb>=nbmin ) .and. ( nb<sminmn ) .and.( nx<sminmn ) ) then
                 ! use blocked code initially.
                 j = nfxd + 1_${ik}$
                 ! compute factorization: while loop.
                 topbmn = minmn - nx
                 30 continue
                 if( j<=topbmn ) then
                    jb = min( nb, topbmn-j+1 )
                    ! factorize jb columns among columns j:n.
                    call stdlib${ii}$_dlaqps( m, n-j+1, j-1, jb, fjb, a( 1_${ik}$, j ), lda,jpvt( j ), tau( j )&
                              , work( j ), work( n+j ),work( 2_${ik}$*n+1 ), work( 2_${ik}$*n+jb+1 ), n-j+1 )
                    j = j + fjb
                    go to 30
                 end if
              else
                 j = nfxd + 1_${ik}$
              end if
              ! use unblocked code to factor the last or only block.
              if( j<=minmn )call stdlib${ii}$_dlaqp2( m, n-j+1, j-1, a( 1_${ik}$, j ), lda, jpvt( j ),tau( j ),&
                         work( j ), work( n+j ),work( 2_${ik}$*n+1 ) )
           end if
           work( 1_${ik}$ ) = iws
           return
     end subroutine stdlib${ii}$_dgeqp3

#:for rk,rt,ri in REAL_KINDS_TYPES
#:if not rk in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ri}$geqp3( m, n, a, lda, jpvt, tau, work, lwork, info )
     !! DGEQP3: computes a QR factorization with column pivoting of a
     !! matrix A:  A*P = Q*R  using Level 3 BLAS.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, lwork, m, n
           ! Array Arguments 
           integer(${ik}$), intent(inout) :: jpvt(*)
           real(${rk}$), intent(inout) :: a(lda,*)
           real(${rk}$), intent(out) :: tau(*), work(*)
        ! =====================================================================
           ! Parameters 
           integer(${ik}$), parameter :: inb = 1_${ik}$
           integer(${ik}$), parameter :: inbmin = 2_${ik}$
           integer(${ik}$), parameter :: ixover = 3_${ik}$
           
           ! Local Scalars 
           logical(lk) :: lquery
           integer(${ik}$) :: fjb, iws, j, jb, lwkopt, minmn, minws, na, nb, nbmin, nfxd, nx, sm, &
                     sminmn, sn, topbmn
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test input arguments
        ! ====================
           info = 0_${ik}$
           lquery = ( lwork==-1_${ik}$ )
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -4_${ik}$
           end if
           if( info==0_${ik}$ ) then
              minmn = min( m, n )
              if( minmn==0_${ik}$ ) then
                 iws = 1_${ik}$
                 lwkopt = 1_${ik}$
              else
                 iws = 3_${ik}$*n + 1_${ik}$
                 nb = stdlib${ii}$_ilaenv( inb, 'DGEQRF', ' ', m, n, -1_${ik}$, -1_${ik}$ )
                 lwkopt = 2_${ik}$*n + ( n + 1_${ik}$ )*nb
              end if
              work( 1_${ik}$ ) = lwkopt
              if( ( lwork<iws ) .and. .not.lquery ) then
                 info = -8_${ik}$
              end if
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DGEQP3', -info )
              return
           else if( lquery ) then
              return
           end if
           ! move initial columns up front.
           nfxd = 1_${ik}$
           do j = 1, n
              if( jpvt( j )/=0_${ik}$ ) then
                 if( j/=nfxd ) then
                    call stdlib${ii}$_${ri}$swap( m, a( 1_${ik}$, j ), 1_${ik}$, a( 1_${ik}$, nfxd ), 1_${ik}$ )
                    jpvt( j ) = jpvt( nfxd )
                    jpvt( nfxd ) = j
                 else
                    jpvt( j ) = j
                 end if
                 nfxd = nfxd + 1_${ik}$
              else
                 jpvt( j ) = j
              end if
           end do
           nfxd = nfxd - 1_${ik}$
           ! factorize fixed columns
        ! =======================
           ! compute the qr factorization of fixed columns and update
           ! remaining columns.
           if( nfxd>0_${ik}$ ) then
              na = min( m, nfxd )
      ! cc      call stdlib${ii}$_${ri}$geqr2( m, na, a, lda, tau, work, info )
              call stdlib${ii}$_${ri}$geqrf( m, na, a, lda, tau, work, lwork, info )
              iws = max( iws, int( work( 1_${ik}$ ),KIND=${ik}$) )
              if( na<n ) then
      ! cc         call stdlib${ii}$_${ri}$orm2r( 'left', 'transpose', m, n-na, na, a, lda,
      ! cc  $                   tau, a( 1, na+1 ), lda, work, info )
                 call stdlib${ii}$_${ri}$ormqr( 'LEFT', 'TRANSPOSE', m, n-na, na, a, lda, tau,a( 1_${ik}$, na+1 ), &
                           lda, work, lwork, info )
                 iws = max( iws, int( work( 1_${ik}$ ),KIND=${ik}$) )
              end if
           end if
           ! factorize free columns
        ! ======================
           if( nfxd<minmn ) then
              sm = m - nfxd
              sn = n - nfxd
              sminmn = minmn - nfxd
              ! determine the block size.
              nb = stdlib${ii}$_ilaenv( inb, 'DGEQRF', ' ', sm, sn, -1_${ik}$, -1_${ik}$ )
              nbmin = 2_${ik}$
              nx = 0_${ik}$
              if( ( nb>1_${ik}$ ) .and. ( nb<sminmn ) ) then
                 ! determine when to cross over from blocked to unblocked code.
                 nx = max( 0_${ik}$, stdlib${ii}$_ilaenv( ixover, 'DGEQRF', ' ', sm, sn, -1_${ik}$,-1_${ik}$ ) )
                 if( nx<sminmn ) then
                    ! determine if workspace is large enough for blocked code.
                    minws = 2_${ik}$*sn + ( sn+1 )*nb
                    iws = max( iws, minws )
                    if( lwork<minws ) then
                       ! not enough workspace to use optimal nb: reduce nb and
                       ! determine the minimum value of nb.
                       nb = ( lwork-2*sn ) / ( sn+1 )
                       nbmin = max( 2_${ik}$, stdlib${ii}$_ilaenv( inbmin, 'DGEQRF', ' ', sm, sn,-1_${ik}$, -1_${ik}$ ) )
                                 
                    end if
                 end if
              end if
              ! initialize partial column norms. the first n elements of work
              ! store the exact column norms.
              do j = nfxd + 1, n
                 work( j ) = stdlib${ii}$_${ri}$nrm2( sm, a( nfxd+1, j ), 1_${ik}$ )
                 work( n+j ) = work( j )
              end do
              if( ( nb>=nbmin ) .and. ( nb<sminmn ) .and.( nx<sminmn ) ) then
                 ! use blocked code initially.
                 j = nfxd + 1_${ik}$
                 ! compute factorization: while loop.
                 topbmn = minmn - nx
                 30 continue
                 if( j<=topbmn ) then
                    jb = min( nb, topbmn-j+1 )
                    ! factorize jb columns among columns j:n.
                    call stdlib${ii}$_${ri}$laqps( m, n-j+1, j-1, jb, fjb, a( 1_${ik}$, j ), lda,jpvt( j ), tau( j )&
                              , work( j ), work( n+j ),work( 2_${ik}$*n+1 ), work( 2_${ik}$*n+jb+1 ), n-j+1 )
                    j = j + fjb
                    go to 30
                 end if
              else
                 j = nfxd + 1_${ik}$
              end if
              ! use unblocked code to factor the last or only block.
              if( j<=minmn )call stdlib${ii}$_${ri}$laqp2( m, n-j+1, j-1, a( 1_${ik}$, j ), lda, jpvt( j ),tau( j ),&
                         work( j ), work( n+j ),work( 2_${ik}$*n+1 ) )
           end if
           work( 1_${ik}$ ) = iws
           return
     end subroutine stdlib${ii}$_${ri}$geqp3

#:endif
#:endfor

     pure module subroutine stdlib${ii}$_cgeqp3( m, n, a, lda, jpvt, tau, work, lwork, rwork,info )
     !! CGEQP3 computes a QR factorization with column pivoting of a
     !! matrix A:  A*P = Q*R  using Level 3 BLAS.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, lwork, m, n
           ! Array Arguments 
           integer(${ik}$), intent(inout) :: jpvt(*)
           real(sp), intent(out) :: rwork(*)
           complex(sp), intent(inout) :: a(lda,*)
           complex(sp), intent(out) :: tau(*), work(*)
        ! =====================================================================
           ! Parameters 
           integer(${ik}$), parameter :: inb = 1_${ik}$
           integer(${ik}$), parameter :: inbmin = 2_${ik}$
           integer(${ik}$), parameter :: ixover = 3_${ik}$
           
           ! Local Scalars 
           logical(lk) :: lquery
           integer(${ik}$) :: fjb, iws, j, jb, lwkopt, minmn, minws, na, nb, nbmin, nfxd, nx, sm, &
                     sminmn, sn, topbmn
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test input arguments
        ! ====================
           info = 0_${ik}$
           lquery = ( lwork==-1_${ik}$ )
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -4_${ik}$
           end if
           if( info==0_${ik}$ ) then
              minmn = min( m, n )
              if( minmn==0_${ik}$ ) then
                 iws = 1_${ik}$
                 lwkopt = 1_${ik}$
              else
                 iws = n + 1_${ik}$
                 nb = stdlib${ii}$_ilaenv( inb, 'CGEQRF', ' ', m, n, -1_${ik}$, -1_${ik}$ )
                 lwkopt = ( n + 1_${ik}$ )*nb
              end if
              work( 1_${ik}$ ) = cmplx( lwkopt,KIND=sp)
              if( ( lwork<iws ) .and. .not.lquery ) then
                 info = -8_${ik}$
              end if
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'CGEQP3', -info )
              return
           else if( lquery ) then
              return
           end if
           ! move initial columns up front.
           nfxd = 1_${ik}$
           do j = 1, n
              if( jpvt( j )/=0_${ik}$ ) then
                 if( j/=nfxd ) then
                    call stdlib${ii}$_cswap( m, a( 1_${ik}$, j ), 1_${ik}$, a( 1_${ik}$, nfxd ), 1_${ik}$ )
                    jpvt( j ) = jpvt( nfxd )
                    jpvt( nfxd ) = j
                 else
                    jpvt( j ) = j
                 end if
                 nfxd = nfxd + 1_${ik}$
              else
                 jpvt( j ) = j
              end if
           end do
           nfxd = nfxd - 1_${ik}$
           ! factorize fixed columns
        ! =======================
           ! compute the qr factorization of fixed columns and update
           ! remaining columns.
           if( nfxd>0_${ik}$ ) then
              na = min( m, nfxd )
      ! cc      call stdlib${ii}$_cgeqr2( m, na, a, lda, tau, work, info )
              call stdlib${ii}$_cgeqrf( m, na, a, lda, tau, work, lwork, info )
              iws = max( iws, int( work( 1_${ik}$ ),KIND=${ik}$) )
              if( na<n ) then
      ! cc         call stdlib${ii}$_cunm2r( 'left', 'conjugate transpose', m, n-na,
      ! cc  $                   na, a, lda, tau, a( 1, na+1 ), lda, work,
      ! cc  $                   info )
                 call stdlib${ii}$_cunmqr( 'LEFT', 'CONJUGATE TRANSPOSE', m, n-na, na, a,lda, tau, a( 1_${ik}$,&
                            na+1 ), lda, work, lwork,info )
                 iws = max( iws, int( work( 1_${ik}$ ),KIND=${ik}$) )
              end if
           end if
           ! factorize free columns
        ! ======================
           if( nfxd<minmn ) then
              sm = m - nfxd
              sn = n - nfxd
              sminmn = minmn - nfxd
              ! determine the block size.
              nb = stdlib${ii}$_ilaenv( inb, 'CGEQRF', ' ', sm, sn, -1_${ik}$, -1_${ik}$ )
              nbmin = 2_${ik}$
              nx = 0_${ik}$
              if( ( nb>1_${ik}$ ) .and. ( nb<sminmn ) ) then
                 ! determine when to cross over from blocked to unblocked code.
                 nx = max( 0_${ik}$, stdlib${ii}$_ilaenv( ixover, 'CGEQRF', ' ', sm, sn, -1_${ik}$,-1_${ik}$ ) )
                 if( nx<sminmn ) then
                    ! determine if workspace is large enough for blocked code.
                    minws = ( sn+1 )*nb
                    iws = max( iws, minws )
                    if( lwork<minws ) then
                       ! not enough workspace to use optimal nb: reduce nb and
                       ! determine the minimum value of nb.
                       nb = lwork / ( sn+1 )
                       nbmin = max( 2_${ik}$, stdlib${ii}$_ilaenv( inbmin, 'CGEQRF', ' ', sm, sn,-1_${ik}$, -1_${ik}$ ) )
                                 
                    end if
                 end if
              end if
              ! initialize partial column norms. the first n elements of work
              ! store the exact column norms.
              do j = nfxd + 1, n
                 rwork( j ) = stdlib${ii}$_scnrm2( sm, a( nfxd+1, j ), 1_${ik}$ )
                 rwork( n+j ) = rwork( j )
              end do
              if( ( nb>=nbmin ) .and. ( nb<sminmn ) .and.( nx<sminmn ) ) then
                 ! use blocked code initially.
                 j = nfxd + 1_${ik}$
                 ! compute factorization: while loop.
                 topbmn = minmn - nx
                 30 continue
                 if( j<=topbmn ) then
                    jb = min( nb, topbmn-j+1 )
                    ! factorize jb columns among columns j:n.
                    call stdlib${ii}$_claqps( m, n-j+1, j-1, jb, fjb, a( 1_${ik}$, j ), lda,jpvt( j ), tau( j )&
                              , rwork( j ),rwork( n+j ), work( 1_${ik}$ ), work( jb+1 ),n-j+1 )
                    j = j + fjb
                    go to 30
                 end if
              else
                 j = nfxd + 1_${ik}$
              end if
              ! use unblocked code to factor the last or only block.
              if( j<=minmn )call stdlib${ii}$_claqp2( m, n-j+1, j-1, a( 1_${ik}$, j ), lda, jpvt( j ),tau( j ),&
                         rwork( j ), rwork( n+j ), work( 1_${ik}$ ) )
           end if
           work( 1_${ik}$ ) = cmplx( lwkopt,KIND=sp)
           return
     end subroutine stdlib${ii}$_cgeqp3

     pure module subroutine stdlib${ii}$_zgeqp3( m, n, a, lda, jpvt, tau, work, lwork, rwork,info )
     !! ZGEQP3 computes a QR factorization with column pivoting of a
     !! matrix A:  A*P = Q*R  using Level 3 BLAS.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, lwork, m, n
           ! Array Arguments 
           integer(${ik}$), intent(inout) :: jpvt(*)
           real(dp), intent(out) :: rwork(*)
           complex(dp), intent(inout) :: a(lda,*)
           complex(dp), intent(out) :: tau(*), work(*)
        ! =====================================================================
           ! Parameters 
           integer(${ik}$), parameter :: inb = 1_${ik}$
           integer(${ik}$), parameter :: inbmin = 2_${ik}$
           integer(${ik}$), parameter :: ixover = 3_${ik}$
           
           ! Local Scalars 
           logical(lk) :: lquery
           integer(${ik}$) :: fjb, iws, j, jb, lwkopt, minmn, minws, na, nb, nbmin, nfxd, nx, sm, &
                     sminmn, sn, topbmn
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test input arguments
        ! ====================
           info = 0_${ik}$
           lquery = ( lwork==-1_${ik}$ )
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -4_${ik}$
           end if
           if( info==0_${ik}$ ) then
              minmn = min( m, n )
              if( minmn==0_${ik}$ ) then
                 iws = 1_${ik}$
                 lwkopt = 1_${ik}$
              else
                 iws = n + 1_${ik}$
                 nb = stdlib${ii}$_ilaenv( inb, 'ZGEQRF', ' ', m, n, -1_${ik}$, -1_${ik}$ )
                 lwkopt = ( n + 1_${ik}$ )*nb
              end if
              work( 1_${ik}$ ) = cmplx( lwkopt,KIND=dp)
              if( ( lwork<iws ) .and. .not.lquery ) then
                 info = -8_${ik}$
              end if
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'ZGEQP3', -info )
              return
           else if( lquery ) then
              return
           end if
           ! move initial columns up front.
           nfxd = 1_${ik}$
           do j = 1, n
              if( jpvt( j )/=0_${ik}$ ) then
                 if( j/=nfxd ) then
                    call stdlib${ii}$_zswap( m, a( 1_${ik}$, j ), 1_${ik}$, a( 1_${ik}$, nfxd ), 1_${ik}$ )
                    jpvt( j ) = jpvt( nfxd )
                    jpvt( nfxd ) = j
                 else
                    jpvt( j ) = j
                 end if
                 nfxd = nfxd + 1_${ik}$
              else
                 jpvt( j ) = j
              end if
           end do
           nfxd = nfxd - 1_${ik}$
           ! factorize fixed columns
        ! =======================
           ! compute the qr factorization of fixed columns and update
           ! remaining columns.
           if( nfxd>0_${ik}$ ) then
              na = min( m, nfxd )
      ! cc      call stdlib${ii}$_zgeqr2( m, na, a, lda, tau, work, info )
              call stdlib${ii}$_zgeqrf( m, na, a, lda, tau, work, lwork, info )
              iws = max( iws, int( work( 1_${ik}$ ),KIND=${ik}$) )
              if( na<n ) then
      ! cc         call stdlib${ii}$_zunm2r( 'left', 'conjugate transpose', m, n-na,
      ! cc  $                   na, a, lda, tau, a( 1, na+1 ), lda, work,
      ! cc  $                   info )
                 call stdlib${ii}$_zunmqr( 'LEFT', 'CONJUGATE TRANSPOSE', m, n-na, na, a,lda, tau, a( 1_${ik}$,&
                            na+1 ), lda, work, lwork,info )
                 iws = max( iws, int( work( 1_${ik}$ ),KIND=${ik}$) )
              end if
           end if
           ! factorize free columns
        ! ======================
           if( nfxd<minmn ) then
              sm = m - nfxd
              sn = n - nfxd
              sminmn = minmn - nfxd
              ! determine the block size.
              nb = stdlib${ii}$_ilaenv( inb, 'ZGEQRF', ' ', sm, sn, -1_${ik}$, -1_${ik}$ )
              nbmin = 2_${ik}$
              nx = 0_${ik}$
              if( ( nb>1_${ik}$ ) .and. ( nb<sminmn ) ) then
                 ! determine when to cross over from blocked to unblocked code.
                 nx = max( 0_${ik}$, stdlib${ii}$_ilaenv( ixover, 'ZGEQRF', ' ', sm, sn, -1_${ik}$,-1_${ik}$ ) )
                 if( nx<sminmn ) then
                    ! determine if workspace is large enough for blocked code.
                    minws = ( sn+1 )*nb
                    iws = max( iws, minws )
                    if( lwork<minws ) then
                       ! not enough workspace to use optimal nb: reduce nb and
                       ! determine the minimum value of nb.
                       nb = lwork / ( sn+1 )
                       nbmin = max( 2_${ik}$, stdlib${ii}$_ilaenv( inbmin, 'ZGEQRF', ' ', sm, sn,-1_${ik}$, -1_${ik}$ ) )
                                 
                    end if
                 end if
              end if
              ! initialize partial column norms. the first n elements of work
              ! store the exact column norms.
              do j = nfxd + 1, n
                 rwork( j ) = stdlib${ii}$_dznrm2( sm, a( nfxd+1, j ), 1_${ik}$ )
                 rwork( n+j ) = rwork( j )
              end do
              if( ( nb>=nbmin ) .and. ( nb<sminmn ) .and.( nx<sminmn ) ) then
                 ! use blocked code initially.
                 j = nfxd + 1_${ik}$
                 ! compute factorization: while loop.
                 topbmn = minmn - nx
                 30 continue
                 if( j<=topbmn ) then
                    jb = min( nb, topbmn-j+1 )
                    ! factorize jb columns among columns j:n.
                    call stdlib${ii}$_zlaqps( m, n-j+1, j-1, jb, fjb, a( 1_${ik}$, j ), lda,jpvt( j ), tau( j )&
                              , rwork( j ),rwork( n+j ), work( 1_${ik}$ ), work( jb+1 ),n-j+1 )
                    j = j + fjb
                    go to 30
                 end if
              else
                 j = nfxd + 1_${ik}$
              end if
              ! use unblocked code to factor the last or only block.
              if( j<=minmn )call stdlib${ii}$_zlaqp2( m, n-j+1, j-1, a( 1_${ik}$, j ), lda, jpvt( j ),tau( j ),&
                         rwork( j ), rwork( n+j ), work( 1_${ik}$ ) )
           end if
           work( 1_${ik}$ ) = cmplx( lwkopt,KIND=dp)
           return
     end subroutine stdlib${ii}$_zgeqp3

#:for ck,ct,ci in CMPLX_KINDS_TYPES
#:if not ck in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ci}$geqp3( m, n, a, lda, jpvt, tau, work, lwork, rwork,info )
     !! ZGEQP3: computes a QR factorization with column pivoting of a
     !! matrix A:  A*P = Q*R  using Level 3 BLAS.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${ck}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, lwork, m, n
           ! Array Arguments 
           integer(${ik}$), intent(inout) :: jpvt(*)
           real(${ck}$), intent(out) :: rwork(*)
           complex(${ck}$), intent(inout) :: a(lda,*)
           complex(${ck}$), intent(out) :: tau(*), work(*)
        ! =====================================================================
           ! Parameters 
           integer(${ik}$), parameter :: inb = 1_${ik}$
           integer(${ik}$), parameter :: inbmin = 2_${ik}$
           integer(${ik}$), parameter :: ixover = 3_${ik}$
           
           ! Local Scalars 
           logical(lk) :: lquery
           integer(${ik}$) :: fjb, iws, j, jb, lwkopt, minmn, minws, na, nb, nbmin, nfxd, nx, sm, &
                     sminmn, sn, topbmn
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test input arguments
        ! ====================
           info = 0_${ik}$
           lquery = ( lwork==-1_${ik}$ )
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -4_${ik}$
           end if
           if( info==0_${ik}$ ) then
              minmn = min( m, n )
              if( minmn==0_${ik}$ ) then
                 iws = 1_${ik}$
                 lwkopt = 1_${ik}$
              else
                 iws = n + 1_${ik}$
                 nb = stdlib${ii}$_ilaenv( inb, 'ZGEQRF', ' ', m, n, -1_${ik}$, -1_${ik}$ )
                 lwkopt = ( n + 1_${ik}$ )*nb
              end if
              work( 1_${ik}$ ) = cmplx( lwkopt,KIND=${ck}$)
              if( ( lwork<iws ) .and. .not.lquery ) then
                 info = -8_${ik}$
              end if
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'ZGEQP3', -info )
              return
           else if( lquery ) then
              return
           end if
           ! move initial columns up front.
           nfxd = 1_${ik}$
           do j = 1, n
              if( jpvt( j )/=0_${ik}$ ) then
                 if( j/=nfxd ) then
                    call stdlib${ii}$_${ci}$swap( m, a( 1_${ik}$, j ), 1_${ik}$, a( 1_${ik}$, nfxd ), 1_${ik}$ )
                    jpvt( j ) = jpvt( nfxd )
                    jpvt( nfxd ) = j
                 else
                    jpvt( j ) = j
                 end if
                 nfxd = nfxd + 1_${ik}$
              else
                 jpvt( j ) = j
              end if
           end do
           nfxd = nfxd - 1_${ik}$
           ! factorize fixed columns
        ! =======================
           ! compute the qr factorization of fixed columns and update
           ! remaining columns.
           if( nfxd>0_${ik}$ ) then
              na = min( m, nfxd )
      ! cc      call stdlib${ii}$_${ci}$geqr2( m, na, a, lda, tau, work, info )
              call stdlib${ii}$_${ci}$geqrf( m, na, a, lda, tau, work, lwork, info )
              iws = max( iws, int( work( 1_${ik}$ ),KIND=${ik}$) )
              if( na<n ) then
      ! cc         call stdlib${ii}$_${ci}$unm2r( 'left', 'conjugate transpose', m, n-na,
      ! cc  $                   na, a, lda, tau, a( 1, na+1 ), lda, work,
      ! cc  $                   info )
                 call stdlib${ii}$_${ci}$unmqr( 'LEFT', 'CONJUGATE TRANSPOSE', m, n-na, na, a,lda, tau, a( 1_${ik}$,&
                            na+1 ), lda, work, lwork,info )
                 iws = max( iws, int( work( 1_${ik}$ ),KIND=${ik}$) )
              end if
           end if
           ! factorize free columns
        ! ======================
           if( nfxd<minmn ) then
              sm = m - nfxd
              sn = n - nfxd
              sminmn = minmn - nfxd
              ! determine the block size.
              nb = stdlib${ii}$_ilaenv( inb, 'ZGEQRF', ' ', sm, sn, -1_${ik}$, -1_${ik}$ )
              nbmin = 2_${ik}$
              nx = 0_${ik}$
              if( ( nb>1_${ik}$ ) .and. ( nb<sminmn ) ) then
                 ! determine when to cross over from blocked to unblocked code.
                 nx = max( 0_${ik}$, stdlib${ii}$_ilaenv( ixover, 'ZGEQRF', ' ', sm, sn, -1_${ik}$,-1_${ik}$ ) )
                 if( nx<sminmn ) then
                    ! determine if workspace is large enough for blocked code.
                    minws = ( sn+1 )*nb
                    iws = max( iws, minws )
                    if( lwork<minws ) then
                       ! not enough workspace to use optimal nb: reduce nb and
                       ! determine the minimum value of nb.
                       nb = lwork / ( sn+1 )
                       nbmin = max( 2_${ik}$, stdlib${ii}$_ilaenv( inbmin, 'ZGEQRF', ' ', sm, sn,-1_${ik}$, -1_${ik}$ ) )
                                 
                    end if
                 end if
              end if
              ! initialize partial column norms. the first n elements of work
              ! store the exact column norms.
              do j = nfxd + 1, n
                 rwork( j ) = stdlib${ii}$_${c2ri(ci)}$znrm2( sm, a( nfxd+1, j ), 1_${ik}$ )
                 rwork( n+j ) = rwork( j )
              end do
              if( ( nb>=nbmin ) .and. ( nb<sminmn ) .and.( nx<sminmn ) ) then
                 ! use blocked code initially.
                 j = nfxd + 1_${ik}$
                 ! compute factorization: while loop.
                 topbmn = minmn - nx
                 30 continue
                 if( j<=topbmn ) then
                    jb = min( nb, topbmn-j+1 )
                    ! factorize jb columns among columns j:n.
                    call stdlib${ii}$_${ci}$laqps( m, n-j+1, j-1, jb, fjb, a( 1_${ik}$, j ), lda,jpvt( j ), tau( j )&
                              , rwork( j ),rwork( n+j ), work( 1_${ik}$ ), work( jb+1 ),n-j+1 )
                    j = j + fjb
                    go to 30
                 end if
              else
                 j = nfxd + 1_${ik}$
              end if
              ! use unblocked code to factor the last or only block.
              if( j<=minmn )call stdlib${ii}$_${ci}$laqp2( m, n-j+1, j-1, a( 1_${ik}$, j ), lda, jpvt( j ),tau( j ),&
                         rwork( j ), rwork( n+j ), work( 1_${ik}$ ) )
           end if
           work( 1_${ik}$ ) = cmplx( lwkopt,KIND=${ck}$)
           return
     end subroutine stdlib${ii}$_${ci}$geqp3

#:endif
#:endfor



     pure module subroutine stdlib${ii}$_slaqp2( m, n, offset, a, lda, jpvt, tau, vn1, vn2,work )
     !! SLAQP2 computes a QR factorization with column pivoting of
     !! the block A(OFFSET+1:M,1:N).
     !! The block A(1:OFFSET,1:N) is accordingly pivoted, but not factorized.
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(in) :: lda, m, n, offset
           ! Array Arguments 
           integer(${ik}$), intent(inout) :: jpvt(*)
           real(sp), intent(inout) :: a(lda,*), vn1(*), vn2(*)
           real(sp), intent(out) :: tau(*), work(*)
        ! =====================================================================
           
           ! Local Scalars 
           integer(${ik}$) :: i, itemp, j, mn, offpi, pvt
           real(sp) :: aii, temp, temp2, tol3z
           ! Intrinsic Functions 
           ! Executable Statements 
           mn = min( m-offset, n )
           tol3z = sqrt(stdlib${ii}$_slamch('EPSILON'))
           ! compute factorization.
           loop_20: do i = 1, mn
              offpi = offset + i
              ! determine ith pivot column and swap if necessary.
              pvt = ( i-1 ) + stdlib${ii}$_isamax( n-i+1, vn1( i ), 1_${ik}$ )
              if( pvt/=i ) then
                 call stdlib${ii}$_sswap( m, a( 1_${ik}$, pvt ), 1_${ik}$, a( 1_${ik}$, i ), 1_${ik}$ )
                 itemp = jpvt( pvt )
                 jpvt( pvt ) = jpvt( i )
                 jpvt( i ) = itemp
                 vn1( pvt ) = vn1( i )
                 vn2( pvt ) = vn2( i )
              end if
              ! generate elementary reflector h(i).
              if( offpi<m ) then
                 call stdlib${ii}$_slarfg( m-offpi+1, a( offpi, i ), a( offpi+1, i ), 1_${ik}$,tau( i ) )
                           
              else
                 call stdlib${ii}$_slarfg( 1_${ik}$, a( m, i ), a( m, i ), 1_${ik}$, tau( i ) )
              end if
              if( i<n ) then
                 ! apply h(i)**t to a(offset+i:m,i+1:n) from the left.
                 aii = a( offpi, i )
                 a( offpi, i ) = one
                 call stdlib${ii}$_slarf( 'LEFT', m-offpi+1, n-i, a( offpi, i ), 1_${ik}$,tau( i ), a( offpi, &
                           i+1 ), lda, work( 1_${ik}$ ) )
                 a( offpi, i ) = aii
              end if
              ! update partial column norms.
              do j = i + 1, n
                 if( vn1( j )/=zero ) then
                    ! note: the following 4 lines follow from the analysis in
                    ! lapack working note 176.
                    temp = one - ( abs( a( offpi, j ) ) / vn1( j ) )**2_${ik}$
                    temp = max( temp, zero )
                    temp2 = temp*( vn1( j ) / vn2( j ) )**2_${ik}$
                    if( temp2 <= tol3z ) then
                       if( offpi<m ) then
                          vn1( j ) = stdlib${ii}$_snrm2( m-offpi, a( offpi+1, j ), 1_${ik}$ )
                          vn2( j ) = vn1( j )
                       else
                          vn1( j ) = zero
                          vn2( j ) = zero
                       end if
                    else
                       vn1( j ) = vn1( j )*sqrt( temp )
                    end if
                 end if
              end do
           end do loop_20
           return
     end subroutine stdlib${ii}$_slaqp2

     pure module subroutine stdlib${ii}$_dlaqp2( m, n, offset, a, lda, jpvt, tau, vn1, vn2,work )
     !! DLAQP2 computes a QR factorization with column pivoting of
     !! the block A(OFFSET+1:M,1:N).
     !! The block A(1:OFFSET,1:N) is accordingly pivoted, but not factorized.
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(in) :: lda, m, n, offset
           ! Array Arguments 
           integer(${ik}$), intent(inout) :: jpvt(*)
           real(dp), intent(inout) :: a(lda,*), vn1(*), vn2(*)
           real(dp), intent(out) :: tau(*), work(*)
        ! =====================================================================
           
           ! Local Scalars 
           integer(${ik}$) :: i, itemp, j, mn, offpi, pvt
           real(dp) :: aii, temp, temp2, tol3z
           ! Intrinsic Functions 
           ! Executable Statements 
           mn = min( m-offset, n )
           tol3z = sqrt(stdlib${ii}$_dlamch('EPSILON'))
           ! compute factorization.
           loop_20: do i = 1, mn
              offpi = offset + i
              ! determine ith pivot column and swap if necessary.
              pvt = ( i-1 ) + stdlib${ii}$_idamax( n-i+1, vn1( i ), 1_${ik}$ )
              if( pvt/=i ) then
                 call stdlib${ii}$_dswap( m, a( 1_${ik}$, pvt ), 1_${ik}$, a( 1_${ik}$, i ), 1_${ik}$ )
                 itemp = jpvt( pvt )
                 jpvt( pvt ) = jpvt( i )
                 jpvt( i ) = itemp
                 vn1( pvt ) = vn1( i )
                 vn2( pvt ) = vn2( i )
              end if
              ! generate elementary reflector h(i).
              if( offpi<m ) then
                 call stdlib${ii}$_dlarfg( m-offpi+1, a( offpi, i ), a( offpi+1, i ), 1_${ik}$,tau( i ) )
                           
              else
                 call stdlib${ii}$_dlarfg( 1_${ik}$, a( m, i ), a( m, i ), 1_${ik}$, tau( i ) )
              end if
              if( i<n ) then
                 ! apply h(i)**t to a(offset+i:m,i+1:n) from the left.
                 aii = a( offpi, i )
                 a( offpi, i ) = one
                 call stdlib${ii}$_dlarf( 'LEFT', m-offpi+1, n-i, a( offpi, i ), 1_${ik}$,tau( i ), a( offpi, &
                           i+1 ), lda, work( 1_${ik}$ ) )
                 a( offpi, i ) = aii
              end if
              ! update partial column norms.
              do j = i + 1, n
                 if( vn1( j )/=zero ) then
                    ! note: the following 4 lines follow from the analysis in
                    ! lapack working note 176.
                    temp = one - ( abs( a( offpi, j ) ) / vn1( j ) )**2_${ik}$
                    temp = max( temp, zero )
                    temp2 = temp*( vn1( j ) / vn2( j ) )**2_${ik}$
                    if( temp2 <= tol3z ) then
                       if( offpi<m ) then
                          vn1( j ) = stdlib${ii}$_dnrm2( m-offpi, a( offpi+1, j ), 1_${ik}$ )
                          vn2( j ) = vn1( j )
                       else
                          vn1( j ) = zero
                          vn2( j ) = zero
                       end if
                    else
                       vn1( j ) = vn1( j )*sqrt( temp )
                    end if
                 end if
              end do
           end do loop_20
           return
     end subroutine stdlib${ii}$_dlaqp2

#:for rk,rt,ri in REAL_KINDS_TYPES
#:if not rk in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ri}$laqp2( m, n, offset, a, lda, jpvt, tau, vn1, vn2,work )
     !! DLAQP2: computes a QR factorization with column pivoting of
     !! the block A(OFFSET+1:M,1:N).
     !! The block A(1:OFFSET,1:N) is accordingly pivoted, but not factorized.
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(in) :: lda, m, n, offset
           ! Array Arguments 
           integer(${ik}$), intent(inout) :: jpvt(*)
           real(${rk}$), intent(inout) :: a(lda,*), vn1(*), vn2(*)
           real(${rk}$), intent(out) :: tau(*), work(*)
        ! =====================================================================
           
           ! Local Scalars 
           integer(${ik}$) :: i, itemp, j, mn, offpi, pvt
           real(${rk}$) :: aii, temp, temp2, tol3z
           ! Intrinsic Functions 
           ! Executable Statements 
           mn = min( m-offset, n )
           tol3z = sqrt(stdlib${ii}$_${ri}$lamch('EPSILON'))
           ! compute factorization.
           loop_20: do i = 1, mn
              offpi = offset + i
              ! determine ith pivot column and swap if necessary.
              pvt = ( i-1 ) + stdlib${ii}$_i${ri}$amax( n-i+1, vn1( i ), 1_${ik}$ )
              if( pvt/=i ) then
                 call stdlib${ii}$_${ri}$swap( m, a( 1_${ik}$, pvt ), 1_${ik}$, a( 1_${ik}$, i ), 1_${ik}$ )
                 itemp = jpvt( pvt )
                 jpvt( pvt ) = jpvt( i )
                 jpvt( i ) = itemp
                 vn1( pvt ) = vn1( i )
                 vn2( pvt ) = vn2( i )
              end if
              ! generate elementary reflector h(i).
              if( offpi<m ) then
                 call stdlib${ii}$_${ri}$larfg( m-offpi+1, a( offpi, i ), a( offpi+1, i ), 1_${ik}$,tau( i ) )
                           
              else
                 call stdlib${ii}$_${ri}$larfg( 1_${ik}$, a( m, i ), a( m, i ), 1_${ik}$, tau( i ) )
              end if
              if( i<n ) then
                 ! apply h(i)**t to a(offset+i:m,i+1:n) from the left.
                 aii = a( offpi, i )
                 a( offpi, i ) = one
                 call stdlib${ii}$_${ri}$larf( 'LEFT', m-offpi+1, n-i, a( offpi, i ), 1_${ik}$,tau( i ), a( offpi, &
                           i+1 ), lda, work( 1_${ik}$ ) )
                 a( offpi, i ) = aii
              end if
              ! update partial column norms.
              do j = i + 1, n
                 if( vn1( j )/=zero ) then
                    ! note: the following 4 lines follow from the analysis in
                    ! lapack working note 176.
                    temp = one - ( abs( a( offpi, j ) ) / vn1( j ) )**2_${ik}$
                    temp = max( temp, zero )
                    temp2 = temp*( vn1( j ) / vn2( j ) )**2_${ik}$
                    if( temp2 <= tol3z ) then
                       if( offpi<m ) then
                          vn1( j ) = stdlib${ii}$_${ri}$nrm2( m-offpi, a( offpi+1, j ), 1_${ik}$ )
                          vn2( j ) = vn1( j )
                       else
                          vn1( j ) = zero
                          vn2( j ) = zero
                       end if
                    else
                       vn1( j ) = vn1( j )*sqrt( temp )
                    end if
                 end if
              end do
           end do loop_20
           return
     end subroutine stdlib${ii}$_${ri}$laqp2

#:endif
#:endfor

     pure module subroutine stdlib${ii}$_claqp2( m, n, offset, a, lda, jpvt, tau, vn1, vn2,work )
     !! CLAQP2 computes a QR factorization with column pivoting of
     !! the block A(OFFSET+1:M,1:N).
     !! The block A(1:OFFSET,1:N) is accordingly pivoted, but not factorized.
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(in) :: lda, m, n, offset
           ! Array Arguments 
           integer(${ik}$), intent(inout) :: jpvt(*)
           real(sp), intent(inout) :: vn1(*), vn2(*)
           complex(sp), intent(inout) :: a(lda,*)
           complex(sp), intent(out) :: tau(*), work(*)
        ! =====================================================================
           
           
           ! Local Scalars 
           integer(${ik}$) :: i, itemp, j, mn, offpi, pvt
           real(sp) :: temp, temp2, tol3z
           complex(sp) :: aii
           ! Intrinsic Functions 
           ! Executable Statements 
           mn = min( m-offset, n )
           tol3z = sqrt(stdlib${ii}$_slamch('EPSILON'))
           ! compute factorization.
           loop_20: do i = 1, mn
              offpi = offset + i
              ! determine ith pivot column and swap if necessary.
              pvt = ( i-1 ) + stdlib${ii}$_isamax( n-i+1, vn1( i ), 1_${ik}$ )
              if( pvt/=i ) then
                 call stdlib${ii}$_cswap( m, a( 1_${ik}$, pvt ), 1_${ik}$, a( 1_${ik}$, i ), 1_${ik}$ )
                 itemp = jpvt( pvt )
                 jpvt( pvt ) = jpvt( i )
                 jpvt( i ) = itemp
                 vn1( pvt ) = vn1( i )
                 vn2( pvt ) = vn2( i )
              end if
              ! generate elementary reflector h(i).
              if( offpi<m ) then
                 call stdlib${ii}$_clarfg( m-offpi+1, a( offpi, i ), a( offpi+1, i ), 1_${ik}$,tau( i ) )
                           
              else
                 call stdlib${ii}$_clarfg( 1_${ik}$, a( m, i ), a( m, i ), 1_${ik}$, tau( i ) )
              end if
              if( i<n ) then
                 ! apply h(i)**h to a(offset+i:m,i+1:n) from the left.
                 aii = a( offpi, i )
                 a( offpi, i ) = cone
                 call stdlib${ii}$_clarf( 'LEFT', m-offpi+1, n-i, a( offpi, i ), 1_${ik}$,conjg( tau( i ) ), a(&
                            offpi, i+1 ), lda,work( 1_${ik}$ ) )
                 a( offpi, i ) = aii
              end if
              ! update partial column norms.
              do j = i + 1, n
                 if( vn1( j )/=zero ) then
                    ! note: the following 4 lines follow from the analysis in
                    ! lapack working note 176.
                    temp = one - ( abs( a( offpi, j ) ) / vn1( j ) )**2_${ik}$
                    temp = max( temp, zero )
                    temp2 = temp*( vn1( j ) / vn2( j ) )**2_${ik}$
                    if( temp2 <= tol3z ) then
                       if( offpi<m ) then
                          vn1( j ) = stdlib${ii}$_scnrm2( m-offpi, a( offpi+1, j ), 1_${ik}$ )
                          vn2( j ) = vn1( j )
                       else
                          vn1( j ) = zero
                          vn2( j ) = zero
                       end if
                    else
                       vn1( j ) = vn1( j )*sqrt( temp )
                    end if
                 end if
              end do
           end do loop_20
           return
     end subroutine stdlib${ii}$_claqp2

     pure module subroutine stdlib${ii}$_zlaqp2( m, n, offset, a, lda, jpvt, tau, vn1, vn2,work )
     !! ZLAQP2 computes a QR factorization with column pivoting of
     !! the block A(OFFSET+1:M,1:N).
     !! The block A(1:OFFSET,1:N) is accordingly pivoted, but not factorized.
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(in) :: lda, m, n, offset
           ! Array Arguments 
           integer(${ik}$), intent(inout) :: jpvt(*)
           real(dp), intent(inout) :: vn1(*), vn2(*)
           complex(dp), intent(inout) :: a(lda,*)
           complex(dp), intent(out) :: tau(*), work(*)
        ! =====================================================================
           
           
           ! Local Scalars 
           integer(${ik}$) :: i, itemp, j, mn, offpi, pvt
           real(dp) :: temp, temp2, tol3z
           complex(dp) :: aii
           ! Intrinsic Functions 
           ! Executable Statements 
           mn = min( m-offset, n )
           tol3z = sqrt(stdlib${ii}$_dlamch('EPSILON'))
           ! compute factorization.
           loop_20: do i = 1, mn
              offpi = offset + i
              ! determine ith pivot column and swap if necessary.
              pvt = ( i-1 ) + stdlib${ii}$_idamax( n-i+1, vn1( i ), 1_${ik}$ )
              if( pvt/=i ) then
                 call stdlib${ii}$_zswap( m, a( 1_${ik}$, pvt ), 1_${ik}$, a( 1_${ik}$, i ), 1_${ik}$ )
                 itemp = jpvt( pvt )
                 jpvt( pvt ) = jpvt( i )
                 jpvt( i ) = itemp
                 vn1( pvt ) = vn1( i )
                 vn2( pvt ) = vn2( i )
              end if
              ! generate elementary reflector h(i).
              if( offpi<m ) then
                 call stdlib${ii}$_zlarfg( m-offpi+1, a( offpi, i ), a( offpi+1, i ), 1_${ik}$,tau( i ) )
                           
              else
                 call stdlib${ii}$_zlarfg( 1_${ik}$, a( m, i ), a( m, i ), 1_${ik}$, tau( i ) )
              end if
              if( i<n ) then
                 ! apply h(i)**h to a(offset+i:m,i+1:n) from the left.
                 aii = a( offpi, i )
                 a( offpi, i ) = cone
                 call stdlib${ii}$_zlarf( 'LEFT', m-offpi+1, n-i, a( offpi, i ), 1_${ik}$,conjg( tau( i ) ), a(&
                            offpi, i+1 ), lda,work( 1_${ik}$ ) )
                 a( offpi, i ) = aii
              end if
              ! update partial column norms.
              do j = i + 1, n
                 if( vn1( j )/=zero ) then
                    ! note: the following 4 lines follow from the analysis in
                    ! lapack working note 176.
                    temp = one - ( abs( a( offpi, j ) ) / vn1( j ) )**2_${ik}$
                    temp = max( temp, zero )
                    temp2 = temp*( vn1( j ) / vn2( j ) )**2_${ik}$
                    if( temp2 <= tol3z ) then
                       if( offpi<m ) then
                          vn1( j ) = stdlib${ii}$_dznrm2( m-offpi, a( offpi+1, j ), 1_${ik}$ )
                          vn2( j ) = vn1( j )
                       else
                          vn1( j ) = zero
                          vn2( j ) = zero
                       end if
                    else
                       vn1( j ) = vn1( j )*sqrt( temp )
                    end if
                 end if
              end do
           end do loop_20
           return
     end subroutine stdlib${ii}$_zlaqp2

#:for ck,ct,ci in CMPLX_KINDS_TYPES
#:if not ck in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ci}$laqp2( m, n, offset, a, lda, jpvt, tau, vn1, vn2,work )
     !! ZLAQP2: computes a QR factorization with column pivoting of
     !! the block A(OFFSET+1:M,1:N).
     !! The block A(1:OFFSET,1:N) is accordingly pivoted, but not factorized.
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${ck}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(in) :: lda, m, n, offset
           ! Array Arguments 
           integer(${ik}$), intent(inout) :: jpvt(*)
           real(${ck}$), intent(inout) :: vn1(*), vn2(*)
           complex(${ck}$), intent(inout) :: a(lda,*)
           complex(${ck}$), intent(out) :: tau(*), work(*)
        ! =====================================================================
           
           
           ! Local Scalars 
           integer(${ik}$) :: i, itemp, j, mn, offpi, pvt
           real(${ck}$) :: temp, temp2, tol3z
           complex(${ck}$) :: aii
           ! Intrinsic Functions 
           ! Executable Statements 
           mn = min( m-offset, n )
           tol3z = sqrt(stdlib${ii}$_${c2ri(ci)}$lamch('EPSILON'))
           ! compute factorization.
           loop_20: do i = 1, mn
              offpi = offset + i
              ! determine ith pivot column and swap if necessary.
              pvt = ( i-1 ) + stdlib${ii}$_i${c2ri(ci)}$amax( n-i+1, vn1( i ), 1_${ik}$ )
              if( pvt/=i ) then
                 call stdlib${ii}$_${ci}$swap( m, a( 1_${ik}$, pvt ), 1_${ik}$, a( 1_${ik}$, i ), 1_${ik}$ )
                 itemp = jpvt( pvt )
                 jpvt( pvt ) = jpvt( i )
                 jpvt( i ) = itemp
                 vn1( pvt ) = vn1( i )
                 vn2( pvt ) = vn2( i )
              end if
              ! generate elementary reflector h(i).
              if( offpi<m ) then
                 call stdlib${ii}$_${ci}$larfg( m-offpi+1, a( offpi, i ), a( offpi+1, i ), 1_${ik}$,tau( i ) )
                           
              else
                 call stdlib${ii}$_${ci}$larfg( 1_${ik}$, a( m, i ), a( m, i ), 1_${ik}$, tau( i ) )
              end if
              if( i<n ) then
                 ! apply h(i)**h to a(offset+i:m,i+1:n) from the left.
                 aii = a( offpi, i )
                 a( offpi, i ) = cone
                 call stdlib${ii}$_${ci}$larf( 'LEFT', m-offpi+1, n-i, a( offpi, i ), 1_${ik}$,conjg( tau( i ) ), a(&
                            offpi, i+1 ), lda,work( 1_${ik}$ ) )
                 a( offpi, i ) = aii
              end if
              ! update partial column norms.
              do j = i + 1, n
                 if( vn1( j )/=zero ) then
                    ! note: the following 4 lines follow from the analysis in
                    ! lapack working note 176.
                    temp = one - ( abs( a( offpi, j ) ) / vn1( j ) )**2_${ik}$
                    temp = max( temp, zero )
                    temp2 = temp*( vn1( j ) / vn2( j ) )**2_${ik}$
                    if( temp2 <= tol3z ) then
                       if( offpi<m ) then
                          vn1( j ) = stdlib${ii}$_${c2ri(ci)}$znrm2( m-offpi, a( offpi+1, j ), 1_${ik}$ )
                          vn2( j ) = vn1( j )
                       else
                          vn1( j ) = zero
                          vn2( j ) = zero
                       end if
                    else
                       vn1( j ) = vn1( j )*sqrt( temp )
                    end if
                 end if
              end do
           end do loop_20
           return
     end subroutine stdlib${ii}$_${ci}$laqp2

#:endif
#:endfor



     pure module subroutine stdlib${ii}$_slaqps( m, n, offset, nb, kb, a, lda, jpvt, tau, vn1,vn2, auxv, f, &
     !! SLAQPS computes a step of QR factorization with column pivoting
     !! of a real M-by-N matrix A by using Blas-3.  It tries to factorize
     !! NB columns from A starting from the row OFFSET+1, and updates all
     !! of the matrix with Blas-3 xGEMM.
     !! In some cases, due to catastrophic cancellations, it cannot
     !! factorize NB columns.  Hence, the actual number of factorized
     !! columns is returned in KB.
     !! Block A(1:OFFSET,1:N) is accordingly pivoted, but not factorized.
               ldf )
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: kb
           integer(${ik}$), intent(in) :: lda, ldf, m, n, nb, offset
           ! Array Arguments 
           integer(${ik}$), intent(inout) :: jpvt(*)
           real(sp), intent(inout) :: a(lda,*), auxv(*), f(ldf,*), vn1(*), vn2(*)
           real(sp), intent(out) :: tau(*)
        ! =====================================================================
           
           ! Local Scalars 
           integer(${ik}$) :: itemp, j, k, lastrk, lsticc, pvt, rk
           real(sp) :: akk, temp, temp2, tol3z
           ! Intrinsic Functions 
           ! Executable Statements 
           lastrk = min( m, n+offset )
           lsticc = 0_${ik}$
           k = 0_${ik}$
           tol3z = sqrt(stdlib${ii}$_slamch('EPSILON'))
           ! beginning of while loop.
           10 continue
           if( ( k<nb ) .and. ( lsticc==0_${ik}$ ) ) then
              k = k + 1_${ik}$
              rk = offset + k
              ! determine ith pivot column and swap if necessary
              pvt = ( k-1 ) + stdlib${ii}$_isamax( n-k+1, vn1( k ), 1_${ik}$ )
              if( pvt/=k ) then
                 call stdlib${ii}$_sswap( m, a( 1_${ik}$, pvt ), 1_${ik}$, a( 1_${ik}$, k ), 1_${ik}$ )
                 call stdlib${ii}$_sswap( k-1, f( pvt, 1_${ik}$ ), ldf, f( k, 1_${ik}$ ), ldf )
                 itemp = jpvt( pvt )
                 jpvt( pvt ) = jpvt( k )
                 jpvt( k ) = itemp
                 vn1( pvt ) = vn1( k )
                 vn2( pvt ) = vn2( k )
              end if
              ! apply previous householder reflectors to column k:
              ! a(rk:m,k) := a(rk:m,k) - a(rk:m,1:k-1)*f(k,1:k-1)**t.
              if( k>1_${ik}$ ) then
                 call stdlib${ii}$_sgemv( 'NO TRANSPOSE', m-rk+1, k-1, -one, a( rk, 1_${ik}$ ),lda, f( k, 1_${ik}$ ), &
                           ldf, one, a( rk, k ), 1_${ik}$ )
              end if
              ! generate elementary reflector h(k).
              if( rk<m ) then
                 call stdlib${ii}$_slarfg( m-rk+1, a( rk, k ), a( rk+1, k ), 1_${ik}$, tau( k ) )
              else
                 call stdlib${ii}$_slarfg( 1_${ik}$, a( rk, k ), a( rk, k ), 1_${ik}$, tau( k ) )
              end if
              akk = a( rk, k )
              a( rk, k ) = one
              ! compute kth column of f:
              ! compute  f(k+1:n,k) := tau(k)*a(rk:m,k+1:n)**t*a(rk:m,k).
              if( k<n ) then
                 call stdlib${ii}$_sgemv( 'TRANSPOSE', m-rk+1, n-k, tau( k ),a( rk, k+1 ), lda, a( rk, &
                           k ), 1_${ik}$, zero,f( k+1, k ), 1_${ik}$ )
              end if
              ! padding f(1:k,k) with zeros.
              do j = 1, k
                 f( j, k ) = zero
              end do
              ! incremental updating of f:
              ! f(1:n,k) := f(1:n,k) - tau(k)*f(1:n,1:k-1)*a(rk:m,1:k-1)**t
                          ! *a(rk:m,k).
              if( k>1_${ik}$ ) then
                 call stdlib${ii}$_sgemv( 'TRANSPOSE', m-rk+1, k-1, -tau( k ), a( rk, 1_${ik}$ ),lda, a( rk, k &
                           ), 1_${ik}$, zero, auxv( 1_${ik}$ ), 1_${ik}$ )
                 call stdlib${ii}$_sgemv( 'NO TRANSPOSE', n, k-1, one, f( 1_${ik}$, 1_${ik}$ ), ldf,auxv( 1_${ik}$ ), 1_${ik}$, one,&
                            f( 1_${ik}$, k ), 1_${ik}$ )
              end if
              ! update the current row of a:
              ! a(rk,k+1:n) := a(rk,k+1:n) - a(rk,1:k)*f(k+1:n,1:k)**t.
              if( k<n ) then
                 call stdlib${ii}$_sgemv( 'NO TRANSPOSE', n-k, k, -one, f( k+1, 1_${ik}$ ), ldf,a( rk, 1_${ik}$ ), &
                           lda, one, a( rk, k+1 ), lda )
              end if
              ! update partial column norms.
              if( rk<lastrk ) then
                 do j = k + 1, n
                    if( vn1( j )/=zero ) then
                       ! note: the following 4 lines follow from the analysis in
                       ! lapack working note 176.
                       temp = abs( a( rk, j ) ) / vn1( j )
                       temp = max( zero, ( one+temp )*( one-temp ) )
                       temp2 = temp*( vn1( j ) / vn2( j ) )**2_${ik}$
                       if( temp2 <= tol3z ) then
                          vn2( j ) = real( lsticc,KIND=sp)
                          lsticc = j
                       else
                          vn1( j ) = vn1( j )*sqrt( temp )
                       end if
                    end if
                 end do
              end if
              a( rk, k ) = akk
              ! end of while loop.
              go to 10
           end if
           kb = k
           rk = offset + kb
           ! apply the block reflector to the rest of the matrix:
           ! a(offset+kb+1:m,kb+1:n) := a(offset+kb+1:m,kb+1:n) -
                               ! a(offset+kb+1:m,1:kb)*f(kb+1:n,1:kb)**t.
           if( kb<min( n, m-offset ) ) then
              call stdlib${ii}$_sgemm( 'NO TRANSPOSE', 'TRANSPOSE', m-rk, n-kb, kb, -one,a( rk+1, 1_${ik}$ ), &
                        lda, f( kb+1, 1_${ik}$ ), ldf, one,a( rk+1, kb+1 ), lda )
           end if
           ! recomputation of difficult columns.
           40 continue
           if( lsticc>0_${ik}$ ) then
              itemp = nint( vn2( lsticc ),KIND=${ik}$)
              vn1( lsticc ) = stdlib${ii}$_snrm2( m-rk, a( rk+1, lsticc ), 1_${ik}$ )
              ! note: the computation of vn1( lsticc ) relies on the fact that
              ! stdlib${ii}$_snrm2 does not fail on vectors with norm below the value of
              ! sqrt(stdlib${ii}$_dlamch('s'))
              vn2( lsticc ) = vn1( lsticc )
              lsticc = itemp
              go to 40
           end if
           return
     end subroutine stdlib${ii}$_slaqps

     pure module subroutine stdlib${ii}$_dlaqps( m, n, offset, nb, kb, a, lda, jpvt, tau, vn1,vn2, auxv, f, &
     !! DLAQPS computes a step of QR factorization with column pivoting
     !! of a real M-by-N matrix A by using Blas-3.  It tries to factorize
     !! NB columns from A starting from the row OFFSET+1, and updates all
     !! of the matrix with Blas-3 xGEMM.
     !! In some cases, due to catastrophic cancellations, it cannot
     !! factorize NB columns.  Hence, the actual number of factorized
     !! columns is returned in KB.
     !! Block A(1:OFFSET,1:N) is accordingly pivoted, but not factorized.
               ldf )
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: kb
           integer(${ik}$), intent(in) :: lda, ldf, m, n, nb, offset
           ! Array Arguments 
           integer(${ik}$), intent(inout) :: jpvt(*)
           real(dp), intent(inout) :: a(lda,*), auxv(*), f(ldf,*), vn1(*), vn2(*)
           real(dp), intent(out) :: tau(*)
        ! =====================================================================
           
           ! Local Scalars 
           integer(${ik}$) :: itemp, j, k, lastrk, lsticc, pvt, rk
           real(dp) :: akk, temp, temp2, tol3z
           ! Intrinsic Functions 
           ! Executable Statements 
           lastrk = min( m, n+offset )
           lsticc = 0_${ik}$
           k = 0_${ik}$
           tol3z = sqrt(stdlib${ii}$_dlamch('EPSILON'))
           ! beginning of while loop.
           10 continue
           if( ( k<nb ) .and. ( lsticc==0_${ik}$ ) ) then
              k = k + 1_${ik}$
              rk = offset + k
              ! determine ith pivot column and swap if necessary
              pvt = ( k-1 ) + stdlib${ii}$_idamax( n-k+1, vn1( k ), 1_${ik}$ )
              if( pvt/=k ) then
                 call stdlib${ii}$_dswap( m, a( 1_${ik}$, pvt ), 1_${ik}$, a( 1_${ik}$, k ), 1_${ik}$ )
                 call stdlib${ii}$_dswap( k-1, f( pvt, 1_${ik}$ ), ldf, f( k, 1_${ik}$ ), ldf )
                 itemp = jpvt( pvt )
                 jpvt( pvt ) = jpvt( k )
                 jpvt( k ) = itemp
                 vn1( pvt ) = vn1( k )
                 vn2( pvt ) = vn2( k )
              end if
              ! apply previous householder reflectors to column k:
              ! a(rk:m,k) := a(rk:m,k) - a(rk:m,1:k-1)*f(k,1:k-1)**t.
              if( k>1_${ik}$ ) then
                 call stdlib${ii}$_dgemv( 'NO TRANSPOSE', m-rk+1, k-1, -one, a( rk, 1_${ik}$ ),lda, f( k, 1_${ik}$ ), &
                           ldf, one, a( rk, k ), 1_${ik}$ )
              end if
              ! generate elementary reflector h(k).
              if( rk<m ) then
                 call stdlib${ii}$_dlarfg( m-rk+1, a( rk, k ), a( rk+1, k ), 1_${ik}$, tau( k ) )
              else
                 call stdlib${ii}$_dlarfg( 1_${ik}$, a( rk, k ), a( rk, k ), 1_${ik}$, tau( k ) )
              end if
              akk = a( rk, k )
              a( rk, k ) = one
              ! compute kth column of f:
              ! compute  f(k+1:n,k) := tau(k)*a(rk:m,k+1:n)**t*a(rk:m,k).
              if( k<n ) then
                 call stdlib${ii}$_dgemv( 'TRANSPOSE', m-rk+1, n-k, tau( k ),a( rk, k+1 ), lda, a( rk, &
                           k ), 1_${ik}$, zero,f( k+1, k ), 1_${ik}$ )
              end if
              ! padding f(1:k,k) with zeros.
              do j = 1, k
                 f( j, k ) = zero
              end do
              ! incremental updating of f:
              ! f(1:n,k) := f(1:n,k) - tau(k)*f(1:n,1:k-1)*a(rk:m,1:k-1)**t
                          ! *a(rk:m,k).
              if( k>1_${ik}$ ) then
                 call stdlib${ii}$_dgemv( 'TRANSPOSE', m-rk+1, k-1, -tau( k ), a( rk, 1_${ik}$ ),lda, a( rk, k &
                           ), 1_${ik}$, zero, auxv( 1_${ik}$ ), 1_${ik}$ )
                 call stdlib${ii}$_dgemv( 'NO TRANSPOSE', n, k-1, one, f( 1_${ik}$, 1_${ik}$ ), ldf,auxv( 1_${ik}$ ), 1_${ik}$, one,&
                            f( 1_${ik}$, k ), 1_${ik}$ )
              end if
              ! update the current row of a:
              ! a(rk,k+1:n) := a(rk,k+1:n) - a(rk,1:k)*f(k+1:n,1:k)**t.
              if( k<n ) then
                 call stdlib${ii}$_dgemv( 'NO TRANSPOSE', n-k, k, -one, f( k+1, 1_${ik}$ ), ldf,a( rk, 1_${ik}$ ), &
                           lda, one, a( rk, k+1 ), lda )
              end if
              ! update partial column norms.
              if( rk<lastrk ) then
                 do j = k + 1, n
                    if( vn1( j )/=zero ) then
                       ! note: the following 4 lines follow from the analysis in
                       ! lapack working note 176.
                       temp = abs( a( rk, j ) ) / vn1( j )
                       temp = max( zero, ( one+temp )*( one-temp ) )
                       temp2 = temp*( vn1( j ) / vn2( j ) )**2_${ik}$
                       if( temp2 <= tol3z ) then
                          vn2( j ) = real( lsticc,KIND=dp)
                          lsticc = j
                       else
                          vn1( j ) = vn1( j )*sqrt( temp )
                       end if
                    end if
                 end do
              end if
              a( rk, k ) = akk
              ! end of while loop.
              go to 10
           end if
           kb = k
           rk = offset + kb
           ! apply the block reflector to the rest of the matrix:
           ! a(offset+kb+1:m,kb+1:n) := a(offset+kb+1:m,kb+1:n) -
                               ! a(offset+kb+1:m,1:kb)*f(kb+1:n,1:kb)**t.
           if( kb<min( n, m-offset ) ) then
              call stdlib${ii}$_dgemm( 'NO TRANSPOSE', 'TRANSPOSE', m-rk, n-kb, kb, -one,a( rk+1, 1_${ik}$ ), &
                        lda, f( kb+1, 1_${ik}$ ), ldf, one,a( rk+1, kb+1 ), lda )
           end if
           ! recomputation of difficult columns.
           40 continue
           if( lsticc>0_${ik}$ ) then
              itemp = nint( vn2( lsticc ),KIND=${ik}$)
              vn1( lsticc ) = stdlib${ii}$_dnrm2( m-rk, a( rk+1, lsticc ), 1_${ik}$ )
              ! note: the computation of vn1( lsticc ) relies on the fact that
              ! stdlib${ii}$_snrm2 does not fail on vectors with norm below the value of
              ! sqrt(stdlib${ii}$_dlamch('s'))
              vn2( lsticc ) = vn1( lsticc )
              lsticc = itemp
              go to 40
           end if
           return
     end subroutine stdlib${ii}$_dlaqps

#:for rk,rt,ri in REAL_KINDS_TYPES
#:if not rk in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ri}$laqps( m, n, offset, nb, kb, a, lda, jpvt, tau, vn1,vn2, auxv, f, &
     !! DLAQPS: computes a step of QR factorization with column pivoting
     !! of a real M-by-N matrix A by using Blas-3.  It tries to factorize
     !! NB columns from A starting from the row OFFSET+1, and updates all
     !! of the matrix with Blas-3 xGEMM.
     !! In some cases, due to catastrophic cancellations, it cannot
     !! factorize NB columns.  Hence, the actual number of factorized
     !! columns is returned in KB.
     !! Block A(1:OFFSET,1:N) is accordingly pivoted, but not factorized.
               ldf )
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: kb
           integer(${ik}$), intent(in) :: lda, ldf, m, n, nb, offset
           ! Array Arguments 
           integer(${ik}$), intent(inout) :: jpvt(*)
           real(${rk}$), intent(inout) :: a(lda,*), auxv(*), f(ldf,*), vn1(*), vn2(*)
           real(${rk}$), intent(out) :: tau(*)
        ! =====================================================================
           
           ! Local Scalars 
           integer(${ik}$) :: itemp, j, k, lastrk, lsticc, pvt, rk
           real(${rk}$) :: akk, temp, temp2, tol3z
           ! Intrinsic Functions 
           ! Executable Statements 
           lastrk = min( m, n+offset )
           lsticc = 0_${ik}$
           k = 0_${ik}$
           tol3z = sqrt(stdlib${ii}$_${ri}$lamch('EPSILON'))
           ! beginning of while loop.
           10 continue
           if( ( k<nb ) .and. ( lsticc==0_${ik}$ ) ) then
              k = k + 1_${ik}$
              rk = offset + k
              ! determine ith pivot column and swap if necessary
              pvt = ( k-1 ) + stdlib${ii}$_i${ri}$amax( n-k+1, vn1( k ), 1_${ik}$ )
              if( pvt/=k ) then
                 call stdlib${ii}$_${ri}$swap( m, a( 1_${ik}$, pvt ), 1_${ik}$, a( 1_${ik}$, k ), 1_${ik}$ )
                 call stdlib${ii}$_${ri}$swap( k-1, f( pvt, 1_${ik}$ ), ldf, f( k, 1_${ik}$ ), ldf )
                 itemp = jpvt( pvt )
                 jpvt( pvt ) = jpvt( k )
                 jpvt( k ) = itemp
                 vn1( pvt ) = vn1( k )
                 vn2( pvt ) = vn2( k )
              end if
              ! apply previous householder reflectors to column k:
              ! a(rk:m,k) := a(rk:m,k) - a(rk:m,1:k-1)*f(k,1:k-1)**t.
              if( k>1_${ik}$ ) then
                 call stdlib${ii}$_${ri}$gemv( 'NO TRANSPOSE', m-rk+1, k-1, -one, a( rk, 1_${ik}$ ),lda, f( k, 1_${ik}$ ), &
                           ldf, one, a( rk, k ), 1_${ik}$ )
              end if
              ! generate elementary reflector h(k).
              if( rk<m ) then
                 call stdlib${ii}$_${ri}$larfg( m-rk+1, a( rk, k ), a( rk+1, k ), 1_${ik}$, tau( k ) )
              else
                 call stdlib${ii}$_${ri}$larfg( 1_${ik}$, a( rk, k ), a( rk, k ), 1_${ik}$, tau( k ) )
              end if
              akk = a( rk, k )
              a( rk, k ) = one
              ! compute kth column of f:
              ! compute  f(k+1:n,k) := tau(k)*a(rk:m,k+1:n)**t*a(rk:m,k).
              if( k<n ) then
                 call stdlib${ii}$_${ri}$gemv( 'TRANSPOSE', m-rk+1, n-k, tau( k ),a( rk, k+1 ), lda, a( rk, &
                           k ), 1_${ik}$, zero,f( k+1, k ), 1_${ik}$ )
              end if
              ! padding f(1:k,k) with zeros.
              do j = 1, k
                 f( j, k ) = zero
              end do
              ! incremental updating of f:
              ! f(1:n,k) := f(1:n,k) - tau(k)*f(1:n,1:k-1)*a(rk:m,1:k-1)**t
                          ! *a(rk:m,k).
              if( k>1_${ik}$ ) then
                 call stdlib${ii}$_${ri}$gemv( 'TRANSPOSE', m-rk+1, k-1, -tau( k ), a( rk, 1_${ik}$ ),lda, a( rk, k &
                           ), 1_${ik}$, zero, auxv( 1_${ik}$ ), 1_${ik}$ )
                 call stdlib${ii}$_${ri}$gemv( 'NO TRANSPOSE', n, k-1, one, f( 1_${ik}$, 1_${ik}$ ), ldf,auxv( 1_${ik}$ ), 1_${ik}$, one,&
                            f( 1_${ik}$, k ), 1_${ik}$ )
              end if
              ! update the current row of a:
              ! a(rk,k+1:n) := a(rk,k+1:n) - a(rk,1:k)*f(k+1:n,1:k)**t.
              if( k<n ) then
                 call stdlib${ii}$_${ri}$gemv( 'NO TRANSPOSE', n-k, k, -one, f( k+1, 1_${ik}$ ), ldf,a( rk, 1_${ik}$ ), &
                           lda, one, a( rk, k+1 ), lda )
              end if
              ! update partial column norms.
              if( rk<lastrk ) then
                 do j = k + 1, n
                    if( vn1( j )/=zero ) then
                       ! note: the following 4 lines follow from the analysis in
                       ! lapack working note 176.
                       temp = abs( a( rk, j ) ) / vn1( j )
                       temp = max( zero, ( one+temp )*( one-temp ) )
                       temp2 = temp*( vn1( j ) / vn2( j ) )**2_${ik}$
                       if( temp2 <= tol3z ) then
                          vn2( j ) = real( lsticc,KIND=${rk}$)
                          lsticc = j
                       else
                          vn1( j ) = vn1( j )*sqrt( temp )
                       end if
                    end if
                 end do
              end if
              a( rk, k ) = akk
              ! end of while loop.
              go to 10
           end if
           kb = k
           rk = offset + kb
           ! apply the block reflector to the rest of the matrix:
           ! a(offset+kb+1:m,kb+1:n) := a(offset+kb+1:m,kb+1:n) -
                               ! a(offset+kb+1:m,1:kb)*f(kb+1:n,1:kb)**t.
           if( kb<min( n, m-offset ) ) then
              call stdlib${ii}$_${ri}$gemm( 'NO TRANSPOSE', 'TRANSPOSE', m-rk, n-kb, kb, -one,a( rk+1, 1_${ik}$ ), &
                        lda, f( kb+1, 1_${ik}$ ), ldf, one,a( rk+1, kb+1 ), lda )
           end if
           ! recomputation of difficult columns.
           40 continue
           if( lsticc>0_${ik}$ ) then
              itemp = nint( vn2( lsticc ),KIND=${ik}$)
              vn1( lsticc ) = stdlib${ii}$_${ri}$nrm2( m-rk, a( rk+1, lsticc ), 1_${ik}$ )
              ! note: the computation of vn1( lsticc ) relies on the fact that
              ! stdlib${ii}$_dnrm2 does not fail on vectors with norm below the value of
              ! sqrt(stdlib${ii}$_${ri}$lamch('s'))
              vn2( lsticc ) = vn1( lsticc )
              lsticc = itemp
              go to 40
           end if
           return
     end subroutine stdlib${ii}$_${ri}$laqps

#:endif
#:endfor

     pure module subroutine stdlib${ii}$_claqps( m, n, offset, nb, kb, a, lda, jpvt, tau, vn1,vn2, auxv, f, &
     !! CLAQPS computes a step of QR factorization with column pivoting
     !! of a complex M-by-N matrix A by using Blas-3.  It tries to factorize
     !! NB columns from A starting from the row OFFSET+1, and updates all
     !! of the matrix with Blas-3 xGEMM.
     !! In some cases, due to catastrophic cancellations, it cannot
     !! factorize NB columns.  Hence, the actual number of factorized
     !! columns is returned in KB.
     !! Block A(1:OFFSET,1:N) is accordingly pivoted, but not factorized.
               ldf )
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: kb
           integer(${ik}$), intent(in) :: lda, ldf, m, n, nb, offset
           ! Array Arguments 
           integer(${ik}$), intent(inout) :: jpvt(*)
           real(sp), intent(inout) :: vn1(*), vn2(*)
           complex(sp), intent(inout) :: a(lda,*), auxv(*), f(ldf,*)
           complex(sp), intent(out) :: tau(*)
        ! =====================================================================
           
           
           ! Local Scalars 
           integer(${ik}$) :: itemp, j, k, lastrk, lsticc, pvt, rk
           real(sp) :: temp, temp2, tol3z
           complex(sp) :: akk
           ! Intrinsic Functions 
           ! Executable Statements 
           lastrk = min( m, n+offset )
           lsticc = 0_${ik}$
           k = 0_${ik}$
           tol3z = sqrt(stdlib${ii}$_slamch('EPSILON'))
           ! beginning of while loop.
           10 continue
           if( ( k<nb ) .and. ( lsticc==0_${ik}$ ) ) then
              k = k + 1_${ik}$
              rk = offset + k
              ! determine ith pivot column and swap if necessary
              pvt = ( k-1 ) + stdlib${ii}$_isamax( n-k+1, vn1( k ), 1_${ik}$ )
              if( pvt/=k ) then
                 call stdlib${ii}$_cswap( m, a( 1_${ik}$, pvt ), 1_${ik}$, a( 1_${ik}$, k ), 1_${ik}$ )
                 call stdlib${ii}$_cswap( k-1, f( pvt, 1_${ik}$ ), ldf, f( k, 1_${ik}$ ), ldf )
                 itemp = jpvt( pvt )
                 jpvt( pvt ) = jpvt( k )
                 jpvt( k ) = itemp
                 vn1( pvt ) = vn1( k )
                 vn2( pvt ) = vn2( k )
              end if
              ! apply previous householder reflectors to column k:
              ! a(rk:m,k) := a(rk:m,k) - a(rk:m,1:k-1)*f(k,1:k-1)**h.
              if( k>1_${ik}$ ) then
                 do j = 1, k - 1
                    f( k, j ) = conjg( f( k, j ) )
                 end do
                 call stdlib${ii}$_cgemv( 'NO TRANSPOSE', m-rk+1, k-1, -cone, a( rk, 1_${ik}$ ),lda, f( k, 1_${ik}$ ),&
                            ldf, cone, a( rk, k ), 1_${ik}$ )
                 do j = 1, k - 1
                    f( k, j ) = conjg( f( k, j ) )
                 end do
              end if
              ! generate elementary reflector h(k).
              if( rk<m ) then
                 call stdlib${ii}$_clarfg( m-rk+1, a( rk, k ), a( rk+1, k ), 1_${ik}$, tau( k ) )
              else
                 call stdlib${ii}$_clarfg( 1_${ik}$, a( rk, k ), a( rk, k ), 1_${ik}$, tau( k ) )
              end if
              akk = a( rk, k )
              a( rk, k ) = cone
              ! compute kth column of f:
              ! compute  f(k+1:n,k) := tau(k)*a(rk:m,k+1:n)**h*a(rk:m,k).
              if( k<n ) then
                 call stdlib${ii}$_cgemv( 'CONJUGATE TRANSPOSE', m-rk+1, n-k, tau( k ),a( rk, k+1 ), &
                           lda, a( rk, k ), 1_${ik}$, czero,f( k+1, k ), 1_${ik}$ )
              end if
              ! padding f(1:k,k) with zeros.
              do j = 1, k
                 f( j, k ) = czero
              end do
              ! incremental updating of f:
              ! f(1:n,k) := f(1:n,k) - tau(k)*f(1:n,1:k-1)*a(rk:m,1:k-1)**h
                          ! *a(rk:m,k).
              if( k>1_${ik}$ ) then
                 call stdlib${ii}$_cgemv( 'CONJUGATE TRANSPOSE', m-rk+1, k-1, -tau( k ),a( rk, 1_${ik}$ ), lda,&
                            a( rk, k ), 1_${ik}$, czero,auxv( 1_${ik}$ ), 1_${ik}$ )
                 call stdlib${ii}$_cgemv( 'NO TRANSPOSE', n, k-1, cone, f( 1_${ik}$, 1_${ik}$ ), ldf,auxv( 1_${ik}$ ), 1_${ik}$, &
                           cone, f( 1_${ik}$, k ), 1_${ik}$ )
              end if
              ! update the current row of a:
              ! a(rk,k+1:n) := a(rk,k+1:n) - a(rk,1:k)*f(k+1:n,1:k)**h.
              if( k<n ) then
                 call stdlib${ii}$_cgemm( 'NO TRANSPOSE', 'CONJUGATE TRANSPOSE', 1_${ik}$, n-k,k, -cone, a( rk,&
                            1_${ik}$ ), lda, f( k+1, 1_${ik}$ ), ldf,cone, a( rk, k+1 ), lda )
              end if
              ! update partial column norms.
              if( rk<lastrk ) then
                 do j = k + 1, n
                    if( vn1( j )/=zero ) then
                       ! note: the following 4 lines follow from the analysis in
                       ! lapack working note 176.
                       temp = abs( a( rk, j ) ) / vn1( j )
                       temp = max( zero, ( one+temp )*( one-temp ) )
                       temp2 = temp*( vn1( j ) / vn2( j ) )**2_${ik}$
                       if( temp2 <= tol3z ) then
                          vn2( j ) = real( lsticc,KIND=sp)
                          lsticc = j
                       else
                          vn1( j ) = vn1( j )*sqrt( temp )
                       end if
                    end if
                 end do
              end if
              a( rk, k ) = akk
              ! end of while loop.
              go to 10
           end if
           kb = k
           rk = offset + kb
           ! apply the block reflector to the rest of the matrix:
           ! a(offset+kb+1:m,kb+1:n) := a(offset+kb+1:m,kb+1:n) -
                               ! a(offset+kb+1:m,1:kb)*f(kb+1:n,1:kb)**h.
           if( kb<min( n, m-offset ) ) then
              call stdlib${ii}$_cgemm( 'NO TRANSPOSE', 'CONJUGATE TRANSPOSE', m-rk, n-kb,kb, -cone, a( &
                        rk+1, 1_${ik}$ ), lda, f( kb+1, 1_${ik}$ ), ldf,cone, a( rk+1, kb+1 ), lda )
           end if
           ! recomputation of difficult columns.
           60 continue
           if( lsticc>0_${ik}$ ) then
              itemp = nint( vn2( lsticc ),KIND=${ik}$)
              vn1( lsticc ) = stdlib${ii}$_scnrm2( m-rk, a( rk+1, lsticc ), 1_${ik}$ )
              ! note: the computation of vn1( lsticc ) relies on the fact that
              ! stdlib${ii}$_snrm2 does not fail on vectors with norm below the value of
              ! sqrt(stdlib${ii}$_dlamch('s'))
              vn2( lsticc ) = vn1( lsticc )
              lsticc = itemp
              go to 60
           end if
           return
     end subroutine stdlib${ii}$_claqps

     pure module subroutine stdlib${ii}$_zlaqps( m, n, offset, nb, kb, a, lda, jpvt, tau, vn1,vn2, auxv, f, &
     !! ZLAQPS computes a step of QR factorization with column pivoting
     !! of a complex M-by-N matrix A by using Blas-3.  It tries to factorize
     !! NB columns from A starting from the row OFFSET+1, and updates all
     !! of the matrix with Blas-3 xGEMM.
     !! In some cases, due to catastrophic cancellations, it cannot
     !! factorize NB columns.  Hence, the actual number of factorized
     !! columns is returned in KB.
     !! Block A(1:OFFSET,1:N) is accordingly pivoted, but not factorized.
               ldf )
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: kb
           integer(${ik}$), intent(in) :: lda, ldf, m, n, nb, offset
           ! Array Arguments 
           integer(${ik}$), intent(inout) :: jpvt(*)
           real(dp), intent(inout) :: vn1(*), vn2(*)
           complex(dp), intent(inout) :: a(lda,*), auxv(*), f(ldf,*)
           complex(dp), intent(out) :: tau(*)
        ! =====================================================================
           
           
           ! Local Scalars 
           integer(${ik}$) :: itemp, j, k, lastrk, lsticc, pvt, rk
           real(dp) :: temp, temp2, tol3z
           complex(dp) :: akk
           ! Intrinsic Functions 
           ! Executable Statements 
           lastrk = min( m, n+offset )
           lsticc = 0_${ik}$
           k = 0_${ik}$
           tol3z = sqrt(stdlib${ii}$_dlamch('EPSILON'))
           ! beginning of while loop.
           10 continue
           if( ( k<nb ) .and. ( lsticc==0_${ik}$ ) ) then
              k = k + 1_${ik}$
              rk = offset + k
              ! determine ith pivot column and swap if necessary
              pvt = ( k-1 ) + stdlib${ii}$_idamax( n-k+1, vn1( k ), 1_${ik}$ )
              if( pvt/=k ) then
                 call stdlib${ii}$_zswap( m, a( 1_${ik}$, pvt ), 1_${ik}$, a( 1_${ik}$, k ), 1_${ik}$ )
                 call stdlib${ii}$_zswap( k-1, f( pvt, 1_${ik}$ ), ldf, f( k, 1_${ik}$ ), ldf )
                 itemp = jpvt( pvt )
                 jpvt( pvt ) = jpvt( k )
                 jpvt( k ) = itemp
                 vn1( pvt ) = vn1( k )
                 vn2( pvt ) = vn2( k )
              end if
              ! apply previous householder reflectors to column k:
              ! a(rk:m,k) := a(rk:m,k) - a(rk:m,1:k-1)*f(k,1:k-1)**h.
              if( k>1_${ik}$ ) then
                 do j = 1, k - 1
                    f( k, j ) = conjg( f( k, j ) )
                 end do
                 call stdlib${ii}$_zgemv( 'NO TRANSPOSE', m-rk+1, k-1, -cone, a( rk, 1_${ik}$ ),lda, f( k, 1_${ik}$ ),&
                            ldf, cone, a( rk, k ), 1_${ik}$ )
                 do j = 1, k - 1
                    f( k, j ) = conjg( f( k, j ) )
                 end do
              end if
              ! generate elementary reflector h(k).
              if( rk<m ) then
                 call stdlib${ii}$_zlarfg( m-rk+1, a( rk, k ), a( rk+1, k ), 1_${ik}$, tau( k ) )
              else
                 call stdlib${ii}$_zlarfg( 1_${ik}$, a( rk, k ), a( rk, k ), 1_${ik}$, tau( k ) )
              end if
              akk = a( rk, k )
              a( rk, k ) = cone
              ! compute kth column of f:
              ! compute  f(k+1:n,k) := tau(k)*a(rk:m,k+1:n)**h*a(rk:m,k).
              if( k<n ) then
                 call stdlib${ii}$_zgemv( 'CONJUGATE TRANSPOSE', m-rk+1, n-k, tau( k ),a( rk, k+1 ), &
                           lda, a( rk, k ), 1_${ik}$, czero,f( k+1, k ), 1_${ik}$ )
              end if
              ! padding f(1:k,k) with zeros.
              do j = 1, k
                 f( j, k ) = czero
              end do
              ! incremental updating of f:
              ! f(1:n,k) := f(1:n,k) - tau(k)*f(1:n,1:k-1)*a(rk:m,1:k-1)**h
                          ! *a(rk:m,k).
              if( k>1_${ik}$ ) then
                 call stdlib${ii}$_zgemv( 'CONJUGATE TRANSPOSE', m-rk+1, k-1, -tau( k ),a( rk, 1_${ik}$ ), lda,&
                            a( rk, k ), 1_${ik}$, czero,auxv( 1_${ik}$ ), 1_${ik}$ )
                 call stdlib${ii}$_zgemv( 'NO TRANSPOSE', n, k-1, cone, f( 1_${ik}$, 1_${ik}$ ), ldf,auxv( 1_${ik}$ ), 1_${ik}$, &
                           cone, f( 1_${ik}$, k ), 1_${ik}$ )
              end if
              ! update the current row of a:
              ! a(rk,k+1:n) := a(rk,k+1:n) - a(rk,1:k)*f(k+1:n,1:k)**h.
              if( k<n ) then
                 call stdlib${ii}$_zgemm( 'NO TRANSPOSE', 'CONJUGATE TRANSPOSE', 1_${ik}$, n-k,k, -cone, a( rk,&
                            1_${ik}$ ), lda, f( k+1, 1_${ik}$ ), ldf,cone, a( rk, k+1 ), lda )
              end if
              ! update partial column norms.
              if( rk<lastrk ) then
                 do j = k + 1, n
                    if( vn1( j )/=zero ) then
                       ! note: the following 4 lines follow from the analysis in
                       ! lapack working note 176.
                       temp = abs( a( rk, j ) ) / vn1( j )
                       temp = max( zero, ( one+temp )*( one-temp ) )
                       temp2 = temp*( vn1( j ) / vn2( j ) )**2_${ik}$
                       if( temp2 <= tol3z ) then
                          vn2( j ) = real( lsticc,KIND=dp)
                          lsticc = j
                       else
                          vn1( j ) = vn1( j )*sqrt( temp )
                       end if
                    end if
                 end do
              end if
              a( rk, k ) = akk
              ! end of while loop.
              go to 10
           end if
           kb = k
           rk = offset + kb
           ! apply the block reflector to the rest of the matrix:
           ! a(offset+kb+1:m,kb+1:n) := a(offset+kb+1:m,kb+1:n) -
                               ! a(offset+kb+1:m,1:kb)*f(kb+1:n,1:kb)**h.
           if( kb<min( n, m-offset ) ) then
              call stdlib${ii}$_zgemm( 'NO TRANSPOSE', 'CONJUGATE TRANSPOSE', m-rk, n-kb,kb, -cone, a( &
                        rk+1, 1_${ik}$ ), lda, f( kb+1, 1_${ik}$ ), ldf,cone, a( rk+1, kb+1 ), lda )
           end if
           ! recomputation of difficult columns.
           60 continue
           if( lsticc>0_${ik}$ ) then
              itemp = nint( vn2( lsticc ),KIND=${ik}$)
              vn1( lsticc ) = stdlib${ii}$_dznrm2( m-rk, a( rk+1, lsticc ), 1_${ik}$ )
              ! note: the computation of vn1( lsticc ) relies on the fact that
              ! stdlib${ii}$_snrm2 does not fail on vectors with norm below the value of
              ! sqrt(stdlib${ii}$_dlamch('s'))
              vn2( lsticc ) = vn1( lsticc )
              lsticc = itemp
              go to 60
           end if
           return
     end subroutine stdlib${ii}$_zlaqps

#:for ck,ct,ci in CMPLX_KINDS_TYPES
#:if not ck in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ci}$laqps( m, n, offset, nb, kb, a, lda, jpvt, tau, vn1,vn2, auxv, f, &
     !! ZLAQPS: computes a step of QR factorization with column pivoting
     !! of a complex M-by-N matrix A by using Blas-3.  It tries to factorize
     !! NB columns from A starting from the row OFFSET+1, and updates all
     !! of the matrix with Blas-3 xGEMM.
     !! In some cases, due to catastrophic cancellations, it cannot
     !! factorize NB columns.  Hence, the actual number of factorized
     !! columns is returned in KB.
     !! Block A(1:OFFSET,1:N) is accordingly pivoted, but not factorized.
               ldf )
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${ck}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: kb
           integer(${ik}$), intent(in) :: lda, ldf, m, n, nb, offset
           ! Array Arguments 
           integer(${ik}$), intent(inout) :: jpvt(*)
           real(${ck}$), intent(inout) :: vn1(*), vn2(*)
           complex(${ck}$), intent(inout) :: a(lda,*), auxv(*), f(ldf,*)
           complex(${ck}$), intent(out) :: tau(*)
        ! =====================================================================
           
           
           ! Local Scalars 
           integer(${ik}$) :: itemp, j, k, lastrk, lsticc, pvt, rk
           real(${ck}$) :: temp, temp2, tol3z
           complex(${ck}$) :: akk
           ! Intrinsic Functions 
           ! Executable Statements 
           lastrk = min( m, n+offset )
           lsticc = 0_${ik}$
           k = 0_${ik}$
           tol3z = sqrt(stdlib${ii}$_${c2ri(ci)}$lamch('EPSILON'))
           ! beginning of while loop.
           10 continue
           if( ( k<nb ) .and. ( lsticc==0_${ik}$ ) ) then
              k = k + 1_${ik}$
              rk = offset + k
              ! determine ith pivot column and swap if necessary
              pvt = ( k-1 ) + stdlib${ii}$_i${c2ri(ci)}$amax( n-k+1, vn1( k ), 1_${ik}$ )
              if( pvt/=k ) then
                 call stdlib${ii}$_${ci}$swap( m, a( 1_${ik}$, pvt ), 1_${ik}$, a( 1_${ik}$, k ), 1_${ik}$ )
                 call stdlib${ii}$_${ci}$swap( k-1, f( pvt, 1_${ik}$ ), ldf, f( k, 1_${ik}$ ), ldf )
                 itemp = jpvt( pvt )
                 jpvt( pvt ) = jpvt( k )
                 jpvt( k ) = itemp
                 vn1( pvt ) = vn1( k )
                 vn2( pvt ) = vn2( k )
              end if
              ! apply previous householder reflectors to column k:
              ! a(rk:m,k) := a(rk:m,k) - a(rk:m,1:k-1)*f(k,1:k-1)**h.
              if( k>1_${ik}$ ) then
                 do j = 1, k - 1
                    f( k, j ) = conjg( f( k, j ) )
                 end do
                 call stdlib${ii}$_${ci}$gemv( 'NO TRANSPOSE', m-rk+1, k-1, -cone, a( rk, 1_${ik}$ ),lda, f( k, 1_${ik}$ ),&
                            ldf, cone, a( rk, k ), 1_${ik}$ )
                 do j = 1, k - 1
                    f( k, j ) = conjg( f( k, j ) )
                 end do
              end if
              ! generate elementary reflector h(k).
              if( rk<m ) then
                 call stdlib${ii}$_${ci}$larfg( m-rk+1, a( rk, k ), a( rk+1, k ), 1_${ik}$, tau( k ) )
              else
                 call stdlib${ii}$_${ci}$larfg( 1_${ik}$, a( rk, k ), a( rk, k ), 1_${ik}$, tau( k ) )
              end if
              akk = a( rk, k )
              a( rk, k ) = cone
              ! compute kth column of f:
              ! compute  f(k+1:n,k) := tau(k)*a(rk:m,k+1:n)**h*a(rk:m,k).
              if( k<n ) then
                 call stdlib${ii}$_${ci}$gemv( 'CONJUGATE TRANSPOSE', m-rk+1, n-k, tau( k ),a( rk, k+1 ), &
                           lda, a( rk, k ), 1_${ik}$, czero,f( k+1, k ), 1_${ik}$ )
              end if
              ! padding f(1:k,k) with zeros.
              do j = 1, k
                 f( j, k ) = czero
              end do
              ! incremental updating of f:
              ! f(1:n,k) := f(1:n,k) - tau(k)*f(1:n,1:k-1)*a(rk:m,1:k-1)**h
                          ! *a(rk:m,k).
              if( k>1_${ik}$ ) then
                 call stdlib${ii}$_${ci}$gemv( 'CONJUGATE TRANSPOSE', m-rk+1, k-1, -tau( k ),a( rk, 1_${ik}$ ), lda,&
                            a( rk, k ), 1_${ik}$, czero,auxv( 1_${ik}$ ), 1_${ik}$ )
                 call stdlib${ii}$_${ci}$gemv( 'NO TRANSPOSE', n, k-1, cone, f( 1_${ik}$, 1_${ik}$ ), ldf,auxv( 1_${ik}$ ), 1_${ik}$, &
                           cone, f( 1_${ik}$, k ), 1_${ik}$ )
              end if
              ! update the current row of a:
              ! a(rk,k+1:n) := a(rk,k+1:n) - a(rk,1:k)*f(k+1:n,1:k)**h.
              if( k<n ) then
                 call stdlib${ii}$_${ci}$gemm( 'NO TRANSPOSE', 'CONJUGATE TRANSPOSE', 1_${ik}$, n-k,k, -cone, a( rk,&
                            1_${ik}$ ), lda, f( k+1, 1_${ik}$ ), ldf,cone, a( rk, k+1 ), lda )
              end if
              ! update partial column norms.
              if( rk<lastrk ) then
                 do j = k + 1, n
                    if( vn1( j )/=zero ) then
                       ! note: the following 4 lines follow from the analysis in
                       ! lapack working note 176.
                       temp = abs( a( rk, j ) ) / vn1( j )
                       temp = max( zero, ( one+temp )*( one-temp ) )
                       temp2 = temp*( vn1( j ) / vn2( j ) )**2_${ik}$
                       if( temp2 <= tol3z ) then
                          vn2( j ) = real( lsticc,KIND=${ck}$)
                          lsticc = j
                       else
                          vn1( j ) = vn1( j )*sqrt( temp )
                       end if
                    end if
                 end do
              end if
              a( rk, k ) = akk
              ! end of while loop.
              go to 10
           end if
           kb = k
           rk = offset + kb
           ! apply the block reflector to the rest of the matrix:
           ! a(offset+kb+1:m,kb+1:n) := a(offset+kb+1:m,kb+1:n) -
                               ! a(offset+kb+1:m,1:kb)*f(kb+1:n,1:kb)**h.
           if( kb<min( n, m-offset ) ) then
              call stdlib${ii}$_${ci}$gemm( 'NO TRANSPOSE', 'CONJUGATE TRANSPOSE', m-rk, n-kb,kb, -cone, a( &
                        rk+1, 1_${ik}$ ), lda, f( kb+1, 1_${ik}$ ), ldf,cone, a( rk+1, kb+1 ), lda )
           end if
           ! recomputation of difficult columns.
           60 continue
           if( lsticc>0_${ik}$ ) then
              itemp = nint( vn2( lsticc ),KIND=${ik}$)
              vn1( lsticc ) = stdlib${ii}$_${c2ri(ci)}$znrm2( m-rk, a( rk+1, lsticc ), 1_${ik}$ )
              ! note: the computation of vn1( lsticc ) relies on the fact that
              ! stdlib${ii}$_dnrm2 does not fail on vectors with norm below the value of
              ! sqrt(stdlib${ii}$_${c2ri(ci)}$lamch('s'))
              vn2( lsticc ) = vn1( lsticc )
              lsticc = itemp
              go to 60
           end if
           return
     end subroutine stdlib${ii}$_${ci}$laqps

#:endif
#:endfor



     pure module subroutine stdlib${ii}$_slatsqr( m, n, mb, nb, a, lda, t, ldt, work,lwork, info)
     !! SLATSQR computes a blocked Tall-Skinny QR factorization of
     !! a real M-by-N matrix A for M >= N:
     !! A = Q * ( R ),
     !! ( 0 )
     !! where:
     !! Q is a M-by-M orthogonal matrix, stored on exit in an implicit
     !! form in the elements below the diagonal of the array A and in
     !! the elements of the array T;
     !! R is an upper-triangular N-by-N matrix, stored on exit in
     !! the elements on and above the diagonal of the array A.
     !! 0 is a (M-N)-by-N zero matrix, and is not stored.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd. --
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, m, n, mb, nb, ldt, lwork
           ! Array Arguments 
           real(sp), intent(inout) :: a(lda,*)
           real(sp), intent(out) :: work(*), t(ldt,*)
        ! =====================================================================
           ! Local Scalars 
           logical(lk) :: lquery
           integer(${ik}$) :: i, ii, kk, ctr
           ! External Subroutines 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           lquery = ( lwork==-1_${ik}$ )
           if( m<0_${ik}$ ) then
             info = -1_${ik}$
           else if( n<0_${ik}$ .or. m<n ) then
             info = -2_${ik}$
           else if( mb<1_${ik}$ ) then
             info = -3_${ik}$
           else if( nb<1_${ik}$ .or. ( nb>n .and. n>0_${ik}$ )) then
             info = -4_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
             info = -6_${ik}$
           else if( ldt<nb ) then
             info = -8_${ik}$
           else if( lwork<(n*nb) .and. (.not.lquery) ) then
             info = -10_${ik}$
           end if
           if( info==0_${ik}$)  then
             work(1_${ik}$) = nb*n
           end if
           if( info/=0_${ik}$ ) then
             call stdlib${ii}$_xerbla( 'SLATSQR', -info )
             return
           else if (lquery) then
            return
           end if
           ! quick return if possible
           if( min(m,n)==0_${ik}$ ) then
               return
           end if
           ! the qr decomposition
            if ((mb<=n).or.(mb>=m)) then
              call stdlib${ii}$_sgeqrt( m, n, nb, a, lda, t, ldt, work, info)
              return
            end if
            kk = mod((m-n),(mb-n))
            ii=m-kk+1
            ! compute the qr factorization of the first block a(1:mb,1:n)
            call stdlib${ii}$_sgeqrt( mb, n, nb, a(1_${ik}$,1_${ik}$), lda, t, ldt, work, info )
            ctr = 1_${ik}$
            do i = mb+1, ii-mb+n ,  (mb-n)
            ! compute the qr factorization of the current block a(i:i+mb-n,1:n)
              call stdlib${ii}$_stpqrt( mb-n, n, 0_${ik}$, nb, a(1_${ik}$,1_${ik}$), lda, a( i, 1_${ik}$ ), lda,t(1_${ik}$, ctr * n + 1_${ik}$),&
                        ldt, work, info )
              ctr = ctr + 1_${ik}$
            end do
            ! compute the qr factorization of the last block a(ii:m,1:n)
            if (ii<=m) then
              call stdlib${ii}$_stpqrt( kk, n, 0_${ik}$, nb, a(1_${ik}$,1_${ik}$), lda, a( ii, 1_${ik}$ ), lda,t(1_${ik}$, ctr * n + 1_${ik}$), &
                        ldt,work, info )
            end if
           work( 1_${ik}$ ) = n*nb
           return
     end subroutine stdlib${ii}$_slatsqr

     pure module subroutine stdlib${ii}$_dlatsqr( m, n, mb, nb, a, lda, t, ldt, work,lwork, info)
     !! DLATSQR computes a blocked Tall-Skinny QR factorization of
     !! a real M-by-N matrix A for M >= N:
     !! A = Q * ( R ),
     !! ( 0 )
     !! where:
     !! Q is a M-by-M orthogonal matrix, stored on exit in an implicit
     !! form in the elements below the diagonal of the array A and in
     !! the elements of the array T;
     !! R is an upper-triangular N-by-N matrix, stored on exit in
     !! the elements on and above the diagonal of the array A.
     !! 0 is a (M-N)-by-N zero matrix, and is not stored.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd. --
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, m, n, mb, nb, ldt, lwork
           ! Array Arguments 
           real(dp), intent(inout) :: a(lda,*)
           real(dp), intent(out) :: work(*), t(ldt,*)
        ! =====================================================================
           ! Local Scalars 
           logical(lk) :: lquery
           integer(${ik}$) :: i, ii, kk, ctr
           ! External Subroutines 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           lquery = ( lwork==-1_${ik}$ )
           if( m<0_${ik}$ ) then
             info = -1_${ik}$
           else if( n<0_${ik}$ .or. m<n ) then
             info = -2_${ik}$
           else if( mb<1_${ik}$ ) then
             info = -3_${ik}$
           else if( nb<1_${ik}$ .or. ( nb>n .and. n>0_${ik}$ )) then
             info = -4_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
             info = -6_${ik}$
           else if( ldt<nb ) then
             info = -8_${ik}$
           else if( lwork<(n*nb) .and. (.not.lquery) ) then
             info = -10_${ik}$
           end if
           if( info==0_${ik}$)  then
             work(1_${ik}$) = nb*n
           end if
           if( info/=0_${ik}$ ) then
             call stdlib${ii}$_xerbla( 'DLATSQR', -info )
             return
           else if (lquery) then
            return
           end if
           ! quick return if possible
           if( min(m,n)==0_${ik}$ ) then
               return
           end if
           ! the qr decomposition
            if ((mb<=n).or.(mb>=m)) then
              call stdlib${ii}$_dgeqrt( m, n, nb, a, lda, t, ldt, work, info)
              return
            end if
            kk = mod((m-n),(mb-n))
            ii=m-kk+1
            ! compute the qr factorization of the first block a(1:mb,1:n)
            call stdlib${ii}$_dgeqrt( mb, n, nb, a(1_${ik}$,1_${ik}$), lda, t, ldt, work, info )
            ctr = 1_${ik}$
            do i = mb+1, ii-mb+n ,  (mb-n)
            ! compute the qr factorization of the current block a(i:i+mb-n,1:n)
              call stdlib${ii}$_dtpqrt( mb-n, n, 0_${ik}$, nb, a(1_${ik}$,1_${ik}$), lda, a( i, 1_${ik}$ ), lda,t(1_${ik}$, ctr * n + 1_${ik}$),&
                        ldt, work, info )
              ctr = ctr + 1_${ik}$
            end do
            ! compute the qr factorization of the last block a(ii:m,1:n)
            if (ii<=m) then
              call stdlib${ii}$_dtpqrt( kk, n, 0_${ik}$, nb, a(1_${ik}$,1_${ik}$), lda, a( ii, 1_${ik}$ ), lda,t(1_${ik}$, ctr * n + 1_${ik}$), &
                        ldt,work, info )
            end if
           work( 1_${ik}$ ) = n*nb
           return
     end subroutine stdlib${ii}$_dlatsqr

#:for rk,rt,ri in REAL_KINDS_TYPES
#:if not rk in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ri}$latsqr( m, n, mb, nb, a, lda, t, ldt, work,lwork, info)
     !! DLATSQR: computes a blocked Tall-Skinny QR factorization of
     !! a real M-by-N matrix A for M >= N:
     !! A = Q * ( R ),
     !! ( 0 )
     !! where:
     !! Q is a M-by-M orthogonal matrix, stored on exit in an implicit
     !! form in the elements below the diagonal of the array A and in
     !! the elements of the array T;
     !! R is an upper-triangular N-by-N matrix, stored on exit in
     !! the elements on and above the diagonal of the array A.
     !! 0 is a (M-N)-by-N zero matrix, and is not stored.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd. --
           use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, m, n, mb, nb, ldt, lwork
           ! Array Arguments 
           real(${rk}$), intent(inout) :: a(lda,*)
           real(${rk}$), intent(out) :: work(*), t(ldt,*)
        ! =====================================================================
           ! Local Scalars 
           logical(lk) :: lquery
           integer(${ik}$) :: i, ii, kk, ctr
           ! External Subroutines 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           lquery = ( lwork==-1_${ik}$ )
           if( m<0_${ik}$ ) then
             info = -1_${ik}$
           else if( n<0_${ik}$ .or. m<n ) then
             info = -2_${ik}$
           else if( mb<1_${ik}$ ) then
             info = -3_${ik}$
           else if( nb<1_${ik}$ .or. ( nb>n .and. n>0_${ik}$ )) then
             info = -4_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
             info = -6_${ik}$
           else if( ldt<nb ) then
             info = -8_${ik}$
           else if( lwork<(n*nb) .and. (.not.lquery) ) then
             info = -10_${ik}$
           end if
           if( info==0_${ik}$)  then
             work(1_${ik}$) = nb*n
           end if
           if( info/=0_${ik}$ ) then
             call stdlib${ii}$_xerbla( 'DLATSQR', -info )
             return
           else if (lquery) then
            return
           end if
           ! quick return if possible
           if( min(m,n)==0_${ik}$ ) then
               return
           end if
           ! the qr decomposition
            if ((mb<=n).or.(mb>=m)) then
              call stdlib${ii}$_${ri}$geqrt( m, n, nb, a, lda, t, ldt, work, info)
              return
            end if
            kk = mod((m-n),(mb-n))
            ii=m-kk+1
            ! compute the qr factorization of the first block a(1:mb,1:n)
            call stdlib${ii}$_${ri}$geqrt( mb, n, nb, a(1_${ik}$,1_${ik}$), lda, t, ldt, work, info )
            ctr = 1_${ik}$
            do i = mb+1, ii-mb+n ,  (mb-n)
            ! compute the qr factorization of the current block a(i:i+mb-n,1:n)
              call stdlib${ii}$_${ri}$tpqrt( mb-n, n, 0_${ik}$, nb, a(1_${ik}$,1_${ik}$), lda, a( i, 1_${ik}$ ), lda,t(1_${ik}$, ctr * n + 1_${ik}$),&
                        ldt, work, info )
              ctr = ctr + 1_${ik}$
            end do
            ! compute the qr factorization of the last block a(ii:m,1:n)
            if (ii<=m) then
              call stdlib${ii}$_${ri}$tpqrt( kk, n, 0_${ik}$, nb, a(1_${ik}$,1_${ik}$), lda, a( ii, 1_${ik}$ ), lda,t(1_${ik}$, ctr * n + 1_${ik}$), &
                        ldt,work, info )
            end if
           work( 1_${ik}$ ) = n*nb
           return
     end subroutine stdlib${ii}$_${ri}$latsqr

#:endif
#:endfor

     pure module subroutine stdlib${ii}$_clatsqr( m, n, mb, nb, a, lda, t, ldt, work,lwork, info)
     !! CLATSQR computes a blocked Tall-Skinny QR factorization of
     !! a complex M-by-N matrix A for M >= N:
     !! A = Q * ( R ),
     !! ( 0 )
     !! where:
     !! Q is a M-by-M orthogonal matrix, stored on exit in an implicit
     !! form in the elements below the diagonal of the array A and in
     !! the elements of the array T;
     !! R is an upper-triangular N-by-N matrix, stored on exit in
     !! the elements on and above the diagonal of the array A.
     !! 0 is a (M-N)-by-N zero matrix, and is not stored.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd. --
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, m, n, mb, nb, ldt, lwork
           ! Array Arguments 
           complex(sp), intent(inout) :: a(lda,*)
           complex(sp), intent(out) :: work(*), t(ldt,*)
        ! =====================================================================
           ! Local Scalars 
           logical(lk) :: lquery
           integer(${ik}$) :: i, ii, kk, ctr
           ! External Subroutines 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           lquery = ( lwork==-1_${ik}$ )
           if( m<0_${ik}$ ) then
             info = -1_${ik}$
           else if( n<0_${ik}$ .or. m<n ) then
             info = -2_${ik}$
           else if( mb<1_${ik}$ ) then
             info = -3_${ik}$
           else if( nb<1_${ik}$ .or. ( nb>n .and. n>0_${ik}$ )) then
             info = -4_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
             info = -6_${ik}$
           else if( ldt<nb ) then
             info = -8_${ik}$
           else if( lwork<(n*nb) .and. (.not.lquery) ) then
             info = -10_${ik}$
           end if
           if( info==0_${ik}$)  then
             work(1_${ik}$) = nb*n
           end if
           if( info/=0_${ik}$ ) then
             call stdlib${ii}$_xerbla( 'CLATSQR', -info )
             return
           else if (lquery) then
            return
           end if
           ! quick return if possible
           if( min(m,n)==0_${ik}$ ) then
               return
           end if
           ! the qr decomposition
            if ((mb<=n).or.(mb>=m)) then
              call stdlib${ii}$_cgeqrt( m, n, nb, a, lda, t, ldt, work, info)
              return
            end if
            kk = mod((m-n),(mb-n))
            ii=m-kk+1
            ! compute the qr factorization of the first block a(1:mb,1:n)
            call stdlib${ii}$_cgeqrt( mb, n, nb, a(1_${ik}$,1_${ik}$), lda, t, ldt, work, info )
            ctr = 1_${ik}$
            do i = mb+1, ii-mb+n ,  (mb-n)
            ! compute the qr factorization of the current block a(i:i+mb-n,1:n)
              call stdlib${ii}$_ctpqrt( mb-n, n, 0_${ik}$, nb, a(1_${ik}$,1_${ik}$), lda, a( i, 1_${ik}$ ), lda,t(1_${ik}$,ctr * n + 1_${ik}$),&
                        ldt, work, info )
              ctr = ctr + 1_${ik}$
            end do
            ! compute the qr factorization of the last block a(ii:m,1:n)
            if (ii<=m) then
              call stdlib${ii}$_ctpqrt( kk, n, 0_${ik}$, nb, a(1_${ik}$,1_${ik}$), lda, a( ii, 1_${ik}$ ), lda,t(1_${ik}$, ctr * n + 1_${ik}$), &
                        ldt,work, info )
            end if
           work( 1_${ik}$ ) = n*nb
           return
     end subroutine stdlib${ii}$_clatsqr

     pure module subroutine stdlib${ii}$_zlatsqr( m, n, mb, nb, a, lda, t, ldt, work,lwork, info)
     !! ZLATSQR computes a blocked Tall-Skinny QR factorization of
     !! a complex M-by-N matrix A for M >= N:
     !! A = Q * ( R ),
     !! ( 0 )
     !! where:
     !! Q is a M-by-M orthogonal matrix, stored on exit in an implicit
     !! form in the elements below the diagonal of the array A and in
     !! the elements of the array T;
     !! R is an upper-triangular N-by-N matrix, stored on exit in
     !! the elements on and above the diagonal of the array A.
     !! 0 is a (M-N)-by-N zero matrix, and is not stored.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd. --
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, m, n, mb, nb, ldt, lwork
           ! Array Arguments 
           complex(dp), intent(inout) :: a(lda,*)
           complex(dp), intent(out) :: work(*), t(ldt,*)
        ! =====================================================================
           ! Local Scalars 
           logical(lk) :: lquery
           integer(${ik}$) :: i, ii, kk, ctr
           ! External Subroutines 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           lquery = ( lwork==-1_${ik}$ )
           if( m<0_${ik}$ ) then
             info = -1_${ik}$
           else if( n<0_${ik}$ .or. m<n ) then
             info = -2_${ik}$
           else if( mb<1_${ik}$ ) then
             info = -3_${ik}$
           else if( nb<1_${ik}$ .or. ( nb>n .and. n>0_${ik}$ )) then
             info = -4_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
             info = -6_${ik}$
           else if( ldt<nb ) then
             info = -8_${ik}$
           else if( lwork<(n*nb) .and. (.not.lquery) ) then
             info = -10_${ik}$
           end if
           if( info==0_${ik}$)  then
             work(1_${ik}$) = nb*n
           end if
           if( info/=0_${ik}$ ) then
             call stdlib${ii}$_xerbla( 'ZLATSQR', -info )
             return
           else if (lquery) then
            return
           end if
           ! quick return if possible
           if( min(m,n)==0_${ik}$ ) then
               return
           end if
           ! the qr decomposition
            if ((mb<=n).or.(mb>=m)) then
              call stdlib${ii}$_zgeqrt( m, n, nb, a, lda, t, ldt, work, info)
              return
            end if
            kk = mod((m-n),(mb-n))
            ii=m-kk+1
            ! compute the qr factorization of the first block a(1:mb,1:n)
            call stdlib${ii}$_zgeqrt( mb, n, nb, a(1_${ik}$,1_${ik}$), lda, t, ldt, work, info )
            ctr = 1_${ik}$
            do i = mb+1, ii-mb+n ,  (mb-n)
            ! compute the qr factorization of the current block a(i:i+mb-n,1:n)
              call stdlib${ii}$_ztpqrt( mb-n, n, 0_${ik}$, nb, a(1_${ik}$,1_${ik}$), lda, a( i, 1_${ik}$ ), lda,t(1_${ik}$, ctr * n + 1_${ik}$),&
                        ldt, work, info )
              ctr = ctr + 1_${ik}$
            end do
            ! compute the qr factorization of the last block a(ii:m,1:n)
            if (ii<=m) then
              call stdlib${ii}$_ztpqrt( kk, n, 0_${ik}$, nb, a(1_${ik}$,1_${ik}$), lda, a( ii, 1_${ik}$ ), lda,t(1_${ik}$,ctr * n + 1_${ik}$), &
                        ldt,work, info )
            end if
           work( 1_${ik}$ ) = n*nb
           return
     end subroutine stdlib${ii}$_zlatsqr

#:for ck,ct,ci in CMPLX_KINDS_TYPES
#:if not ck in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ci}$latsqr( m, n, mb, nb, a, lda, t, ldt, work,lwork, info)
     !! ZLATSQR: computes a blocked Tall-Skinny QR factorization of
     !! a complex M-by-N matrix A for M >= N:
     !! A = Q * ( R ),
     !! ( 0 )
     !! where:
     !! Q is a M-by-M orthogonal matrix, stored on exit in an implicit
     !! form in the elements below the diagonal of the array A and in
     !! the elements of the array T;
     !! R is an upper-triangular N-by-N matrix, stored on exit in
     !! the elements on and above the diagonal of the array A.
     !! 0 is a (M-N)-by-N zero matrix, and is not stored.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd. --
           use stdlib_blas_constants_${ck}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, m, n, mb, nb, ldt, lwork
           ! Array Arguments 
           complex(${ck}$), intent(inout) :: a(lda,*)
           complex(${ck}$), intent(out) :: work(*), t(ldt,*)
        ! =====================================================================
           ! Local Scalars 
           logical(lk) :: lquery
           integer(${ik}$) :: i, ii, kk, ctr
           ! External Subroutines 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           lquery = ( lwork==-1_${ik}$ )
           if( m<0_${ik}$ ) then
             info = -1_${ik}$
           else if( n<0_${ik}$ .or. m<n ) then
             info = -2_${ik}$
           else if( mb<1_${ik}$ ) then
             info = -3_${ik}$
           else if( nb<1_${ik}$ .or. ( nb>n .and. n>0_${ik}$ )) then
             info = -4_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
             info = -6_${ik}$
           else if( ldt<nb ) then
             info = -8_${ik}$
           else if( lwork<(n*nb) .and. (.not.lquery) ) then
             info = -10_${ik}$
           end if
           if( info==0_${ik}$)  then
             work(1_${ik}$) = nb*n
           end if
           if( info/=0_${ik}$ ) then
             call stdlib${ii}$_xerbla( 'ZLATSQR', -info )
             return
           else if (lquery) then
            return
           end if
           ! quick return if possible
           if( min(m,n)==0_${ik}$ ) then
               return
           end if
           ! the qr decomposition
            if ((mb<=n).or.(mb>=m)) then
              call stdlib${ii}$_${ci}$geqrt( m, n, nb, a, lda, t, ldt, work, info)
              return
            end if
            kk = mod((m-n),(mb-n))
            ii=m-kk+1
            ! compute the qr factorization of the first block a(1:mb,1:n)
            call stdlib${ii}$_${ci}$geqrt( mb, n, nb, a(1_${ik}$,1_${ik}$), lda, t, ldt, work, info )
            ctr = 1_${ik}$
            do i = mb+1, ii-mb+n ,  (mb-n)
            ! compute the qr factorization of the current block a(i:i+mb-n,1:n)
              call stdlib${ii}$_${ci}$tpqrt( mb-n, n, 0_${ik}$, nb, a(1_${ik}$,1_${ik}$), lda, a( i, 1_${ik}$ ), lda,t(1_${ik}$, ctr * n + 1_${ik}$),&
                        ldt, work, info )
              ctr = ctr + 1_${ik}$
            end do
            ! compute the qr factorization of the last block a(ii:m,1:n)
            if (ii<=m) then
              call stdlib${ii}$_${ci}$tpqrt( kk, n, 0_${ik}$, nb, a(1_${ik}$,1_${ik}$), lda, a( ii, 1_${ik}$ ), lda,t(1_${ik}$,ctr * n + 1_${ik}$), &
                        ldt,work, info )
            end if
           work( 1_${ik}$ ) = n*nb
           return
     end subroutine stdlib${ii}$_${ci}$latsqr

#:endif
#:endfor



     pure module subroutine stdlib${ii}$_cungtsqr( m, n, mb, nb, a, lda, t, ldt, work, lwork,info )
     !! CUNGTSQR generates an M-by-N complex matrix Q_out with orthonormal
     !! columns, which are the first N columns of a product of comlpex unitary
     !! matrices of order M which are returned by CLATSQR
     !! Q_out = first_N_columns_of( Q(1)_in * Q(2)_in * ... * Q(k)_in ).
     !! See the documentation for CLATSQR.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, ldt, lwork, m, n, mb, nb
           ! Array Arguments 
           complex(sp), intent(inout) :: a(lda,*)
           complex(sp), intent(in) :: t(ldt,*)
           complex(sp), intent(out) :: work(*)
        ! =====================================================================
           
           ! Local Scalars 
           logical(lk) :: lquery
           integer(${ik}$) :: iinfo, ldc, lworkopt, lc, lw, nblocal, j
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input parameters
           lquery  = lwork==-1_${ik}$
           info = 0_${ik}$
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ .or. m<n ) then
              info = -2_${ik}$
           else if( mb<=n ) then
              info = -3_${ik}$
           else if( nb<1_${ik}$ ) then
              info = -4_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -6_${ik}$
           else if( ldt<max( 1_${ik}$, min( nb, n ) ) ) then
              info = -8_${ik}$
           else
              ! test the input lwork for the dimension of the array work.
              ! this workspace is used to store array c(ldc, n) and work(lwork)
              ! in the call to stdlib${ii}$_clamtsqr. see the documentation for stdlib${ii}$_clamtsqr.
              if( lwork<2_${ik}$ .and. (.not.lquery) ) then
                 info = -10_${ik}$
              else
                 ! set block size for column blocks
                 nblocal = min( nb, n )
                 ! lwork = -1, then set the size for the array c(ldc,n)
                 ! in stdlib${ii}$_clamtsqr call and set the optimal size of the work array
                 ! work(lwork) in stdlib${ii}$_clamtsqr call.
                 ldc = m
                 lc = ldc*n
                 lw = n * nblocal
                 lworkopt = lc+lw
                 if( ( lwork<max( 1_${ik}$, lworkopt ) ).and.(.not.lquery) ) then
                    info = -10_${ik}$
                 end if
              end if
           end if
           ! handle error in the input parameters and return workspace query.
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'CUNGTSQR', -info )
              return
           else if ( lquery ) then
              work( 1_${ik}$ ) = cmplx( lworkopt,KIND=sp)
              return
           end if
           ! quick return if possible
           if( min( m, n )==0_${ik}$ ) then
              work( 1_${ik}$ ) = cmplx( lworkopt,KIND=sp)
              return
           end if
           ! (1) form explicitly the tall-skinny m-by-n left submatrix q1_in
           ! of m-by-m orthogonal matrix q_in, which is implicitly stored in
           ! the subdiagonal part of input array a and in the input array t.
           ! perform by the following operation using the routine stdlib${ii}$_clamtsqr.
               ! q1_in = q_in * ( i ), where i is a n-by-n identity matrix,
                              ! ( 0 )        0 is a (m-n)-by-n zero matrix.
           ! (1a) form m-by-n matrix in the array work(1:ldc*n) with ones
           ! on the diagonal and zeros elsewhere.
           call stdlib${ii}$_claset( 'F', m, n, czero, cone, work, ldc )
           ! (1b)  on input, work(1:ldc*n) stores ( i );
                                                ! ( 0 )
                 ! on output, work(1:ldc*n) stores q1_in.
           call stdlib${ii}$_clamtsqr( 'L', 'N', m, n, n, mb, nblocal, a, lda, t, ldt,work, ldc, work( &
                     lc+1 ), lw, iinfo )
           ! (2) copy the result from the part of the work array (1:m,1:n)
           ! with the leading dimension ldc that starts at work(1) into
           ! the output array a(1:m,1:n) column-by-column.
           do j = 1, n
              call stdlib${ii}$_ccopy( m, work( (j-1)*ldc + 1_${ik}$ ), 1_${ik}$, a( 1_${ik}$, j ), 1_${ik}$ )
           end do
           work( 1_${ik}$ ) = cmplx( lworkopt,KIND=sp)
           return
     end subroutine stdlib${ii}$_cungtsqr

     pure module subroutine stdlib${ii}$_zungtsqr( m, n, mb, nb, a, lda, t, ldt, work, lwork,info )
     !! ZUNGTSQR generates an M-by-N complex matrix Q_out with orthonormal
     !! columns, which are the first N columns of a product of comlpex unitary
     !! matrices of order M which are returned by ZLATSQR
     !! Q_out = first_N_columns_of( Q(1)_in * Q(2)_in * ... * Q(k)_in ).
     !! See the documentation for ZLATSQR.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, ldt, lwork, m, n, mb, nb
           ! Array Arguments 
           complex(dp), intent(inout) :: a(lda,*)
           complex(dp), intent(in) :: t(ldt,*)
           complex(dp), intent(out) :: work(*)
        ! =====================================================================
           
           ! Local Scalars 
           logical(lk) :: lquery
           integer(${ik}$) :: iinfo, ldc, lworkopt, lc, lw, nblocal, j
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input parameters
           lquery  = lwork==-1_${ik}$
           info = 0_${ik}$
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ .or. m<n ) then
              info = -2_${ik}$
           else if( mb<=n ) then
              info = -3_${ik}$
           else if( nb<1_${ik}$ ) then
              info = -4_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -6_${ik}$
           else if( ldt<max( 1_${ik}$, min( nb, n ) ) ) then
              info = -8_${ik}$
           else
              ! test the input lwork for the dimension of the array work.
              ! this workspace is used to store array c(ldc, n) and work(lwork)
              ! in the call to stdlib${ii}$_zlamtsqr. see the documentation for stdlib${ii}$_zlamtsqr.
              if( lwork<2_${ik}$ .and. (.not.lquery) ) then
                 info = -10_${ik}$
              else
                 ! set block size for column blocks
                 nblocal = min( nb, n )
                 ! lwork = -1, then set the size for the array c(ldc,n)
                 ! in stdlib${ii}$_zlamtsqr call and set the optimal size of the work array
                 ! work(lwork) in stdlib${ii}$_zlamtsqr call.
                 ldc = m
                 lc = ldc*n
                 lw = n * nblocal
                 lworkopt = lc+lw
                 if( ( lwork<max( 1_${ik}$, lworkopt ) ).and.(.not.lquery) ) then
                    info = -10_${ik}$
                 end if
              end if
           end if
           ! handle error in the input parameters and return workspace query.
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'ZUNGTSQR', -info )
              return
           else if ( lquery ) then
              work( 1_${ik}$ ) = cmplx( lworkopt,KIND=dp)
              return
           end if
           ! quick return if possible
           if( min( m, n )==0_${ik}$ ) then
              work( 1_${ik}$ ) = cmplx( lworkopt,KIND=dp)
              return
           end if
           ! (1) form explicitly the tall-skinny m-by-n left submatrix q1_in
           ! of m-by-m orthogonal matrix q_in, which is implicitly stored in
           ! the subdiagonal part of input array a and in the input array t.
           ! perform by the following operation using the routine stdlib${ii}$_zlamtsqr.
               ! q1_in = q_in * ( i ), where i is a n-by-n identity matrix,
                              ! ( 0 )        0 is a (m-n)-by-n zero matrix.
           ! (1a) form m-by-n matrix in the array work(1:ldc*n) with ones
           ! on the diagonal and zeros elsewhere.
           call stdlib${ii}$_zlaset( 'F', m, n, czero, cone, work, ldc )
           ! (1b)  on input, work(1:ldc*n) stores ( i );
                                                ! ( 0 )
                 ! on output, work(1:ldc*n) stores q1_in.
           call stdlib${ii}$_zlamtsqr( 'L', 'N', m, n, n, mb, nblocal, a, lda, t, ldt,work, ldc, work( &
                     lc+1 ), lw, iinfo )
           ! (2) copy the result from the part of the work array (1:m,1:n)
           ! with the leading dimension ldc that starts at work(1) into
           ! the output array a(1:m,1:n) column-by-column.
           do j = 1, n
              call stdlib${ii}$_zcopy( m, work( (j-1)*ldc + 1_${ik}$ ), 1_${ik}$, a( 1_${ik}$, j ), 1_${ik}$ )
           end do
           work( 1_${ik}$ ) = cmplx( lworkopt,KIND=dp)
           return
     end subroutine stdlib${ii}$_zungtsqr

#:for ck,ct,ci in CMPLX_KINDS_TYPES
#:if not ck in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ci}$ungtsqr( m, n, mb, nb, a, lda, t, ldt, work, lwork,info )
     !! ZUNGTSQR: generates an M-by-N complex matrix Q_out with orthonormal
     !! columns, which are the first N columns of a product of comlpex unitary
     !! matrices of order M which are returned by ZLATSQR
     !! Q_out = first_N_columns_of( Q(1)_in * Q(2)_in * ... * Q(k)_in ).
     !! See the documentation for ZLATSQR.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${ck}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, ldt, lwork, m, n, mb, nb
           ! Array Arguments 
           complex(${ck}$), intent(inout) :: a(lda,*)
           complex(${ck}$), intent(in) :: t(ldt,*)
           complex(${ck}$), intent(out) :: work(*)
        ! =====================================================================
           
           ! Local Scalars 
           logical(lk) :: lquery
           integer(${ik}$) :: iinfo, ldc, lworkopt, lc, lw, nblocal, j
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input parameters
           lquery  = lwork==-1_${ik}$
           info = 0_${ik}$
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ .or. m<n ) then
              info = -2_${ik}$
           else if( mb<=n ) then
              info = -3_${ik}$
           else if( nb<1_${ik}$ ) then
              info = -4_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -6_${ik}$
           else if( ldt<max( 1_${ik}$, min( nb, n ) ) ) then
              info = -8_${ik}$
           else
              ! test the input lwork for the dimension of the array work.
              ! this workspace is used to store array c(ldc, n) and work(lwork)
              ! in the call to stdlib${ii}$_${ci}$lamtsqr. see the documentation for stdlib${ii}$_${ci}$lamtsqr.
              if( lwork<2_${ik}$ .and. (.not.lquery) ) then
                 info = -10_${ik}$
              else
                 ! set block size for column blocks
                 nblocal = min( nb, n )
                 ! lwork = -1, then set the size for the array c(ldc,n)
                 ! in stdlib${ii}$_${ci}$lamtsqr call and set the optimal size of the work array
                 ! work(lwork) in stdlib${ii}$_${ci}$lamtsqr call.
                 ldc = m
                 lc = ldc*n
                 lw = n * nblocal
                 lworkopt = lc+lw
                 if( ( lwork<max( 1_${ik}$, lworkopt ) ).and.(.not.lquery) ) then
                    info = -10_${ik}$
                 end if
              end if
           end if
           ! handle error in the input parameters and return workspace query.
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'ZUNGTSQR', -info )
              return
           else if ( lquery ) then
              work( 1_${ik}$ ) = cmplx( lworkopt,KIND=${ck}$)
              return
           end if
           ! quick return if possible
           if( min( m, n )==0_${ik}$ ) then
              work( 1_${ik}$ ) = cmplx( lworkopt,KIND=${ck}$)
              return
           end if
           ! (1) form explicitly the tall-skinny m-by-n left submatrix q1_in
           ! of m-by-m orthogonal matrix q_in, which is implicitly stored in
           ! the subdiagonal part of input array a and in the input array t.
           ! perform by the following operation using the routine stdlib${ii}$_${ci}$lamtsqr.
               ! q1_in = q_in * ( i ), where i is a n-by-n identity matrix,
                              ! ( 0 )        0 is a (m-n)-by-n zero matrix.
           ! (1a) form m-by-n matrix in the array work(1:ldc*n) with ones
           ! on the diagonal and zeros elsewhere.
           call stdlib${ii}$_${ci}$laset( 'F', m, n, czero, cone, work, ldc )
           ! (1b)  on input, work(1:ldc*n) stores ( i );
                                                ! ( 0 )
                 ! on output, work(1:ldc*n) stores q1_in.
           call stdlib${ii}$_${ci}$lamtsqr( 'L', 'N', m, n, n, mb, nblocal, a, lda, t, ldt,work, ldc, work( &
                     lc+1 ), lw, iinfo )
           ! (2) copy the result from the part of the work array (1:m,1:n)
           ! with the leading dimension ldc that starts at work(1) into
           ! the output array a(1:m,1:n) column-by-column.
           do j = 1, n
              call stdlib${ii}$_${ci}$copy( m, work( (j-1)*ldc + 1_${ik}$ ), 1_${ik}$, a( 1_${ik}$, j ), 1_${ik}$ )
           end do
           work( 1_${ik}$ ) = cmplx( lworkopt,KIND=${ck}$)
           return
     end subroutine stdlib${ii}$_${ci}$ungtsqr

#:endif
#:endfor



     pure module subroutine stdlib${ii}$_cungtsqr_row( m, n, mb, nb, a, lda, t, ldt, work,lwork, info )
     !! CUNGTSQR_ROW generates an M-by-N complex matrix Q_out with
     !! orthonormal columns from the output of CLATSQR. These N orthonormal
     !! columns are the first N columns of a product of complex unitary
     !! matrices Q(k)_in of order M, which are returned by CLATSQR in
     !! a special format.
     !! Q_out = first_N_columns_of( Q(1)_in * Q(2)_in * ... * Q(k)_in ).
     !! The input matrices Q(k)_in are stored in row and column blocks in A.
     !! See the documentation of CLATSQR for more details on the format of
     !! Q(k)_in, where each Q(k)_in is represented by block Householder
     !! transformations. This routine calls an auxiliary routine CLARFB_GETT,
     !! where the computation is performed on each individual block. The
     !! algorithm first sweeps NB-sized column blocks from the right to left
     !! starting in the bottom row block and continues to the top row block
     !! (hence _ROW in the routine name). This sweep is in reverse order of
     !! the order in which CLATSQR generates the output blocks.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, ldt, lwork, m, n, mb, nb
           ! Array Arguments 
           complex(sp), intent(inout) :: a(lda,*)
           complex(sp), intent(in) :: t(ldt,*)
           complex(sp), intent(out) :: work(*)
        ! =====================================================================
           
           ! Local Scalars 
           logical(lk) :: lquery
           integer(${ik}$) :: nblocal, mb2, m_plus_one, itmp, ib_bottom, lworkopt, &
                     num_all_row_blocks, jb_t, ib, imb, kb, kb_last, knb, mb1
           ! Local Arrays 
           complex(sp) :: dummy(1_${ik}$,1_${ik}$)
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input parameters
           info = 0_${ik}$
           lquery  = lwork==-1_${ik}$
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ .or. m<n ) then
              info = -2_${ik}$
           else if( mb<=n ) then
              info = -3_${ik}$
           else if( nb<1_${ik}$ ) then
              info = -4_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -6_${ik}$
           else if( ldt<max( 1_${ik}$, min( nb, n ) ) ) then
              info = -8_${ik}$
           else if( lwork<1_${ik}$ .and. .not.lquery ) then
              info = -10_${ik}$
           end if
           nblocal = min( nb, n )
           ! determine the workspace size.
           if( info==0_${ik}$ ) then
              lworkopt = nblocal * max( nblocal, ( n - nblocal ) )
           end if
           ! handle error in the input parameters and handle the workspace query.
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'CUNGTSQR_ROW', -info )
              return
           else if ( lquery ) then
              work( 1_${ik}$ ) = cmplx( lworkopt,KIND=sp)
              return
           end if
           ! quick return if possible
           if( min( m, n )==0_${ik}$ ) then
              work( 1_${ik}$ ) = cmplx( lworkopt,KIND=sp)
              return
           end if
           ! (0) set the upper-triangular part of the matrix a to zero and
           ! its diagonal elements to one.
           call stdlib${ii}$_claset('U', m, n, czero, cone, a, lda )
           ! kb_last is the column index of the last column block reflector
           ! in the matrices t and v.
           kb_last = ( ( n-1 ) / nblocal ) * nblocal + 1_${ik}$
           ! (1) bottom-up loop over row blocks of a, except the top row block.
           ! note: if mb>=m, then the loop is never executed.
           if ( mb<m ) then
              ! mb2 is the row blocking size for the row blocks before the
              ! first top row block in the matrix a. ib is the row index for
              ! the row blocks in the matrix a before the first top row block.
              ! ib_bottom is the row index for the last bottom row block
              ! in the matrix a. jb_t is the column index of the corresponding
              ! column block in the matrix t.
              ! initialize variables.
              ! num_all_row_blocks is the number of row blocks in the matrix a
              ! including the first row block.
              mb2 = mb - n
              m_plus_one = m + 1_${ik}$
              itmp = ( m - mb - 1_${ik}$ ) / mb2
              ib_bottom = itmp * mb2 + mb + 1_${ik}$
              num_all_row_blocks = itmp + 2_${ik}$
              jb_t = num_all_row_blocks * n + 1_${ik}$
              do ib = ib_bottom, mb+1, -mb2
                 ! determine the block size imb for the current row block
                 ! in the matrix a.
                 imb = min( m_plus_one - ib, mb2 )
                 ! determine the column index jb_t for the current column block
                 ! in the matrix t.
                 jb_t = jb_t - n
                 ! apply column blocks of h in the row block from right to left.
                 ! kb is the column index of the current column block reflector
                 ! in the matrices t and v.
                 do kb = kb_last, 1, -nblocal
                    ! determine the size of the current column block knb in
                    ! the matrices t and v.
                    knb = min( nblocal, n - kb + 1_${ik}$ )
                    call stdlib${ii}$_clarfb_gett( 'I', imb, n-kb+1, knb,t( 1_${ik}$, jb_t+kb-1 ), ldt, a( kb, &
                              kb ), lda,a( ib, kb ), lda, work, knb )
                 end do
              end do
           end if
           ! (2) top row block of a.
           ! note: if mb>=m, then we have only one row block of a of size m
           ! and we work on the entire matrix a.
           mb1 = min( mb, m )
           ! apply column blocks of h in the top row block from right to left.
           ! kb is the column index of the current block reflector in
           ! the matrices t and v.
           do kb = kb_last, 1, -nblocal
              ! determine the size of the current column block knb in
              ! the matrices t and v.
              knb = min( nblocal, n - kb + 1_${ik}$ )
              if( mb1-kb-knb+1==0_${ik}$ ) then
                 ! in stdlib${ii}$_slarfb_gett parameters, when m=0, then the matrix b
                 ! does not exist, hence we need to pass a dummy array
                 ! reference dummy(1,1) to b with lddummy=1.
                 call stdlib${ii}$_clarfb_gett( 'N', 0_${ik}$, n-kb+1, knb,t( 1_${ik}$, kb ), ldt, a( kb, kb ), lda,&
                           dummy( 1_${ik}$, 1_${ik}$ ), 1_${ik}$, work, knb )
              else
                 call stdlib${ii}$_clarfb_gett( 'N', mb1-kb-knb+1, n-kb+1, knb,t( 1_${ik}$, kb ), ldt, a( kb, &
                           kb ), lda,a( kb+knb, kb), lda, work, knb )
              end if
           end do
           work( 1_${ik}$ ) = cmplx( lworkopt,KIND=sp)
           return
     end subroutine stdlib${ii}$_cungtsqr_row

     pure module subroutine stdlib${ii}$_zungtsqr_row( m, n, mb, nb, a, lda, t, ldt, work,lwork, info )
     !! ZUNGTSQR_ROW generates an M-by-N complex matrix Q_out with
     !! orthonormal columns from the output of ZLATSQR. These N orthonormal
     !! columns are the first N columns of a product of complex unitary
     !! matrices Q(k)_in of order M, which are returned by ZLATSQR in
     !! a special format.
     !! Q_out = first_N_columns_of( Q(1)_in * Q(2)_in * ... * Q(k)_in ).
     !! The input matrices Q(k)_in are stored in row and column blocks in A.
     !! See the documentation of ZLATSQR for more details on the format of
     !! Q(k)_in, where each Q(k)_in is represented by block Householder
     !! transformations. This routine calls an auxiliary routine ZLARFB_GETT,
     !! where the computation is performed on each individual block. The
     !! algorithm first sweeps NB-sized column blocks from the right to left
     !! starting in the bottom row block and continues to the top row block
     !! (hence _ROW in the routine name). This sweep is in reverse order of
     !! the order in which ZLATSQR generates the output blocks.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, ldt, lwork, m, n, mb, nb
           ! Array Arguments 
           complex(dp), intent(inout) :: a(lda,*)
           complex(dp), intent(in) :: t(ldt,*)
           complex(dp), intent(out) :: work(*)
        ! =====================================================================
           
           ! Local Scalars 
           logical(lk) :: lquery
           integer(${ik}$) :: nblocal, mb2, m_plus_one, itmp, ib_bottom, lworkopt, &
                     num_all_row_blocks, jb_t, ib, imb, kb, kb_last, knb, mb1
           ! Local Arrays 
           complex(dp) :: dummy(1_${ik}$,1_${ik}$)
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input parameters
           info = 0_${ik}$
           lquery  = lwork==-1_${ik}$
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ .or. m<n ) then
              info = -2_${ik}$
           else if( mb<=n ) then
              info = -3_${ik}$
           else if( nb<1_${ik}$ ) then
              info = -4_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -6_${ik}$
           else if( ldt<max( 1_${ik}$, min( nb, n ) ) ) then
              info = -8_${ik}$
           else if( lwork<1_${ik}$ .and. .not.lquery ) then
              info = -10_${ik}$
           end if
           nblocal = min( nb, n )
           ! determine the workspace size.
           if( info==0_${ik}$ ) then
              lworkopt = nblocal * max( nblocal, ( n - nblocal ) )
           end if
           ! handle error in the input parameters and handle the workspace query.
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'ZUNGTSQR_ROW', -info )
              return
           else if ( lquery ) then
              work( 1_${ik}$ ) = cmplx( lworkopt,KIND=dp)
              return
           end if
           ! quick return if possible
           if( min( m, n )==0_${ik}$ ) then
              work( 1_${ik}$ ) = cmplx( lworkopt,KIND=dp)
              return
           end if
           ! (0) set the upper-triangular part of the matrix a to zero and
           ! its diagonal elements to one.
           call stdlib${ii}$_zlaset('U', m, n, czero, cone, a, lda )
           ! kb_last is the column index of the last column block reflector
           ! in the matrices t and v.
           kb_last = ( ( n-1 ) / nblocal ) * nblocal + 1_${ik}$
           ! (1) bottom-up loop over row blocks of a, except the top row block.
           ! note: if mb>=m, then the loop is never executed.
           if ( mb<m ) then
              ! mb2 is the row blocking size for the row blocks before the
              ! first top row block in the matrix a. ib is the row index for
              ! the row blocks in the matrix a before the first top row block.
              ! ib_bottom is the row index for the last bottom row block
              ! in the matrix a. jb_t is the column index of the corresponding
              ! column block in the matrix t.
              ! initialize variables.
              ! num_all_row_blocks is the number of row blocks in the matrix a
              ! including the first row block.
              mb2 = mb - n
              m_plus_one = m + 1_${ik}$
              itmp = ( m - mb - 1_${ik}$ ) / mb2
              ib_bottom = itmp * mb2 + mb + 1_${ik}$
              num_all_row_blocks = itmp + 2_${ik}$
              jb_t = num_all_row_blocks * n + 1_${ik}$
              do ib = ib_bottom, mb+1, -mb2
                 ! determine the block size imb for the current row block
                 ! in the matrix a.
                 imb = min( m_plus_one - ib, mb2 )
                 ! determine the column index jb_t for the current column block
                 ! in the matrix t.
                 jb_t = jb_t - n
                 ! apply column blocks of h in the row block from right to left.
                 ! kb is the column index of the current column block reflector
                 ! in the matrices t and v.
                 do kb = kb_last, 1, -nblocal
                    ! determine the size of the current column block knb in
                    ! the matrices t and v.
                    knb = min( nblocal, n - kb + 1_${ik}$ )
                    call stdlib${ii}$_zlarfb_gett( 'I', imb, n-kb+1, knb,t( 1_${ik}$, jb_t+kb-1 ), ldt, a( kb, &
                              kb ), lda,a( ib, kb ), lda, work, knb )
                 end do
              end do
           end if
           ! (2) top row block of a.
           ! note: if mb>=m, then we have only one row block of a of size m
           ! and we work on the entire matrix a.
           mb1 = min( mb, m )
           ! apply column blocks of h in the top row block from right to left.
           ! kb is the column index of the current block reflector in
           ! the matrices t and v.
           do kb = kb_last, 1, -nblocal
              ! determine the size of the current column block knb in
              ! the matrices t and v.
              knb = min( nblocal, n - kb + 1_${ik}$ )
              if( mb1-kb-knb+1==0_${ik}$ ) then
                 ! in stdlib${ii}$_slarfb_gett parameters, when m=0, then the matrix b
                 ! does not exist, hence we need to pass a dummy array
                 ! reference dummy(1,1) to b with lddummy=1.
                 call stdlib${ii}$_zlarfb_gett( 'N', 0_${ik}$, n-kb+1, knb,t( 1_${ik}$, kb ), ldt, a( kb, kb ), lda,&
                           dummy( 1_${ik}$, 1_${ik}$ ), 1_${ik}$, work, knb )
              else
                 call stdlib${ii}$_zlarfb_gett( 'N', mb1-kb-knb+1, n-kb+1, knb,t( 1_${ik}$, kb ), ldt, a( kb, &
                           kb ), lda,a( kb+knb, kb), lda, work, knb )
              end if
           end do
           work( 1_${ik}$ ) = cmplx( lworkopt,KIND=dp)
           return
     end subroutine stdlib${ii}$_zungtsqr_row

#:for ck,ct,ci in CMPLX_KINDS_TYPES
#:if not ck in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ci}$ungtsqr_row( m, n, mb, nb, a, lda, t, ldt, work,lwork, info )
     !! ZUNGTSQR_ROW: generates an M-by-N complex matrix Q_out with
     !! orthonormal columns from the output of ZLATSQR. These N orthonormal
     !! columns are the first N columns of a product of complex unitary
     !! matrices Q(k)_in of order M, which are returned by ZLATSQR in
     !! a special format.
     !! Q_out = first_N_columns_of( Q(1)_in * Q(2)_in * ... * Q(k)_in ).
     !! The input matrices Q(k)_in are stored in row and column blocks in A.
     !! See the documentation of ZLATSQR for more details on the format of
     !! Q(k)_in, where each Q(k)_in is represented by block Householder
     !! transformations. This routine calls an auxiliary routine ZLARFB_GETT,
     !! where the computation is performed on each individual block. The
     !! algorithm first sweeps NB-sized column blocks from the right to left
     !! starting in the bottom row block and continues to the top row block
     !! (hence _ROW in the routine name). This sweep is in reverse order of
     !! the order in which ZLATSQR generates the output blocks.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${ck}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, ldt, lwork, m, n, mb, nb
           ! Array Arguments 
           complex(${ck}$), intent(inout) :: a(lda,*)
           complex(${ck}$), intent(in) :: t(ldt,*)
           complex(${ck}$), intent(out) :: work(*)
        ! =====================================================================
           
           ! Local Scalars 
           logical(lk) :: lquery
           integer(${ik}$) :: nblocal, mb2, m_plus_one, itmp, ib_bottom, lworkopt, &
                     num_all_row_blocks, jb_t, ib, imb, kb, kb_last, knb, mb1
           ! Local Arrays 
           complex(${ck}$) :: dummy(1_${ik}$,1_${ik}$)
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input parameters
           info = 0_${ik}$
           lquery  = lwork==-1_${ik}$
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ .or. m<n ) then
              info = -2_${ik}$
           else if( mb<=n ) then
              info = -3_${ik}$
           else if( nb<1_${ik}$ ) then
              info = -4_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -6_${ik}$
           else if( ldt<max( 1_${ik}$, min( nb, n ) ) ) then
              info = -8_${ik}$
           else if( lwork<1_${ik}$ .and. .not.lquery ) then
              info = -10_${ik}$
           end if
           nblocal = min( nb, n )
           ! determine the workspace size.
           if( info==0_${ik}$ ) then
              lworkopt = nblocal * max( nblocal, ( n - nblocal ) )
           end if
           ! handle error in the input parameters and handle the workspace query.
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'ZUNGTSQR_ROW', -info )
              return
           else if ( lquery ) then
              work( 1_${ik}$ ) = cmplx( lworkopt,KIND=${ck}$)
              return
           end if
           ! quick return if possible
           if( min( m, n )==0_${ik}$ ) then
              work( 1_${ik}$ ) = cmplx( lworkopt,KIND=${ck}$)
              return
           end if
           ! (0) set the upper-triangular part of the matrix a to zero and
           ! its diagonal elements to one.
           call stdlib${ii}$_${ci}$laset('U', m, n, czero, cone, a, lda )
           ! kb_last is the column index of the last column block reflector
           ! in the matrices t and v.
           kb_last = ( ( n-1 ) / nblocal ) * nblocal + 1_${ik}$
           ! (1) bottom-up loop over row blocks of a, except the top row block.
           ! note: if mb>=m, then the loop is never executed.
           if ( mb<m ) then
              ! mb2 is the row blocking size for the row blocks before the
              ! first top row block in the matrix a. ib is the row index for
              ! the row blocks in the matrix a before the first top row block.
              ! ib_bottom is the row index for the last bottom row block
              ! in the matrix a. jb_t is the column index of the corresponding
              ! column block in the matrix t.
              ! initialize variables.
              ! num_all_row_blocks is the number of row blocks in the matrix a
              ! including the first row block.
              mb2 = mb - n
              m_plus_one = m + 1_${ik}$
              itmp = ( m - mb - 1_${ik}$ ) / mb2
              ib_bottom = itmp * mb2 + mb + 1_${ik}$
              num_all_row_blocks = itmp + 2_${ik}$
              jb_t = num_all_row_blocks * n + 1_${ik}$
              do ib = ib_bottom, mb+1, -mb2
                 ! determine the block size imb for the current row block
                 ! in the matrix a.
                 imb = min( m_plus_one - ib, mb2 )
                 ! determine the column index jb_t for the current column block
                 ! in the matrix t.
                 jb_t = jb_t - n
                 ! apply column blocks of h in the row block from right to left.
                 ! kb is the column index of the current column block reflector
                 ! in the matrices t and v.
                 do kb = kb_last, 1, -nblocal
                    ! determine the size of the current column block knb in
                    ! the matrices t and v.
                    knb = min( nblocal, n - kb + 1_${ik}$ )
                    call stdlib${ii}$_${ci}$larfb_gett( 'I', imb, n-kb+1, knb,t( 1_${ik}$, jb_t+kb-1 ), ldt, a( kb, &
                              kb ), lda,a( ib, kb ), lda, work, knb )
                 end do
              end do
           end if
           ! (2) top row block of a.
           ! note: if mb>=m, then we have only one row block of a of size m
           ! and we work on the entire matrix a.
           mb1 = min( mb, m )
           ! apply column blocks of h in the top row block from right to left.
           ! kb is the column index of the current block reflector in
           ! the matrices t and v.
           do kb = kb_last, 1, -nblocal
              ! determine the size of the current column block knb in
              ! the matrices t and v.
              knb = min( nblocal, n - kb + 1_${ik}$ )
              if( mb1-kb-knb+1==0_${ik}$ ) then
                 ! in stdlib${ii}$_dlarfb_gett parameters, when m=0, then the matrix b
                 ! does not exist, hence we need to pass a dummy array
                 ! reference dummy(1,1) to b with lddummy=1.
                 call stdlib${ii}$_${ci}$larfb_gett( 'N', 0_${ik}$, n-kb+1, knb,t( 1_${ik}$, kb ), ldt, a( kb, kb ), lda,&
                           dummy( 1_${ik}$, 1_${ik}$ ), 1_${ik}$, work, knb )
              else
                 call stdlib${ii}$_${ci}$larfb_gett( 'N', mb1-kb-knb+1, n-kb+1, knb,t( 1_${ik}$, kb ), ldt, a( kb, &
                           kb ), lda,a( kb+knb, kb), lda, work, knb )
              end if
           end do
           work( 1_${ik}$ ) = cmplx( lworkopt,KIND=${ck}$)
           return
     end subroutine stdlib${ii}$_${ci}$ungtsqr_row

#:endif
#:endfor



     pure module subroutine stdlib${ii}$_sorgtsqr( m, n, mb, nb, a, lda, t, ldt, work, lwork,info )
     !! SORGTSQR generates an M-by-N real matrix Q_out with orthonormal columns,
     !! which are the first N columns of a product of real orthogonal
     !! matrices of order M which are returned by SLATSQR
     !! Q_out = first_N_columns_of( Q(1)_in * Q(2)_in * ... * Q(k)_in ).
     !! See the documentation for SLATSQR.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, ldt, lwork, m, n, mb, nb
           ! Array Arguments 
           real(sp), intent(inout) :: a(lda,*)
           real(sp), intent(in) :: t(ldt,*)
           real(sp), intent(out) :: work(*)
        ! =====================================================================
           
           ! Local Scalars 
           logical(lk) :: lquery
           integer(${ik}$) :: iinfo, ldc, lworkopt, lc, lw, nblocal, j
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input parameters
           lquery  = lwork==-1_${ik}$
           info = 0_${ik}$
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ .or. m<n ) then
              info = -2_${ik}$
           else if( mb<=n ) then
              info = -3_${ik}$
           else if( nb<1_${ik}$ ) then
              info = -4_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -6_${ik}$
           else if( ldt<max( 1_${ik}$, min( nb, n ) ) ) then
              info = -8_${ik}$
           else
              ! test the input lwork for the dimension of the array work.
              ! this workspace is used to store array c(ldc, n) and work(lwork)
              ! in the call to stdlib${ii}$_slamtsqr. see the documentation for stdlib${ii}$_slamtsqr.
              if( lwork<2_${ik}$ .and. (.not.lquery) ) then
                 info = -10_${ik}$
              else
                 ! set block size for column blocks
                 nblocal = min( nb, n )
                 ! lwork = -1, then set the size for the array c(ldc,n)
                 ! in stdlib${ii}$_slamtsqr call and set the optimal size of the work array
                 ! work(lwork) in stdlib${ii}$_slamtsqr call.
                 ldc = m
                 lc = ldc*n
                 lw = n * nblocal
                 lworkopt = lc+lw
                 if( ( lwork<max( 1_${ik}$, lworkopt ) ).and.(.not.lquery) ) then
                    info = -10_${ik}$
                 end if
              end if
           end if
           ! handle error in the input parameters and return workspace query.
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'SORGTSQR', -info )
              return
           else if ( lquery ) then
              work( 1_${ik}$ ) = real( lworkopt,KIND=sp)
              return
           end if
           ! quick return if possible
           if( min( m, n )==0_${ik}$ ) then
              work( 1_${ik}$ ) = real( lworkopt,KIND=sp)
              return
           end if
           ! (1) form explicitly the tall-skinny m-by-n left submatrix q1_in
           ! of m-by-m orthogonal matrix q_in, which is implicitly stored in
           ! the subdiagonal part of input array a and in the input array t.
           ! perform by the following operation using the routine stdlib${ii}$_slamtsqr.
               ! q1_in = q_in * ( i ), where i is a n-by-n identity matrix,
                              ! ( 0 )        0 is a (m-n)-by-n zero matrix.
           ! (1a) form m-by-n matrix in the array work(1:ldc*n) with ones
           ! on the diagonal and zeros elsewhere.
           call stdlib${ii}$_slaset( 'F', m, n, zero, one, work, ldc )
           ! (1b)  on input, work(1:ldc*n) stores ( i );
                                                ! ( 0 )
                 ! on output, work(1:ldc*n) stores q1_in.
           call stdlib${ii}$_slamtsqr( 'L', 'N', m, n, n, mb, nblocal, a, lda, t, ldt,work, ldc, work( &
                     lc+1 ), lw, iinfo )
           ! (2) copy the result from the part of the work array (1:m,1:n)
           ! with the leading dimension ldc that starts at work(1) into
           ! the output array a(1:m,1:n) column-by-column.
           do j = 1, n
              call stdlib${ii}$_scopy( m, work( (j-1)*ldc + 1_${ik}$ ), 1_${ik}$, a( 1_${ik}$, j ), 1_${ik}$ )
           end do
           work( 1_${ik}$ ) = real( lworkopt,KIND=sp)
           return
     end subroutine stdlib${ii}$_sorgtsqr

     pure module subroutine stdlib${ii}$_dorgtsqr( m, n, mb, nb, a, lda, t, ldt, work, lwork,info )
     !! DORGTSQR generates an M-by-N real matrix Q_out with orthonormal columns,
     !! which are the first N columns of a product of real orthogonal
     !! matrices of order M which are returned by DLATSQR
     !! Q_out = first_N_columns_of( Q(1)_in * Q(2)_in * ... * Q(k)_in ).
     !! See the documentation for DLATSQR.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, ldt, lwork, m, n, mb, nb
           ! Array Arguments 
           real(dp), intent(inout) :: a(lda,*)
           real(dp), intent(in) :: t(ldt,*)
           real(dp), intent(out) :: work(*)
        ! =====================================================================
           
           ! Local Scalars 
           logical(lk) :: lquery
           integer(${ik}$) :: iinfo, ldc, lworkopt, lc, lw, nblocal, j
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input parameters
           lquery  = lwork==-1_${ik}$
           info = 0_${ik}$
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ .or. m<n ) then
              info = -2_${ik}$
           else if( mb<=n ) then
              info = -3_${ik}$
           else if( nb<1_${ik}$ ) then
              info = -4_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -6_${ik}$
           else if( ldt<max( 1_${ik}$, min( nb, n ) ) ) then
              info = -8_${ik}$
           else
              ! test the input lwork for the dimension of the array work.
              ! this workspace is used to store array c(ldc, n) and work(lwork)
              ! in the call to stdlib${ii}$_dlamtsqr. see the documentation for stdlib${ii}$_dlamtsqr.
              if( lwork<2_${ik}$ .and. (.not.lquery) ) then
                 info = -10_${ik}$
              else
                 ! set block size for column blocks
                 nblocal = min( nb, n )
                 ! lwork = -1, then set the size for the array c(ldc,n)
                 ! in stdlib${ii}$_dlamtsqr call and set the optimal size of the work array
                 ! work(lwork) in stdlib${ii}$_dlamtsqr call.
                 ldc = m
                 lc = ldc*n
                 lw = n * nblocal
                 lworkopt = lc+lw
                 if( ( lwork<max( 1_${ik}$, lworkopt ) ).and.(.not.lquery) ) then
                    info = -10_${ik}$
                 end if
              end if
           end if
           ! handle error in the input parameters and return workspace query.
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DORGTSQR', -info )
              return
           else if ( lquery ) then
              work( 1_${ik}$ ) = real( lworkopt,KIND=dp)
              return
           end if
           ! quick return if possible
           if( min( m, n )==0_${ik}$ ) then
              work( 1_${ik}$ ) = real( lworkopt,KIND=dp)
              return
           end if
           ! (1) form explicitly the tall-skinny m-by-n left submatrix q1_in
           ! of m-by-m orthogonal matrix q_in, which is implicitly stored in
           ! the subdiagonal part of input array a and in the input array t.
           ! perform by the following operation using the routine stdlib${ii}$_dlamtsqr.
               ! q1_in = q_in * ( i ), where i is a n-by-n identity matrix,
                              ! ( 0 )        0 is a (m-n)-by-n zero matrix.
           ! (1a) form m-by-n matrix in the array work(1:ldc*n) with ones
           ! on the diagonal and zeros elsewhere.
           call stdlib${ii}$_dlaset( 'F', m, n, zero, one, work, ldc )
           ! (1b)  on input, work(1:ldc*n) stores ( i );
                                                ! ( 0 )
                 ! on output, work(1:ldc*n) stores q1_in.
           call stdlib${ii}$_dlamtsqr( 'L', 'N', m, n, n, mb, nblocal, a, lda, t, ldt,work, ldc, work( &
                     lc+1 ), lw, iinfo )
           ! (2) copy the result from the part of the work array (1:m,1:n)
           ! with the leading dimension ldc that starts at work(1) into
           ! the output array a(1:m,1:n) column-by-column.
           do j = 1, n
              call stdlib${ii}$_dcopy( m, work( (j-1)*ldc + 1_${ik}$ ), 1_${ik}$, a( 1_${ik}$, j ), 1_${ik}$ )
           end do
           work( 1_${ik}$ ) = real( lworkopt,KIND=dp)
           return
     end subroutine stdlib${ii}$_dorgtsqr

#:for rk,rt,ri in REAL_KINDS_TYPES
#:if not rk in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ri}$orgtsqr( m, n, mb, nb, a, lda, t, ldt, work, lwork,info )
     !! DORGTSQR: generates an M-by-N real matrix Q_out with orthonormal columns,
     !! which are the first N columns of a product of real orthogonal
     !! matrices of order M which are returned by DLATSQR
     !! Q_out = first_N_columns_of( Q(1)_in * Q(2)_in * ... * Q(k)_in ).
     !! See the documentation for DLATSQR.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, ldt, lwork, m, n, mb, nb
           ! Array Arguments 
           real(${rk}$), intent(inout) :: a(lda,*)
           real(${rk}$), intent(in) :: t(ldt,*)
           real(${rk}$), intent(out) :: work(*)
        ! =====================================================================
           
           ! Local Scalars 
           logical(lk) :: lquery
           integer(${ik}$) :: iinfo, ldc, lworkopt, lc, lw, nblocal, j
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input parameters
           lquery  = lwork==-1_${ik}$
           info = 0_${ik}$
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ .or. m<n ) then
              info = -2_${ik}$
           else if( mb<=n ) then
              info = -3_${ik}$
           else if( nb<1_${ik}$ ) then
              info = -4_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -6_${ik}$
           else if( ldt<max( 1_${ik}$, min( nb, n ) ) ) then
              info = -8_${ik}$
           else
              ! test the input lwork for the dimension of the array work.
              ! this workspace is used to store array c(ldc, n) and work(lwork)
              ! in the call to stdlib${ii}$_${ri}$lamtsqr. see the documentation for stdlib${ii}$_${ri}$lamtsqr.
              if( lwork<2_${ik}$ .and. (.not.lquery) ) then
                 info = -10_${ik}$
              else
                 ! set block size for column blocks
                 nblocal = min( nb, n )
                 ! lwork = -1, then set the size for the array c(ldc,n)
                 ! in stdlib${ii}$_${ri}$lamtsqr call and set the optimal size of the work array
                 ! work(lwork) in stdlib${ii}$_${ri}$lamtsqr call.
                 ldc = m
                 lc = ldc*n
                 lw = n * nblocal
                 lworkopt = lc+lw
                 if( ( lwork<max( 1_${ik}$, lworkopt ) ).and.(.not.lquery) ) then
                    info = -10_${ik}$
                 end if
              end if
           end if
           ! handle error in the input parameters and return workspace query.
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DORGTSQR', -info )
              return
           else if ( lquery ) then
              work( 1_${ik}$ ) = real( lworkopt,KIND=${rk}$)
              return
           end if
           ! quick return if possible
           if( min( m, n )==0_${ik}$ ) then
              work( 1_${ik}$ ) = real( lworkopt,KIND=${rk}$)
              return
           end if
           ! (1) form explicitly the tall-skinny m-by-n left submatrix q1_in
           ! of m-by-m orthogonal matrix q_in, which is implicitly stored in
           ! the subdiagonal part of input array a and in the input array t.
           ! perform by the following operation using the routine stdlib${ii}$_${ri}$lamtsqr.
               ! q1_in = q_in * ( i ), where i is a n-by-n identity matrix,
                              ! ( 0 )        0 is a (m-n)-by-n zero matrix.
           ! (1a) form m-by-n matrix in the array work(1:ldc*n) with ones
           ! on the diagonal and zeros elsewhere.
           call stdlib${ii}$_${ri}$laset( 'F', m, n, zero, one, work, ldc )
           ! (1b)  on input, work(1:ldc*n) stores ( i );
                                                ! ( 0 )
                 ! on output, work(1:ldc*n) stores q1_in.
           call stdlib${ii}$_${ri}$lamtsqr( 'L', 'N', m, n, n, mb, nblocal, a, lda, t, ldt,work, ldc, work( &
                     lc+1 ), lw, iinfo )
           ! (2) copy the result from the part of the work array (1:m,1:n)
           ! with the leading dimension ldc that starts at work(1) into
           ! the output array a(1:m,1:n) column-by-column.
           do j = 1, n
              call stdlib${ii}$_${ri}$copy( m, work( (j-1)*ldc + 1_${ik}$ ), 1_${ik}$, a( 1_${ik}$, j ), 1_${ik}$ )
           end do
           work( 1_${ik}$ ) = real( lworkopt,KIND=${rk}$)
           return
     end subroutine stdlib${ii}$_${ri}$orgtsqr

#:endif
#:endfor



     pure module subroutine stdlib${ii}$_sorgtsqr_row( m, n, mb, nb, a, lda, t, ldt, work,lwork, info )
     !! SORGTSQR_ROW generates an M-by-N real matrix Q_out with
     !! orthonormal columns from the output of SLATSQR. These N orthonormal
     !! columns are the first N columns of a product of complex unitary
     !! matrices Q(k)_in of order M, which are returned by SLATSQR in
     !! a special format.
     !! Q_out = first_N_columns_of( Q(1)_in * Q(2)_in * ... * Q(k)_in ).
     !! The input matrices Q(k)_in are stored in row and column blocks in A.
     !! See the documentation of SLATSQR for more details on the format of
     !! Q(k)_in, where each Q(k)_in is represented by block Householder
     !! transformations. This routine calls an auxiliary routine SLARFB_GETT,
     !! where the computation is performed on each individual block. The
     !! algorithm first sweeps NB-sized column blocks from the right to left
     !! starting in the bottom row block and continues to the top row block
     !! (hence _ROW in the routine name). This sweep is in reverse order of
     !! the order in which SLATSQR generates the output blocks.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, ldt, lwork, m, n, mb, nb
           ! Array Arguments 
           real(sp), intent(inout) :: a(lda,*)
           real(sp), intent(in) :: t(ldt,*)
           real(sp), intent(out) :: work(*)
        ! =====================================================================
           
           ! Local Scalars 
           logical(lk) :: lquery
           integer(${ik}$) :: nblocal, mb2, m_plus_one, itmp, ib_bottom, lworkopt, &
                     num_all_row_blocks, jb_t, ib, imb, kb, kb_last, knb, mb1
           ! Local Arrays 
           real(sp) :: dummy(1_${ik}$,1_${ik}$)
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input parameters
           info = 0_${ik}$
           lquery  = lwork==-1_${ik}$
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ .or. m<n ) then
              info = -2_${ik}$
           else if( mb<=n ) then
              info = -3_${ik}$
           else if( nb<1_${ik}$ ) then
              info = -4_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -6_${ik}$
           else if( ldt<max( 1_${ik}$, min( nb, n ) ) ) then
              info = -8_${ik}$
           else if( lwork<1_${ik}$ .and. .not.lquery ) then
              info = -10_${ik}$
           end if
           nblocal = min( nb, n )
           ! determine the workspace size.
           if( info==0_${ik}$ ) then
              lworkopt = nblocal * max( nblocal, ( n - nblocal ) )
           end if
           ! handle error in the input parameters and handle the workspace query.
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'SORGTSQR_ROW', -info )
              return
           else if ( lquery ) then
              work( 1_${ik}$ ) = real( lworkopt,KIND=sp)
              return
           end if
           ! quick return if possible
           if( min( m, n )==0_${ik}$ ) then
              work( 1_${ik}$ ) = real( lworkopt,KIND=sp)
              return
           end if
           ! (0) set the upper-triangular part of the matrix a to zero and
           ! its diagonal elements to one.
           call stdlib${ii}$_slaset('U', m, n, zero, one, a, lda )
           ! kb_last is the column index of the last column block reflector
           ! in the matrices t and v.
           kb_last = ( ( n-1 ) / nblocal ) * nblocal + 1_${ik}$
           ! (1) bottom-up loop over row blocks of a, except the top row block.
           ! note: if mb>=m, then the loop is never executed.
           if ( mb<m ) then
              ! mb2 is the row blocking size for the row blocks before the
              ! first top row block in the matrix a. ib is the row index for
              ! the row blocks in the matrix a before the first top row block.
              ! ib_bottom is the row index for the last bottom row block
              ! in the matrix a. jb_t is the column index of the corresponding
              ! column block in the matrix t.
              ! initialize variables.
              ! num_all_row_blocks is the number of row blocks in the matrix a
              ! including the first row block.
              mb2 = mb - n
              m_plus_one = m + 1_${ik}$
              itmp = ( m - mb - 1_${ik}$ ) / mb2
              ib_bottom = itmp * mb2 + mb + 1_${ik}$
              num_all_row_blocks = itmp + 2_${ik}$
              jb_t = num_all_row_blocks * n + 1_${ik}$
              do ib = ib_bottom, mb+1, -mb2
                 ! determine the block size imb for the current row block
                 ! in the matrix a.
                 imb = min( m_plus_one - ib, mb2 )
                 ! determine the column index jb_t for the current column block
                 ! in the matrix t.
                 jb_t = jb_t - n
                 ! apply column blocks of h in the row block from right to left.
                 ! kb is the column index of the current column block reflector
                 ! in the matrices t and v.
                 do kb = kb_last, 1, -nblocal
                    ! determine the size of the current column block knb in
                    ! the matrices t and v.
                    knb = min( nblocal, n - kb + 1_${ik}$ )
                    call stdlib${ii}$_slarfb_gett( 'I', imb, n-kb+1, knb,t( 1_${ik}$, jb_t+kb-1 ), ldt, a( kb, &
                              kb ), lda,a( ib, kb ), lda, work, knb )
                 end do
              end do
           end if
           ! (2) top row block of a.
           ! note: if mb>=m, then we have only one row block of a of size m
           ! and we work on the entire matrix a.
           mb1 = min( mb, m )
           ! apply column blocks of h in the top row block from right to left.
           ! kb is the column index of the current block reflector in
           ! the matrices t and v.
           do kb = kb_last, 1, -nblocal
              ! determine the size of the current column block knb in
              ! the matrices t and v.
              knb = min( nblocal, n - kb + 1_${ik}$ )
              if( mb1-kb-knb+1==0_${ik}$ ) then
                 ! in stdlib${ii}$_slarfb_gett parameters, when m=0, then the matrix b
                 ! does not exist, hence we need to pass a dummy array
                 ! reference dummy(1,1) to b with lddummy=1.
                 call stdlib${ii}$_slarfb_gett( 'N', 0_${ik}$, n-kb+1, knb,t( 1_${ik}$, kb ), ldt, a( kb, kb ), lda,&
                           dummy( 1_${ik}$, 1_${ik}$ ), 1_${ik}$, work, knb )
              else
                 call stdlib${ii}$_slarfb_gett( 'N', mb1-kb-knb+1, n-kb+1, knb,t( 1_${ik}$, kb ), ldt, a( kb, &
                           kb ), lda,a( kb+knb, kb), lda, work, knb )
              end if
           end do
           work( 1_${ik}$ ) = real( lworkopt,KIND=sp)
           return
     end subroutine stdlib${ii}$_sorgtsqr_row

     pure module subroutine stdlib${ii}$_dorgtsqr_row( m, n, mb, nb, a, lda, t, ldt, work,lwork, info )
     !! DORGTSQR_ROW generates an M-by-N real matrix Q_out with
     !! orthonormal columns from the output of DLATSQR. These N orthonormal
     !! columns are the first N columns of a product of complex unitary
     !! matrices Q(k)_in of order M, which are returned by DLATSQR in
     !! a special format.
     !! Q_out = first_N_columns_of( Q(1)_in * Q(2)_in * ... * Q(k)_in ).
     !! The input matrices Q(k)_in are stored in row and column blocks in A.
     !! See the documentation of DLATSQR for more details on the format of
     !! Q(k)_in, where each Q(k)_in is represented by block Householder
     !! transformations. This routine calls an auxiliary routine DLARFB_GETT,
     !! where the computation is performed on each individual block. The
     !! algorithm first sweeps NB-sized column blocks from the right to left
     !! starting in the bottom row block and continues to the top row block
     !! (hence _ROW in the routine name). This sweep is in reverse order of
     !! the order in which DLATSQR generates the output blocks.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, ldt, lwork, m, n, mb, nb
           ! Array Arguments 
           real(dp), intent(inout) :: a(lda,*)
           real(dp), intent(in) :: t(ldt,*)
           real(dp), intent(out) :: work(*)
        ! =====================================================================
           
           ! Local Scalars 
           logical(lk) :: lquery
           integer(${ik}$) :: nblocal, mb2, m_plus_one, itmp, ib_bottom, lworkopt, &
                     num_all_row_blocks, jb_t, ib, imb, kb, kb_last, knb, mb1
           ! Local Arrays 
           real(dp) :: dummy(1_${ik}$,1_${ik}$)
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input parameters
           info = 0_${ik}$
           lquery  = lwork==-1_${ik}$
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ .or. m<n ) then
              info = -2_${ik}$
           else if( mb<=n ) then
              info = -3_${ik}$
           else if( nb<1_${ik}$ ) then
              info = -4_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -6_${ik}$
           else if( ldt<max( 1_${ik}$, min( nb, n ) ) ) then
              info = -8_${ik}$
           else if( lwork<1_${ik}$ .and. .not.lquery ) then
              info = -10_${ik}$
           end if
           nblocal = min( nb, n )
           ! determine the workspace size.
           if( info==0_${ik}$ ) then
              lworkopt = nblocal * max( nblocal, ( n - nblocal ) )
           end if
           ! handle error in the input parameters and handle the workspace query.
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DORGTSQR_ROW', -info )
              return
           else if ( lquery ) then
              work( 1_${ik}$ ) = real( lworkopt,KIND=dp)
              return
           end if
           ! quick return if possible
           if( min( m, n )==0_${ik}$ ) then
              work( 1_${ik}$ ) = real( lworkopt,KIND=dp)
              return
           end if
           ! (0) set the upper-triangular part of the matrix a to zero and
           ! its diagonal elements to one.
           call stdlib${ii}$_dlaset('U', m, n, zero, one, a, lda )
           ! kb_last is the column index of the last column block reflector
           ! in the matrices t and v.
           kb_last = ( ( n-1 ) / nblocal ) * nblocal + 1_${ik}$
           ! (1) bottom-up loop over row blocks of a, except the top row block.
           ! note: if mb>=m, then the loop is never executed.
           if ( mb<m ) then
              ! mb2 is the row blocking size for the row blocks before the
              ! first top row block in the matrix a. ib is the row index for
              ! the row blocks in the matrix a before the first top row block.
              ! ib_bottom is the row index for the last bottom row block
              ! in the matrix a. jb_t is the column index of the corresponding
              ! column block in the matrix t.
              ! initialize variables.
              ! num_all_row_blocks is the number of row blocks in the matrix a
              ! including the first row block.
              mb2 = mb - n
              m_plus_one = m + 1_${ik}$
              itmp = ( m - mb - 1_${ik}$ ) / mb2
              ib_bottom = itmp * mb2 + mb + 1_${ik}$
              num_all_row_blocks = itmp + 2_${ik}$
              jb_t = num_all_row_blocks * n + 1_${ik}$
              do ib = ib_bottom, mb+1, -mb2
                 ! determine the block size imb for the current row block
                 ! in the matrix a.
                 imb = min( m_plus_one - ib, mb2 )
                 ! determine the column index jb_t for the current column block
                 ! in the matrix t.
                 jb_t = jb_t - n
                 ! apply column blocks of h in the row block from right to left.
                 ! kb is the column index of the current column block reflector
                 ! in the matrices t and v.
                 do kb = kb_last, 1, -nblocal
                    ! determine the size of the current column block knb in
                    ! the matrices t and v.
                    knb = min( nblocal, n - kb + 1_${ik}$ )
                    call stdlib${ii}$_dlarfb_gett( 'I', imb, n-kb+1, knb,t( 1_${ik}$, jb_t+kb-1 ), ldt, a( kb, &
                              kb ), lda,a( ib, kb ), lda, work, knb )
                 end do
              end do
           end if
           ! (2) top row block of a.
           ! note: if mb>=m, then we have only one row block of a of size m
           ! and we work on the entire matrix a.
           mb1 = min( mb, m )
           ! apply column blocks of h in the top row block from right to left.
           ! kb is the column index of the current block reflector in
           ! the matrices t and v.
           do kb = kb_last, 1, -nblocal
              ! determine the size of the current column block knb in
              ! the matrices t and v.
              knb = min( nblocal, n - kb + 1_${ik}$ )
              if( mb1-kb-knb+1==0_${ik}$ ) then
                 ! in stdlib${ii}$_slarfb_gett parameters, when m=0, then the matrix b
                 ! does not exist, hence we need to pass a dummy array
                 ! reference dummy(1,1) to b with lddummy=1.
                 call stdlib${ii}$_dlarfb_gett( 'N', 0_${ik}$, n-kb+1, knb,t( 1_${ik}$, kb ), ldt, a( kb, kb ), lda,&
                           dummy( 1_${ik}$, 1_${ik}$ ), 1_${ik}$, work, knb )
              else
                 call stdlib${ii}$_dlarfb_gett( 'N', mb1-kb-knb+1, n-kb+1, knb,t( 1_${ik}$, kb ), ldt, a( kb, &
                           kb ), lda,a( kb+knb, kb), lda, work, knb )
              end if
           end do
           work( 1_${ik}$ ) = real( lworkopt,KIND=dp)
           return
     end subroutine stdlib${ii}$_dorgtsqr_row

#:for rk,rt,ri in REAL_KINDS_TYPES
#:if not rk in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ri}$orgtsqr_row( m, n, mb, nb, a, lda, t, ldt, work,lwork, info )
     !! DORGTSQR_ROW: generates an M-by-N real matrix Q_out with
     !! orthonormal columns from the output of DLATSQR. These N orthonormal
     !! columns are the first N columns of a product of complex unitary
     !! matrices Q(k)_in of order M, which are returned by DLATSQR in
     !! a special format.
     !! Q_out = first_N_columns_of( Q(1)_in * Q(2)_in * ... * Q(k)_in ).
     !! The input matrices Q(k)_in are stored in row and column blocks in A.
     !! See the documentation of DLATSQR for more details on the format of
     !! Q(k)_in, where each Q(k)_in is represented by block Householder
     !! transformations. This routine calls an auxiliary routine DLARFB_GETT,
     !! where the computation is performed on each individual block. The
     !! algorithm first sweeps NB-sized column blocks from the right to left
     !! starting in the bottom row block and continues to the top row block
     !! (hence _ROW in the routine name). This sweep is in reverse order of
     !! the order in which DLATSQR generates the output blocks.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, ldt, lwork, m, n, mb, nb
           ! Array Arguments 
           real(${rk}$), intent(inout) :: a(lda,*)
           real(${rk}$), intent(in) :: t(ldt,*)
           real(${rk}$), intent(out) :: work(*)
        ! =====================================================================
           
           ! Local Scalars 
           logical(lk) :: lquery
           integer(${ik}$) :: nblocal, mb2, m_plus_one, itmp, ib_bottom, lworkopt, &
                     num_all_row_blocks, jb_t, ib, imb, kb, kb_last, knb, mb1
           ! Local Arrays 
           real(${rk}$) :: dummy(1_${ik}$,1_${ik}$)
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input parameters
           info = 0_${ik}$
           lquery  = lwork==-1_${ik}$
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ .or. m<n ) then
              info = -2_${ik}$
           else if( mb<=n ) then
              info = -3_${ik}$
           else if( nb<1_${ik}$ ) then
              info = -4_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -6_${ik}$
           else if( ldt<max( 1_${ik}$, min( nb, n ) ) ) then
              info = -8_${ik}$
           else if( lwork<1_${ik}$ .and. .not.lquery ) then
              info = -10_${ik}$
           end if
           nblocal = min( nb, n )
           ! determine the workspace size.
           if( info==0_${ik}$ ) then
              lworkopt = nblocal * max( nblocal, ( n - nblocal ) )
           end if
           ! handle error in the input parameters and handle the workspace query.
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DORGTSQR_ROW', -info )
              return
           else if ( lquery ) then
              work( 1_${ik}$ ) = real( lworkopt,KIND=${rk}$)
              return
           end if
           ! quick return if possible
           if( min( m, n )==0_${ik}$ ) then
              work( 1_${ik}$ ) = real( lworkopt,KIND=${rk}$)
              return
           end if
           ! (0) set the upper-triangular part of the matrix a to zero and
           ! its diagonal elements to one.
           call stdlib${ii}$_${ri}$laset('U', m, n, zero, one, a, lda )
           ! kb_last is the column index of the last column block reflector
           ! in the matrices t and v.
           kb_last = ( ( n-1 ) / nblocal ) * nblocal + 1_${ik}$
           ! (1) bottom-up loop over row blocks of a, except the top row block.
           ! note: if mb>=m, then the loop is never executed.
           if ( mb<m ) then
              ! mb2 is the row blocking size for the row blocks before the
              ! first top row block in the matrix a. ib is the row index for
              ! the row blocks in the matrix a before the first top row block.
              ! ib_bottom is the row index for the last bottom row block
              ! in the matrix a. jb_t is the column index of the corresponding
              ! column block in the matrix t.
              ! initialize variables.
              ! num_all_row_blocks is the number of row blocks in the matrix a
              ! including the first row block.
              mb2 = mb - n
              m_plus_one = m + 1_${ik}$
              itmp = ( m - mb - 1_${ik}$ ) / mb2
              ib_bottom = itmp * mb2 + mb + 1_${ik}$
              num_all_row_blocks = itmp + 2_${ik}$
              jb_t = num_all_row_blocks * n + 1_${ik}$
              do ib = ib_bottom, mb+1, -mb2
                 ! determine the block size imb for the current row block
                 ! in the matrix a.
                 imb = min( m_plus_one - ib, mb2 )
                 ! determine the column index jb_t for the current column block
                 ! in the matrix t.
                 jb_t = jb_t - n
                 ! apply column blocks of h in the row block from right to left.
                 ! kb is the column index of the current column block reflector
                 ! in the matrices t and v.
                 do kb = kb_last, 1, -nblocal
                    ! determine the size of the current column block knb in
                    ! the matrices t and v.
                    knb = min( nblocal, n - kb + 1_${ik}$ )
                    call stdlib${ii}$_${ri}$larfb_gett( 'I', imb, n-kb+1, knb,t( 1_${ik}$, jb_t+kb-1 ), ldt, a( kb, &
                              kb ), lda,a( ib, kb ), lda, work, knb )
                 end do
              end do
           end if
           ! (2) top row block of a.
           ! note: if mb>=m, then we have only one row block of a of size m
           ! and we work on the entire matrix a.
           mb1 = min( mb, m )
           ! apply column blocks of h in the top row block from right to left.
           ! kb is the column index of the current block reflector in
           ! the matrices t and v.
           do kb = kb_last, 1, -nblocal
              ! determine the size of the current column block knb in
              ! the matrices t and v.
              knb = min( nblocal, n - kb + 1_${ik}$ )
              if( mb1-kb-knb+1==0_${ik}$ ) then
                 ! in stdlib${ii}$_dlarfb_gett parameters, when m=0, then the matrix b
                 ! does not exist, hence we need to pass a dummy array
                 ! reference dummy(1,1) to b with lddummy=1.
                 call stdlib${ii}$_${ri}$larfb_gett( 'N', 0_${ik}$, n-kb+1, knb,t( 1_${ik}$, kb ), ldt, a( kb, kb ), lda,&
                           dummy( 1_${ik}$, 1_${ik}$ ), 1_${ik}$, work, knb )
              else
                 call stdlib${ii}$_${ri}$larfb_gett( 'N', mb1-kb-knb+1, n-kb+1, knb,t( 1_${ik}$, kb ), ldt, a( kb, &
                           kb ), lda,a( kb+knb, kb), lda, work, knb )
              end if
           end do
           work( 1_${ik}$ ) = real( lworkopt,KIND=${rk}$)
           return
     end subroutine stdlib${ii}$_${ri}$orgtsqr_row

#:endif
#:endfor



     pure module subroutine stdlib${ii}$_slarfb_gett( ident, m, n, k, t, ldt, a, lda, b, ldb,work, ldwork )
     !! SLARFB_GETT applies a real Householder block reflector H from the
     !! left to a real (K+M)-by-N  "triangular-pentagonal" matrix
     !! composed of two block matrices: an upper trapezoidal K-by-N matrix A
     !! stored in the array A, and a rectangular M-by-(N-K) matrix B, stored
     !! in the array B. The block reflector H is stored in a compact
     !! WY-representation, where the elementary reflectors are in the
     !! arrays A, B and T. See Further Details section.
               
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: ident
           integer(${ik}$), intent(in) :: k, lda, ldb, ldt, ldwork, m, n
           ! Array Arguments 
           real(sp), intent(inout) :: a(lda,*), b(ldb,*)
           real(sp), intent(in) :: t(ldt,*)
           real(sp), intent(out) :: work(ldwork,*)
        ! =====================================================================
           
           ! Local Scalars 
           logical(lk) :: lnotident
           integer(${ik}$) :: i, j
           ! Executable Statements 
           ! quick return if possible
           if( m<0 .or. n<=0 .or. k==0 .or. k>n )return
           lnotident = .not.stdlib_lsame( ident, 'I' )
           ! ------------------------------------------------------------------
           ! first step. computation of the column block 2:
              ! ( a2 ) := h * ( a2 )
              ! ( b2 )        ( b2 )
           ! ------------------------------------------------------------------
           if( n>k ) then
              ! col2_(1) compute w2: = a2. therefore, copy a2 = a(1:k, k+1:n)
              ! into w2=work(1:k, 1:n-k) column-by-column.
              do j = 1, n-k
                 call stdlib${ii}$_scopy( k, a( 1_${ik}$, k+j ), 1_${ik}$, work( 1_${ik}$, j ), 1_${ik}$ )
              end do
              if( lnotident ) then
                 ! col2_(2) compute w2: = (v1**t) * w2 = (a1**t) * w2,
                 ! v1 is not an identy matrix, but unit lower-triangular
                 ! v1 stored in a1 (diagonal ones are not stored).
                 call stdlib${ii}$_strmm( 'L', 'L', 'T', 'U', k, n-k, one, a, lda,work, ldwork )
              end if
              ! col2_(3) compute w2: = w2 + (v2**t) * b2 = w2 + (b1**t) * b2
              ! v2 stored in b1.
              if( m>0_${ik}$ ) then
                 call stdlib${ii}$_sgemm( 'T', 'N', k, n-k, m, one, b, ldb,b( 1_${ik}$, k+1 ), ldb, one, work, &
                           ldwork )
              end if
              ! col2_(4) compute w2: = t * w2,
              ! t is upper-triangular.
              call stdlib${ii}$_strmm( 'L', 'U', 'N', 'N', k, n-k, one, t, ldt,work, ldwork )
              ! col2_(5) compute b2: = b2 - v2 * w2 = b2 - b1 * w2,
              ! v2 stored in b1.
              if( m>0_${ik}$ ) then
                 call stdlib${ii}$_sgemm( 'N', 'N', m, n-k, k, -one, b, ldb,work, ldwork, one, b( 1_${ik}$, k+&
                           1_${ik}$ ), ldb )
              end if
              if( lnotident ) then
                 ! col2_(6) compute w2: = v1 * w2 = a1 * w2,
                 ! v1 is not an identity matrix, but unit lower-triangular,
                 ! v1 stored in a1 (diagonal ones are not stored).
                 call stdlib${ii}$_strmm( 'L', 'L', 'N', 'U', k, n-k, one, a, lda,work, ldwork )
              end if
              ! col2_(7) compute a2: = a2 - w2 =
                                   ! = a(1:k, k+1:n-k) - work(1:k, 1:n-k),
              ! column-by-column.
              do j = 1, n-k
                 do i = 1, k
                    a( i, k+j ) = a( i, k+j ) - work( i, j )
                 end do
              end do
           end if
           ! ------------------------------------------------------------------
           ! second step. computation of the column block 1:
              ! ( a1 ) := h * ( a1 )
              ! ( b1 )        (  0 )
           ! ------------------------------------------------------------------
           ! col1_(1) compute w1: = a1. copy the upper-triangular
           ! a1 = a(1:k, 1:k) into the upper-triangular
           ! w1 = work(1:k, 1:k) column-by-column.
           do j = 1, k
              call stdlib${ii}$_scopy( j, a( 1_${ik}$, j ), 1_${ik}$, work( 1_${ik}$, j ), 1_${ik}$ )
           end do
           ! set the subdiagonal elements of w1 to zero column-by-column.
           do j = 1, k - 1
              do i = j + 1, k
                 work( i, j ) = zero
              end do
           end do
           if( lnotident ) then
              ! col1_(2) compute w1: = (v1**t) * w1 = (a1**t) * w1,
              ! v1 is not an identity matrix, but unit lower-triangular
              ! v1 stored in a1 (diagonal ones are not stored),
              ! w1 is upper-triangular with zeroes below the diagonal.
              call stdlib${ii}$_strmm( 'L', 'L', 'T', 'U', k, k, one, a, lda,work, ldwork )
           end if
           ! col1_(3) compute w1: = t * w1,
           ! t is upper-triangular,
           ! w1 is upper-triangular with zeroes below the diagonal.
           call stdlib${ii}$_strmm( 'L', 'U', 'N', 'N', k, k, one, t, ldt,work, ldwork )
           ! col1_(4) compute b1: = - v2 * w1 = - b1 * w1,
           ! v2 = b1, w1 is upper-triangular with zeroes below the diagonal.
           if( m>0_${ik}$ ) then
              call stdlib${ii}$_strmm( 'R', 'U', 'N', 'N', m, k, -one, work, ldwork,b, ldb )
           end if
           if( lnotident ) then
              ! col1_(5) compute w1: = v1 * w1 = a1 * w1,
              ! v1 is not an identity matrix, but unit lower-triangular
              ! v1 stored in a1 (diagonal ones are not stored),
              ! w1 is upper-triangular on input with zeroes below the diagonal,
              ! and square on output.
              call stdlib${ii}$_strmm( 'L', 'L', 'N', 'U', k, k, one, a, lda,work, ldwork )
              ! col1_(6) compute a1: = a1 - w1 = a(1:k, 1:k) - work(1:k, 1:k)
              ! column-by-column. a1 is upper-triangular on input.
              ! if ident, a1 is square on output, and w1 is square,
              ! if not ident, a1 is upper-triangular on output,
              ! w1 is upper-triangular.
              ! col1_(6)_a compute elements of a1 below the diagonal.
              do j = 1, k - 1
                 do i = j + 1, k
                    a( i, j ) = - work( i, j )
                 end do
              end do
           end if
           ! col1_(6)_b compute elements of a1 on and above the diagonal.
           do j = 1, k
              do i = 1, j
                 a( i, j ) = a( i, j ) - work( i, j )
              end do
           end do
           return
     end subroutine stdlib${ii}$_slarfb_gett

     pure module subroutine stdlib${ii}$_dlarfb_gett( ident, m, n, k, t, ldt, a, lda, b, ldb,work, ldwork )
     !! DLARFB_GETT applies a real Householder block reflector H from the
     !! left to a real (K+M)-by-N  "triangular-pentagonal" matrix
     !! composed of two block matrices: an upper trapezoidal K-by-N matrix A
     !! stored in the array A, and a rectangular M-by-(N-K) matrix B, stored
     !! in the array B. The block reflector H is stored in a compact
     !! WY-representation, where the elementary reflectors are in the
     !! arrays A, B and T. See Further Details section.
               
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: ident
           integer(${ik}$), intent(in) :: k, lda, ldb, ldt, ldwork, m, n
           ! Array Arguments 
           real(dp), intent(inout) :: a(lda,*), b(ldb,*)
           real(dp), intent(in) :: t(ldt,*)
           real(dp), intent(out) :: work(ldwork,*)
        ! =====================================================================
           
           ! Local Scalars 
           logical(lk) :: lnotident
           integer(${ik}$) :: i, j
           ! Executable Statements 
           ! quick return if possible
           if( m<0 .or. n<=0 .or. k==0 .or. k>n )return
           lnotident = .not.stdlib_lsame( ident, 'I' )
           ! ------------------------------------------------------------------
           ! first step. computation of the column block 2:
              ! ( a2 ) := h * ( a2 )
              ! ( b2 )        ( b2 )
           ! ------------------------------------------------------------------
           if( n>k ) then
              ! col2_(1) compute w2: = a2. therefore, copy a2 = a(1:k, k+1:n)
              ! into w2=work(1:k, 1:n-k) column-by-column.
              do j = 1, n-k
                 call stdlib${ii}$_dcopy( k, a( 1_${ik}$, k+j ), 1_${ik}$, work( 1_${ik}$, j ), 1_${ik}$ )
              end do
              if( lnotident ) then
                 ! col2_(2) compute w2: = (v1**t) * w2 = (a1**t) * w2,
                 ! v1 is not an identy matrix, but unit lower-triangular
                 ! v1 stored in a1 (diagonal ones are not stored).
                 call stdlib${ii}$_dtrmm( 'L', 'L', 'T', 'U', k, n-k, one, a, lda,work, ldwork )
              end if
              ! col2_(3) compute w2: = w2 + (v2**t) * b2 = w2 + (b1**t) * b2
              ! v2 stored in b1.
              if( m>0_${ik}$ ) then
                 call stdlib${ii}$_dgemm( 'T', 'N', k, n-k, m, one, b, ldb,b( 1_${ik}$, k+1 ), ldb, one, work, &
                           ldwork )
              end if
              ! col2_(4) compute w2: = t * w2,
              ! t is upper-triangular.
              call stdlib${ii}$_dtrmm( 'L', 'U', 'N', 'N', k, n-k, one, t, ldt,work, ldwork )
              ! col2_(5) compute b2: = b2 - v2 * w2 = b2 - b1 * w2,
              ! v2 stored in b1.
              if( m>0_${ik}$ ) then
                 call stdlib${ii}$_dgemm( 'N', 'N', m, n-k, k, -one, b, ldb,work, ldwork, one, b( 1_${ik}$, k+&
                           1_${ik}$ ), ldb )
              end if
              if( lnotident ) then
                 ! col2_(6) compute w2: = v1 * w2 = a1 * w2,
                 ! v1 is not an identity matrix, but unit lower-triangular,
                 ! v1 stored in a1 (diagonal ones are not stored).
                 call stdlib${ii}$_dtrmm( 'L', 'L', 'N', 'U', k, n-k, one, a, lda,work, ldwork )
              end if
              ! col2_(7) compute a2: = a2 - w2 =
                                   ! = a(1:k, k+1:n-k) - work(1:k, 1:n-k),
              ! column-by-column.
              do j = 1, n-k
                 do i = 1, k
                    a( i, k+j ) = a( i, k+j ) - work( i, j )
                 end do
              end do
           end if
           ! ------------------------------------------------------------------
           ! second step. computation of the column block 1:
              ! ( a1 ) := h * ( a1 )
              ! ( b1 )        (  0 )
           ! ------------------------------------------------------------------
           ! col1_(1) compute w1: = a1. copy the upper-triangular
           ! a1 = a(1:k, 1:k) into the upper-triangular
           ! w1 = work(1:k, 1:k) column-by-column.
           do j = 1, k
              call stdlib${ii}$_dcopy( j, a( 1_${ik}$, j ), 1_${ik}$, work( 1_${ik}$, j ), 1_${ik}$ )
           end do
           ! set the subdiagonal elements of w1 to zero column-by-column.
           do j = 1, k - 1
              do i = j + 1, k
                 work( i, j ) = zero
              end do
           end do
           if( lnotident ) then
              ! col1_(2) compute w1: = (v1**t) * w1 = (a1**t) * w1,
              ! v1 is not an identity matrix, but unit lower-triangular
              ! v1 stored in a1 (diagonal ones are not stored),
              ! w1 is upper-triangular with zeroes below the diagonal.
              call stdlib${ii}$_dtrmm( 'L', 'L', 'T', 'U', k, k, one, a, lda,work, ldwork )
           end if
           ! col1_(3) compute w1: = t * w1,
           ! t is upper-triangular,
           ! w1 is upper-triangular with zeroes below the diagonal.
           call stdlib${ii}$_dtrmm( 'L', 'U', 'N', 'N', k, k, one, t, ldt,work, ldwork )
           ! col1_(4) compute b1: = - v2 * w1 = - b1 * w1,
           ! v2 = b1, w1 is upper-triangular with zeroes below the diagonal.
           if( m>0_${ik}$ ) then
              call stdlib${ii}$_dtrmm( 'R', 'U', 'N', 'N', m, k, -one, work, ldwork,b, ldb )
           end if
           if( lnotident ) then
              ! col1_(5) compute w1: = v1 * w1 = a1 * w1,
              ! v1 is not an identity matrix, but unit lower-triangular
              ! v1 stored in a1 (diagonal ones are not stored),
              ! w1 is upper-triangular on input with zeroes below the diagonal,
              ! and square on output.
              call stdlib${ii}$_dtrmm( 'L', 'L', 'N', 'U', k, k, one, a, lda,work, ldwork )
              ! col1_(6) compute a1: = a1 - w1 = a(1:k, 1:k) - work(1:k, 1:k)
              ! column-by-column. a1 is upper-triangular on input.
              ! if ident, a1 is square on output, and w1 is square,
              ! if not ident, a1 is upper-triangular on output,
              ! w1 is upper-triangular.
              ! col1_(6)_a compute elements of a1 below the diagonal.
              do j = 1, k - 1
                 do i = j + 1, k
                    a( i, j ) = - work( i, j )
                 end do
              end do
           end if
           ! col1_(6)_b compute elements of a1 on and above the diagonal.
           do j = 1, k
              do i = 1, j
                 a( i, j ) = a( i, j ) - work( i, j )
              end do
           end do
           return
     end subroutine stdlib${ii}$_dlarfb_gett

#:for rk,rt,ri in REAL_KINDS_TYPES
#:if not rk in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ri}$larfb_gett( ident, m, n, k, t, ldt, a, lda, b, ldb,work, ldwork )
     !! DLARFB_GETT: applies a real Householder block reflector H from the
     !! left to a real (K+M)-by-N  "triangular-pentagonal" matrix
     !! composed of two block matrices: an upper trapezoidal K-by-N matrix A
     !! stored in the array A, and a rectangular M-by-(N-K) matrix B, stored
     !! in the array B. The block reflector H is stored in a compact
     !! WY-representation, where the elementary reflectors are in the
     !! arrays A, B and T. See Further Details section.
               
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: ident
           integer(${ik}$), intent(in) :: k, lda, ldb, ldt, ldwork, m, n
           ! Array Arguments 
           real(${rk}$), intent(inout) :: a(lda,*), b(ldb,*)
           real(${rk}$), intent(in) :: t(ldt,*)
           real(${rk}$), intent(out) :: work(ldwork,*)
        ! =====================================================================
           
           ! Local Scalars 
           logical(lk) :: lnotident
           integer(${ik}$) :: i, j
           ! Executable Statements 
           ! quick return if possible
           if( m<0 .or. n<=0 .or. k==0 .or. k>n )return
           lnotident = .not.stdlib_lsame( ident, 'I' )
           ! ------------------------------------------------------------------
           ! first step. computation of the column block 2:
              ! ( a2 ) := h * ( a2 )
              ! ( b2 )        ( b2 )
           ! ------------------------------------------------------------------
           if( n>k ) then
              ! col2_(1) compute w2: = a2. therefore, copy a2 = a(1:k, k+1:n)
              ! into w2=work(1:k, 1:n-k) column-by-column.
              do j = 1, n-k
                 call stdlib${ii}$_${ri}$copy( k, a( 1_${ik}$, k+j ), 1_${ik}$, work( 1_${ik}$, j ), 1_${ik}$ )
              end do
              if( lnotident ) then
                 ! col2_(2) compute w2: = (v1**t) * w2 = (a1**t) * w2,
                 ! v1 is not an identy matrix, but unit lower-triangular
                 ! v1 stored in a1 (diagonal ones are not stored).
                 call stdlib${ii}$_${ri}$trmm( 'L', 'L', 'T', 'U', k, n-k, one, a, lda,work, ldwork )
              end if
              ! col2_(3) compute w2: = w2 + (v2**t) * b2 = w2 + (b1**t) * b2
              ! v2 stored in b1.
              if( m>0_${ik}$ ) then
                 call stdlib${ii}$_${ri}$gemm( 'T', 'N', k, n-k, m, one, b, ldb,b( 1_${ik}$, k+1 ), ldb, one, work, &
                           ldwork )
              end if
              ! col2_(4) compute w2: = t * w2,
              ! t is upper-triangular.
              call stdlib${ii}$_${ri}$trmm( 'L', 'U', 'N', 'N', k, n-k, one, t, ldt,work, ldwork )
              ! col2_(5) compute b2: = b2 - v2 * w2 = b2 - b1 * w2,
              ! v2 stored in b1.
              if( m>0_${ik}$ ) then
                 call stdlib${ii}$_${ri}$gemm( 'N', 'N', m, n-k, k, -one, b, ldb,work, ldwork, one, b( 1_${ik}$, k+&
                           1_${ik}$ ), ldb )
              end if
              if( lnotident ) then
                 ! col2_(6) compute w2: = v1 * w2 = a1 * w2,
                 ! v1 is not an identity matrix, but unit lower-triangular,
                 ! v1 stored in a1 (diagonal ones are not stored).
                 call stdlib${ii}$_${ri}$trmm( 'L', 'L', 'N', 'U', k, n-k, one, a, lda,work, ldwork )
              end if
              ! col2_(7) compute a2: = a2 - w2 =
                                   ! = a(1:k, k+1:n-k) - work(1:k, 1:n-k),
              ! column-by-column.
              do j = 1, n-k
                 do i = 1, k
                    a( i, k+j ) = a( i, k+j ) - work( i, j )
                 end do
              end do
           end if
           ! ------------------------------------------------------------------
           ! second step. computation of the column block 1:
              ! ( a1 ) := h * ( a1 )
              ! ( b1 )        (  0 )
           ! ------------------------------------------------------------------
           ! col1_(1) compute w1: = a1. copy the upper-triangular
           ! a1 = a(1:k, 1:k) into the upper-triangular
           ! w1 = work(1:k, 1:k) column-by-column.
           do j = 1, k
              call stdlib${ii}$_${ri}$copy( j, a( 1_${ik}$, j ), 1_${ik}$, work( 1_${ik}$, j ), 1_${ik}$ )
           end do
           ! set the subdiagonal elements of w1 to zero column-by-column.
           do j = 1, k - 1
              do i = j + 1, k
                 work( i, j ) = zero
              end do
           end do
           if( lnotident ) then
              ! col1_(2) compute w1: = (v1**t) * w1 = (a1**t) * w1,
              ! v1 is not an identity matrix, but unit lower-triangular
              ! v1 stored in a1 (diagonal ones are not stored),
              ! w1 is upper-triangular with zeroes below the diagonal.
              call stdlib${ii}$_${ri}$trmm( 'L', 'L', 'T', 'U', k, k, one, a, lda,work, ldwork )
           end if
           ! col1_(3) compute w1: = t * w1,
           ! t is upper-triangular,
           ! w1 is upper-triangular with zeroes below the diagonal.
           call stdlib${ii}$_${ri}$trmm( 'L', 'U', 'N', 'N', k, k, one, t, ldt,work, ldwork )
           ! col1_(4) compute b1: = - v2 * w1 = - b1 * w1,
           ! v2 = b1, w1 is upper-triangular with zeroes below the diagonal.
           if( m>0_${ik}$ ) then
              call stdlib${ii}$_${ri}$trmm( 'R', 'U', 'N', 'N', m, k, -one, work, ldwork,b, ldb )
           end if
           if( lnotident ) then
              ! col1_(5) compute w1: = v1 * w1 = a1 * w1,
              ! v1 is not an identity matrix, but unit lower-triangular
              ! v1 stored in a1 (diagonal ones are not stored),
              ! w1 is upper-triangular on input with zeroes below the diagonal,
              ! and square on output.
              call stdlib${ii}$_${ri}$trmm( 'L', 'L', 'N', 'U', k, k, one, a, lda,work, ldwork )
              ! col1_(6) compute a1: = a1 - w1 = a(1:k, 1:k) - work(1:k, 1:k)
              ! column-by-column. a1 is upper-triangular on input.
              ! if ident, a1 is square on output, and w1 is square,
              ! if not ident, a1 is upper-triangular on output,
              ! w1 is upper-triangular.
              ! col1_(6)_a compute elements of a1 below the diagonal.
              do j = 1, k - 1
                 do i = j + 1, k
                    a( i, j ) = - work( i, j )
                 end do
              end do
           end if
           ! col1_(6)_b compute elements of a1 on and above the diagonal.
           do j = 1, k
              do i = 1, j
                 a( i, j ) = a( i, j ) - work( i, j )
              end do
           end do
           return
     end subroutine stdlib${ii}$_${ri}$larfb_gett

#:endif
#:endfor

     pure module subroutine stdlib${ii}$_clarfb_gett( ident, m, n, k, t, ldt, a, lda, b, ldb,work, ldwork )
     !! CLARFB_GETT applies a complex Householder block reflector H from the
     !! left to a complex (K+M)-by-N  "triangular-pentagonal" matrix
     !! composed of two block matrices: an upper trapezoidal K-by-N matrix A
     !! stored in the array A, and a rectangular M-by-(N-K) matrix B, stored
     !! in the array B. The block reflector H is stored in a compact
     !! WY-representation, where the elementary reflectors are in the
     !! arrays A, B and T. See Further Details section.
               
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: ident
           integer(${ik}$), intent(in) :: k, lda, ldb, ldt, ldwork, m, n
           ! Array Arguments 
           complex(sp), intent(inout) :: a(lda,*), b(ldb,*)
           complex(sp), intent(in) :: t(ldt,*)
           complex(sp), intent(out) :: work(ldwork,*)
        ! =====================================================================
           
           ! Local Scalars 
           logical(lk) :: lnotident
           integer(${ik}$) :: i, j
           ! Executable Statements 
           ! quick return if possible
           if( m<0 .or. n<=0 .or. k==0 .or. k>n )return
           lnotident = .not.stdlib_lsame( ident, 'I' )
           ! ------------------------------------------------------------------
           ! first step. computation of the column block 2:
              ! ( a2 ) := h * ( a2 )
              ! ( b2 )        ( b2 )
           ! ------------------------------------------------------------------
           if( n>k ) then
              ! col2_(1) compute w2: = a2. therefore, copy a2 = a(1:k, k+1:n)
              ! into w2=work(1:k, 1:n-k) column-by-column.
              do j = 1, n-k
                 call stdlib${ii}$_ccopy( k, a( 1_${ik}$, k+j ), 1_${ik}$, work( 1_${ik}$, j ), 1_${ik}$ )
              end do
              if( lnotident ) then
                 ! col2_(2) compute w2: = (v1**h) * w2 = (a1**h) * w2,
                 ! v1 is not an identy matrix, but unit lower-triangular
                 ! v1 stored in a1 (diagonal ones are not stored).
                 call stdlib${ii}$_ctrmm( 'L', 'L', 'C', 'U', k, n-k, cone, a, lda,work, ldwork )
                           
              end if
              ! col2_(3) compute w2: = w2 + (v2**h) * b2 = w2 + (b1**h) * b2
              ! v2 stored in b1.
              if( m>0_${ik}$ ) then
                 call stdlib${ii}$_cgemm( 'C', 'N', k, n-k, m, cone, b, ldb,b( 1_${ik}$, k+1 ), ldb, cone, &
                           work, ldwork )
              end if
              ! col2_(4) compute w2: = t * w2,
              ! t is upper-triangular.
              call stdlib${ii}$_ctrmm( 'L', 'U', 'N', 'N', k, n-k, cone, t, ldt,work, ldwork )
              ! col2_(5) compute b2: = b2 - v2 * w2 = b2 - b1 * w2,
              ! v2 stored in b1.
              if( m>0_${ik}$ ) then
                 call stdlib${ii}$_cgemm( 'N', 'N', m, n-k, k, -cone, b, ldb,work, ldwork, cone, b( 1_${ik}$, &
                           k+1 ), ldb )
              end if
              if( lnotident ) then
                 ! col2_(6) compute w2: = v1 * w2 = a1 * w2,
                 ! v1 is not an identity matrix, but unit lower-triangular,
                 ! v1 stored in a1 (diagonal ones are not stored).
                 call stdlib${ii}$_ctrmm( 'L', 'L', 'N', 'U', k, n-k, cone, a, lda,work, ldwork )
                           
              end if
              ! col2_(7) compute a2: = a2 - w2 =
                                   ! = a(1:k, k+1:n-k) - work(1:k, 1:n-k),
              ! column-by-column.
              do j = 1, n-k
                 do i = 1, k
                    a( i, k+j ) = a( i, k+j ) - work( i, j )
                 end do
              end do
           end if
           ! ------------------------------------------------------------------
           ! second step. computation of the column block 1:
              ! ( a1 ) := h * ( a1 )
              ! ( b1 )        (  0 )
           ! ------------------------------------------------------------------
           ! col1_(1) compute w1: = a1. copy the upper-triangular
           ! a1 = a(1:k, 1:k) into the upper-triangular
           ! w1 = work(1:k, 1:k) column-by-column.
           do j = 1, k
              call stdlib${ii}$_ccopy( j, a( 1_${ik}$, j ), 1_${ik}$, work( 1_${ik}$, j ), 1_${ik}$ )
           end do
           ! set the subdiagonal elements of w1 to zero column-by-column.
           do j = 1, k - 1
              do i = j + 1, k
                 work( i, j ) = czero
              end do
           end do
           if( lnotident ) then
              ! col1_(2) compute w1: = (v1**h) * w1 = (a1**h) * w1,
              ! v1 is not an identity matrix, but unit lower-triangular
              ! v1 stored in a1 (diagonal ones are not stored),
              ! w1 is upper-triangular with zeroes below the diagonal.
              call stdlib${ii}$_ctrmm( 'L', 'L', 'C', 'U', k, k, cone, a, lda,work, ldwork )
           end if
           ! col1_(3) compute w1: = t * w1,
           ! t is upper-triangular,
           ! w1 is upper-triangular with zeroes below the diagonal.
           call stdlib${ii}$_ctrmm( 'L', 'U', 'N', 'N', k, k, cone, t, ldt,work, ldwork )
           ! col1_(4) compute b1: = - v2 * w1 = - b1 * w1,
           ! v2 = b1, w1 is upper-triangular with zeroes below the diagonal.
           if( m>0_${ik}$ ) then
              call stdlib${ii}$_ctrmm( 'R', 'U', 'N', 'N', m, k, -cone, work, ldwork,b, ldb )
           end if
           if( lnotident ) then
              ! col1_(5) compute w1: = v1 * w1 = a1 * w1,
              ! v1 is not an identity matrix, but unit lower-triangular
              ! v1 stored in a1 (diagonal ones are not stored),
              ! w1 is upper-triangular on input with zeroes below the diagonal,
              ! and square on output.
              call stdlib${ii}$_ctrmm( 'L', 'L', 'N', 'U', k, k, cone, a, lda,work, ldwork )
              ! col1_(6) compute a1: = a1 - w1 = a(1:k, 1:k) - work(1:k, 1:k)
              ! column-by-column. a1 is upper-triangular on input.
              ! if ident, a1 is square on output, and w1 is square,
              ! if not ident, a1 is upper-triangular on output,
              ! w1 is upper-triangular.
              ! col1_(6)_a compute elements of a1 below the diagonal.
              do j = 1, k - 1
                 do i = j + 1, k
                    a( i, j ) = - work( i, j )
                 end do
              end do
           end if
           ! col1_(6)_b compute elements of a1 on and above the diagonal.
           do j = 1, k
              do i = 1, j
                 a( i, j ) = a( i, j ) - work( i, j )
              end do
           end do
           return
     end subroutine stdlib${ii}$_clarfb_gett

     pure module subroutine stdlib${ii}$_zlarfb_gett( ident, m, n, k, t, ldt, a, lda, b, ldb,work, ldwork )
     !! ZLARFB_GETT applies a complex Householder block reflector H from the
     !! left to a complex (K+M)-by-N  "triangular-pentagonal" matrix
     !! composed of two block matrices: an upper trapezoidal K-by-N matrix A
     !! stored in the array A, and a rectangular M-by-(N-K) matrix B, stored
     !! in the array B. The block reflector H is stored in a compact
     !! WY-representation, where the elementary reflectors are in the
     !! arrays A, B and T. See Further Details section.
               
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: ident
           integer(${ik}$), intent(in) :: k, lda, ldb, ldt, ldwork, m, n
           ! Array Arguments 
           complex(dp), intent(inout) :: a(lda,*), b(ldb,*)
           complex(dp), intent(in) :: t(ldt,*)
           complex(dp), intent(out) :: work(ldwork,*)
        ! =====================================================================
           
           ! Local Scalars 
           logical(lk) :: lnotident
           integer(${ik}$) :: i, j
           ! Executable Statements 
           ! quick return if possible
           if( m<0 .or. n<=0 .or. k==0 .or. k>n )return
           lnotident = .not.stdlib_lsame( ident, 'I' )
           ! ------------------------------------------------------------------
           ! first step. computation of the column block 2:
              ! ( a2 ) := h * ( a2 )
              ! ( b2 )        ( b2 )
           ! ------------------------------------------------------------------
           if( n>k ) then
              ! col2_(1) compute w2: = a2. therefore, copy a2 = a(1:k, k+1:n)
              ! into w2=work(1:k, 1:n-k) column-by-column.
              do j = 1, n-k
                 call stdlib${ii}$_zcopy( k, a( 1_${ik}$, k+j ), 1_${ik}$, work( 1_${ik}$, j ), 1_${ik}$ )
              end do
              if( lnotident ) then
                 ! col2_(2) compute w2: = (v1**h) * w2 = (a1**h) * w2,
                 ! v1 is not an identy matrix, but unit lower-triangular
                 ! v1 stored in a1 (diagonal ones are not stored).
                 call stdlib${ii}$_ztrmm( 'L', 'L', 'C', 'U', k, n-k, cone, a, lda,work, ldwork )
                           
              end if
              ! col2_(3) compute w2: = w2 + (v2**h) * b2 = w2 + (b1**h) * b2
              ! v2 stored in b1.
              if( m>0_${ik}$ ) then
                 call stdlib${ii}$_zgemm( 'C', 'N', k, n-k, m, cone, b, ldb,b( 1_${ik}$, k+1 ), ldb, cone, &
                           work, ldwork )
              end if
              ! col2_(4) compute w2: = t * w2,
              ! t is upper-triangular.
              call stdlib${ii}$_ztrmm( 'L', 'U', 'N', 'N', k, n-k, cone, t, ldt,work, ldwork )
              ! col2_(5) compute b2: = b2 - v2 * w2 = b2 - b1 * w2,
              ! v2 stored in b1.
              if( m>0_${ik}$ ) then
                 call stdlib${ii}$_zgemm( 'N', 'N', m, n-k, k, -cone, b, ldb,work, ldwork, cone, b( 1_${ik}$, &
                           k+1 ), ldb )
              end if
              if( lnotident ) then
                 ! col2_(6) compute w2: = v1 * w2 = a1 * w2,
                 ! v1 is not an identity matrix, but unit lower-triangular,
                 ! v1 stored in a1 (diagonal ones are not stored).
                 call stdlib${ii}$_ztrmm( 'L', 'L', 'N', 'U', k, n-k, cone, a, lda,work, ldwork )
                           
              end if
              ! col2_(7) compute a2: = a2 - w2 =
                                   ! = a(1:k, k+1:n-k) - work(1:k, 1:n-k),
              ! column-by-column.
              do j = 1, n-k
                 do i = 1, k
                    a( i, k+j ) = a( i, k+j ) - work( i, j )
                 end do
              end do
           end if
           ! ------------------------------------------------------------------
           ! second step. computation of the column block 1:
              ! ( a1 ) := h * ( a1 )
              ! ( b1 )        (  0 )
           ! ------------------------------------------------------------------
           ! col1_(1) compute w1: = a1. copy the upper-triangular
           ! a1 = a(1:k, 1:k) into the upper-triangular
           ! w1 = work(1:k, 1:k) column-by-column.
           do j = 1, k
              call stdlib${ii}$_zcopy( j, a( 1_${ik}$, j ), 1_${ik}$, work( 1_${ik}$, j ), 1_${ik}$ )
           end do
           ! set the subdiagonal elements of w1 to zero column-by-column.
           do j = 1, k - 1
              do i = j + 1, k
                 work( i, j ) = czero
              end do
           end do
           if( lnotident ) then
              ! col1_(2) compute w1: = (v1**h) * w1 = (a1**h) * w1,
              ! v1 is not an identity matrix, but unit lower-triangular
              ! v1 stored in a1 (diagonal ones are not stored),
              ! w1 is upper-triangular with zeroes below the diagonal.
              call stdlib${ii}$_ztrmm( 'L', 'L', 'C', 'U', k, k, cone, a, lda,work, ldwork )
           end if
           ! col1_(3) compute w1: = t * w1,
           ! t is upper-triangular,
           ! w1 is upper-triangular with zeroes below the diagonal.
           call stdlib${ii}$_ztrmm( 'L', 'U', 'N', 'N', k, k, cone, t, ldt,work, ldwork )
           ! col1_(4) compute b1: = - v2 * w1 = - b1 * w1,
           ! v2 = b1, w1 is upper-triangular with zeroes below the diagonal.
           if( m>0_${ik}$ ) then
              call stdlib${ii}$_ztrmm( 'R', 'U', 'N', 'N', m, k, -cone, work, ldwork,b, ldb )
           end if
           if( lnotident ) then
              ! col1_(5) compute w1: = v1 * w1 = a1 * w1,
              ! v1 is not an identity matrix, but unit lower-triangular
              ! v1 stored in a1 (diagonal ones are not stored),
              ! w1 is upper-triangular on input with zeroes below the diagonal,
              ! and square on output.
              call stdlib${ii}$_ztrmm( 'L', 'L', 'N', 'U', k, k, cone, a, lda,work, ldwork )
              ! col1_(6) compute a1: = a1 - w1 = a(1:k, 1:k) - work(1:k, 1:k)
              ! column-by-column. a1 is upper-triangular on input.
              ! if ident, a1 is square on output, and w1 is square,
              ! if not ident, a1 is upper-triangular on output,
              ! w1 is upper-triangular.
              ! col1_(6)_a compute elements of a1 below the diagonal.
              do j = 1, k - 1
                 do i = j + 1, k
                    a( i, j ) = - work( i, j )
                 end do
              end do
           end if
           ! col1_(6)_b compute elements of a1 on and above the diagonal.
           do j = 1, k
              do i = 1, j
                 a( i, j ) = a( i, j ) - work( i, j )
              end do
           end do
           return
     end subroutine stdlib${ii}$_zlarfb_gett

#:for ck,ct,ci in CMPLX_KINDS_TYPES
#:if not ck in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ci}$larfb_gett( ident, m, n, k, t, ldt, a, lda, b, ldb,work, ldwork )
     !! ZLARFB_GETT: applies a complex Householder block reflector H from the
     !! left to a complex (K+M)-by-N  "triangular-pentagonal" matrix
     !! composed of two block matrices: an upper trapezoidal K-by-N matrix A
     !! stored in the array A, and a rectangular M-by-(N-K) matrix B, stored
     !! in the array B. The block reflector H is stored in a compact
     !! WY-representation, where the elementary reflectors are in the
     !! arrays A, B and T. See Further Details section.
               
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${ck}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: ident
           integer(${ik}$), intent(in) :: k, lda, ldb, ldt, ldwork, m, n
           ! Array Arguments 
           complex(${ck}$), intent(inout) :: a(lda,*), b(ldb,*)
           complex(${ck}$), intent(in) :: t(ldt,*)
           complex(${ck}$), intent(out) :: work(ldwork,*)
        ! =====================================================================
           
           ! Local Scalars 
           logical(lk) :: lnotident
           integer(${ik}$) :: i, j
           ! Executable Statements 
           ! quick return if possible
           if( m<0 .or. n<=0 .or. k==0 .or. k>n )return
           lnotident = .not.stdlib_lsame( ident, 'I' )
           ! ------------------------------------------------------------------
           ! first step. computation of the column block 2:
              ! ( a2 ) := h * ( a2 )
              ! ( b2 )        ( b2 )
           ! ------------------------------------------------------------------
           if( n>k ) then
              ! col2_(1) compute w2: = a2. therefore, copy a2 = a(1:k, k+1:n)
              ! into w2=work(1:k, 1:n-k) column-by-column.
              do j = 1, n-k
                 call stdlib${ii}$_${ci}$copy( k, a( 1_${ik}$, k+j ), 1_${ik}$, work( 1_${ik}$, j ), 1_${ik}$ )
              end do
              if( lnotident ) then
                 ! col2_(2) compute w2: = (v1**h) * w2 = (a1**h) * w2,
                 ! v1 is not an identy matrix, but unit lower-triangular
                 ! v1 stored in a1 (diagonal ones are not stored).
                 call stdlib${ii}$_${ci}$trmm( 'L', 'L', 'C', 'U', k, n-k, cone, a, lda,work, ldwork )
                           
              end if
              ! col2_(3) compute w2: = w2 + (v2**h) * b2 = w2 + (b1**h) * b2
              ! v2 stored in b1.
              if( m>0_${ik}$ ) then
                 call stdlib${ii}$_${ci}$gemm( 'C', 'N', k, n-k, m, cone, b, ldb,b( 1_${ik}$, k+1 ), ldb, cone, &
                           work, ldwork )
              end if
              ! col2_(4) compute w2: = t * w2,
              ! t is upper-triangular.
              call stdlib${ii}$_${ci}$trmm( 'L', 'U', 'N', 'N', k, n-k, cone, t, ldt,work, ldwork )
              ! col2_(5) compute b2: = b2 - v2 * w2 = b2 - b1 * w2,
              ! v2 stored in b1.
              if( m>0_${ik}$ ) then
                 call stdlib${ii}$_${ci}$gemm( 'N', 'N', m, n-k, k, -cone, b, ldb,work, ldwork, cone, b( 1_${ik}$, &
                           k+1 ), ldb )
              end if
              if( lnotident ) then
                 ! col2_(6) compute w2: = v1 * w2 = a1 * w2,
                 ! v1 is not an identity matrix, but unit lower-triangular,
                 ! v1 stored in a1 (diagonal ones are not stored).
                 call stdlib${ii}$_${ci}$trmm( 'L', 'L', 'N', 'U', k, n-k, cone, a, lda,work, ldwork )
                           
              end if
              ! col2_(7) compute a2: = a2 - w2 =
                                   ! = a(1:k, k+1:n-k) - work(1:k, 1:n-k),
              ! column-by-column.
              do j = 1, n-k
                 do i = 1, k
                    a( i, k+j ) = a( i, k+j ) - work( i, j )
                 end do
              end do
           end if
           ! ------------------------------------------------------------------
           ! second step. computation of the column block 1:
              ! ( a1 ) := h * ( a1 )
              ! ( b1 )        (  0 )
           ! ------------------------------------------------------------------
           ! col1_(1) compute w1: = a1. copy the upper-triangular
           ! a1 = a(1:k, 1:k) into the upper-triangular
           ! w1 = work(1:k, 1:k) column-by-column.
           do j = 1, k
              call stdlib${ii}$_${ci}$copy( j, a( 1_${ik}$, j ), 1_${ik}$, work( 1_${ik}$, j ), 1_${ik}$ )
           end do
           ! set the subdiagonal elements of w1 to zero column-by-column.
           do j = 1, k - 1
              do i = j + 1, k
                 work( i, j ) = czero
              end do
           end do
           if( lnotident ) then
              ! col1_(2) compute w1: = (v1**h) * w1 = (a1**h) * w1,
              ! v1 is not an identity matrix, but unit lower-triangular
              ! v1 stored in a1 (diagonal ones are not stored),
              ! w1 is upper-triangular with zeroes below the diagonal.
              call stdlib${ii}$_${ci}$trmm( 'L', 'L', 'C', 'U', k, k, cone, a, lda,work, ldwork )
           end if
           ! col1_(3) compute w1: = t * w1,
           ! t is upper-triangular,
           ! w1 is upper-triangular with zeroes below the diagonal.
           call stdlib${ii}$_${ci}$trmm( 'L', 'U', 'N', 'N', k, k, cone, t, ldt,work, ldwork )
           ! col1_(4) compute b1: = - v2 * w1 = - b1 * w1,
           ! v2 = b1, w1 is upper-triangular with zeroes below the diagonal.
           if( m>0_${ik}$ ) then
              call stdlib${ii}$_${ci}$trmm( 'R', 'U', 'N', 'N', m, k, -cone, work, ldwork,b, ldb )
           end if
           if( lnotident ) then
              ! col1_(5) compute w1: = v1 * w1 = a1 * w1,
              ! v1 is not an identity matrix, but unit lower-triangular
              ! v1 stored in a1 (diagonal ones are not stored),
              ! w1 is upper-triangular on input with zeroes below the diagonal,
              ! and square on output.
              call stdlib${ii}$_${ci}$trmm( 'L', 'L', 'N', 'U', k, k, cone, a, lda,work, ldwork )
              ! col1_(6) compute a1: = a1 - w1 = a(1:k, 1:k) - work(1:k, 1:k)
              ! column-by-column. a1 is upper-triangular on input.
              ! if ident, a1 is square on output, and w1 is square,
              ! if not ident, a1 is upper-triangular on output,
              ! w1 is upper-triangular.
              ! col1_(6)_a compute elements of a1 below the diagonal.
              do j = 1, k - 1
                 do i = j + 1, k
                    a( i, j ) = - work( i, j )
                 end do
              end do
           end if
           ! col1_(6)_b compute elements of a1 on and above the diagonal.
           do j = 1, k
              do i = 1, j
                 a( i, j ) = a( i, j ) - work( i, j )
              end do
           end do
           return
     end subroutine stdlib${ii}$_${ci}$larfb_gett

#:endif
#:endfor



     pure module subroutine stdlib${ii}$_slamtsqr( side, trans, m, n, k, mb, nb, a, lda, t,ldt, c, ldc, work, &
     !! SLAMTSQR overwrites the general real M-by-N matrix C with
     !! SIDE = 'L'     SIDE = 'R'
     !! TRANS = 'N':      Q * C          C * Q
     !! TRANS = 'T':      Q**T * C       C * Q**T
     !! where Q is a real orthogonal matrix defined as the product
     !! of blocked elementary reflectors computed by tall skinny
     !! QR factorization (SLATSQR)
               lwork, info )
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: side, trans
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, m, n, k, mb, nb, ldt, lwork, ldc
           ! Array Arguments 
           real(sp), intent(in) :: a(lda,*), t(ldt,*)
           real(sp), intent(out) :: work(*)
           real(sp), intent(inout) :: c(ldc,*)
       ! =====================================================================
           ! Local Scalars 
           logical(lk) :: left, right, tran, notran, lquery
           integer(${ik}$) :: i, ii, kk, lw, ctr, q
           ! External Subroutines 
           ! Executable Statements 
           ! test the input arguments
           lquery  = lwork<0_${ik}$
           notran  = stdlib_lsame( trans, 'N' )
           tran    = stdlib_lsame( trans, 'T' )
           left    = stdlib_lsame( side, 'L' )
           right   = stdlib_lsame( side, 'R' )
           if (left) then
             lw = n * nb
             q = m
           else
             lw = mb * nb
             q = n
           end if
           info = 0_${ik}$
           if( .not.left .and. .not.right ) then
              info = -1_${ik}$
           else if( .not.tran .and. .not.notran ) then
              info = -2_${ik}$
           else if( m<k ) then
             info = -3_${ik}$
           else if( n<0_${ik}$ ) then
             info = -4_${ik}$
           else if( k<0_${ik}$ ) then
             info = -5_${ik}$
           else if( k<nb .or. nb<1_${ik}$ ) then
             info = -7_${ik}$
           else if( lda<max( 1_${ik}$, q ) ) then
             info = -9_${ik}$
           else if( ldt<max( 1_${ik}$, nb) ) then
             info = -11_${ik}$
           else if( ldc<max( 1_${ik}$, m ) ) then
              info = -13_${ik}$
           else if(( lwork<max(1_${ik}$,lw)).and.(.not.lquery)) then
             info = -15_${ik}$
           end if
           ! determine the block size if it is tall skinny or short and wide
           if( info==0_${ik}$)  then
               work(1_${ik}$) = lw
           end if
           if( info/=0_${ik}$ ) then
             call stdlib${ii}$_xerbla( 'SLAMTSQR', -info )
             return
           else if (lquery) then
            return
           end if
           ! quick return if possible
           if( min(m,n,k)==0_${ik}$ ) then
             return
           end if
           if((mb<=k).or.(mb>=max(m,n,k))) then
             call stdlib${ii}$_sgemqrt( side, trans, m, n, k, nb, a, lda,t, ldt, c, ldc, work, info)
                       
             return
            end if
           if(left.and.notran) then
               ! multiply q to the last block of c
              kk = mod((m-k),(mb-k))
              ctr = (m-k)/(mb-k)
              if (kk>0_${ik}$) then
                ii=m-kk+1
                call stdlib${ii}$_stpmqrt('L','N',kk , n, k, 0_${ik}$, nb, a(ii,1_${ik}$), lda,t(1_${ik}$,ctr*k+1),ldt , c(1_${ik}$,&
                          1_${ik}$), ldc,c(ii,1_${ik}$), ldc, work, info )
              else
                ii=m+1
              end if
              do i=ii-(mb-k),mb+1,-(mb-k)
               ! multiply q to the current block of c (i:i+mb,1:n)
                ctr = ctr - 1_${ik}$
                call stdlib${ii}$_stpmqrt('L','N',mb-k , n, k, 0_${ik}$,nb, a(i,1_${ik}$), lda,t(1_${ik}$, ctr * k + 1_${ik}$), ldt,&
                           c(1_${ik}$,1_${ik}$), ldc,c(i,1_${ik}$), ldc, work, info )
              end do
               ! multiply q to the first block of c (1:mb,1:n)
              call stdlib${ii}$_sgemqrt('L','N',mb , n, k, nb, a(1_${ik}$,1_${ik}$), lda, t,ldt ,c(1_${ik}$,1_${ik}$), ldc, work, &
                        info )
           else if (left.and.tran) then
               ! multiply q to the first block of c
              kk = mod((m-k),(mb-k))
              ii=m-kk+1
              ctr = 1_${ik}$
              call stdlib${ii}$_sgemqrt('L','T',mb , n, k, nb, a(1_${ik}$,1_${ik}$), lda, t,ldt ,c(1_${ik}$,1_${ik}$), ldc, work, &
                        info )
              do i=mb+1,ii-mb+k,(mb-k)
               ! multiply q to the current block of c (i:i+mb,1:n)
               call stdlib${ii}$_stpmqrt('L','T',mb-k , n, k, 0_${ik}$,nb, a(i,1_${ik}$), lda,t(1_${ik}$,ctr * k + 1_${ik}$),ldt, c(&
                         1_${ik}$,1_${ik}$), ldc,c(i,1_${ik}$), ldc, work, info )
               ctr = ctr + 1_${ik}$
              end do
              if(ii<=m) then
               ! multiply q to the last block of c
               call stdlib${ii}$_stpmqrt('L','T',kk , n, k, 0_${ik}$,nb, a(ii,1_${ik}$), lda,t(1_${ik}$, ctr * k + 1_${ik}$), ldt, &
                         c(1_${ik}$,1_${ik}$), ldc,c(ii,1_${ik}$), ldc, work, info )
              end if
           else if(right.and.tran) then
               ! multiply q to the last block of c
               kk = mod((n-k),(mb-k))
               ctr = (n-k)/(mb-k)
               if (kk>0_${ik}$) then
                 ii=n-kk+1
                 call stdlib${ii}$_stpmqrt('R','T',m , kk, k, 0_${ik}$, nb, a(ii,1_${ik}$), lda,t(1_${ik}$, ctr * k + 1_${ik}$), &
                           ldt, c(1_${ik}$,1_${ik}$), ldc,c(1_${ik}$,ii), ldc, work, info )
               else
                 ii=n+1
               end if
               do i=ii-(mb-k),mb+1,-(mb-k)
               ! multiply q to the current block of c (1:m,i:i+mb)
                 ctr = ctr - 1_${ik}$
                 call stdlib${ii}$_stpmqrt('R','T',m , mb-k, k, 0_${ik}$,nb, a(i,1_${ik}$), lda,t(1_${ik}$, ctr * k + 1_${ik}$), &
                           ldt, c(1_${ik}$,1_${ik}$), ldc,c(1_${ik}$,i), ldc, work, info )
               end do
               ! multiply q to the first block of c (1:m,1:mb)
               call stdlib${ii}$_sgemqrt('R','T',m , mb, k, nb, a(1_${ik}$,1_${ik}$), lda, t,ldt ,c(1_${ik}$,1_${ik}$), ldc, work, &
                         info )
           else if (right.and.notran) then
               ! multiply q to the first block of c
              kk = mod((n-k),(mb-k))
              ii=n-kk+1
              ctr = 1_${ik}$
              call stdlib${ii}$_sgemqrt('R','N', m, mb , k, nb, a(1_${ik}$,1_${ik}$), lda, t,ldt ,c(1_${ik}$,1_${ik}$), ldc, work, &
                        info )
              do i=mb+1,ii-mb+k,(mb-k)
               ! multiply q to the current block of c (1:m,i:i+mb)
               call stdlib${ii}$_stpmqrt('R','N', m, mb-k, k, 0_${ik}$,nb, a(i,1_${ik}$), lda,t(1_${ik}$, ctr * k + 1_${ik}$),ldt, &
                         c(1_${ik}$,1_${ik}$), ldc,c(1_${ik}$,i), ldc, work, info )
               ctr = ctr + 1_${ik}$
              end do
              if(ii<=n) then
               ! multiply q to the last block of c
               call stdlib${ii}$_stpmqrt('R','N', m, kk , k, 0_${ik}$,nb, a(ii,1_${ik}$), lda,t(1_${ik}$, ctr * k + 1_${ik}$),ldt, &
                         c(1_${ik}$,1_${ik}$), ldc,c(1_${ik}$,ii), ldc, work, info )
              end if
           end if
           work(1_${ik}$) = lw
           return
     end subroutine stdlib${ii}$_slamtsqr

     pure module subroutine stdlib${ii}$_dlamtsqr( side, trans, m, n, k, mb, nb, a, lda, t,ldt, c, ldc, work, &
     !! DLAMTSQR overwrites the general real M-by-N matrix C with
     !! SIDE = 'L'     SIDE = 'R'
     !! TRANS = 'N':      Q * C          C * Q
     !! TRANS = 'T':      Q**T * C       C * Q**T
     !! where Q is a real orthogonal matrix defined as the product
     !! of blocked elementary reflectors computed by tall skinny
     !! QR factorization (DLATSQR)
               lwork, info )
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: side, trans
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, m, n, k, mb, nb, ldt, lwork, ldc
           ! Array Arguments 
           real(dp), intent(in) :: a(lda,*), t(ldt,*)
           real(dp), intent(out) :: work(*)
           real(dp), intent(inout) :: c(ldc,*)
       ! =====================================================================
           ! Local Scalars 
           logical(lk) :: left, right, tran, notran, lquery
           integer(${ik}$) :: i, ii, kk, lw, ctr, q
           ! External Subroutines 
           ! Executable Statements 
           ! test the input arguments
           lquery  = lwork<0_${ik}$
           notran  = stdlib_lsame( trans, 'N' )
           tran    = stdlib_lsame( trans, 'T' )
           left    = stdlib_lsame( side, 'L' )
           right   = stdlib_lsame( side, 'R' )
           if (left) then
             lw = n * nb
             q = m
           else
             lw = mb * nb
             q = n
           end if
           info = 0_${ik}$
           if( .not.left .and. .not.right ) then
              info = -1_${ik}$
           else if( .not.tran .and. .not.notran ) then
              info = -2_${ik}$
           else if( m<k ) then
             info = -3_${ik}$
           else if( n<0_${ik}$ ) then
             info = -4_${ik}$
           else if( k<0_${ik}$ ) then
             info = -5_${ik}$
           else if( k<nb .or. nb<1_${ik}$ ) then
             info = -7_${ik}$
           else if( lda<max( 1_${ik}$, q ) ) then
             info = -9_${ik}$
           else if( ldt<max( 1_${ik}$, nb) ) then
             info = -11_${ik}$
           else if( ldc<max( 1_${ik}$, m ) ) then
              info = -13_${ik}$
           else if(( lwork<max(1_${ik}$,lw)).and.(.not.lquery)) then
             info = -15_${ik}$
           end if
           ! determine the block size if it is tall skinny or short and wide
           if( info==0_${ik}$)  then
               work(1_${ik}$) = lw
           end if
           if( info/=0_${ik}$ ) then
             call stdlib${ii}$_xerbla( 'DLAMTSQR', -info )
             return
           else if (lquery) then
            return
           end if
           ! quick return if possible
           if( min(m,n,k)==0_${ik}$ ) then
             return
           end if
           if((mb<=k).or.(mb>=max(m,n,k))) then
             call stdlib${ii}$_dgemqrt( side, trans, m, n, k, nb, a, lda,t, ldt, c, ldc, work, info)
                       
             return
            end if
           if(left.and.notran) then
               ! multiply q to the last block of c
              kk = mod((m-k),(mb-k))
              ctr = (m-k)/(mb-k)
              if (kk>0_${ik}$) then
                ii=m-kk+1
                call stdlib${ii}$_dtpmqrt('L','N',kk , n, k, 0_${ik}$, nb, a(ii,1_${ik}$), lda,t(1_${ik}$,ctr*k+1),ldt , c(1_${ik}$,&
                          1_${ik}$), ldc,c(ii,1_${ik}$), ldc, work, info )
              else
                ii=m+1
              end if
              do i=ii-(mb-k),mb+1,-(mb-k)
               ! multiply q to the current block of c (i:i+mb,1:n)
                ctr = ctr - 1_${ik}$
                call stdlib${ii}$_dtpmqrt('L','N',mb-k , n, k, 0_${ik}$,nb, a(i,1_${ik}$), lda,t(1_${ik}$,ctr*k+1),ldt, c(1_${ik}$,&
                          1_${ik}$), ldc,c(i,1_${ik}$), ldc, work, info )
              end do
               ! multiply q to the first block of c (1:mb,1:n)
              call stdlib${ii}$_dgemqrt('L','N',mb , n, k, nb, a(1_${ik}$,1_${ik}$), lda, t,ldt ,c(1_${ik}$,1_${ik}$), ldc, work, &
                        info )
           else if (left.and.tran) then
               ! multiply q to the first block of c
              kk = mod((m-k),(mb-k))
              ii=m-kk+1
              ctr = 1_${ik}$
              call stdlib${ii}$_dgemqrt('L','T',mb , n, k, nb, a(1_${ik}$,1_${ik}$), lda, t,ldt ,c(1_${ik}$,1_${ik}$), ldc, work, &
                        info )
              do i=mb+1,ii-mb+k,(mb-k)
               ! multiply q to the current block of c (i:i+mb,1:n)
               call stdlib${ii}$_dtpmqrt('L','T',mb-k , n, k, 0_${ik}$,nb, a(i,1_${ik}$), lda,t(1_${ik}$,ctr * k + 1_${ik}$),ldt, c(&
                         1_${ik}$,1_${ik}$), ldc,c(i,1_${ik}$), ldc, work, info )
               ctr = ctr + 1_${ik}$
              end do
              if(ii<=m) then
               ! multiply q to the last block of c
               call stdlib${ii}$_dtpmqrt('L','T',kk , n, k, 0_${ik}$,nb, a(ii,1_${ik}$), lda,t(1_${ik}$,ctr * k + 1_${ik}$), ldt, c(&
                         1_${ik}$,1_${ik}$), ldc,c(ii,1_${ik}$), ldc, work, info )
              end if
           else if(right.and.tran) then
               ! multiply q to the last block of c
               kk = mod((n-k),(mb-k))
               ctr = (n-k)/(mb-k)
               if (kk>0_${ik}$) then
                 ii=n-kk+1
                 call stdlib${ii}$_dtpmqrt('R','T',m , kk, k, 0_${ik}$, nb, a(ii,1_${ik}$), lda,t(1_${ik}$,ctr*k+1), ldt, c(&
                           1_${ik}$,1_${ik}$), ldc,c(1_${ik}$,ii), ldc, work, info )
               else
                 ii=n+1
               end if
               do i=ii-(mb-k),mb+1,-(mb-k)
               ! multiply q to the current block of c (1:m,i:i+mb)
                 ctr = ctr - 1_${ik}$
                 call stdlib${ii}$_dtpmqrt('R','T',m , mb-k, k, 0_${ik}$,nb, a(i,1_${ik}$), lda,t(1_${ik}$,ctr*k+1), ldt, c(&
                           1_${ik}$,1_${ik}$), ldc,c(1_${ik}$,i), ldc, work, info )
               end do
               ! multiply q to the first block of c (1:m,1:mb)
               call stdlib${ii}$_dgemqrt('R','T',m , mb, k, nb, a(1_${ik}$,1_${ik}$), lda, t,ldt ,c(1_${ik}$,1_${ik}$), ldc, work, &
                         info )
           else if (right.and.notran) then
               ! multiply q to the first block of c
              kk = mod((n-k),(mb-k))
              ii=n-kk+1
              ctr = 1_${ik}$
              call stdlib${ii}$_dgemqrt('R','N', m, mb , k, nb, a(1_${ik}$,1_${ik}$), lda, t,ldt ,c(1_${ik}$,1_${ik}$), ldc, work, &
                        info )
              do i=mb+1,ii-mb+k,(mb-k)
               ! multiply q to the current block of c (1:m,i:i+mb)
               call stdlib${ii}$_dtpmqrt('R','N', m, mb-k, k, 0_${ik}$,nb, a(i,1_${ik}$), lda,t(1_${ik}$, ctr * k + 1_${ik}$),ldt, &
                         c(1_${ik}$,1_${ik}$), ldc,c(1_${ik}$,i), ldc, work, info )
               ctr = ctr + 1_${ik}$
              end do
              if(ii<=n) then
               ! multiply q to the last block of c
               call stdlib${ii}$_dtpmqrt('R','N', m, kk , k, 0_${ik}$,nb, a(ii,1_${ik}$), lda,t(1_${ik}$, ctr * k + 1_${ik}$),ldt, &
                         c(1_${ik}$,1_${ik}$), ldc,c(1_${ik}$,ii), ldc, work, info )
              end if
           end if
           work(1_${ik}$) = lw
           return
     end subroutine stdlib${ii}$_dlamtsqr

#:for rk,rt,ri in REAL_KINDS_TYPES
#:if not rk in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ri}$lamtsqr( side, trans, m, n, k, mb, nb, a, lda, t,ldt, c, ldc, work, &
     !! DLAMTSQR: overwrites the general real M-by-N matrix C with
     !! SIDE = 'L'     SIDE = 'R'
     !! TRANS = 'N':      Q * C          C * Q
     !! TRANS = 'T':      Q**T * C       C * Q**T
     !! where Q is a real orthogonal matrix defined as the product
     !! of blocked elementary reflectors computed by tall skinny
     !! QR factorization (DLATSQR)
               lwork, info )
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: side, trans
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, m, n, k, mb, nb, ldt, lwork, ldc
           ! Array Arguments 
           real(${rk}$), intent(in) :: a(lda,*), t(ldt,*)
           real(${rk}$), intent(out) :: work(*)
           real(${rk}$), intent(inout) :: c(ldc,*)
       ! =====================================================================
           ! Local Scalars 
           logical(lk) :: left, right, tran, notran, lquery
           integer(${ik}$) :: i, ii, kk, lw, ctr, q
           ! External Subroutines 
           ! Executable Statements 
           ! test the input arguments
           lquery  = lwork<0_${ik}$
           notran  = stdlib_lsame( trans, 'N' )
           tran    = stdlib_lsame( trans, 'T' )
           left    = stdlib_lsame( side, 'L' )
           right   = stdlib_lsame( side, 'R' )
           if (left) then
             lw = n * nb
             q = m
           else
             lw = mb * nb
             q = n
           end if
           info = 0_${ik}$
           if( .not.left .and. .not.right ) then
              info = -1_${ik}$
           else if( .not.tran .and. .not.notran ) then
              info = -2_${ik}$
           else if( m<k ) then
             info = -3_${ik}$
           else if( n<0_${ik}$ ) then
             info = -4_${ik}$
           else if( k<0_${ik}$ ) then
             info = -5_${ik}$
           else if( k<nb .or. nb<1_${ik}$ ) then
             info = -7_${ik}$
           else if( lda<max( 1_${ik}$, q ) ) then
             info = -9_${ik}$
           else if( ldt<max( 1_${ik}$, nb) ) then
             info = -11_${ik}$
           else if( ldc<max( 1_${ik}$, m ) ) then
              info = -13_${ik}$
           else if(( lwork<max(1_${ik}$,lw)).and.(.not.lquery)) then
             info = -15_${ik}$
           end if
           ! determine the block size if it is tall skinny or short and wide
           if( info==0_${ik}$)  then
               work(1_${ik}$) = lw
           end if
           if( info/=0_${ik}$ ) then
             call stdlib${ii}$_xerbla( 'DLAMTSQR', -info )
             return
           else if (lquery) then
            return
           end if
           ! quick return if possible
           if( min(m,n,k)==0_${ik}$ ) then
             return
           end if
           if((mb<=k).or.(mb>=max(m,n,k))) then
             call stdlib${ii}$_${ri}$gemqrt( side, trans, m, n, k, nb, a, lda,t, ldt, c, ldc, work, info)
                       
             return
            end if
           if(left.and.notran) then
               ! multiply q to the last block of c
              kk = mod((m-k),(mb-k))
              ctr = (m-k)/(mb-k)
              if (kk>0_${ik}$) then
                ii=m-kk+1
                call stdlib${ii}$_${ri}$tpmqrt('L','N',kk , n, k, 0_${ik}$, nb, a(ii,1_${ik}$), lda,t(1_${ik}$,ctr*k+1),ldt , c(1_${ik}$,&
                          1_${ik}$), ldc,c(ii,1_${ik}$), ldc, work, info )
              else
                ii=m+1
              end if
              do i=ii-(mb-k),mb+1,-(mb-k)
               ! multiply q to the current block of c (i:i+mb,1:n)
                ctr = ctr - 1_${ik}$
                call stdlib${ii}$_${ri}$tpmqrt('L','N',mb-k , n, k, 0_${ik}$,nb, a(i,1_${ik}$), lda,t(1_${ik}$,ctr*k+1),ldt, c(1_${ik}$,&
                          1_${ik}$), ldc,c(i,1_${ik}$), ldc, work, info )
              end do
               ! multiply q to the first block of c (1:mb,1:n)
              call stdlib${ii}$_${ri}$gemqrt('L','N',mb , n, k, nb, a(1_${ik}$,1_${ik}$), lda, t,ldt ,c(1_${ik}$,1_${ik}$), ldc, work, &
                        info )
           else if (left.and.tran) then
               ! multiply q to the first block of c
              kk = mod((m-k),(mb-k))
              ii=m-kk+1
              ctr = 1_${ik}$
              call stdlib${ii}$_${ri}$gemqrt('L','T',mb , n, k, nb, a(1_${ik}$,1_${ik}$), lda, t,ldt ,c(1_${ik}$,1_${ik}$), ldc, work, &
                        info )
              do i=mb+1,ii-mb+k,(mb-k)
               ! multiply q to the current block of c (i:i+mb,1:n)
               call stdlib${ii}$_${ri}$tpmqrt('L','T',mb-k , n, k, 0_${ik}$,nb, a(i,1_${ik}$), lda,t(1_${ik}$,ctr * k + 1_${ik}$),ldt, c(&
                         1_${ik}$,1_${ik}$), ldc,c(i,1_${ik}$), ldc, work, info )
               ctr = ctr + 1_${ik}$
              end do
              if(ii<=m) then
               ! multiply q to the last block of c
               call stdlib${ii}$_${ri}$tpmqrt('L','T',kk , n, k, 0_${ik}$,nb, a(ii,1_${ik}$), lda,t(1_${ik}$,ctr * k + 1_${ik}$), ldt, c(&
                         1_${ik}$,1_${ik}$), ldc,c(ii,1_${ik}$), ldc, work, info )
              end if
           else if(right.and.tran) then
               ! multiply q to the last block of c
               kk = mod((n-k),(mb-k))
               ctr = (n-k)/(mb-k)
               if (kk>0_${ik}$) then
                 ii=n-kk+1
                 call stdlib${ii}$_${ri}$tpmqrt('R','T',m , kk, k, 0_${ik}$, nb, a(ii,1_${ik}$), lda,t(1_${ik}$,ctr*k+1), ldt, c(&
                           1_${ik}$,1_${ik}$), ldc,c(1_${ik}$,ii), ldc, work, info )
               else
                 ii=n+1
               end if
               do i=ii-(mb-k),mb+1,-(mb-k)
               ! multiply q to the current block of c (1:m,i:i+mb)
                 ctr = ctr - 1_${ik}$
                 call stdlib${ii}$_${ri}$tpmqrt('R','T',m , mb-k, k, 0_${ik}$,nb, a(i,1_${ik}$), lda,t(1_${ik}$,ctr*k+1), ldt, c(&
                           1_${ik}$,1_${ik}$), ldc,c(1_${ik}$,i), ldc, work, info )
               end do
               ! multiply q to the first block of c (1:m,1:mb)
               call stdlib${ii}$_${ri}$gemqrt('R','T',m , mb, k, nb, a(1_${ik}$,1_${ik}$), lda, t,ldt ,c(1_${ik}$,1_${ik}$), ldc, work, &
                         info )
           else if (right.and.notran) then
               ! multiply q to the first block of c
              kk = mod((n-k),(mb-k))
              ii=n-kk+1
              ctr = 1_${ik}$
              call stdlib${ii}$_${ri}$gemqrt('R','N', m, mb , k, nb, a(1_${ik}$,1_${ik}$), lda, t,ldt ,c(1_${ik}$,1_${ik}$), ldc, work, &
                        info )
              do i=mb+1,ii-mb+k,(mb-k)
               ! multiply q to the current block of c (1:m,i:i+mb)
               call stdlib${ii}$_${ri}$tpmqrt('R','N', m, mb-k, k, 0_${ik}$,nb, a(i,1_${ik}$), lda,t(1_${ik}$, ctr * k + 1_${ik}$),ldt, &
                         c(1_${ik}$,1_${ik}$), ldc,c(1_${ik}$,i), ldc, work, info )
               ctr = ctr + 1_${ik}$
              end do
              if(ii<=n) then
               ! multiply q to the last block of c
               call stdlib${ii}$_${ri}$tpmqrt('R','N', m, kk , k, 0_${ik}$,nb, a(ii,1_${ik}$), lda,t(1_${ik}$, ctr * k + 1_${ik}$),ldt, &
                         c(1_${ik}$,1_${ik}$), ldc,c(1_${ik}$,ii), ldc, work, info )
              end if
           end if
           work(1_${ik}$) = lw
           return
     end subroutine stdlib${ii}$_${ri}$lamtsqr

#:endif
#:endfor

     pure module subroutine stdlib${ii}$_clamtsqr( side, trans, m, n, k, mb, nb, a, lda, t,ldt, c, ldc, work, &
     !! CLAMTSQR overwrites the general complex M-by-N matrix C with
     !! SIDE = 'L'     SIDE = 'R'
     !! TRANS = 'N':      Q * C          C * Q
     !! TRANS = 'C':      Q**H * C       C * Q**H
     !! where Q is a complex unitary matrix defined as the product
     !! of blocked elementary reflectors computed by tall skinny
     !! QR factorization (CLATSQR)
               lwork, info )
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: side, trans
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, m, n, k, mb, nb, ldt, lwork, ldc
           ! Array Arguments 
           complex(sp), intent(in) :: a(lda,*), t(ldt,*)
           complex(sp), intent(out) :: work(*)
           complex(sp), intent(inout) :: c(ldc,*)
       ! =====================================================================
           ! Local Scalars 
           logical(lk) :: left, right, tran, notran, lquery
           integer(${ik}$) :: i, ii, kk, lw, ctr, q
           ! External Subroutines 
           ! Executable Statements 
           ! test the input arguments
           lquery  = lwork<0_${ik}$
           notran  = stdlib_lsame( trans, 'N' )
           tran    = stdlib_lsame( trans, 'C' )
           left    = stdlib_lsame( side, 'L' )
           right   = stdlib_lsame( side, 'R' )
           if (left) then
             lw = n * nb
             q = m
           else
             lw = m * nb
             q = n
           end if
           info = 0_${ik}$
           if( .not.left .and. .not.right ) then
              info = -1_${ik}$
           else if( .not.tran .and. .not.notran ) then
              info = -2_${ik}$
           else if( m<k ) then
             info = -3_${ik}$
           else if( n<0_${ik}$ ) then
             info = -4_${ik}$
           else if( k<0_${ik}$ ) then
             info = -5_${ik}$
           else if( k<nb .or. nb<1_${ik}$ ) then
             info = -7_${ik}$
           else if( lda<max( 1_${ik}$, q ) ) then
             info = -9_${ik}$
           else if( ldt<max( 1_${ik}$, nb) ) then
             info = -11_${ik}$
           else if( ldc<max( 1_${ik}$, m ) ) then
              info = -13_${ik}$
           else if(( lwork<max(1_${ik}$,lw)).and.(.not.lquery)) then
             info = -15_${ik}$
           end if
           ! determine the block size if it is tall skinny or short and wide
           if( info==0_${ik}$)  then
               work(1_${ik}$) = lw
           end if
           if( info/=0_${ik}$ ) then
             call stdlib${ii}$_xerbla( 'CLAMTSQR', -info )
             return
           else if (lquery) then
            return
           end if
           ! quick return if possible
           if( min(m,n,k)==0_${ik}$ ) then
             return
           end if
           if((mb<=k).or.(mb>=max(m,n,k))) then
             call stdlib${ii}$_cgemqrt( side, trans, m, n, k, nb, a, lda,t, ldt, c, ldc, work, info)
                       
             return
            end if
           if(left.and.notran) then
               ! multiply q to the last block of c
              kk = mod((m-k),(mb-k))
              ctr = (m-k)/(mb-k)
              if (kk>0_${ik}$) then
                ii=m-kk+1
                call stdlib${ii}$_ctpmqrt('L','N',kk , n, k, 0_${ik}$, nb, a(ii,1_${ik}$), lda,t(1_${ik}$, ctr*k+1),ldt , c(&
                          1_${ik}$,1_${ik}$), ldc,c(ii,1_${ik}$), ldc, work, info )
              else
                ii=m+1
              end if
              do i=ii-(mb-k),mb+1,-(mb-k)
               ! multiply q to the current block of c (i:i+mb,1:n)
                ctr = ctr - 1_${ik}$
                call stdlib${ii}$_ctpmqrt('L','N',mb-k , n, k, 0_${ik}$,nb, a(i,1_${ik}$), lda,t(1_${ik}$,ctr*k+1),ldt, c(1_${ik}$,&
                          1_${ik}$), ldc,c(i,1_${ik}$), ldc, work, info )
              end do
               ! multiply q to the first block of c (1:mb,1:n)
              call stdlib${ii}$_cgemqrt('L','N',mb , n, k, nb, a(1_${ik}$,1_${ik}$), lda, t,ldt ,c(1_${ik}$,1_${ik}$), ldc, work, &
                        info )
           else if (left.and.tran) then
               ! multiply q to the first block of c
              kk = mod((m-k),(mb-k))
              ii=m-kk+1
              ctr = 1_${ik}$
              call stdlib${ii}$_cgemqrt('L','C',mb , n, k, nb, a(1_${ik}$,1_${ik}$), lda, t,ldt ,c(1_${ik}$,1_${ik}$), ldc, work, &
                        info )
              do i=mb+1,ii-mb+k,(mb-k)
               ! multiply q to the current block of c (i:i+mb,1:n)
               call stdlib${ii}$_ctpmqrt('L','C',mb-k , n, k, 0_${ik}$,nb, a(i,1_${ik}$), lda,t(1_${ik}$, ctr*k+1),ldt, c(1_${ik}$,&
                         1_${ik}$), ldc,c(i,1_${ik}$), ldc, work, info )
               ctr = ctr + 1_${ik}$
              end do
              if(ii<=m) then
               ! multiply q to the last block of c
               call stdlib${ii}$_ctpmqrt('L','C',kk , n, k, 0_${ik}$,nb, a(ii,1_${ik}$), lda,t(1_${ik}$,ctr*k+1), ldt, c(1_${ik}$,1_${ik}$)&
                         , ldc,c(ii,1_${ik}$), ldc, work, info )
              end if
           else if(right.and.tran) then
               ! multiply q to the last block of c
               kk = mod((n-k),(mb-k))
               ctr = (n-k)/(mb-k)
               if (kk>0_${ik}$) then
                 ii=n-kk+1
                 call stdlib${ii}$_ctpmqrt('R','C',m , kk, k, 0_${ik}$, nb, a(ii,1_${ik}$), lda,t(1_${ik}$, ctr*k+1), ldt, c(&
                           1_${ik}$,1_${ik}$), ldc,c(1_${ik}$,ii), ldc, work, info )
               else
                 ii=n+1
               end if
               do i=ii-(mb-k),mb+1,-(mb-k)
               ! multiply q to the current block of c (1:m,i:i+mb)
                 ctr = ctr - 1_${ik}$
                 call stdlib${ii}$_ctpmqrt('R','C',m , mb-k, k, 0_${ik}$,nb, a(i,1_${ik}$), lda,t(1_${ik}$,ctr*k+1), ldt, c(&
                           1_${ik}$,1_${ik}$), ldc,c(1_${ik}$,i), ldc, work, info )
               end do
               ! multiply q to the first block of c (1:m,1:mb)
               call stdlib${ii}$_cgemqrt('R','C',m , mb, k, nb, a(1_${ik}$,1_${ik}$), lda, t,ldt ,c(1_${ik}$,1_${ik}$), ldc, work, &
                         info )
           else if (right.and.notran) then
               ! multiply q to the first block of c
              kk = mod((n-k),(mb-k))
              ii=n-kk+1
              ctr = 1_${ik}$
              call stdlib${ii}$_cgemqrt('R','N', m, mb , k, nb, a(1_${ik}$,1_${ik}$), lda, t,ldt ,c(1_${ik}$,1_${ik}$), ldc, work, &
                        info )
              do i=mb+1,ii-mb+k,(mb-k)
               ! multiply q to the current block of c (1:m,i:i+mb)
               call stdlib${ii}$_ctpmqrt('R','N', m, mb-k, k, 0_${ik}$,nb, a(i,1_${ik}$), lda,t(1_${ik}$,ctr*k+1),ldt, c(1_${ik}$,1_${ik}$)&
                         , ldc,c(1_${ik}$,i), ldc, work, info )
               ctr = ctr + 1_${ik}$
              end do
              if(ii<=n) then
               ! multiply q to the last block of c
               call stdlib${ii}$_ctpmqrt('R','N', m, kk , k, 0_${ik}$,nb, a(ii,1_${ik}$), lda,t(1_${ik}$,ctr*k+1),ldt, c(1_${ik}$,1_${ik}$)&
                         , ldc,c(1_${ik}$,ii), ldc, work, info )
              end if
           end if
           work(1_${ik}$) = lw
           return
     end subroutine stdlib${ii}$_clamtsqr

     pure module subroutine stdlib${ii}$_zlamtsqr( side, trans, m, n, k, mb, nb, a, lda, t,ldt, c, ldc, work, &
     !! ZLAMTSQR overwrites the general complex M-by-N matrix C with
     !! SIDE = 'L'     SIDE = 'R'
     !! TRANS = 'N':      Q * C          C * Q
     !! TRANS = 'C':      Q**H * C       C * Q**H
     !! where Q is a complex unitary matrix defined as the product
     !! of blocked elementary reflectors computed by tall skinny
     !! QR factorization (ZLATSQR)
               lwork, info )
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: side, trans
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, m, n, k, mb, nb, ldt, lwork, ldc
           ! Array Arguments 
           complex(dp), intent(in) :: a(lda,*), t(ldt,*)
           complex(dp), intent(out) :: work(*)
           complex(dp), intent(inout) :: c(ldc,*)
       ! =====================================================================
           ! Local Scalars 
           logical(lk) :: left, right, tran, notran, lquery
           integer(${ik}$) :: i, ii, kk, lw, ctr, q
           ! External Subroutines 
           ! Executable Statements 
           ! test the input arguments
           lquery  = lwork<0_${ik}$
           notran  = stdlib_lsame( trans, 'N' )
           tran    = stdlib_lsame( trans, 'C' )
           left    = stdlib_lsame( side, 'L' )
           right   = stdlib_lsame( side, 'R' )
           if (left) then
             lw = n * nb
             q = m
           else
             lw = m * nb
             q = n
           end if
           info = 0_${ik}$
           if( .not.left .and. .not.right ) then
              info = -1_${ik}$
           else if( .not.tran .and. .not.notran ) then
              info = -2_${ik}$
           else if( m<k ) then
             info = -3_${ik}$
           else if( n<0_${ik}$ ) then
             info = -4_${ik}$
           else if( k<0_${ik}$ ) then
             info = -5_${ik}$
           else if( k<nb .or. nb<1_${ik}$ ) then
             info = -7_${ik}$
           else if( lda<max( 1_${ik}$, q ) ) then
             info = -9_${ik}$
           else if( ldt<max( 1_${ik}$, nb) ) then
             info = -11_${ik}$
           else if( ldc<max( 1_${ik}$, m ) ) then
              info = -13_${ik}$
           else if(( lwork<max(1_${ik}$,lw)).and.(.not.lquery)) then
             info = -15_${ik}$
           end if
           ! determine the block size if it is tall skinny or short and wide
           if( info==0_${ik}$)  then
               work(1_${ik}$) = lw
           end if
           if( info/=0_${ik}$ ) then
             call stdlib${ii}$_xerbla( 'ZLAMTSQR', -info )
             return
           else if (lquery) then
            return
           end if
           ! quick return if possible
           if( min(m,n,k)==0_${ik}$ ) then
             return
           end if
           if((mb<=k).or.(mb>=max(m,n,k))) then
             call stdlib${ii}$_zgemqrt( side, trans, m, n, k, nb, a, lda,t, ldt, c, ldc, work, info)
                       
             return
            end if
           if(left.and.notran) then
               ! multiply q to the last block of c
              kk = mod((m-k),(mb-k))
              ctr = (m-k)/(mb-k)
              if (kk>0_${ik}$) then
                ii=m-kk+1
                call stdlib${ii}$_ztpmqrt('L','N',kk , n, k, 0_${ik}$, nb, a(ii,1_${ik}$), lda,t(1_${ik}$, ctr * k + 1_${ik}$),ldt ,&
                           c(1_${ik}$,1_${ik}$), ldc,c(ii,1_${ik}$), ldc, work, info )
              else
                ii=m+1
              end if
              do i=ii-(mb-k),mb+1,-(mb-k)
               ! multiply q to the current block of c (i:i+mb,1:n)
                ctr = ctr - 1_${ik}$
                call stdlib${ii}$_ztpmqrt('L','N',mb-k , n, k, 0_${ik}$,nb, a(i,1_${ik}$), lda,t(1_${ik}$,ctr * k + 1_${ik}$),ldt, &
                          c(1_${ik}$,1_${ik}$), ldc,c(i,1_${ik}$), ldc, work, info )
              end do
               ! multiply q to the first block of c (1:mb,1:n)
              call stdlib${ii}$_zgemqrt('L','N',mb , n, k, nb, a(1_${ik}$,1_${ik}$), lda, t,ldt ,c(1_${ik}$,1_${ik}$), ldc, work, &
                        info )
           else if (left.and.tran) then
               ! multiply q to the first block of c
              kk = mod((m-k),(mb-k))
              ii=m-kk+1
              ctr = 1_${ik}$
              call stdlib${ii}$_zgemqrt('L','C',mb , n, k, nb, a(1_${ik}$,1_${ik}$), lda, t,ldt ,c(1_${ik}$,1_${ik}$), ldc, work, &
                        info )
              do i=mb+1,ii-mb+k,(mb-k)
               ! multiply q to the current block of c (i:i+mb,1:n)
               call stdlib${ii}$_ztpmqrt('L','C',mb-k , n, k, 0_${ik}$,nb, a(i,1_${ik}$), lda,t(1_${ik}$,ctr * k + 1_${ik}$),ldt, c(&
                         1_${ik}$,1_${ik}$), ldc,c(i,1_${ik}$), ldc, work, info )
               ctr = ctr + 1_${ik}$
              end do
              if(ii<=m) then
               ! multiply q to the last block of c
               call stdlib${ii}$_ztpmqrt('L','C',kk , n, k, 0_${ik}$,nb, a(ii,1_${ik}$), lda,t(1_${ik}$, ctr * k + 1_${ik}$), ldt, &
                         c(1_${ik}$,1_${ik}$), ldc,c(ii,1_${ik}$), ldc, work, info )
              end if
           else if(right.and.tran) then
               ! multiply q to the last block of c
               kk = mod((n-k),(mb-k))
               ctr = (n-k)/(mb-k)
               if (kk>0_${ik}$) then
                 ii=n-kk+1
                 call stdlib${ii}$_ztpmqrt('R','C',m , kk, k, 0_${ik}$, nb, a(ii,1_${ik}$), lda,t(1_${ik}$,ctr * k + 1_${ik}$), ldt,&
                            c(1_${ik}$,1_${ik}$), ldc,c(1_${ik}$,ii), ldc, work, info )
               else
                 ii=n+1
               end if
               do i=ii-(mb-k),mb+1,-(mb-k)
               ! multiply q to the current block of c (1:m,i:i+mb)
                 ctr = ctr - 1_${ik}$
                 call stdlib${ii}$_ztpmqrt('R','C',m , mb-k, k, 0_${ik}$,nb, a(i,1_${ik}$), lda,t(1_${ik}$, ctr * k + 1_${ik}$), &
                           ldt, c(1_${ik}$,1_${ik}$), ldc,c(1_${ik}$,i), ldc, work, info )
               end do
               ! multiply q to the first block of c (1:m,1:mb)
               call stdlib${ii}$_zgemqrt('R','C',m , mb, k, nb, a(1_${ik}$,1_${ik}$), lda, t,ldt ,c(1_${ik}$,1_${ik}$), ldc, work, &
                         info )
           else if (right.and.notran) then
               ! multiply q to the first block of c
              kk = mod((n-k),(mb-k))
              ii=n-kk+1
              ctr = 1_${ik}$
              call stdlib${ii}$_zgemqrt('R','N', m, mb , k, nb, a(1_${ik}$,1_${ik}$), lda, t,ldt ,c(1_${ik}$,1_${ik}$), ldc, work, &
                        info )
              do i=mb+1,ii-mb+k,(mb-k)
               ! multiply q to the current block of c (1:m,i:i+mb)
               call stdlib${ii}$_ztpmqrt('R','N', m, mb-k, k, 0_${ik}$,nb, a(i,1_${ik}$), lda,t(1_${ik}$, ctr * k + 1_${ik}$),ldt, &
                         c(1_${ik}$,1_${ik}$), ldc,c(1_${ik}$,i), ldc, work, info )
               ctr = ctr + 1_${ik}$
              end do
              if(ii<=n) then
               ! multiply q to the last block of c
               call stdlib${ii}$_ztpmqrt('R','N', m, kk , k, 0_${ik}$,nb, a(ii,1_${ik}$), lda,t(1_${ik}$,ctr * k + 1_${ik}$),ldt, c(&
                         1_${ik}$,1_${ik}$), ldc,c(1_${ik}$,ii), ldc, work, info )
              end if
           end if
           work(1_${ik}$) = lw
           return
     end subroutine stdlib${ii}$_zlamtsqr

#:for ck,ct,ci in CMPLX_KINDS_TYPES
#:if not ck in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ci}$lamtsqr( side, trans, m, n, k, mb, nb, a, lda, t,ldt, c, ldc, work, &
     !! ZLAMTSQR: overwrites the general complex M-by-N matrix C with
     !! SIDE = 'L'     SIDE = 'R'
     !! TRANS = 'N':      Q * C          C * Q
     !! TRANS = 'C':      Q**H * C       C * Q**H
     !! where Q is a complex unitary matrix defined as the product
     !! of blocked elementary reflectors computed by tall skinny
     !! QR factorization (ZLATSQR)
               lwork, info )
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${ck}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: side, trans
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, m, n, k, mb, nb, ldt, lwork, ldc
           ! Array Arguments 
           complex(${ck}$), intent(in) :: a(lda,*), t(ldt,*)
           complex(${ck}$), intent(out) :: work(*)
           complex(${ck}$), intent(inout) :: c(ldc,*)
       ! =====================================================================
           ! Local Scalars 
           logical(lk) :: left, right, tran, notran, lquery
           integer(${ik}$) :: i, ii, kk, lw, ctr, q
           ! External Subroutines 
           ! Executable Statements 
           ! test the input arguments
           lquery  = lwork<0_${ik}$
           notran  = stdlib_lsame( trans, 'N' )
           tran    = stdlib_lsame( trans, 'C' )
           left    = stdlib_lsame( side, 'L' )
           right   = stdlib_lsame( side, 'R' )
           if (left) then
             lw = n * nb
             q = m
           else
             lw = m * nb
             q = n
           end if
           info = 0_${ik}$
           if( .not.left .and. .not.right ) then
              info = -1_${ik}$
           else if( .not.tran .and. .not.notran ) then
              info = -2_${ik}$
           else if( m<k ) then
             info = -3_${ik}$
           else if( n<0_${ik}$ ) then
             info = -4_${ik}$
           else if( k<0_${ik}$ ) then
             info = -5_${ik}$
           else if( k<nb .or. nb<1_${ik}$ ) then
             info = -7_${ik}$
           else if( lda<max( 1_${ik}$, q ) ) then
             info = -9_${ik}$
           else if( ldt<max( 1_${ik}$, nb) ) then
             info = -11_${ik}$
           else if( ldc<max( 1_${ik}$, m ) ) then
              info = -13_${ik}$
           else if(( lwork<max(1_${ik}$,lw)).and.(.not.lquery)) then
             info = -15_${ik}$
           end if
           ! determine the block size if it is tall skinny or short and wide
           if( info==0_${ik}$)  then
               work(1_${ik}$) = lw
           end if
           if( info/=0_${ik}$ ) then
             call stdlib${ii}$_xerbla( 'ZLAMTSQR', -info )
             return
           else if (lquery) then
            return
           end if
           ! quick return if possible
           if( min(m,n,k)==0_${ik}$ ) then
             return
           end if
           if((mb<=k).or.(mb>=max(m,n,k))) then
             call stdlib${ii}$_${ci}$gemqrt( side, trans, m, n, k, nb, a, lda,t, ldt, c, ldc, work, info)
                       
             return
            end if
           if(left.and.notran) then
               ! multiply q to the last block of c
              kk = mod((m-k),(mb-k))
              ctr = (m-k)/(mb-k)
              if (kk>0_${ik}$) then
                ii=m-kk+1
                call stdlib${ii}$_${ci}$tpmqrt('L','N',kk , n, k, 0_${ik}$, nb, a(ii,1_${ik}$), lda,t(1_${ik}$, ctr * k + 1_${ik}$),ldt ,&
                           c(1_${ik}$,1_${ik}$), ldc,c(ii,1_${ik}$), ldc, work, info )
              else
                ii=m+1
              end if
              do i=ii-(mb-k),mb+1,-(mb-k)
               ! multiply q to the current block of c (i:i+mb,1:n)
                ctr = ctr - 1_${ik}$
                call stdlib${ii}$_${ci}$tpmqrt('L','N',mb-k , n, k, 0_${ik}$,nb, a(i,1_${ik}$), lda,t(1_${ik}$,ctr * k + 1_${ik}$),ldt, &
                          c(1_${ik}$,1_${ik}$), ldc,c(i,1_${ik}$), ldc, work, info )
              end do
               ! multiply q to the first block of c (1:mb,1:n)
              call stdlib${ii}$_${ci}$gemqrt('L','N',mb , n, k, nb, a(1_${ik}$,1_${ik}$), lda, t,ldt ,c(1_${ik}$,1_${ik}$), ldc, work, &
                        info )
           else if (left.and.tran) then
               ! multiply q to the first block of c
              kk = mod((m-k),(mb-k))
              ii=m-kk+1
              ctr = 1_${ik}$
              call stdlib${ii}$_${ci}$gemqrt('L','C',mb , n, k, nb, a(1_${ik}$,1_${ik}$), lda, t,ldt ,c(1_${ik}$,1_${ik}$), ldc, work, &
                        info )
              do i=mb+1,ii-mb+k,(mb-k)
               ! multiply q to the current block of c (i:i+mb,1:n)
               call stdlib${ii}$_${ci}$tpmqrt('L','C',mb-k , n, k, 0_${ik}$,nb, a(i,1_${ik}$), lda,t(1_${ik}$,ctr * k + 1_${ik}$),ldt, c(&
                         1_${ik}$,1_${ik}$), ldc,c(i,1_${ik}$), ldc, work, info )
               ctr = ctr + 1_${ik}$
              end do
              if(ii<=m) then
               ! multiply q to the last block of c
               call stdlib${ii}$_${ci}$tpmqrt('L','C',kk , n, k, 0_${ik}$,nb, a(ii,1_${ik}$), lda,t(1_${ik}$, ctr * k + 1_${ik}$), ldt, &
                         c(1_${ik}$,1_${ik}$), ldc,c(ii,1_${ik}$), ldc, work, info )
              end if
           else if(right.and.tran) then
               ! multiply q to the last block of c
               kk = mod((n-k),(mb-k))
               ctr = (n-k)/(mb-k)
               if (kk>0_${ik}$) then
                 ii=n-kk+1
                 call stdlib${ii}$_${ci}$tpmqrt('R','C',m , kk, k, 0_${ik}$, nb, a(ii,1_${ik}$), lda,t(1_${ik}$,ctr * k + 1_${ik}$), ldt,&
                            c(1_${ik}$,1_${ik}$), ldc,c(1_${ik}$,ii), ldc, work, info )
               else
                 ii=n+1
               end if
               do i=ii-(mb-k),mb+1,-(mb-k)
               ! multiply q to the current block of c (1:m,i:i+mb)
                 ctr = ctr - 1_${ik}$
                 call stdlib${ii}$_${ci}$tpmqrt('R','C',m , mb-k, k, 0_${ik}$,nb, a(i,1_${ik}$), lda,t(1_${ik}$, ctr * k + 1_${ik}$), &
                           ldt, c(1_${ik}$,1_${ik}$), ldc,c(1_${ik}$,i), ldc, work, info )
               end do
               ! multiply q to the first block of c (1:m,1:mb)
               call stdlib${ii}$_${ci}$gemqrt('R','C',m , mb, k, nb, a(1_${ik}$,1_${ik}$), lda, t,ldt ,c(1_${ik}$,1_${ik}$), ldc, work, &
                         info )
           else if (right.and.notran) then
               ! multiply q to the first block of c
              kk = mod((n-k),(mb-k))
              ii=n-kk+1
              ctr = 1_${ik}$
              call stdlib${ii}$_${ci}$gemqrt('R','N', m, mb , k, nb, a(1_${ik}$,1_${ik}$), lda, t,ldt ,c(1_${ik}$,1_${ik}$), ldc, work, &
                        info )
              do i=mb+1,ii-mb+k,(mb-k)
               ! multiply q to the current block of c (1:m,i:i+mb)
               call stdlib${ii}$_${ci}$tpmqrt('R','N', m, mb-k, k, 0_${ik}$,nb, a(i,1_${ik}$), lda,t(1_${ik}$, ctr * k + 1_${ik}$),ldt, &
                         c(1_${ik}$,1_${ik}$), ldc,c(1_${ik}$,i), ldc, work, info )
               ctr = ctr + 1_${ik}$
              end do
              if(ii<=n) then
               ! multiply q to the last block of c
               call stdlib${ii}$_${ci}$tpmqrt('R','N', m, kk , k, 0_${ik}$,nb, a(ii,1_${ik}$), lda,t(1_${ik}$,ctr * k + 1_${ik}$),ldt, c(&
                         1_${ik}$,1_${ik}$), ldc,c(1_${ik}$,ii), ldc, work, info )
              end if
           end if
           work(1_${ik}$) = lw
           return
     end subroutine stdlib${ii}$_${ci}$lamtsqr

#:endif
#:endfor



     pure module subroutine stdlib${ii}$_sgetsqrhrt( m, n, mb1, nb1, nb2, a, lda, t, ldt, work,lwork, info )
     !! SGETSQRHRT computes a NB2-sized column blocked QR-factorization
     !! of a complex M-by-N matrix A with M >= N,
     !! A = Q * R.
     !! The routine uses internally a NB1-sized column blocked and MB1-sized
     !! row blocked TSQR-factorization and perfors the reconstruction
     !! of the Householder vectors from the TSQR output. The routine also
     !! converts the R_tsqr factor from the TSQR-factorization output into
     !! the R factor that corresponds to the Householder QR-factorization,
     !! A = Q_tsqr * R_tsqr = Q * R.
     !! The output Q and R factors are stored in the same format as in SGEQRT
     !! (Q is in blocked compact WY-representation). See the documentation
     !! of SGEQRT for more details on the format.
               
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, ldt, lwork, m, n, nb1, nb2, mb1
           ! Array Arguments 
           real(sp), intent(inout) :: a(lda,*)
           real(sp), intent(out) :: t(ldt,*), work(*)
        ! =====================================================================
           
           ! Local Scalars 
           logical(lk) :: lquery
           integer(${ik}$) :: i, iinfo, j, lw1, lw2, lwt, ldwt, lworkopt, nb1local, nb2local, &
                     num_all_row_blocks
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           lquery  = lwork==-1_${ik}$
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ .or. m<n ) then
              info = -2_${ik}$
           else if( mb1<=n ) then
              info = -3_${ik}$
           else if( nb1<1_${ik}$ ) then
              info = -4_${ik}$
           else if( nb2<1_${ik}$ ) then
              info = -5_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -7_${ik}$
           else if( ldt<max( 1_${ik}$,  min( nb2, n ) ) ) then
              info = -9_${ik}$
           else
              ! test the input lwork for the dimension of the array work.
              ! this workspace is used to store array:
              ! a) matrix t and work for stdlib${ii}$_slatsqr;
              ! b) n-by-n upper-triangular factor r_tsqr;
              ! c) matrix t and array work for stdlib${ii}$_sorgtsqr_row;
              ! d) diagonal d for stdlib${ii}$_sorhr_col.
              if( lwork<n*n+1 .and. .not.lquery ) then
                 info = -11_${ik}$
              else
                 ! set block size for column blocks
                 nb1local = min( nb1, n )
                 num_all_row_blocks = max( 1_${ik}$,ceiling( real( m - n,KIND=sp) / real( mb1 - n,&
                           KIND=sp) ) )
                 ! length and leading dimension of work array to place
                 ! t array in tsqr.
                 lwt = num_all_row_blocks * n * nb1local
                 ldwt = nb1local
                 ! length of tsqr work array
                 lw1 = nb1local * n
                 ! length of stdlib${ii}$_sorgtsqr_row work array.
                 lw2 = nb1local * max( nb1local, ( n - nb1local ) )
                 lworkopt = max( lwt + lw1, max( lwt+n*n+lw2, lwt+n*n+n ) )
                 if( ( lwork<max( 1_${ik}$, lworkopt ) ).and.(.not.lquery) ) then
                    info = -11_${ik}$
                 end if
              end if
           end if
           ! handle error in the input parameters and return workspace query.
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'SGETSQRHRT', -info )
              return
           else if ( lquery ) then
              work( 1_${ik}$ ) = real( lworkopt,KIND=sp)
              return
           end if
           ! quick return if possible
           if( min( m, n )==0_${ik}$ ) then
              work( 1_${ik}$ ) = real( lworkopt,KIND=sp)
              return
           end if
           nb2local = min( nb2, n )
           ! (1) perform tsqr-factorization of the m-by-n matrix a.
           call stdlib${ii}$_slatsqr( m, n, mb1, nb1local, a, lda, work, ldwt,work(lwt+1), lw1, iinfo )
                     
           ! (2) copy the factor r_tsqr stored in the upper-triangular part
               ! of a into the square matrix in the work array
               ! work(lwt+1:lwt+n*n) column-by-column.
           do j = 1, n
              call stdlib${ii}$_scopy( j, a( 1_${ik}$, j ), 1_${ik}$, work( lwt + n*(j-1)+1_${ik}$ ), 1_${ik}$ )
           end do
           ! (3) generate a m-by-n matrix q with orthonormal columns from
           ! the result stored below the diagonal in the array a in place.
           call stdlib${ii}$_sorgtsqr_row( m, n, mb1, nb1local, a, lda, work, ldwt,work( lwt+n*n+1 ), &
                     lw2, iinfo )
           ! (4) perform the reconstruction of householder vectors from
           ! the matrix q (stored in a) in place.
           call stdlib${ii}$_sorhr_col( m, n, nb2local, a, lda, t, ldt,work( lwt+n*n+1 ), iinfo )
                     
           ! (5) copy the factor r_tsqr stored in the square matrix in the
           ! work array work(lwt+1:lwt+n*n) into the upper-triangular
           ! part of a.
           ! (6) compute from r_tsqr the factor r_hr corresponding to
           ! the reconstructed householder vectors, i.e. r_hr = s * r_tsqr.
           ! this multiplication by the sign matrix s on the left means
           ! changing the sign of i-th row of the matrix r_tsqr according
           ! to sign of the i-th diagonal element diag(i) of the matrix s.
           ! diag is stored in work( lwt+n*n+1 ) from the stdlib${ii}$_sorhr_col output.
           ! (5) and (6) can be combined in a single loop, so the rows in a
           ! are accessed only once.
           do i = 1, n
              if( work( lwt+n*n+i )==-one ) then
                 do j = i, n
                    a( i, j ) = -one * work( lwt+n*(j-1)+i )
                 end do
              else
                 call stdlib${ii}$_scopy( n-i+1, work(lwt+n*(i-1)+i), n, a( i, i ), lda )
              end if
           end do
           work( 1_${ik}$ ) = real( lworkopt,KIND=sp)
           return
     end subroutine stdlib${ii}$_sgetsqrhrt

     pure module subroutine stdlib${ii}$_dgetsqrhrt( m, n, mb1, nb1, nb2, a, lda, t, ldt, work,lwork, info )
     !! DGETSQRHRT computes a NB2-sized column blocked QR-factorization
     !! of a real M-by-N matrix A with M >= N,
     !! A = Q * R.
     !! The routine uses internally a NB1-sized column blocked and MB1-sized
     !! row blocked TSQR-factorization and perfors the reconstruction
     !! of the Householder vectors from the TSQR output. The routine also
     !! converts the R_tsqr factor from the TSQR-factorization output into
     !! the R factor that corresponds to the Householder QR-factorization,
     !! A = Q_tsqr * R_tsqr = Q * R.
     !! The output Q and R factors are stored in the same format as in DGEQRT
     !! (Q is in blocked compact WY-representation). See the documentation
     !! of DGEQRT for more details on the format.
               
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, ldt, lwork, m, n, nb1, nb2, mb1
           ! Array Arguments 
           real(dp), intent(inout) :: a(lda,*)
           real(dp), intent(out) :: t(ldt,*), work(*)
        ! =====================================================================
           
           ! Local Scalars 
           logical(lk) :: lquery
           integer(${ik}$) :: i, iinfo, j, lw1, lw2, lwt, ldwt, lworkopt, nb1local, nb2local, &
                     num_all_row_blocks
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           lquery  = lwork==-1_${ik}$
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ .or. m<n ) then
              info = -2_${ik}$
           else if( mb1<=n ) then
              info = -3_${ik}$
           else if( nb1<1_${ik}$ ) then
              info = -4_${ik}$
           else if( nb2<1_${ik}$ ) then
              info = -5_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -7_${ik}$
           else if( ldt<max( 1_${ik}$,  min( nb2, n ) ) ) then
              info = -9_${ik}$
           else
              ! test the input lwork for the dimension of the array work.
              ! this workspace is used to store array:
              ! a) matrix t and work for stdlib${ii}$_dlatsqr;
              ! b) n-by-n upper-triangular factor r_tsqr;
              ! c) matrix t and array work for stdlib${ii}$_dorgtsqr_row;
              ! d) diagonal d for stdlib${ii}$_dorhr_col.
              if( lwork<n*n+1 .and. .not.lquery ) then
                 info = -11_${ik}$
              else
                 ! set block size for column blocks
                 nb1local = min( nb1, n )
                 num_all_row_blocks = max( 1_${ik}$,ceiling( real( m - n,KIND=dp) / real( mb1 - n,&
                           KIND=dp) ) )
                 ! length and leading dimension of work array to place
                 ! t array in tsqr.
                 lwt = num_all_row_blocks * n * nb1local
                 ldwt = nb1local
                 ! length of tsqr work array
                 lw1 = nb1local * n
                 ! length of stdlib${ii}$_dorgtsqr_row work array.
                 lw2 = nb1local * max( nb1local, ( n - nb1local ) )
                 lworkopt = max( lwt + lw1, max( lwt+n*n+lw2, lwt+n*n+n ) )
                 if( ( lwork<max( 1_${ik}$, lworkopt ) ).and.(.not.lquery) ) then
                    info = -11_${ik}$
                 end if
              end if
           end if
           ! handle error in the input parameters and return workspace query.
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DGETSQRHRT', -info )
              return
           else if ( lquery ) then
              work( 1_${ik}$ ) = real( lworkopt,KIND=dp)
              return
           end if
           ! quick return if possible
           if( min( m, n )==0_${ik}$ ) then
              work( 1_${ik}$ ) = real( lworkopt,KIND=dp)
              return
           end if
           nb2local = min( nb2, n )
           ! (1) perform tsqr-factorization of the m-by-n matrix a.
           call stdlib${ii}$_dlatsqr( m, n, mb1, nb1local, a, lda, work, ldwt,work(lwt+1), lw1, iinfo )
                     
           ! (2) copy the factor r_tsqr stored in the upper-triangular part
               ! of a into the square matrix in the work array
               ! work(lwt+1:lwt+n*n) column-by-column.
           do j = 1, n
              call stdlib${ii}$_dcopy( j, a( 1_${ik}$, j ), 1_${ik}$, work( lwt + n*(j-1)+1_${ik}$ ), 1_${ik}$ )
           end do
           ! (3) generate a m-by-n matrix q with orthonormal columns from
           ! the result stored below the diagonal in the array a in place.
           call stdlib${ii}$_dorgtsqr_row( m, n, mb1, nb1local, a, lda, work, ldwt,work( lwt+n*n+1 ), &
                     lw2, iinfo )
           ! (4) perform the reconstruction of householder vectors from
           ! the matrix q (stored in a) in place.
           call stdlib${ii}$_dorhr_col( m, n, nb2local, a, lda, t, ldt,work( lwt+n*n+1 ), iinfo )
                     
           ! (5) copy the factor r_tsqr stored in the square matrix in the
           ! work array work(lwt+1:lwt+n*n) into the upper-triangular
           ! part of a.
           ! (6) compute from r_tsqr the factor r_hr corresponding to
           ! the reconstructed householder vectors, i.e. r_hr = s * r_tsqr.
           ! this multiplication by the sign matrix s on the left means
           ! changing the sign of i-th row of the matrix r_tsqr according
           ! to sign of the i-th diagonal element diag(i) of the matrix s.
           ! diag is stored in work( lwt+n*n+1 ) from the stdlib${ii}$_dorhr_col output.
           ! (5) and (6) can be combined in a single loop, so the rows in a
           ! are accessed only once.
           do i = 1, n
              if( work( lwt+n*n+i )==-one ) then
                 do j = i, n
                    a( i, j ) = -one * work( lwt+n*(j-1)+i )
                 end do
              else
                 call stdlib${ii}$_dcopy( n-i+1, work(lwt+n*(i-1)+i), n, a( i, i ), lda )
              end if
           end do
           work( 1_${ik}$ ) = real( lworkopt,KIND=dp)
           return
     end subroutine stdlib${ii}$_dgetsqrhrt

#:for rk,rt,ri in REAL_KINDS_TYPES
#:if not rk in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ri}$getsqrhrt( m, n, mb1, nb1, nb2, a, lda, t, ldt, work,lwork, info )
     !! DGETSQRHRT: computes a NB2-sized column blocked QR-factorization
     !! of a real M-by-N matrix A with M >= N,
     !! A = Q * R.
     !! The routine uses internally a NB1-sized column blocked and MB1-sized
     !! row blocked TSQR-factorization and perfors the reconstruction
     !! of the Householder vectors from the TSQR output. The routine also
     !! converts the R_tsqr factor from the TSQR-factorization output into
     !! the R factor that corresponds to the Householder QR-factorization,
     !! A = Q_tsqr * R_tsqr = Q * R.
     !! The output Q and R factors are stored in the same format as in DGEQRT
     !! (Q is in blocked compact WY-representation). See the documentation
     !! of DGEQRT for more details on the format.
               
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, ldt, lwork, m, n, nb1, nb2, mb1
           ! Array Arguments 
           real(${rk}$), intent(inout) :: a(lda,*)
           real(${rk}$), intent(out) :: t(ldt,*), work(*)
        ! =====================================================================
           
           ! Local Scalars 
           logical(lk) :: lquery
           integer(${ik}$) :: i, iinfo, j, lw1, lw2, lwt, ldwt, lworkopt, nb1local, nb2local, &
                     num_all_row_blocks
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           lquery  = lwork==-1_${ik}$
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ .or. m<n ) then
              info = -2_${ik}$
           else if( mb1<=n ) then
              info = -3_${ik}$
           else if( nb1<1_${ik}$ ) then
              info = -4_${ik}$
           else if( nb2<1_${ik}$ ) then
              info = -5_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -7_${ik}$
           else if( ldt<max( 1_${ik}$,  min( nb2, n ) ) ) then
              info = -9_${ik}$
           else
              ! test the input lwork for the dimension of the array work.
              ! this workspace is used to store array:
              ! a) matrix t and work for stdlib${ii}$_${ri}$latsqr;
              ! b) n-by-n upper-triangular factor r_tsqr;
              ! c) matrix t and array work for stdlib${ii}$_${ri}$orgtsqr_row;
              ! d) diagonal d for stdlib${ii}$_${ri}$orhr_col.
              if( lwork<n*n+1 .and. .not.lquery ) then
                 info = -11_${ik}$
              else
                 ! set block size for column blocks
                 nb1local = min( nb1, n )
                 num_all_row_blocks = max( 1_${ik}$,ceiling( real( m - n,KIND=${rk}$) / real( mb1 - n,&
                           KIND=${rk}$) ) )
                 ! length and leading dimension of work array to place
                 ! t array in tsqr.
                 lwt = num_all_row_blocks * n * nb1local
                 ldwt = nb1local
                 ! length of tsqr work array
                 lw1 = nb1local * n
                 ! length of stdlib${ii}$_${ri}$orgtsqr_row work array.
                 lw2 = nb1local * max( nb1local, ( n - nb1local ) )
                 lworkopt = max( lwt + lw1, max( lwt+n*n+lw2, lwt+n*n+n ) )
                 if( ( lwork<max( 1_${ik}$, lworkopt ) ).and.(.not.lquery) ) then
                    info = -11_${ik}$
                 end if
              end if
           end if
           ! handle error in the input parameters and return workspace query.
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DGETSQRHRT', -info )
              return
           else if ( lquery ) then
              work( 1_${ik}$ ) = real( lworkopt,KIND=${rk}$)
              return
           end if
           ! quick return if possible
           if( min( m, n )==0_${ik}$ ) then
              work( 1_${ik}$ ) = real( lworkopt,KIND=${rk}$)
              return
           end if
           nb2local = min( nb2, n )
           ! (1) perform tsqr-factorization of the m-by-n matrix a.
           call stdlib${ii}$_${ri}$latsqr( m, n, mb1, nb1local, a, lda, work, ldwt,work(lwt+1), lw1, iinfo )
                     
           ! (2) copy the factor r_tsqr stored in the upper-triangular part
               ! of a into the square matrix in the work array
               ! work(lwt+1:lwt+n*n) column-by-column.
           do j = 1, n
              call stdlib${ii}$_${ri}$copy( j, a( 1_${ik}$, j ), 1_${ik}$, work( lwt + n*(j-1)+1_${ik}$ ), 1_${ik}$ )
           end do
           ! (3) generate a m-by-n matrix q with orthonormal columns from
           ! the result stored below the diagonal in the array a in place.
           call stdlib${ii}$_${ri}$orgtsqr_row( m, n, mb1, nb1local, a, lda, work, ldwt,work( lwt+n*n+1 ), &
                     lw2, iinfo )
           ! (4) perform the reconstruction of householder vectors from
           ! the matrix q (stored in a) in place.
           call stdlib${ii}$_${ri}$orhr_col( m, n, nb2local, a, lda, t, ldt,work( lwt+n*n+1 ), iinfo )
                     
           ! (5) copy the factor r_tsqr stored in the square matrix in the
           ! work array work(lwt+1:lwt+n*n) into the upper-triangular
           ! part of a.
           ! (6) compute from r_tsqr the factor r_hr corresponding to
           ! the reconstructed householder vectors, i.e. r_hr = s * r_tsqr.
           ! this multiplication by the sign matrix s on the left means
           ! changing the sign of i-th row of the matrix r_tsqr according
           ! to sign of the i-th diagonal element diag(i) of the matrix s.
           ! diag is stored in work( lwt+n*n+1 ) from the stdlib${ii}$_${ri}$orhr_col output.
           ! (5) and (6) can be combined in a single loop, so the rows in a
           ! are accessed only once.
           do i = 1, n
              if( work( lwt+n*n+i )==-one ) then
                 do j = i, n
                    a( i, j ) = -one * work( lwt+n*(j-1)+i )
                 end do
              else
                 call stdlib${ii}$_${ri}$copy( n-i+1, work(lwt+n*(i-1)+i), n, a( i, i ), lda )
              end if
           end do
           work( 1_${ik}$ ) = real( lworkopt,KIND=${rk}$)
           return
     end subroutine stdlib${ii}$_${ri}$getsqrhrt

#:endif
#:endfor

     pure module subroutine stdlib${ii}$_cgetsqrhrt( m, n, mb1, nb1, nb2, a, lda, t, ldt, work,lwork, info )
     !! CGETSQRHRT computes a NB2-sized column blocked QR-factorization
     !! of a complex M-by-N matrix A with M >= N,
     !! A = Q * R.
     !! The routine uses internally a NB1-sized column blocked and MB1-sized
     !! row blocked TSQR-factorization and perfors the reconstruction
     !! of the Householder vectors from the TSQR output. The routine also
     !! converts the R_tsqr factor from the TSQR-factorization output into
     !! the R factor that corresponds to the Householder QR-factorization,
     !! A = Q_tsqr * R_tsqr = Q * R.
     !! The output Q and R factors are stored in the same format as in CGEQRT
     !! (Q is in blocked compact WY-representation). See the documentation
     !! of CGEQRT for more details on the format.
               
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, ldt, lwork, m, n, nb1, nb2, mb1
           ! Array Arguments 
           complex(sp), intent(inout) :: a(lda,*)
           complex(sp), intent(out) :: t(ldt,*), work(*)
        ! =====================================================================
           
           ! Local Scalars 
           logical(lk) :: lquery
           integer(${ik}$) :: i, iinfo, j, lw1, lw2, lwt, ldwt, lworkopt, nb1local, nb2local, &
                     num_all_row_blocks
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           lquery  = lwork==-1_${ik}$
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ .or. m<n ) then
              info = -2_${ik}$
           else if( mb1<=n ) then
              info = -3_${ik}$
           else if( nb1<1_${ik}$ ) then
              info = -4_${ik}$
           else if( nb2<1_${ik}$ ) then
              info = -5_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -7_${ik}$
           else if( ldt<max( 1_${ik}$,  min( nb2, n ) ) ) then
              info = -9_${ik}$
           else
              ! test the input lwork for the dimension of the array work.
              ! this workspace is used to store array:
              ! a) matrix t and work for stdlib${ii}$_clatsqr;
              ! b) n-by-n upper-triangular factor r_tsqr;
              ! c) matrix t and array work for stdlib${ii}$_cungtsqr_row;
              ! d) diagonal d for stdlib${ii}$_cunhr_col.
              if( lwork<n*n+1 .and. .not.lquery ) then
                 info = -11_${ik}$
              else
                 ! set block size for column blocks
                 nb1local = min( nb1, n )
                 num_all_row_blocks = max( 1_${ik}$,ceiling( real( m - n,KIND=sp) / real( mb1 - n,&
                           KIND=sp) ) )
                 ! length and leading dimension of work array to place
                 ! t array in tsqr.
                 lwt = num_all_row_blocks * n * nb1local
                 ldwt = nb1local
                 ! length of tsqr work array
                 lw1 = nb1local * n
                 ! length of stdlib${ii}$_cungtsqr_row work array.
                 lw2 = nb1local * max( nb1local, ( n - nb1local ) )
                 lworkopt = max( lwt + lw1, max( lwt+n*n+lw2, lwt+n*n+n ) )
                 if( ( lwork<max( 1_${ik}$, lworkopt ) ).and.(.not.lquery) ) then
                    info = -11_${ik}$
                 end if
              end if
           end if
           ! handle error in the input parameters and return workspace query.
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'CGETSQRHRT', -info )
              return
           else if ( lquery ) then
              work( 1_${ik}$ ) = cmplx( lworkopt,KIND=sp)
              return
           end if
           ! quick return if possible
           if( min( m, n )==0_${ik}$ ) then
              work( 1_${ik}$ ) = cmplx( lworkopt,KIND=sp)
              return
           end if
           nb2local = min( nb2, n )
           ! (1) perform tsqr-factorization of the m-by-n matrix a.
           call stdlib${ii}$_clatsqr( m, n, mb1, nb1local, a, lda, work, ldwt,work(lwt+1), lw1, iinfo )
                     
           ! (2) copy the factor r_tsqr stored in the upper-triangular part
               ! of a into the square matrix in the work array
               ! work(lwt+1:lwt+n*n) column-by-column.
           do j = 1, n
              call stdlib${ii}$_ccopy( j, a( 1_${ik}$, j ), 1_${ik}$, work( lwt + n*(j-1)+1_${ik}$ ), 1_${ik}$ )
           end do
           ! (3) generate a m-by-n matrix q with orthonormal columns from
           ! the result stored below the diagonal in the array a in place.
           call stdlib${ii}$_cungtsqr_row( m, n, mb1, nb1local, a, lda, work, ldwt,work( lwt+n*n+1 ), &
                     lw2, iinfo )
           ! (4) perform the reconstruction of householder vectors from
           ! the matrix q (stored in a) in place.
           call stdlib${ii}$_cunhr_col( m, n, nb2local, a, lda, t, ldt,work( lwt+n*n+1 ), iinfo )
                     
           ! (5) copy the factor r_tsqr stored in the square matrix in the
           ! work array work(lwt+1:lwt+n*n) into the upper-triangular
           ! part of a.
           ! (6) compute from r_tsqr the factor r_hr corresponding to
           ! the reconstructed householder vectors, i.e. r_hr = s * r_tsqr.
           ! this multiplication by the sign matrix s on the left means
           ! changing the sign of i-th row of the matrix r_tsqr according
           ! to sign of the i-th diagonal element diag(i) of the matrix s.
           ! diag is stored in work( lwt+n*n+1 ) from the stdlib${ii}$_cunhr_col output.
           ! (5) and (6) can be combined in a single loop, so the rows in a
           ! are accessed only once.
           do i = 1, n
              if( work( lwt+n*n+i )==-cone ) then
                 do j = i, n
                    a( i, j ) = -cone * work( lwt+n*(j-1)+i )
                 end do
              else
                 call stdlib${ii}$_ccopy( n-i+1, work(lwt+n*(i-1)+i), n, a( i, i ), lda )
              end if
           end do
           work( 1_${ik}$ ) = cmplx( lworkopt,KIND=sp)
           return
     end subroutine stdlib${ii}$_cgetsqrhrt

     pure module subroutine stdlib${ii}$_zgetsqrhrt( m, n, mb1, nb1, nb2, a, lda, t, ldt, work,lwork, info )
     !! ZGETSQRHRT computes a NB2-sized column blocked QR-factorization
     !! of a complex M-by-N matrix A with M >= N,
     !! A = Q * R.
     !! The routine uses internally a NB1-sized column blocked and MB1-sized
     !! row blocked TSQR-factorization and perfors the reconstruction
     !! of the Householder vectors from the TSQR output. The routine also
     !! converts the R_tsqr factor from the TSQR-factorization output into
     !! the R factor that corresponds to the Householder QR-factorization,
     !! A = Q_tsqr * R_tsqr = Q * R.
     !! The output Q and R factors are stored in the same format as in ZGEQRT
     !! (Q is in blocked compact WY-representation). See the documentation
     !! of ZGEQRT for more details on the format.
               
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, ldt, lwork, m, n, nb1, nb2, mb1
           ! Array Arguments 
           complex(dp), intent(inout) :: a(lda,*)
           complex(dp), intent(out) :: t(ldt,*), work(*)
        ! =====================================================================
           
           ! Local Scalars 
           logical(lk) :: lquery
           integer(${ik}$) :: i, iinfo, j, lw1, lw2, lwt, ldwt, lworkopt, nb1local, nb2local, &
                     num_all_row_blocks
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           lquery  = lwork==-1_${ik}$
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ .or. m<n ) then
              info = -2_${ik}$
           else if( mb1<=n ) then
              info = -3_${ik}$
           else if( nb1<1_${ik}$ ) then
              info = -4_${ik}$
           else if( nb2<1_${ik}$ ) then
              info = -5_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -7_${ik}$
           else if( ldt<max( 1_${ik}$,  min( nb2, n ) ) ) then
              info = -9_${ik}$
           else
              ! test the input lwork for the dimension of the array work.
              ! this workspace is used to store array:
              ! a) matrix t and work for stdlib${ii}$_zlatsqr;
              ! b) n-by-n upper-triangular factor r_tsqr;
              ! c) matrix t and array work for stdlib${ii}$_zungtsqr_row;
              ! d) diagonal d for stdlib${ii}$_zunhr_col.
              if( lwork<n*n+1 .and. .not.lquery ) then
                 info = -11_${ik}$
              else
                 ! set block size for column blocks
                 nb1local = min( nb1, n )
                 num_all_row_blocks = max( 1_${ik}$,ceiling( real( m - n,KIND=dp) / real( mb1 - n,&
                           KIND=dp) ) )
                 ! length and leading dimension of work array to place
                 ! t array in tsqr.
                 lwt = num_all_row_blocks * n * nb1local
                 ldwt = nb1local
                 ! length of tsqr work array
                 lw1 = nb1local * n
                 ! length of stdlib${ii}$_zungtsqr_row work array.
                 lw2 = nb1local * max( nb1local, ( n - nb1local ) )
                 lworkopt = max( lwt + lw1, max( lwt+n*n+lw2, lwt+n*n+n ) )
                 if( ( lwork<max( 1_${ik}$, lworkopt ) ).and.(.not.lquery) ) then
                    info = -11_${ik}$
                 end if
              end if
           end if
           ! handle error in the input parameters and return workspace query.
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'ZGETSQRHRT', -info )
              return
           else if ( lquery ) then
              work( 1_${ik}$ ) = cmplx( lworkopt,KIND=dp)
              return
           end if
           ! quick return if possible
           if( min( m, n )==0_${ik}$ ) then
              work( 1_${ik}$ ) = cmplx( lworkopt,KIND=dp)
              return
           end if
           nb2local = min( nb2, n )
           ! (1) perform tsqr-factorization of the m-by-n matrix a.
           call stdlib${ii}$_zlatsqr( m, n, mb1, nb1local, a, lda, work, ldwt,work(lwt+1), lw1, iinfo )
                     
           ! (2) copy the factor r_tsqr stored in the upper-triangular part
               ! of a into the square matrix in the work array
               ! work(lwt+1:lwt+n*n) column-by-column.
           do j = 1, n
              call stdlib${ii}$_zcopy( j, a( 1_${ik}$, j ), 1_${ik}$, work( lwt + n*(j-1)+1_${ik}$ ), 1_${ik}$ )
           end do
           ! (3) generate a m-by-n matrix q with orthonormal columns from
           ! the result stored below the diagonal in the array a in place.
           call stdlib${ii}$_zungtsqr_row( m, n, mb1, nb1local, a, lda, work, ldwt,work( lwt+n*n+1 ), &
                     lw2, iinfo )
           ! (4) perform the reconstruction of householder vectors from
           ! the matrix q (stored in a) in place.
           call stdlib${ii}$_zunhr_col( m, n, nb2local, a, lda, t, ldt,work( lwt+n*n+1 ), iinfo )
                     
           ! (5) copy the factor r_tsqr stored in the square matrix in the
           ! work array work(lwt+1:lwt+n*n) into the upper-triangular
           ! part of a.
           ! (6) compute from r_tsqr the factor r_hr corresponding to
           ! the reconstructed householder vectors, i.e. r_hr = s * r_tsqr.
           ! this multiplication by the sign matrix s on the left means
           ! changing the sign of i-th row of the matrix r_tsqr according
           ! to sign of the i-th diagonal element diag(i) of the matrix s.
           ! diag is stored in work( lwt+n*n+1 ) from the stdlib${ii}$_zunhr_col output.
           ! (5) and (6) can be combined in a single loop, so the rows in a
           ! are accessed only once.
           do i = 1, n
              if( work( lwt+n*n+i )==-cone ) then
                 do j = i, n
                    a( i, j ) = -cone * work( lwt+n*(j-1)+i )
                 end do
              else
                 call stdlib${ii}$_zcopy( n-i+1, work(lwt+n*(i-1)+i), n, a( i, i ), lda )
              end if
           end do
           work( 1_${ik}$ ) = cmplx( lworkopt,KIND=dp)
           return
     end subroutine stdlib${ii}$_zgetsqrhrt

#:for ck,ct,ci in CMPLX_KINDS_TYPES
#:if not ck in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ci}$getsqrhrt( m, n, mb1, nb1, nb2, a, lda, t, ldt, work,lwork, info )
     !! ZGETSQRHRT: computes a NB2-sized column blocked QR-factorization
     !! of a complex M-by-N matrix A with M >= N,
     !! A = Q * R.
     !! The routine uses internally a NB1-sized column blocked and MB1-sized
     !! row blocked TSQR-factorization and perfors the reconstruction
     !! of the Householder vectors from the TSQR output. The routine also
     !! converts the R_tsqr factor from the TSQR-factorization output into
     !! the R factor that corresponds to the Householder QR-factorization,
     !! A = Q_tsqr * R_tsqr = Q * R.
     !! The output Q and R factors are stored in the same format as in ZGEQRT
     !! (Q is in blocked compact WY-representation). See the documentation
     !! of ZGEQRT for more details on the format.
               
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${ck}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, ldt, lwork, m, n, nb1, nb2, mb1
           ! Array Arguments 
           complex(${ck}$), intent(inout) :: a(lda,*)
           complex(${ck}$), intent(out) :: t(ldt,*), work(*)
        ! =====================================================================
           
           ! Local Scalars 
           logical(lk) :: lquery
           integer(${ik}$) :: i, iinfo, j, lw1, lw2, lwt, ldwt, lworkopt, nb1local, nb2local, &
                     num_all_row_blocks
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           lquery  = lwork==-1_${ik}$
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ .or. m<n ) then
              info = -2_${ik}$
           else if( mb1<=n ) then
              info = -3_${ik}$
           else if( nb1<1_${ik}$ ) then
              info = -4_${ik}$
           else if( nb2<1_${ik}$ ) then
              info = -5_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -7_${ik}$
           else if( ldt<max( 1_${ik}$,  min( nb2, n ) ) ) then
              info = -9_${ik}$
           else
              ! test the input lwork for the dimension of the array work.
              ! this workspace is used to store array:
              ! a) matrix t and work for stdlib${ii}$_${ci}$latsqr;
              ! b) n-by-n upper-triangular factor r_tsqr;
              ! c) matrix t and array work for stdlib${ii}$_${ci}$ungtsqr_row;
              ! d) diagonal d for stdlib${ii}$_${ci}$unhr_col.
              if( lwork<n*n+1 .and. .not.lquery ) then
                 info = -11_${ik}$
              else
                 ! set block size for column blocks
                 nb1local = min( nb1, n )
                 num_all_row_blocks = max( 1_${ik}$,ceiling( real( m - n,KIND=${ck}$) / real( mb1 - n,&
                           KIND=${ck}$) ) )
                 ! length and leading dimension of work array to place
                 ! t array in tsqr.
                 lwt = num_all_row_blocks * n * nb1local
                 ldwt = nb1local
                 ! length of tsqr work array
                 lw1 = nb1local * n
                 ! length of stdlib${ii}$_${ci}$ungtsqr_row work array.
                 lw2 = nb1local * max( nb1local, ( n - nb1local ) )
                 lworkopt = max( lwt + lw1, max( lwt+n*n+lw2, lwt+n*n+n ) )
                 if( ( lwork<max( 1_${ik}$, lworkopt ) ).and.(.not.lquery) ) then
                    info = -11_${ik}$
                 end if
              end if
           end if
           ! handle error in the input parameters and return workspace query.
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'ZGETSQRHRT', -info )
              return
           else if ( lquery ) then
              work( 1_${ik}$ ) = cmplx( lworkopt,KIND=${ck}$)
              return
           end if
           ! quick return if possible
           if( min( m, n )==0_${ik}$ ) then
              work( 1_${ik}$ ) = cmplx( lworkopt,KIND=${ck}$)
              return
           end if
           nb2local = min( nb2, n )
           ! (1) perform tsqr-factorization of the m-by-n matrix a.
           call stdlib${ii}$_${ci}$latsqr( m, n, mb1, nb1local, a, lda, work, ldwt,work(lwt+1), lw1, iinfo )
                     
           ! (2) copy the factor r_tsqr stored in the upper-triangular part
               ! of a into the square matrix in the work array
               ! work(lwt+1:lwt+n*n) column-by-column.
           do j = 1, n
              call stdlib${ii}$_${ci}$copy( j, a( 1_${ik}$, j ), 1_${ik}$, work( lwt + n*(j-1)+1_${ik}$ ), 1_${ik}$ )
           end do
           ! (3) generate a m-by-n matrix q with orthonormal columns from
           ! the result stored below the diagonal in the array a in place.
           call stdlib${ii}$_${ci}$ungtsqr_row( m, n, mb1, nb1local, a, lda, work, ldwt,work( lwt+n*n+1 ), &
                     lw2, iinfo )
           ! (4) perform the reconstruction of householder vectors from
           ! the matrix q (stored in a) in place.
           call stdlib${ii}$_${ci}$unhr_col( m, n, nb2local, a, lda, t, ldt,work( lwt+n*n+1 ), iinfo )
                     
           ! (5) copy the factor r_tsqr stored in the square matrix in the
           ! work array work(lwt+1:lwt+n*n) into the upper-triangular
           ! part of a.
           ! (6) compute from r_tsqr the factor r_hr corresponding to
           ! the reconstructed householder vectors, i.e. r_hr = s * r_tsqr.
           ! this multiplication by the sign matrix s on the left means
           ! changing the sign of i-th row of the matrix r_tsqr according
           ! to sign of the i-th diagonal element diag(i) of the matrix s.
           ! diag is stored in work( lwt+n*n+1 ) from the stdlib${ii}$_${ci}$unhr_col output.
           ! (5) and (6) can be combined in a single loop, so the rows in a
           ! are accessed only once.
           do i = 1, n
              if( work( lwt+n*n+i )==-cone ) then
                 do j = i, n
                    a( i, j ) = -cone * work( lwt+n*(j-1)+i )
                 end do
              else
                 call stdlib${ii}$_${ci}$copy( n-i+1, work(lwt+n*(i-1)+i), n, a( i, i ), lda )
              end if
           end do
           work( 1_${ik}$ ) = cmplx( lworkopt,KIND=${ck}$)
           return
     end subroutine stdlib${ii}$_${ci}$getsqrhrt

#:endif
#:endfor



     pure module subroutine stdlib${ii}$_cunhr_col( m, n, nb, a, lda, t, ldt, d, info )
     !! CUNHR_COL takes an M-by-N complex matrix Q_in with orthonormal columns
     !! as input, stored in A, and performs Householder Reconstruction (HR),
     !! i.e. reconstructs Householder vectors V(i) implicitly representing
     !! another M-by-N matrix Q_out, with the property that Q_in = Q_out*S,
     !! where S is an N-by-N diagonal matrix with diagonal entries
     !! equal to +1 or -1. The Householder vectors (columns V(i) of V) are
     !! stored in A on output, and the diagonal entries of S are stored in D.
     !! Block reflectors are also returned in T
     !! (same output format as CGEQRT).
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, ldt, m, n, nb
           ! Array Arguments 
           complex(sp), intent(inout) :: a(lda,*)
           complex(sp), intent(out) :: d(*), t(ldt,*)
        ! =====================================================================
           
           ! Local Scalars 
           integer(${ik}$) :: i, iinfo, j, jb, jbtemp1, jbtemp2, jnb, nplusone
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input parameters
           info = 0_${ik}$
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ .or. n>m ) then
              info = -2_${ik}$
           else if( nb<1_${ik}$ ) then
              info = -3_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -5_${ik}$
           else if( ldt<max( 1_${ik}$, min( nb, n ) ) ) then
              info = -7_${ik}$
           end if
           ! handle error in the input parameters.
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'CUNHR_COL', -info )
              return
           end if
           ! quick return if possible
           if( min( m, n )==0_${ik}$ ) then
              return
           end if
           ! on input, the m-by-n matrix a contains the unitary
           ! m-by-n matrix q_in.
           ! (1) compute the unit lower-trapezoidal v (ones on the diagonal
           ! are not stored) by performing the "modified" lu-decomposition.
           ! q_in - ( s ) = v * u = ( v1 ) * u,
                  ! ( 0 )           ( v2 )
           ! where 0 is an (m-n)-by-n zero matrix.
           ! (1-1) factor v1 and u.
           call stdlib${ii}$_claunhr_col_getrfnp( n, n, a, lda, d, iinfo )
           ! (1-2) solve for v2.
           if( m>n ) then
              call stdlib${ii}$_ctrsm( 'R', 'U', 'N', 'N', m-n, n, cone, a, lda,a( n+1, 1_${ik}$ ), lda )
                        
           end if
           ! (2) reconstruct the block reflector t stored in t(1:nb, 1:n)
           ! as a sequence of upper-triangular blocks with nb-size column
           ! blocking.
           ! loop over the column blocks of size nb of the array a(1:m,1:n)
           ! and the array t(1:nb,1:n), jb is the column index of a column
           ! block, jnb is the column block size at each step jb.
           nplusone = n + 1_${ik}$
           do jb = 1, n, nb
              ! (2-0) determine the column block size jnb.
              jnb = min( nplusone-jb, nb )
              ! (2-1) copy the upper-triangular part of the current jnb-by-jnb
              ! diagonal block u(jb) (of the n-by-n matrix u) stored
              ! in a(jb:jb+jnb-1,jb:jb+jnb-1) into the upper-triangular part
              ! of the current jnb-by-jnb block t(1:jnb,jb:jb+jnb-1)
              ! column-by-column, total jnb*(jnb+1)/2 elements.
              jbtemp1 = jb - 1_${ik}$
              do j = jb, jb+jnb-1
                 call stdlib${ii}$_ccopy( j-jbtemp1, a( jb, j ), 1_${ik}$, t( 1_${ik}$, j ), 1_${ik}$ )
              end do
              ! (2-2) perform on the upper-triangular part of the current
              ! jnb-by-jnb diagonal block u(jb) (of the n-by-n matrix u) stored
              ! in t(1:jnb,jb:jb+jnb-1) the following operation in place:
              ! (-1)*u(jb)*s(jb), i.e the result will be stored in the upper-
              ! triangular part of t(1:jnb,jb:jb+jnb-1). this multiplication
              ! of the jnb-by-jnb diagonal block u(jb) by the jnb-by-jnb
              ! diagonal block s(jb) of the n-by-n sign matrix s from the
              ! right means changing the sign of each j-th column of the block
              ! u(jb) according to the sign of the diagonal element of the block
              ! s(jb), i.e. s(j,j) that is stored in the array element d(j).
              do j = jb, jb+jnb-1
                 if( d( j )==cone ) then
                    call stdlib${ii}$_cscal( j-jbtemp1, -cone, t( 1_${ik}$, j ), 1_${ik}$ )
                 end if
              end do
              ! (2-3) perform the triangular solve for the current block
              ! matrix x(jb):
                     ! x(jb) * (a(jb)**t) = b(jb), where:
                     ! a(jb)**t  is a jnb-by-jnb unit upper-triangular
                               ! coefficient block, and a(jb)=v1(jb), which
                               ! is a jnb-by-jnb unit lower-triangular block
                               ! stored in a(jb:jb+jnb-1,jb:jb+jnb-1).
                               ! the n-by-n matrix v1 is the upper part
                               ! of the m-by-n lower-trapezoidal matrix v
                               ! stored in a(1:m,1:n);
                     ! b(jb)     is a jnb-by-jnb  upper-triangular right-hand
                               ! side block, b(jb) = (-1)*u(jb)*s(jb), and
                               ! b(jb) is stored in t(1:jnb,jb:jb+jnb-1);
                     ! x(jb)     is a jnb-by-jnb upper-triangular solution
                               ! block, x(jb) is the upper-triangular block
                               ! reflector t(jb), and x(jb) is stored
                               ! in t(1:jnb,jb:jb+jnb-1).
                   ! in other words, we perform the triangular solve for the
                   ! upper-triangular block t(jb):
                     ! t(jb) * (v1(jb)**t) = (-1)*u(jb)*s(jb).
                   ! even though the blocks x(jb) and b(jb) are upper-
                   ! triangular, the routine stdlib${ii}$_ctrsm will access all jnb**2
                   ! elements of the square t(1:jnb,jb:jb+jnb-1). therefore,
                   ! we need to set to zero the elements of the block
                   ! t(1:jnb,jb:jb+jnb-1) below the diagonal before the call
                   ! to stdlib${ii}$_ctrsm.
              ! (2-3a) set the elements to zero.
              jbtemp2 = jb - 2_${ik}$
              do j = jb, jb+jnb-2
                 do i = j-jbtemp2, nb
                    t( i, j ) = czero
                 end do
              end do
              ! (2-3b) perform the triangular solve.
              call stdlib${ii}$_ctrsm( 'R', 'L', 'C', 'U', jnb, jnb, cone,a( jb, jb ), lda, t( 1_${ik}$, jb ), &
                        ldt )
           end do
           return
     end subroutine stdlib${ii}$_cunhr_col

     pure module subroutine stdlib${ii}$_zunhr_col( m, n, nb, a, lda, t, ldt, d, info )
     !! ZUNHR_COL takes an M-by-N complex matrix Q_in with orthonormal columns
     !! as input, stored in A, and performs Householder Reconstruction (HR),
     !! i.e. reconstructs Householder vectors V(i) implicitly representing
     !! another M-by-N matrix Q_out, with the property that Q_in = Q_out*S,
     !! where S is an N-by-N diagonal matrix with diagonal entries
     !! equal to +1 or -1. The Householder vectors (columns V(i) of V) are
     !! stored in A on output, and the diagonal entries of S are stored in D.
     !! Block reflectors are also returned in T
     !! (same output format as ZGEQRT).
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, ldt, m, n, nb
           ! Array Arguments 
           complex(dp), intent(inout) :: a(lda,*)
           complex(dp), intent(out) :: d(*), t(ldt,*)
        ! =====================================================================
           
           ! Local Scalars 
           integer(${ik}$) :: i, iinfo, j, jb, jbtemp1, jbtemp2, jnb, nplusone
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input parameters
           info = 0_${ik}$
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ .or. n>m ) then
              info = -2_${ik}$
           else if( nb<1_${ik}$ ) then
              info = -3_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -5_${ik}$
           else if( ldt<max( 1_${ik}$, min( nb, n ) ) ) then
              info = -7_${ik}$
           end if
           ! handle error in the input parameters.
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'ZUNHR_COL', -info )
              return
           end if
           ! quick return if possible
           if( min( m, n )==0_${ik}$ ) then
              return
           end if
           ! on input, the m-by-n matrix a contains the unitary
           ! m-by-n matrix q_in.
           ! (1) compute the unit lower-trapezoidal v (ones on the diagonal
           ! are not stored) by performing the "modified" lu-decomposition.
           ! q_in - ( s ) = v * u = ( v1 ) * u,
                  ! ( 0 )           ( v2 )
           ! where 0 is an (m-n)-by-n zero matrix.
           ! (1-1) factor v1 and u.
           call stdlib${ii}$_zlaunhr_col_getrfnp( n, n, a, lda, d, iinfo )
           ! (1-2) solve for v2.
           if( m>n ) then
              call stdlib${ii}$_ztrsm( 'R', 'U', 'N', 'N', m-n, n, cone, a, lda,a( n+1, 1_${ik}$ ), lda )
                        
           end if
           ! (2) reconstruct the block reflector t stored in t(1:nb, 1:n)
           ! as a sequence of upper-triangular blocks with nb-size column
           ! blocking.
           ! loop over the column blocks of size nb of the array a(1:m,1:n)
           ! and the array t(1:nb,1:n), jb is the column index of a column
           ! block, jnb is the column block size at each step jb.
           nplusone = n + 1_${ik}$
           do jb = 1, n, nb
              ! (2-0) determine the column block size jnb.
              jnb = min( nplusone-jb, nb )
              ! (2-1) copy the upper-triangular part of the current jnb-by-jnb
              ! diagonal block u(jb) (of the n-by-n matrix u) stored
              ! in a(jb:jb+jnb-1,jb:jb+jnb-1) into the upper-triangular part
              ! of the current jnb-by-jnb block t(1:jnb,jb:jb+jnb-1)
              ! column-by-column, total jnb*(jnb+1)/2 elements.
              jbtemp1 = jb - 1_${ik}$
              do j = jb, jb+jnb-1
                 call stdlib${ii}$_zcopy( j-jbtemp1, a( jb, j ), 1_${ik}$, t( 1_${ik}$, j ), 1_${ik}$ )
              end do
              ! (2-2) perform on the upper-triangular part of the current
              ! jnb-by-jnb diagonal block u(jb) (of the n-by-n matrix u) stored
              ! in t(1:jnb,jb:jb+jnb-1) the following operation in place:
              ! (-1)*u(jb)*s(jb), i.e the result will be stored in the upper-
              ! triangular part of t(1:jnb,jb:jb+jnb-1). this multiplication
              ! of the jnb-by-jnb diagonal block u(jb) by the jnb-by-jnb
              ! diagonal block s(jb) of the n-by-n sign matrix s from the
              ! right means changing the sign of each j-th column of the block
              ! u(jb) according to the sign of the diagonal element of the block
              ! s(jb), i.e. s(j,j) that is stored in the array element d(j).
              do j = jb, jb+jnb-1
                 if( d( j )==cone ) then
                    call stdlib${ii}$_zscal( j-jbtemp1, -cone, t( 1_${ik}$, j ), 1_${ik}$ )
                 end if
              end do
              ! (2-3) perform the triangular solve for the current block
              ! matrix x(jb):
                     ! x(jb) * (a(jb)**t) = b(jb), where:
                     ! a(jb)**t  is a jnb-by-jnb unit upper-triangular
                               ! coefficient block, and a(jb)=v1(jb), which
                               ! is a jnb-by-jnb unit lower-triangular block
                               ! stored in a(jb:jb+jnb-1,jb:jb+jnb-1).
                               ! the n-by-n matrix v1 is the upper part
                               ! of the m-by-n lower-trapezoidal matrix v
                               ! stored in a(1:m,1:n);
                     ! b(jb)     is a jnb-by-jnb  upper-triangular right-hand
                               ! side block, b(jb) = (-1)*u(jb)*s(jb), and
                               ! b(jb) is stored in t(1:jnb,jb:jb+jnb-1);
                     ! x(jb)     is a jnb-by-jnb upper-triangular solution
                               ! block, x(jb) is the upper-triangular block
                               ! reflector t(jb), and x(jb) is stored
                               ! in t(1:jnb,jb:jb+jnb-1).
                   ! in other words, we perform the triangular solve for the
                   ! upper-triangular block t(jb):
                     ! t(jb) * (v1(jb)**t) = (-1)*u(jb)*s(jb).
                   ! even though the blocks x(jb) and b(jb) are upper-
                   ! triangular, the routine stdlib${ii}$_ztrsm will access all jnb**2
                   ! elements of the square t(1:jnb,jb:jb+jnb-1). therefore,
                   ! we need to set to zero the elements of the block
                   ! t(1:jnb,jb:jb+jnb-1) below the diagonal before the call
                   ! to stdlib${ii}$_ztrsm.
              ! (2-3a) set the elements to zero.
              jbtemp2 = jb - 2_${ik}$
              do j = jb, jb+jnb-2
                 do i = j-jbtemp2, nb
                    t( i, j ) = czero
                 end do
              end do
              ! (2-3b) perform the triangular solve.
              call stdlib${ii}$_ztrsm( 'R', 'L', 'C', 'U', jnb, jnb, cone,a( jb, jb ), lda, t( 1_${ik}$, jb ), &
                        ldt )
           end do
           return
     end subroutine stdlib${ii}$_zunhr_col

#:for ck,ct,ci in CMPLX_KINDS_TYPES
#:if not ck in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ci}$unhr_col( m, n, nb, a, lda, t, ldt, d, info )
     !! ZUNHR_COL: takes an M-by-N complex matrix Q_in with orthonormal columns
     !! as input, stored in A, and performs Householder Reconstruction (HR),
     !! i.e. reconstructs Householder vectors V(i) implicitly representing
     !! another M-by-N matrix Q_out, with the property that Q_in = Q_out*S,
     !! where S is an N-by-N diagonal matrix with diagonal entries
     !! equal to +1 or -1. The Householder vectors (columns V(i) of V) are
     !! stored in A on output, and the diagonal entries of S are stored in D.
     !! Block reflectors are also returned in T
     !! (same output format as ZGEQRT).
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${ck}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, ldt, m, n, nb
           ! Array Arguments 
           complex(${ck}$), intent(inout) :: a(lda,*)
           complex(${ck}$), intent(out) :: d(*), t(ldt,*)
        ! =====================================================================
           
           ! Local Scalars 
           integer(${ik}$) :: i, iinfo, j, jb, jbtemp1, jbtemp2, jnb, nplusone
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input parameters
           info = 0_${ik}$
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ .or. n>m ) then
              info = -2_${ik}$
           else if( nb<1_${ik}$ ) then
              info = -3_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -5_${ik}$
           else if( ldt<max( 1_${ik}$, min( nb, n ) ) ) then
              info = -7_${ik}$
           end if
           ! handle error in the input parameters.
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'ZUNHR_COL', -info )
              return
           end if
           ! quick return if possible
           if( min( m, n )==0_${ik}$ ) then
              return
           end if
           ! on input, the m-by-n matrix a contains the unitary
           ! m-by-n matrix q_in.
           ! (1) compute the unit lower-trapezoidal v (ones on the diagonal
           ! are not stored) by performing the "modified" lu-decomposition.
           ! q_in - ( s ) = v * u = ( v1 ) * u,
                  ! ( 0 )           ( v2 )
           ! where 0 is an (m-n)-by-n zero matrix.
           ! (1-1) factor v1 and u.
           call stdlib${ii}$_${ci}$launhr_col_getrfnp( n, n, a, lda, d, iinfo )
           ! (1-2) solve for v2.
           if( m>n ) then
              call stdlib${ii}$_${ci}$trsm( 'R', 'U', 'N', 'N', m-n, n, cone, a, lda,a( n+1, 1_${ik}$ ), lda )
                        
           end if
           ! (2) reconstruct the block reflector t stored in t(1:nb, 1:n)
           ! as a sequence of upper-triangular blocks with nb-size column
           ! blocking.
           ! loop over the column blocks of size nb of the array a(1:m,1:n)
           ! and the array t(1:nb,1:n), jb is the column index of a column
           ! block, jnb is the column block size at each step jb.
           nplusone = n + 1_${ik}$
           do jb = 1, n, nb
              ! (2-0) determine the column block size jnb.
              jnb = min( nplusone-jb, nb )
              ! (2-1) copy the upper-triangular part of the current jnb-by-jnb
              ! diagonal block u(jb) (of the n-by-n matrix u) stored
              ! in a(jb:jb+jnb-1,jb:jb+jnb-1) into the upper-triangular part
              ! of the current jnb-by-jnb block t(1:jnb,jb:jb+jnb-1)
              ! column-by-column, total jnb*(jnb+1)/2 elements.
              jbtemp1 = jb - 1_${ik}$
              do j = jb, jb+jnb-1
                 call stdlib${ii}$_${ci}$copy( j-jbtemp1, a( jb, j ), 1_${ik}$, t( 1_${ik}$, j ), 1_${ik}$ )
              end do
              ! (2-2) perform on the upper-triangular part of the current
              ! jnb-by-jnb diagonal block u(jb) (of the n-by-n matrix u) stored
              ! in t(1:jnb,jb:jb+jnb-1) the following operation in place:
              ! (-1)*u(jb)*s(jb), i.e the result will be stored in the upper-
              ! triangular part of t(1:jnb,jb:jb+jnb-1). this multiplication
              ! of the jnb-by-jnb diagonal block u(jb) by the jnb-by-jnb
              ! diagonal block s(jb) of the n-by-n sign matrix s from the
              ! right means changing the sign of each j-th column of the block
              ! u(jb) according to the sign of the diagonal element of the block
              ! s(jb), i.e. s(j,j) that is stored in the array element d(j).
              do j = jb, jb+jnb-1
                 if( d( j )==cone ) then
                    call stdlib${ii}$_${ci}$scal( j-jbtemp1, -cone, t( 1_${ik}$, j ), 1_${ik}$ )
                 end if
              end do
              ! (2-3) perform the triangular solve for the current block
              ! matrix x(jb):
                     ! x(jb) * (a(jb)**t) = b(jb), where:
                     ! a(jb)**t  is a jnb-by-jnb unit upper-triangular
                               ! coefficient block, and a(jb)=v1(jb), which
                               ! is a jnb-by-jnb unit lower-triangular block
                               ! stored in a(jb:jb+jnb-1,jb:jb+jnb-1).
                               ! the n-by-n matrix v1 is the upper part
                               ! of the m-by-n lower-trapezoidal matrix v
                               ! stored in a(1:m,1:n);
                     ! b(jb)     is a jnb-by-jnb  upper-triangular right-hand
                               ! side block, b(jb) = (-1)*u(jb)*s(jb), and
                               ! b(jb) is stored in t(1:jnb,jb:jb+jnb-1);
                     ! x(jb)     is a jnb-by-jnb upper-triangular solution
                               ! block, x(jb) is the upper-triangular block
                               ! reflector t(jb), and x(jb) is stored
                               ! in t(1:jnb,jb:jb+jnb-1).
                   ! in other words, we perform the triangular solve for the
                   ! upper-triangular block t(jb):
                     ! t(jb) * (v1(jb)**t) = (-1)*u(jb)*s(jb).
                   ! even though the blocks x(jb) and b(jb) are upper-
                   ! triangular, the routine stdlib${ii}$_${ci}$trsm will access all jnb**2
                   ! elements of the square t(1:jnb,jb:jb+jnb-1). therefore,
                   ! we need to set to zero the elements of the block
                   ! t(1:jnb,jb:jb+jnb-1) below the diagonal before the call
                   ! to stdlib${ii}$_${ci}$trsm.
              ! (2-3a) set the elements to zero.
              jbtemp2 = jb - 2_${ik}$
              do j = jb, jb+jnb-2
                 do i = j-jbtemp2, nb
                    t( i, j ) = czero
                 end do
              end do
              ! (2-3b) perform the triangular solve.
              call stdlib${ii}$_${ci}$trsm( 'R', 'L', 'C', 'U', jnb, jnb, cone,a( jb, jb ), lda, t( 1_${ik}$, jb ), &
                        ldt )
           end do
           return
     end subroutine stdlib${ii}$_${ci}$unhr_col

#:endif
#:endfor



     pure module subroutine stdlib${ii}$_sorhr_col( m, n, nb, a, lda, t, ldt, d, info )
     !! SORHR_COL takes an M-by-N real matrix Q_in with orthonormal columns
     !! as input, stored in A, and performs Householder Reconstruction (HR),
     !! i.e. reconstructs Householder vectors V(i) implicitly representing
     !! another M-by-N matrix Q_out, with the property that Q_in = Q_out*S,
     !! where S is an N-by-N diagonal matrix with diagonal entries
     !! equal to +1 or -1. The Householder vectors (columns V(i) of V) are
     !! stored in A on output, and the diagonal entries of S are stored in D.
     !! Block reflectors are also returned in T
     !! (same output format as SGEQRT).
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, ldt, m, n, nb
           ! Array Arguments 
           real(sp), intent(inout) :: a(lda,*)
           real(sp), intent(out) :: d(*), t(ldt,*)
        ! =====================================================================
           
           ! Local Scalars 
           integer(${ik}$) :: i, iinfo, j, jb, jbtemp1, jbtemp2, jnb, nplusone
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input parameters
           info = 0_${ik}$
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ .or. n>m ) then
              info = -2_${ik}$
           else if( nb<1_${ik}$ ) then
              info = -3_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -5_${ik}$
           else if( ldt<max( 1_${ik}$, min( nb, n ) ) ) then
              info = -7_${ik}$
           end if
           ! handle error in the input parameters.
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'SORHR_COL', -info )
              return
           end if
           ! quick return if possible
           if( min( m, n )==0_${ik}$ ) then
              return
           end if
           ! on input, the m-by-n matrix a contains the orthogonal
           ! m-by-n matrix q_in.
           ! (1) compute the unit lower-trapezoidal v (ones on the diagonal
           ! are not stored) by performing the "modified" lu-decomposition.
           ! q_in - ( s ) = v * u = ( v1 ) * u,
                  ! ( 0 )           ( v2 )
           ! where 0 is an (m-n)-by-n zero matrix.
           ! (1-1) factor v1 and u.
           call stdlib${ii}$_slaorhr_col_getrfnp( n, n, a, lda, d, iinfo )
           ! (1-2) solve for v2.
           if( m>n ) then
              call stdlib${ii}$_strsm( 'R', 'U', 'N', 'N', m-n, n, one, a, lda,a( n+1, 1_${ik}$ ), lda )
                        
           end if
           ! (2) reconstruct the block reflector t stored in t(1:nb, 1:n)
           ! as a sequence of upper-triangular blocks with nb-size column
           ! blocking.
           ! loop over the column blocks of size nb of the array a(1:m,1:n)
           ! and the array t(1:nb,1:n), jb is the column index of a column
           ! block, jnb is the column block size at each step jb.
           nplusone = n + 1_${ik}$
           do jb = 1, n, nb
              ! (2-0) determine the column block size jnb.
              jnb = min( nplusone-jb, nb )
              ! (2-1) copy the upper-triangular part of the current jnb-by-jnb
              ! diagonal block u(jb) (of the n-by-n matrix u) stored
              ! in a(jb:jb+jnb-1,jb:jb+jnb-1) into the upper-triangular part
              ! of the current jnb-by-jnb block t(1:jnb,jb:jb+jnb-1)
              ! column-by-column, total jnb*(jnb+1)/2 elements.
              jbtemp1 = jb - 1_${ik}$
              do j = jb, jb+jnb-1
                 call stdlib${ii}$_scopy( j-jbtemp1, a( jb, j ), 1_${ik}$, t( 1_${ik}$, j ), 1_${ik}$ )
              end do
              ! (2-2) perform on the upper-triangular part of the current
              ! jnb-by-jnb diagonal block u(jb) (of the n-by-n matrix u) stored
              ! in t(1:jnb,jb:jb+jnb-1) the following operation in place:
              ! (-1)*u(jb)*s(jb), i.e the result will be stored in the upper-
              ! triangular part of t(1:jnb,jb:jb+jnb-1). this multiplication
              ! of the jnb-by-jnb diagonal block u(jb) by the jnb-by-jnb
              ! diagonal block s(jb) of the n-by-n sign matrix s from the
              ! right means changing the sign of each j-th column of the block
              ! u(jb) according to the sign of the diagonal element of the block
              ! s(jb), i.e. s(j,j) that is stored in the array element d(j).
              do j = jb, jb+jnb-1
                 if( d( j )==one ) then
                    call stdlib${ii}$_sscal( j-jbtemp1, -one, t( 1_${ik}$, j ), 1_${ik}$ )
                 end if
              end do
              ! (2-3) perform the triangular solve for the current block
              ! matrix x(jb):
                     ! x(jb) * (a(jb)**t) = b(jb), where:
                     ! a(jb)**t  is a jnb-by-jnb unit upper-triangular
                               ! coefficient block, and a(jb)=v1(jb), which
                               ! is a jnb-by-jnb unit lower-triangular block
                               ! stored in a(jb:jb+jnb-1,jb:jb+jnb-1).
                               ! the n-by-n matrix v1 is the upper part
                               ! of the m-by-n lower-trapezoidal matrix v
                               ! stored in a(1:m,1:n);
                     ! b(jb)     is a jnb-by-jnb  upper-triangular right-hand
                               ! side block, b(jb) = (-1)*u(jb)*s(jb), and
                               ! b(jb) is stored in t(1:jnb,jb:jb+jnb-1);
                     ! x(jb)     is a jnb-by-jnb upper-triangular solution
                               ! block, x(jb) is the upper-triangular block
                               ! reflector t(jb), and x(jb) is stored
                               ! in t(1:jnb,jb:jb+jnb-1).
                   ! in other words, we perform the triangular solve for the
                   ! upper-triangular block t(jb):
                     ! t(jb) * (v1(jb)**t) = (-1)*u(jb)*s(jb).
                   ! even though the blocks x(jb) and b(jb) are upper-
                   ! triangular, the routine stdlib${ii}$_strsm will access all jnb**2
                   ! elements of the square t(1:jnb,jb:jb+jnb-1). therefore,
                   ! we need to set to zero the elements of the block
                   ! t(1:jnb,jb:jb+jnb-1) below the diagonal before the call
                   ! to stdlib${ii}$_strsm.
              ! (2-3a) set the elements to zero.
              jbtemp2 = jb - 2_${ik}$
              do j = jb, jb+jnb-2
                 do i = j-jbtemp2, nb
                    t( i, j ) = zero
                 end do
              end do
              ! (2-3b) perform the triangular solve.
              call stdlib${ii}$_strsm( 'R', 'L', 'T', 'U', jnb, jnb, one,a( jb, jb ), lda, t( 1_${ik}$, jb ), &
                        ldt )
           end do
           return
     end subroutine stdlib${ii}$_sorhr_col

     pure module subroutine stdlib${ii}$_dorhr_col( m, n, nb, a, lda, t, ldt, d, info )
     !! DORHR_COL takes an M-by-N real matrix Q_in with orthonormal columns
     !! as input, stored in A, and performs Householder Reconstruction (HR),
     !! i.e. reconstructs Householder vectors V(i) implicitly representing
     !! another M-by-N matrix Q_out, with the property that Q_in = Q_out*S,
     !! where S is an N-by-N diagonal matrix with diagonal entries
     !! equal to +1 or -1. The Householder vectors (columns V(i) of V) are
     !! stored in A on output, and the diagonal entries of S are stored in D.
     !! Block reflectors are also returned in T
     !! (same output format as DGEQRT).
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, ldt, m, n, nb
           ! Array Arguments 
           real(dp), intent(inout) :: a(lda,*)
           real(dp), intent(out) :: d(*), t(ldt,*)
        ! =====================================================================
           
           ! Local Scalars 
           integer(${ik}$) :: i, iinfo, j, jb, jbtemp1, jbtemp2, jnb, nplusone
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input parameters
           info = 0_${ik}$
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ .or. n>m ) then
              info = -2_${ik}$
           else if( nb<1_${ik}$ ) then
              info = -3_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -5_${ik}$
           else if( ldt<max( 1_${ik}$, min( nb, n ) ) ) then
              info = -7_${ik}$
           end if
           ! handle error in the input parameters.
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DORHR_COL', -info )
              return
           end if
           ! quick return if possible
           if( min( m, n )==0_${ik}$ ) then
              return
           end if
           ! on input, the m-by-n matrix a contains the orthogonal
           ! m-by-n matrix q_in.
           ! (1) compute the unit lower-trapezoidal v (ones on the diagonal
           ! are not stored) by performing the "modified" lu-decomposition.
           ! q_in - ( s ) = v * u = ( v1 ) * u,
                  ! ( 0 )           ( v2 )
           ! where 0 is an (m-n)-by-n zero matrix.
           ! (1-1) factor v1 and u.
           call stdlib${ii}$_dlaorhr_col_getrfnp( n, n, a, lda, d, iinfo )
           ! (1-2) solve for v2.
           if( m>n ) then
              call stdlib${ii}$_dtrsm( 'R', 'U', 'N', 'N', m-n, n, one, a, lda,a( n+1, 1_${ik}$ ), lda )
                        
           end if
           ! (2) reconstruct the block reflector t stored in t(1:nb, 1:n)
           ! as a sequence of upper-triangular blocks with nb-size column
           ! blocking.
           ! loop over the column blocks of size nb of the array a(1:m,1:n)
           ! and the array t(1:nb,1:n), jb is the column index of a column
           ! block, jnb is the column block size at each step jb.
           nplusone = n + 1_${ik}$
           do jb = 1, n, nb
              ! (2-0) determine the column block size jnb.
              jnb = min( nplusone-jb, nb )
              ! (2-1) copy the upper-triangular part of the current jnb-by-jnb
              ! diagonal block u(jb) (of the n-by-n matrix u) stored
              ! in a(jb:jb+jnb-1,jb:jb+jnb-1) into the upper-triangular part
              ! of the current jnb-by-jnb block t(1:jnb,jb:jb+jnb-1)
              ! column-by-column, total jnb*(jnb+1)/2 elements.
              jbtemp1 = jb - 1_${ik}$
              do j = jb, jb+jnb-1
                 call stdlib${ii}$_dcopy( j-jbtemp1, a( jb, j ), 1_${ik}$, t( 1_${ik}$, j ), 1_${ik}$ )
              end do
              ! (2-2) perform on the upper-triangular part of the current
              ! jnb-by-jnb diagonal block u(jb) (of the n-by-n matrix u) stored
              ! in t(1:jnb,jb:jb+jnb-1) the following operation in place:
              ! (-1)*u(jb)*s(jb), i.e the result will be stored in the upper-
              ! triangular part of t(1:jnb,jb:jb+jnb-1). this multiplication
              ! of the jnb-by-jnb diagonal block u(jb) by the jnb-by-jnb
              ! diagonal block s(jb) of the n-by-n sign matrix s from the
              ! right means changing the sign of each j-th column of the block
              ! u(jb) according to the sign of the diagonal element of the block
              ! s(jb), i.e. s(j,j) that is stored in the array element d(j).
              do j = jb, jb+jnb-1
                 if( d( j )==one ) then
                    call stdlib${ii}$_dscal( j-jbtemp1, -one, t( 1_${ik}$, j ), 1_${ik}$ )
                 end if
              end do
              ! (2-3) perform the triangular solve for the current block
              ! matrix x(jb):
                     ! x(jb) * (a(jb)**t) = b(jb), where:
                     ! a(jb)**t  is a jnb-by-jnb unit upper-triangular
                               ! coefficient block, and a(jb)=v1(jb), which
                               ! is a jnb-by-jnb unit lower-triangular block
                               ! stored in a(jb:jb+jnb-1,jb:jb+jnb-1).
                               ! the n-by-n matrix v1 is the upper part
                               ! of the m-by-n lower-trapezoidal matrix v
                               ! stored in a(1:m,1:n);
                     ! b(jb)     is a jnb-by-jnb  upper-triangular right-hand
                               ! side block, b(jb) = (-1)*u(jb)*s(jb), and
                               ! b(jb) is stored in t(1:jnb,jb:jb+jnb-1);
                     ! x(jb)     is a jnb-by-jnb upper-triangular solution
                               ! block, x(jb) is the upper-triangular block
                               ! reflector t(jb), and x(jb) is stored
                               ! in t(1:jnb,jb:jb+jnb-1).
                   ! in other words, we perform the triangular solve for the
                   ! upper-triangular block t(jb):
                     ! t(jb) * (v1(jb)**t) = (-1)*u(jb)*s(jb).
                   ! even though the blocks x(jb) and b(jb) are upper-
                   ! triangular, the routine stdlib${ii}$_dtrsm will access all jnb**2
                   ! elements of the square t(1:jnb,jb:jb+jnb-1). therefore,
                   ! we need to set to zero the elements of the block
                   ! t(1:jnb,jb:jb+jnb-1) below the diagonal before the call
                   ! to stdlib${ii}$_dtrsm.
              ! (2-3a) set the elements to zero.
              jbtemp2 = jb - 2_${ik}$
              do j = jb, jb+jnb-2
                 do i = j-jbtemp2, nb
                    t( i, j ) = zero
                 end do
              end do
              ! (2-3b) perform the triangular solve.
              call stdlib${ii}$_dtrsm( 'R', 'L', 'T', 'U', jnb, jnb, one,a( jb, jb ), lda, t( 1_${ik}$, jb ), &
                        ldt )
           end do
           return
     end subroutine stdlib${ii}$_dorhr_col

#:for rk,rt,ri in REAL_KINDS_TYPES
#:if not rk in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ri}$orhr_col( m, n, nb, a, lda, t, ldt, d, info )
     !! DORHR_COL: takes an M-by-N real matrix Q_in with orthonormal columns
     !! as input, stored in A, and performs Householder Reconstruction (HR),
     !! i.e. reconstructs Householder vectors V(i) implicitly representing
     !! another M-by-N matrix Q_out, with the property that Q_in = Q_out*S,
     !! where S is an N-by-N diagonal matrix with diagonal entries
     !! equal to +1 or -1. The Householder vectors (columns V(i) of V) are
     !! stored in A on output, and the diagonal entries of S are stored in D.
     !! Block reflectors are also returned in T
     !! (same output format as DGEQRT).
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, ldt, m, n, nb
           ! Array Arguments 
           real(${rk}$), intent(inout) :: a(lda,*)
           real(${rk}$), intent(out) :: d(*), t(ldt,*)
        ! =====================================================================
           
           ! Local Scalars 
           integer(${ik}$) :: i, iinfo, j, jb, jbtemp1, jbtemp2, jnb, nplusone
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input parameters
           info = 0_${ik}$
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ .or. n>m ) then
              info = -2_${ik}$
           else if( nb<1_${ik}$ ) then
              info = -3_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -5_${ik}$
           else if( ldt<max( 1_${ik}$, min( nb, n ) ) ) then
              info = -7_${ik}$
           end if
           ! handle error in the input parameters.
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DORHR_COL', -info )
              return
           end if
           ! quick return if possible
           if( min( m, n )==0_${ik}$ ) then
              return
           end if
           ! on input, the m-by-n matrix a contains the orthogonal
           ! m-by-n matrix q_in.
           ! (1) compute the unit lower-trapezoidal v (ones on the diagonal
           ! are not stored) by performing the "modified" lu-decomposition.
           ! q_in - ( s ) = v * u = ( v1 ) * u,
                  ! ( 0 )           ( v2 )
           ! where 0 is an (m-n)-by-n zero matrix.
           ! (1-1) factor v1 and u.
           call stdlib${ii}$_${ri}$laorhr_col_getrfnp( n, n, a, lda, d, iinfo )
           ! (1-2) solve for v2.
           if( m>n ) then
              call stdlib${ii}$_${ri}$trsm( 'R', 'U', 'N', 'N', m-n, n, one, a, lda,a( n+1, 1_${ik}$ ), lda )
                        
           end if
           ! (2) reconstruct the block reflector t stored in t(1:nb, 1:n)
           ! as a sequence of upper-triangular blocks with nb-size column
           ! blocking.
           ! loop over the column blocks of size nb of the array a(1:m,1:n)
           ! and the array t(1:nb,1:n), jb is the column index of a column
           ! block, jnb is the column block size at each step jb.
           nplusone = n + 1_${ik}$
           do jb = 1, n, nb
              ! (2-0) determine the column block size jnb.
              jnb = min( nplusone-jb, nb )
              ! (2-1) copy the upper-triangular part of the current jnb-by-jnb
              ! diagonal block u(jb) (of the n-by-n matrix u) stored
              ! in a(jb:jb+jnb-1,jb:jb+jnb-1) into the upper-triangular part
              ! of the current jnb-by-jnb block t(1:jnb,jb:jb+jnb-1)
              ! column-by-column, total jnb*(jnb+1)/2 elements.
              jbtemp1 = jb - 1_${ik}$
              do j = jb, jb+jnb-1
                 call stdlib${ii}$_${ri}$copy( j-jbtemp1, a( jb, j ), 1_${ik}$, t( 1_${ik}$, j ), 1_${ik}$ )
              end do
              ! (2-2) perform on the upper-triangular part of the current
              ! jnb-by-jnb diagonal block u(jb) (of the n-by-n matrix u) stored
              ! in t(1:jnb,jb:jb+jnb-1) the following operation in place:
              ! (-1)*u(jb)*s(jb), i.e the result will be stored in the upper-
              ! triangular part of t(1:jnb,jb:jb+jnb-1). this multiplication
              ! of the jnb-by-jnb diagonal block u(jb) by the jnb-by-jnb
              ! diagonal block s(jb) of the n-by-n sign matrix s from the
              ! right means changing the sign of each j-th column of the block
              ! u(jb) according to the sign of the diagonal element of the block
              ! s(jb), i.e. s(j,j) that is stored in the array element d(j).
              do j = jb, jb+jnb-1
                 if( d( j )==one ) then
                    call stdlib${ii}$_${ri}$scal( j-jbtemp1, -one, t( 1_${ik}$, j ), 1_${ik}$ )
                 end if
              end do
              ! (2-3) perform the triangular solve for the current block
              ! matrix x(jb):
                     ! x(jb) * (a(jb)**t) = b(jb), where:
                     ! a(jb)**t  is a jnb-by-jnb unit upper-triangular
                               ! coefficient block, and a(jb)=v1(jb), which
                               ! is a jnb-by-jnb unit lower-triangular block
                               ! stored in a(jb:jb+jnb-1,jb:jb+jnb-1).
                               ! the n-by-n matrix v1 is the upper part
                               ! of the m-by-n lower-trapezoidal matrix v
                               ! stored in a(1:m,1:n);
                     ! b(jb)     is a jnb-by-jnb  upper-triangular right-hand
                               ! side block, b(jb) = (-1)*u(jb)*s(jb), and
                               ! b(jb) is stored in t(1:jnb,jb:jb+jnb-1);
                     ! x(jb)     is a jnb-by-jnb upper-triangular solution
                               ! block, x(jb) is the upper-triangular block
                               ! reflector t(jb), and x(jb) is stored
                               ! in t(1:jnb,jb:jb+jnb-1).
                   ! in other words, we perform the triangular solve for the
                   ! upper-triangular block t(jb):
                     ! t(jb) * (v1(jb)**t) = (-1)*u(jb)*s(jb).
                   ! even though the blocks x(jb) and b(jb) are upper-
                   ! triangular, the routine stdlib${ii}$_${ri}$trsm will access all jnb**2
                   ! elements of the square t(1:jnb,jb:jb+jnb-1). therefore,
                   ! we need to set to zero the elements of the block
                   ! t(1:jnb,jb:jb+jnb-1) below the diagonal before the call
                   ! to stdlib${ii}$_${ri}$trsm.
              ! (2-3a) set the elements to zero.
              jbtemp2 = jb - 2_${ik}$
              do j = jb, jb+jnb-2
                 do i = j-jbtemp2, nb
                    t( i, j ) = zero
                 end do
              end do
              ! (2-3b) perform the triangular solve.
              call stdlib${ii}$_${ri}$trsm( 'R', 'L', 'T', 'U', jnb, jnb, one,a( jb, jb ), lda, t( 1_${ik}$, jb ), &
                        ldt )
           end do
           return
     end subroutine stdlib${ii}$_${ri}$orhr_col

#:endif
#:endfor



     pure module subroutine stdlib${ii}$_claunhr_col_getrfnp( m, n, a, lda, d, info )
     !! CLAUNHR_COL_GETRFNP computes the modified LU factorization without
     !! pivoting of a complex general M-by-N matrix A. The factorization has
     !! the form:
     !! A - S = L * U,
     !! where:
     !! S is a m-by-n diagonal sign matrix with the diagonal D, so that
     !! D(i) = S(i,i), 1 <= i <= min(M,N). The diagonal D is constructed
     !! as D(i)=-SIGN(A(i,i)), where A(i,i) is the value after performing
     !! i-1 steps of Gaussian elimination. This means that the diagonal
     !! element at each step of "modified" Gaussian elimination is
     !! at least one in absolute value (so that division-by-zero not
     !! not possible during the division by the diagonal element);
     !! L is a M-by-N lower triangular matrix with unit diagonal elements
     !! (lower trapezoidal if M > N);
     !! and U is a M-by-N upper triangular matrix
     !! (upper trapezoidal if M < N).
     !! This routine is an auxiliary routine used in the Householder
     !! reconstruction routine CUNHR_COL. In CUNHR_COL, this routine is
     !! applied to an M-by-N matrix A with orthonormal columns, where each
     !! element is bounded by one in absolute value. With the choice of
     !! the matrix S above, one can show that the diagonal element at each
     !! step of Gaussian elimination is the largest (in absolute value) in
     !! the column on or below the diagonal, so that no pivoting is required
     !! for numerical stability [1].
     !! For more details on the Householder reconstruction algorithm,
     !! including the modified LU factorization, see [1].
     !! This is the blocked right-looking version of the algorithm,
     !! calling Level 3 BLAS to update the submatrix. To factorize a block,
     !! this routine calls the recursive routine CLAUNHR_COL_GETRFNP2.
     !! [1] "Reconstructing Householder vectors from tall-skinny QR",
     !! G. Ballard, J. Demmel, L. Grigori, M. Jacquelin, H.D. Nguyen,
     !! E. Solomonik, J. Parallel Distrib. Comput.,
     !! vol. 85, pp. 3-31, 2015.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, m, n
           ! Array Arguments 
           complex(sp), intent(inout) :: a(lda,*)
           complex(sp), intent(out) :: d(*)
        ! =====================================================================
           
           ! Local Scalars 
           integer(${ik}$) :: iinfo, j, jb, nb
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input parameters.
           info = 0_${ik}$
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -4_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'CLAUNHR_COL_GETRFNP', -info )
              return
           end if
           ! quick return if possible
           if( min( m, n )==0 )return
           ! determine the block size for this environment.
           nb = stdlib${ii}$_ilaenv( 1_${ik}$, 'CLAUNHR_COL_GETRFNP', ' ', m, n, -1_${ik}$, -1_${ik}$ )
           if( nb<=1_${ik}$ .or. nb>=min( m, n ) ) then
              ! use unblocked code.
              call stdlib${ii}$_claunhr_col_getrfnp2( m, n, a, lda, d, info )
           else
              ! use blocked code.
              do j = 1, min( m, n ), nb
                 jb = min( min( m, n )-j+1, nb )
                 ! factor diagonal and subdiagonal blocks.
                 call stdlib${ii}$_claunhr_col_getrfnp2( m-j+1, jb, a( j, j ), lda,d( j ), iinfo )
                           
                 if( j+jb<=n ) then
                    ! compute block row of u.
                    call stdlib${ii}$_ctrsm( 'LEFT', 'LOWER', 'NO TRANSPOSE', 'UNIT', jb,n-j-jb+1, cone,&
                               a( j, j ), lda, a( j, j+jb ),lda )
                    if( j+jb<=m ) then
                       ! update trailing submatrix.
                       call stdlib${ii}$_cgemm( 'NO TRANSPOSE', 'NO TRANSPOSE', m-j-jb+1,n-j-jb+1, jb, -&
                       cone, a( j+jb, j ), lda,a( j, j+jb ), lda, cone, a( j+jb, j+jb ),lda )
                                 
                    end if
                 end if
              end do
           end if
           return
     end subroutine stdlib${ii}$_claunhr_col_getrfnp

     pure module subroutine stdlib${ii}$_zlaunhr_col_getrfnp( m, n, a, lda, d, info )
     !! ZLAUNHR_COL_GETRFNP computes the modified LU factorization without
     !! pivoting of a complex general M-by-N matrix A. The factorization has
     !! the form:
     !! A - S = L * U,
     !! where:
     !! S is a m-by-n diagonal sign matrix with the diagonal D, so that
     !! D(i) = S(i,i), 1 <= i <= min(M,N). The diagonal D is constructed
     !! as D(i)=-SIGN(A(i,i)), where A(i,i) is the value after performing
     !! i-1 steps of Gaussian elimination. This means that the diagonal
     !! element at each step of "modified" Gaussian elimination is
     !! at least one in absolute value (so that division-by-zero not
     !! not possible during the division by the diagonal element);
     !! L is a M-by-N lower triangular matrix with unit diagonal elements
     !! (lower trapezoidal if M > N);
     !! and U is a M-by-N upper triangular matrix
     !! (upper trapezoidal if M < N).
     !! This routine is an auxiliary routine used in the Householder
     !! reconstruction routine ZUNHR_COL. In ZUNHR_COL, this routine is
     !! applied to an M-by-N matrix A with orthonormal columns, where each
     !! element is bounded by one in absolute value. With the choice of
     !! the matrix S above, one can show that the diagonal element at each
     !! step of Gaussian elimination is the largest (in absolute value) in
     !! the column on or below the diagonal, so that no pivoting is required
     !! for numerical stability [1].
     !! For more details on the Householder reconstruction algorithm,
     !! including the modified LU factorization, see [1].
     !! This is the blocked right-looking version of the algorithm,
     !! calling Level 3 BLAS to update the submatrix. To factorize a block,
     !! this routine calls the recursive routine ZLAUNHR_COL_GETRFNP2.
     !! [1] "Reconstructing Householder vectors from tall-skinny QR",
     !! G. Ballard, J. Demmel, L. Grigori, M. Jacquelin, H.D. Nguyen,
     !! E. Solomonik, J. Parallel Distrib. Comput.,
     !! vol. 85, pp. 3-31, 2015.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, m, n
           ! Array Arguments 
           complex(dp), intent(inout) :: a(lda,*)
           complex(dp), intent(out) :: d(*)
        ! =====================================================================
           
           ! Local Scalars 
           integer(${ik}$) :: iinfo, j, jb, nb
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input parameters.
           info = 0_${ik}$
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -4_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'ZLAUNHR_COL_GETRFNP', -info )
              return
           end if
           ! quick return if possible
           if( min( m, n )==0 )return
           ! determine the block size for this environment.
           nb = stdlib${ii}$_ilaenv( 1_${ik}$, 'ZLAUNHR_COL_GETRFNP', ' ', m, n, -1_${ik}$, -1_${ik}$ )
           if( nb<=1_${ik}$ .or. nb>=min( m, n ) ) then
              ! use unblocked code.
              call stdlib${ii}$_zlaunhr_col_getrfnp2( m, n, a, lda, d, info )
           else
              ! use blocked code.
              do j = 1, min( m, n ), nb
                 jb = min( min( m, n )-j+1, nb )
                 ! factor diagonal and subdiagonal blocks.
                 call stdlib${ii}$_zlaunhr_col_getrfnp2( m-j+1, jb, a( j, j ), lda,d( j ), iinfo )
                           
                 if( j+jb<=n ) then
                    ! compute block row of u.
                    call stdlib${ii}$_ztrsm( 'LEFT', 'LOWER', 'NO TRANSPOSE', 'UNIT', jb,n-j-jb+1, cone,&
                               a( j, j ), lda, a( j, j+jb ),lda )
                    if( j+jb<=m ) then
                       ! update trailing submatrix.
                       call stdlib${ii}$_zgemm( 'NO TRANSPOSE', 'NO TRANSPOSE', m-j-jb+1,n-j-jb+1, jb, -&
                       cone, a( j+jb, j ), lda,a( j, j+jb ), lda, cone, a( j+jb, j+jb ),lda )
                                 
                    end if
                 end if
              end do
           end if
           return
     end subroutine stdlib${ii}$_zlaunhr_col_getrfnp

#:for ck,ct,ci in CMPLX_KINDS_TYPES
#:if not ck in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ci}$launhr_col_getrfnp( m, n, a, lda, d, info )
     !! ZLAUNHR_COL_GETRFNP: computes the modified LU factorization without
     !! pivoting of a complex general M-by-N matrix A. The factorization has
     !! the form:
     !! A - S = L * U,
     !! where:
     !! S is a m-by-n diagonal sign matrix with the diagonal D, so that
     !! D(i) = S(i,i), 1 <= i <= min(M,N). The diagonal D is constructed
     !! as D(i)=-SIGN(A(i,i)), where A(i,i) is the value after performing
     !! i-1 steps of Gaussian elimination. This means that the diagonal
     !! element at each step of "modified" Gaussian elimination is
     !! at least one in absolute value (so that division-by-zero not
     !! not possible during the division by the diagonal element);
     !! L is a M-by-N lower triangular matrix with unit diagonal elements
     !! (lower trapezoidal if M > N);
     !! and U is a M-by-N upper triangular matrix
     !! (upper trapezoidal if M < N).
     !! This routine is an auxiliary routine used in the Householder
     !! reconstruction routine ZUNHR_COL. In ZUNHR_COL, this routine is
     !! applied to an M-by-N matrix A with orthonormal columns, where each
     !! element is bounded by one in absolute value. With the choice of
     !! the matrix S above, one can show that the diagonal element at each
     !! step of Gaussian elimination is the largest (in absolute value) in
     !! the column on or below the diagonal, so that no pivoting is required
     !! for numerical stability [1].
     !! For more details on the Householder reconstruction algorithm,
     !! including the modified LU factorization, see [1].
     !! This is the blocked right-looking version of the algorithm,
     !! calling Level 3 BLAS to update the submatrix. To factorize a block,
     !! this routine calls the recursive routine ZLAUNHR_COL_GETRFNP2.
     !! [1] "Reconstructing Householder vectors from tall-skinny QR",
     !! G. Ballard, J. Demmel, L. Grigori, M. Jacquelin, H.D. Nguyen,
     !! E. Solomonik, J. Parallel Distrib. Comput.,
     !! vol. 85, pp. 3-31, 2015.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${ck}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, m, n
           ! Array Arguments 
           complex(${ck}$), intent(inout) :: a(lda,*)
           complex(${ck}$), intent(out) :: d(*)
        ! =====================================================================
           
           ! Local Scalars 
           integer(${ik}$) :: iinfo, j, jb, nb
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input parameters.
           info = 0_${ik}$
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -4_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'ZLAUNHR_COL_GETRFNP', -info )
              return
           end if
           ! quick return if possible
           if( min( m, n )==0 )return
           ! determine the block size for this environment.
           nb = stdlib${ii}$_ilaenv( 1_${ik}$, 'ZLAUNHR_COL_GETRFNP', ' ', m, n, -1_${ik}$, -1_${ik}$ )
           if( nb<=1_${ik}$ .or. nb>=min( m, n ) ) then
              ! use unblocked code.
              call stdlib${ii}$_${ci}$launhr_col_getrfnp2( m, n, a, lda, d, info )
           else
              ! use blocked code.
              do j = 1, min( m, n ), nb
                 jb = min( min( m, n )-j+1, nb )
                 ! factor diagonal and subdiagonal blocks.
                 call stdlib${ii}$_${ci}$launhr_col_getrfnp2( m-j+1, jb, a( j, j ), lda,d( j ), iinfo )
                           
                 if( j+jb<=n ) then
                    ! compute block row of u.
                    call stdlib${ii}$_${ci}$trsm( 'LEFT', 'LOWER', 'NO TRANSPOSE', 'UNIT', jb,n-j-jb+1, cone,&
                               a( j, j ), lda, a( j, j+jb ),lda )
                    if( j+jb<=m ) then
                       ! update trailing submatrix.
                       call stdlib${ii}$_${ci}$gemm( 'NO TRANSPOSE', 'NO TRANSPOSE', m-j-jb+1,n-j-jb+1, jb, -&
                       cone, a( j+jb, j ), lda,a( j, j+jb ), lda, cone, a( j+jb, j+jb ),lda )
                                 
                    end if
                 end if
              end do
           end if
           return
     end subroutine stdlib${ii}$_${ci}$launhr_col_getrfnp

#:endif
#:endfor



     pure module subroutine stdlib${ii}$_slaorhr_col_getrfnp( m, n, a, lda, d, info )
     !! SLAORHR_COL_GETRFNP computes the modified LU factorization without
     !! pivoting of a real general M-by-N matrix A. The factorization has
     !! the form:
     !! A - S = L * U,
     !! where:
     !! S is a m-by-n diagonal sign matrix with the diagonal D, so that
     !! D(i) = S(i,i), 1 <= i <= min(M,N). The diagonal D is constructed
     !! as D(i)=-SIGN(A(i,i)), where A(i,i) is the value after performing
     !! i-1 steps of Gaussian elimination. This means that the diagonal
     !! element at each step of "modified" Gaussian elimination is
     !! at least one in absolute value (so that division-by-zero not
     !! not possible during the division by the diagonal element);
     !! L is a M-by-N lower triangular matrix with unit diagonal elements
     !! (lower trapezoidal if M > N);
     !! and U is a M-by-N upper triangular matrix
     !! (upper trapezoidal if M < N).
     !! This routine is an auxiliary routine used in the Householder
     !! reconstruction routine SORHR_COL. In SORHR_COL, this routine is
     !! applied to an M-by-N matrix A with orthonormal columns, where each
     !! element is bounded by one in absolute value. With the choice of
     !! the matrix S above, one can show that the diagonal element at each
     !! step of Gaussian elimination is the largest (in absolute value) in
     !! the column on or below the diagonal, so that no pivoting is required
     !! for numerical stability [1].
     !! For more details on the Householder reconstruction algorithm,
     !! including the modified LU factorization, see [1].
     !! This is the blocked right-looking version of the algorithm,
     !! calling Level 3 BLAS to update the submatrix. To factorize a block,
     !! this routine calls the recursive routine SLAORHR_COL_GETRFNP2.
     !! [1] "Reconstructing Householder vectors from tall-skinny QR",
     !! G. Ballard, J. Demmel, L. Grigori, M. Jacquelin, H.D. Nguyen,
     !! E. Solomonik, J. Parallel Distrib. Comput.,
     !! vol. 85, pp. 3-31, 2015.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, m, n
           ! Array Arguments 
           real(sp), intent(inout) :: a(lda,*)
           real(sp), intent(out) :: d(*)
        ! =====================================================================
           
           ! Local Scalars 
           integer(${ik}$) :: iinfo, j, jb, nb
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input parameters.
           info = 0_${ik}$
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -4_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'SLAORHR_COL_GETRFNP', -info )
              return
           end if
           ! quick return if possible
           if( min( m, n )==0 )return
           ! determine the block size for this environment.
           nb = stdlib${ii}$_ilaenv( 1_${ik}$, 'SLAORHR_COL_GETRFNP', ' ', m, n, -1_${ik}$, -1_${ik}$ )
           if( nb<=1_${ik}$ .or. nb>=min( m, n ) ) then
              ! use unblocked code.
              call stdlib${ii}$_slaorhr_col_getrfnp2( m, n, a, lda, d, info )
           else
              ! use blocked code.
              do j = 1, min( m, n ), nb
                 jb = min( min( m, n )-j+1, nb )
                 ! factor diagonal and subdiagonal blocks.
                 call stdlib${ii}$_slaorhr_col_getrfnp2( m-j+1, jb, a( j, j ), lda,d( j ), iinfo )
                           
                 if( j+jb<=n ) then
                    ! compute block row of u.
                    call stdlib${ii}$_strsm( 'LEFT', 'LOWER', 'NO TRANSPOSE', 'UNIT', jb,n-j-jb+1, one, &
                              a( j, j ), lda, a( j, j+jb ),lda )
                    if( j+jb<=m ) then
                       ! update trailing submatrix.
                       call stdlib${ii}$_sgemm( 'NO TRANSPOSE', 'NO TRANSPOSE', m-j-jb+1,n-j-jb+1, jb, -&
                       one, a( j+jb, j ), lda,a( j, j+jb ), lda, one, a( j+jb, j+jb ),lda )
                                 
                    end if
                 end if
              end do
           end if
           return
     end subroutine stdlib${ii}$_slaorhr_col_getrfnp

     pure module subroutine stdlib${ii}$_dlaorhr_col_getrfnp( m, n, a, lda, d, info )
     !! DLAORHR_COL_GETRFNP computes the modified LU factorization without
     !! pivoting of a real general M-by-N matrix A. The factorization has
     !! the form:
     !! A - S = L * U,
     !! where:
     !! S is a m-by-n diagonal sign matrix with the diagonal D, so that
     !! D(i) = S(i,i), 1 <= i <= min(M,N). The diagonal D is constructed
     !! as D(i)=-SIGN(A(i,i)), where A(i,i) is the value after performing
     !! i-1 steps of Gaussian elimination. This means that the diagonal
     !! element at each step of "modified" Gaussian elimination is
     !! at least one in absolute value (so that division-by-zero not
     !! not possible during the division by the diagonal element);
     !! L is a M-by-N lower triangular matrix with unit diagonal elements
     !! (lower trapezoidal if M > N);
     !! and U is a M-by-N upper triangular matrix
     !! (upper trapezoidal if M < N).
     !! This routine is an auxiliary routine used in the Householder
     !! reconstruction routine DORHR_COL. In DORHR_COL, this routine is
     !! applied to an M-by-N matrix A with orthonormal columns, where each
     !! element is bounded by one in absolute value. With the choice of
     !! the matrix S above, one can show that the diagonal element at each
     !! step of Gaussian elimination is the largest (in absolute value) in
     !! the column on or below the diagonal, so that no pivoting is required
     !! for numerical stability [1].
     !! For more details on the Householder reconstruction algorithm,
     !! including the modified LU factorization, see [1].
     !! This is the blocked right-looking version of the algorithm,
     !! calling Level 3 BLAS to update the submatrix. To factorize a block,
     !! this routine calls the recursive routine DLAORHR_COL_GETRFNP2.
     !! [1] "Reconstructing Householder vectors from tall-skinny QR",
     !! G. Ballard, J. Demmel, L. Grigori, M. Jacquelin, H.D. Nguyen,
     !! E. Solomonik, J. Parallel Distrib. Comput.,
     !! vol. 85, pp. 3-31, 2015.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, m, n
           ! Array Arguments 
           real(dp), intent(inout) :: a(lda,*)
           real(dp), intent(out) :: d(*)
        ! =====================================================================
           
           ! Local Scalars 
           integer(${ik}$) :: iinfo, j, jb, nb
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input parameters.
           info = 0_${ik}$
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -4_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DLAORHR_COL_GETRFNP', -info )
              return
           end if
           ! quick return if possible
           if( min( m, n )==0 )return
           ! determine the block size for this environment.
           nb = stdlib${ii}$_ilaenv( 1_${ik}$, 'DLAORHR_COL_GETRFNP', ' ', m, n, -1_${ik}$, -1_${ik}$ )
           if( nb<=1_${ik}$ .or. nb>=min( m, n ) ) then
              ! use unblocked code.
              call stdlib${ii}$_dlaorhr_col_getrfnp2( m, n, a, lda, d, info )
           else
              ! use blocked code.
              do j = 1, min( m, n ), nb
                 jb = min( min( m, n )-j+1, nb )
                 ! factor diagonal and subdiagonal blocks.
                 call stdlib${ii}$_dlaorhr_col_getrfnp2( m-j+1, jb, a( j, j ), lda,d( j ), iinfo )
                           
                 if( j+jb<=n ) then
                    ! compute block row of u.
                    call stdlib${ii}$_dtrsm( 'LEFT', 'LOWER', 'NO TRANSPOSE', 'UNIT', jb,n-j-jb+1, one, &
                              a( j, j ), lda, a( j, j+jb ),lda )
                    if( j+jb<=m ) then
                       ! update trailing submatrix.
                       call stdlib${ii}$_dgemm( 'NO TRANSPOSE', 'NO TRANSPOSE', m-j-jb+1,n-j-jb+1, jb, -&
                       one, a( j+jb, j ), lda,a( j, j+jb ), lda, one, a( j+jb, j+jb ),lda )
                                 
                    end if
                 end if
              end do
           end if
           return
     end subroutine stdlib${ii}$_dlaorhr_col_getrfnp

#:for rk,rt,ri in REAL_KINDS_TYPES
#:if not rk in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ri}$laorhr_col_getrfnp( m, n, a, lda, d, info )
     !! DLAORHR_COL_GETRFNP: computes the modified LU factorization without
     !! pivoting of a real general M-by-N matrix A. The factorization has
     !! the form:
     !! A - S = L * U,
     !! where:
     !! S is a m-by-n diagonal sign matrix with the diagonal D, so that
     !! D(i) = S(i,i), 1 <= i <= min(M,N). The diagonal D is constructed
     !! as D(i)=-SIGN(A(i,i)), where A(i,i) is the value after performing
     !! i-1 steps of Gaussian elimination. This means that the diagonal
     !! element at each step of "modified" Gaussian elimination is
     !! at least one in absolute value (so that division-by-zero not
     !! not possible during the division by the diagonal element);
     !! L is a M-by-N lower triangular matrix with unit diagonal elements
     !! (lower trapezoidal if M > N);
     !! and U is a M-by-N upper triangular matrix
     !! (upper trapezoidal if M < N).
     !! This routine is an auxiliary routine used in the Householder
     !! reconstruction routine DORHR_COL. In DORHR_COL, this routine is
     !! applied to an M-by-N matrix A with orthonormal columns, where each
     !! element is bounded by one in absolute value. With the choice of
     !! the matrix S above, one can show that the diagonal element at each
     !! step of Gaussian elimination is the largest (in absolute value) in
     !! the column on or below the diagonal, so that no pivoting is required
     !! for numerical stability [1].
     !! For more details on the Householder reconstruction algorithm,
     !! including the modified LU factorization, see [1].
     !! This is the blocked right-looking version of the algorithm,
     !! calling Level 3 BLAS to update the submatrix. To factorize a block,
     !! this routine calls the recursive routine DLAORHR_COL_GETRFNP2.
     !! [1] "Reconstructing Householder vectors from tall-skinny QR",
     !! G. Ballard, J. Demmel, L. Grigori, M. Jacquelin, H.D. Nguyen,
     !! E. Solomonik, J. Parallel Distrib. Comput.,
     !! vol. 85, pp. 3-31, 2015.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, m, n
           ! Array Arguments 
           real(${rk}$), intent(inout) :: a(lda,*)
           real(${rk}$), intent(out) :: d(*)
        ! =====================================================================
           
           ! Local Scalars 
           integer(${ik}$) :: iinfo, j, jb, nb
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input parameters.
           info = 0_${ik}$
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -4_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DLAORHR_COL_GETRFNP', -info )
              return
           end if
           ! quick return if possible
           if( min( m, n )==0 )return
           ! determine the block size for this environment.
           nb = stdlib${ii}$_ilaenv( 1_${ik}$, 'DLAORHR_COL_GETRFNP', ' ', m, n, -1_${ik}$, -1_${ik}$ )
           if( nb<=1_${ik}$ .or. nb>=min( m, n ) ) then
              ! use unblocked code.
              call stdlib${ii}$_${ri}$laorhr_col_getrfnp2( m, n, a, lda, d, info )
           else
              ! use blocked code.
              do j = 1, min( m, n ), nb
                 jb = min( min( m, n )-j+1, nb )
                 ! factor diagonal and subdiagonal blocks.
                 call stdlib${ii}$_${ri}$laorhr_col_getrfnp2( m-j+1, jb, a( j, j ), lda,d( j ), iinfo )
                           
                 if( j+jb<=n ) then
                    ! compute block row of u.
                    call stdlib${ii}$_${ri}$trsm( 'LEFT', 'LOWER', 'NO TRANSPOSE', 'UNIT', jb,n-j-jb+1, one, &
                              a( j, j ), lda, a( j, j+jb ),lda )
                    if( j+jb<=m ) then
                       ! update trailing submatrix.
                       call stdlib${ii}$_${ri}$gemm( 'NO TRANSPOSE', 'NO TRANSPOSE', m-j-jb+1,n-j-jb+1, jb, -&
                       one, a( j+jb, j ), lda,a( j, j+jb ), lda, one, a( j+jb, j+jb ),lda )
                                 
                    end if
                 end if
              end do
           end if
           return
     end subroutine stdlib${ii}$_${ri}$laorhr_col_getrfnp

#:endif
#:endfor



     pure recursive module subroutine stdlib${ii}$_claunhr_col_getrfnp2( m, n, a, lda, d, info )
     !! CLAUNHR_COL_GETRFNP2 computes the modified LU factorization without
     !! pivoting of a complex general M-by-N matrix A. The factorization has
     !! the form:
     !! A - S = L * U,
     !! where:
     !! S is a m-by-n diagonal sign matrix with the diagonal D, so that
     !! D(i) = S(i,i), 1 <= i <= min(M,N). The diagonal D is constructed
     !! as D(i)=-SIGN(A(i,i)), where A(i,i) is the value after performing
     !! i-1 steps of Gaussian elimination. This means that the diagonal
     !! element at each step of "modified" Gaussian elimination is at
     !! least one in absolute value (so that division-by-zero not
     !! possible during the division by the diagonal element);
     !! L is a M-by-N lower triangular matrix with unit diagonal elements
     !! (lower trapezoidal if M > N);
     !! and U is a M-by-N upper triangular matrix
     !! (upper trapezoidal if M < N).
     !! This routine is an auxiliary routine used in the Householder
     !! reconstruction routine CUNHR_COL. In CUNHR_COL, this routine is
     !! applied to an M-by-N matrix A with orthonormal columns, where each
     !! element is bounded by one in absolute value. With the choice of
     !! the matrix S above, one can show that the diagonal element at each
     !! step of Gaussian elimination is the largest (in absolute value) in
     !! the column on or below the diagonal, so that no pivoting is required
     !! for numerical stability [1].
     !! For more details on the Householder reconstruction algorithm,
     !! including the modified LU factorization, see [1].
     !! This is the recursive version of the LU factorization algorithm.
     !! Denote A - S by B. The algorithm divides the matrix B into four
     !! submatrices:
     !! [  B11 | B12  ]  where B11 is n1 by n1,
     !! B = [ -----|----- ]        B21 is (m-n1) by n1,
     !! [  B21 | B22  ]        B12 is n1 by n2,
     !! B22 is (m-n1) by n2,
     !! with n1 = min(m,n)/2, n2 = n-n1.
     !! The subroutine calls itself to factor B11, solves for B21,
     !! solves for B12, updates B22, then calls itself to factor B22.
     !! For more details on the recursive LU algorithm, see [2].
     !! CLAUNHR_COL_GETRFNP2 is called to factorize a block by the blocked
     !! routine CLAUNHR_COL_GETRFNP, which uses blocked code calling
     !! Level 3 BLAS to update the submatrix. However, CLAUNHR_COL_GETRFNP2
     !! is self-sufficient and can be used without CLAUNHR_COL_GETRFNP.
     !! [1] "Reconstructing Householder vectors from tall-skinny QR",
     !! G. Ballard, J. Demmel, L. Grigori, M. Jacquelin, H.D. Nguyen,
     !! E. Solomonik, J. Parallel Distrib. Comput.,
     !! vol. 85, pp. 3-31, 2015.
     !! [2] "Recursion leads to automatic variable blocking for dense linear
     !! algebra algorithms", F. Gustavson, IBM J. of Res. and Dev.,
     !! vol. 41, no. 6, pp. 737-755, 1997.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, m, n
           ! Array Arguments 
           complex(sp), intent(inout) :: a(lda,*)
           complex(sp), intent(out) :: d(*)
        ! =====================================================================
           
           
           ! Local Scalars 
           real(sp) :: sfmin
           integer(${ik}$) :: i, iinfo, n1, n2
           complex(sp) :: z
           ! Intrinsic Functions 
           ! Statement Functions 
           real(dp) :: cabs1
           ! Statement Function Definitions 
           cabs1( z ) = abs( real( z,KIND=sp) ) + abs( aimag( z ) )
           ! Executable Statements 
           ! test the input parameters
           info = 0_${ik}$
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -4_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'CLAUNHR_COL_GETRFNP2', -info )
              return
           end if
           ! quick return if possible
           if( min( m, n )==0 )return
           if ( m==1_${ik}$ ) then
              ! one row case, (also recursion termination case),
              ! use unblocked code
              ! transfer the sign
              d( 1_${ik}$ ) = cmplx( -sign( one, real( a( 1_${ik}$, 1_${ik}$ ),KIND=sp) ),KIND=sp)
              ! construct the row of u
              a( 1_${ik}$, 1_${ik}$ ) = a( 1_${ik}$, 1_${ik}$ ) - d( 1_${ik}$ )
           else if( n==1_${ik}$ ) then
              ! one column case, (also recursion termination case),
              ! use unblocked code
              ! transfer the sign
              d( 1_${ik}$ ) = cmplx( -sign( one, real( a( 1_${ik}$, 1_${ik}$ ),KIND=sp) ),KIND=sp)
              ! construct the row of u
              a( 1_${ik}$, 1_${ik}$ ) = a( 1_${ik}$, 1_${ik}$ ) - d( 1_${ik}$ )
              ! scale the elements 2:m of the column
              ! determine machine safe minimum
              sfmin = stdlib${ii}$_slamch('S')
              ! construct the subdiagonal elements of l
              if( cabs1( a( 1_${ik}$, 1_${ik}$ ) ) >= sfmin ) then
                 call stdlib${ii}$_cscal( m-1, cone / a( 1_${ik}$, 1_${ik}$ ), a( 2_${ik}$, 1_${ik}$ ), 1_${ik}$ )
              else
                 do i = 2, m
                    a( i, 1_${ik}$ ) = a( i, 1_${ik}$ ) / a( 1_${ik}$, 1_${ik}$ )
                 end do
              end if
           else
              ! divide the matrix b into four submatrices
              n1 = min( m, n ) / 2_${ik}$
              n2 = n-n1
              ! factor b11, recursive call
              call stdlib${ii}$_claunhr_col_getrfnp2( n1, n1, a, lda, d, iinfo )
              ! solve for b21
              call stdlib${ii}$_ctrsm( 'R', 'U', 'N', 'N', m-n1, n1, cone, a, lda,a( n1+1, 1_${ik}$ ), lda )
                        
              ! solve for b12
              call stdlib${ii}$_ctrsm( 'L', 'L', 'N', 'U', n1, n2, cone, a, lda,a( 1_${ik}$, n1+1 ), lda )
                        
              ! update b22, i.e. compute the schur complement
              ! b22 := b22 - b21*b12
              call stdlib${ii}$_cgemm( 'N', 'N', m-n1, n2, n1, -cone, a( n1+1, 1_${ik}$ ), lda,a( 1_${ik}$, n1+1 ), &
                        lda, cone, a( n1+1, n1+1 ), lda )
              ! factor b22, recursive call
              call stdlib${ii}$_claunhr_col_getrfnp2( m-n1, n2, a( n1+1, n1+1 ), lda,d( n1+1 ), iinfo )
                        
           end if
           return
     end subroutine stdlib${ii}$_claunhr_col_getrfnp2

     pure recursive module subroutine stdlib${ii}$_zlaunhr_col_getrfnp2( m, n, a, lda, d, info )
     !! ZLAUNHR_COL_GETRFNP2 computes the modified LU factorization without
     !! pivoting of a complex general M-by-N matrix A. The factorization has
     !! the form:
     !! A - S = L * U,
     !! where:
     !! S is a m-by-n diagonal sign matrix with the diagonal D, so that
     !! D(i) = S(i,i), 1 <= i <= min(M,N). The diagonal D is constructed
     !! as D(i)=-SIGN(A(i,i)), where A(i,i) is the value after performing
     !! i-1 steps of Gaussian elimination. This means that the diagonal
     !! element at each step of "modified" Gaussian elimination is at
     !! least one in absolute value (so that division-by-zero not
     !! possible during the division by the diagonal element);
     !! L is a M-by-N lower triangular matrix with unit diagonal elements
     !! (lower trapezoidal if M > N);
     !! and U is a M-by-N upper triangular matrix
     !! (upper trapezoidal if M < N).
     !! This routine is an auxiliary routine used in the Householder
     !! reconstruction routine ZUNHR_COL. In ZUNHR_COL, this routine is
     !! applied to an M-by-N matrix A with orthonormal columns, where each
     !! element is bounded by one in absolute value. With the choice of
     !! the matrix S above, one can show that the diagonal element at each
     !! step of Gaussian elimination is the largest (in absolute value) in
     !! the column on or below the diagonal, so that no pivoting is required
     !! for numerical stability [1].
     !! For more details on the Householder reconstruction algorithm,
     !! including the modified LU factorization, see [1].
     !! This is the recursive version of the LU factorization algorithm.
     !! Denote A - S by B. The algorithm divides the matrix B into four
     !! submatrices:
     !! [  B11 | B12  ]  where B11 is n1 by n1,
     !! B = [ -----|----- ]        B21 is (m-n1) by n1,
     !! [  B21 | B22  ]        B12 is n1 by n2,
     !! B22 is (m-n1) by n2,
     !! with n1 = min(m,n)/2, n2 = n-n1.
     !! The subroutine calls itself to factor B11, solves for B21,
     !! solves for B12, updates B22, then calls itself to factor B22.
     !! For more details on the recursive LU algorithm, see [2].
     !! ZLAUNHR_COL_GETRFNP2 is called to factorize a block by the blocked
     !! routine ZLAUNHR_COL_GETRFNP, which uses blocked code calling
     !! Level 3 BLAS to update the submatrix. However, ZLAUNHR_COL_GETRFNP2
     !! is self-sufficient and can be used without ZLAUNHR_COL_GETRFNP.
     !! [1] "Reconstructing Householder vectors from tall-skinny QR",
     !! G. Ballard, J. Demmel, L. Grigori, M. Jacquelin, H.D. Nguyen,
     !! E. Solomonik, J. Parallel Distrib. Comput.,
     !! vol. 85, pp. 3-31, 2015.
     !! [2] "Recursion leads to automatic variable blocking for dense linear
     !! algebra algorithms", F. Gustavson, IBM J. of Res. and Dev.,
     !! vol. 41, no. 6, pp. 737-755, 1997.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, m, n
           ! Array Arguments 
           complex(dp), intent(inout) :: a(lda,*)
           complex(dp), intent(out) :: d(*)
        ! =====================================================================
           
           
           ! Local Scalars 
           real(dp) :: sfmin
           integer(${ik}$) :: i, iinfo, n1, n2
           complex(dp) :: z
           ! Intrinsic Functions 
           ! Statement Functions 
           real(dp) :: cabs1
           ! Statement Function Definitions 
           cabs1( z ) = abs( real( z,KIND=dp) ) + abs( aimag( z ) )
           ! Executable Statements 
           ! test the input parameters
           info = 0_${ik}$
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -4_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'ZLAUNHR_COL_GETRFNP2', -info )
              return
           end if
           ! quick return if possible
           if( min( m, n )==0 )return
           if ( m==1_${ik}$ ) then
              ! one row case, (also recursion termination case),
              ! use unblocked code
              ! transfer the sign
              d( 1_${ik}$ ) = cmplx( -sign( one, real( a( 1_${ik}$, 1_${ik}$ ),KIND=dp) ),KIND=dp)
              ! construct the row of u
              a( 1_${ik}$, 1_${ik}$ ) = a( 1_${ik}$, 1_${ik}$ ) - d( 1_${ik}$ )
           else if( n==1_${ik}$ ) then
              ! one column case, (also recursion termination case),
              ! use unblocked code
              ! transfer the sign
              d( 1_${ik}$ ) = cmplx( -sign( one, real( a( 1_${ik}$, 1_${ik}$ ),KIND=dp) ),KIND=dp)
              ! construct the row of u
              a( 1_${ik}$, 1_${ik}$ ) = a( 1_${ik}$, 1_${ik}$ ) - d( 1_${ik}$ )
              ! scale the elements 2:m of the column
              ! determine machine safe minimum
              sfmin = stdlib${ii}$_dlamch('S')
              ! construct the subdiagonal elements of l
              if( cabs1( a( 1_${ik}$, 1_${ik}$ ) ) >= sfmin ) then
                 call stdlib${ii}$_zscal( m-1, cone / a( 1_${ik}$, 1_${ik}$ ), a( 2_${ik}$, 1_${ik}$ ), 1_${ik}$ )
              else
                 do i = 2, m
                    a( i, 1_${ik}$ ) = a( i, 1_${ik}$ ) / a( 1_${ik}$, 1_${ik}$ )
                 end do
              end if
           else
              ! divide the matrix b into four submatrices
              n1 = min( m, n ) / 2_${ik}$
              n2 = n-n1
              ! factor b11, recursive call
              call stdlib${ii}$_zlaunhr_col_getrfnp2( n1, n1, a, lda, d, iinfo )
              ! solve for b21
              call stdlib${ii}$_ztrsm( 'R', 'U', 'N', 'N', m-n1, n1, cone, a, lda,a( n1+1, 1_${ik}$ ), lda )
                        
              ! solve for b12
              call stdlib${ii}$_ztrsm( 'L', 'L', 'N', 'U', n1, n2, cone, a, lda,a( 1_${ik}$, n1+1 ), lda )
                        
              ! update b22, i.e. compute the schur complement
              ! b22 := b22 - b21*b12
              call stdlib${ii}$_zgemm( 'N', 'N', m-n1, n2, n1, -cone, a( n1+1, 1_${ik}$ ), lda,a( 1_${ik}$, n1+1 ), &
                        lda, cone, a( n1+1, n1+1 ), lda )
              ! factor b22, recursive call
              call stdlib${ii}$_zlaunhr_col_getrfnp2( m-n1, n2, a( n1+1, n1+1 ), lda,d( n1+1 ), iinfo )
                        
           end if
           return
     end subroutine stdlib${ii}$_zlaunhr_col_getrfnp2

#:for ck,ct,ci in CMPLX_KINDS_TYPES
#:if not ck in ["sp","dp"]
     pure recursive module subroutine stdlib${ii}$_${ci}$launhr_col_getrfnp2( m, n, a, lda, d, info )
     !! ZLAUNHR_COL_GETRFNP2: computes the modified LU factorization without
     !! pivoting of a complex general M-by-N matrix A. The factorization has
     !! the form:
     !! A - S = L * U,
     !! where:
     !! S is a m-by-n diagonal sign matrix with the diagonal D, so that
     !! D(i) = S(i,i), 1 <= i <= min(M,N). The diagonal D is constructed
     !! as D(i)=-SIGN(A(i,i)), where A(i,i) is the value after performing
     !! i-1 steps of Gaussian elimination. This means that the diagonal
     !! element at each step of "modified" Gaussian elimination is at
     !! least one in absolute value (so that division-by-zero not
     !! possible during the division by the diagonal element);
     !! L is a M-by-N lower triangular matrix with unit diagonal elements
     !! (lower trapezoidal if M > N);
     !! and U is a M-by-N upper triangular matrix
     !! (upper trapezoidal if M < N).
     !! This routine is an auxiliary routine used in the Householder
     !! reconstruction routine ZUNHR_COL. In ZUNHR_COL, this routine is
     !! applied to an M-by-N matrix A with orthonormal columns, where each
     !! element is bounded by one in absolute value. With the choice of
     !! the matrix S above, one can show that the diagonal element at each
     !! step of Gaussian elimination is the largest (in absolute value) in
     !! the column on or below the diagonal, so that no pivoting is required
     !! for numerical stability [1].
     !! For more details on the Householder reconstruction algorithm,
     !! including the modified LU factorization, see [1].
     !! This is the recursive version of the LU factorization algorithm.
     !! Denote A - S by B. The algorithm divides the matrix B into four
     !! submatrices:
     !! [  B11 | B12  ]  where B11 is n1 by n1,
     !! B = [ -----|----- ]        B21 is (m-n1) by n1,
     !! [  B21 | B22  ]        B12 is n1 by n2,
     !! B22 is (m-n1) by n2,
     !! with n1 = min(m,n)/2, n2 = n-n1.
     !! The subroutine calls itself to factor B11, solves for B21,
     !! solves for B12, updates B22, then calls itself to factor B22.
     !! For more details on the recursive LU algorithm, see [2].
     !! ZLAUNHR_COL_GETRFNP2 is called to factorize a block by the blocked
     !! routine ZLAUNHR_COL_GETRFNP, which uses blocked code calling
     !! Level 3 BLAS to update the submatrix. However, ZLAUNHR_COL_GETRFNP2
     !! is self-sufficient and can be used without ZLAUNHR_COL_GETRFNP.
     !! [1] "Reconstructing Householder vectors from tall-skinny QR",
     !! G. Ballard, J. Demmel, L. Grigori, M. Jacquelin, H.D. Nguyen,
     !! E. Solomonik, J. Parallel Distrib. Comput.,
     !! vol. 85, pp. 3-31, 2015.
     !! [2] "Recursion leads to automatic variable blocking for dense linear
     !! algebra algorithms", F. Gustavson, IBM J. of Res. and Dev.,
     !! vol. 41, no. 6, pp. 737-755, 1997.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${ck}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, m, n
           ! Array Arguments 
           complex(${ck}$), intent(inout) :: a(lda,*)
           complex(${ck}$), intent(out) :: d(*)
        ! =====================================================================
           
           
           ! Local Scalars 
           real(${ck}$) :: sfmin
           integer(${ik}$) :: i, iinfo, n1, n2
           complex(${ck}$) :: z
           ! Intrinsic Functions 
           ! Statement Functions 
           real(${ck}$) :: cabs1
           ! Statement Function Definitions 
           cabs1( z ) = abs( real( z,KIND=${ck}$) ) + abs( aimag( z ) )
           ! Executable Statements 
           ! test the input parameters
           info = 0_${ik}$
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -4_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'ZLAUNHR_COL_GETRFNP2', -info )
              return
           end if
           ! quick return if possible
           if( min( m, n )==0 )return
           if ( m==1_${ik}$ ) then
              ! one row case, (also recursion termination case),
              ! use unblocked code
              ! transfer the sign
              d( 1_${ik}$ ) = cmplx( -sign( one, real( a( 1_${ik}$, 1_${ik}$ ),KIND=${ck}$) ),KIND=${ck}$)
              ! construct the row of u
              a( 1_${ik}$, 1_${ik}$ ) = a( 1_${ik}$, 1_${ik}$ ) - d( 1_${ik}$ )
           else if( n==1_${ik}$ ) then
              ! one column case, (also recursion termination case),
              ! use unblocked code
              ! transfer the sign
              d( 1_${ik}$ ) = cmplx( -sign( one, real( a( 1_${ik}$, 1_${ik}$ ),KIND=${ck}$) ),KIND=${ck}$)
              ! construct the row of u
              a( 1_${ik}$, 1_${ik}$ ) = a( 1_${ik}$, 1_${ik}$ ) - d( 1_${ik}$ )
              ! scale the elements 2:m of the column
              ! determine machine safe minimum
              sfmin = stdlib${ii}$_${c2ri(ci)}$lamch('S')
              ! construct the subdiagonal elements of l
              if( cabs1( a( 1_${ik}$, 1_${ik}$ ) ) >= sfmin ) then
                 call stdlib${ii}$_${ci}$scal( m-1, cone / a( 1_${ik}$, 1_${ik}$ ), a( 2_${ik}$, 1_${ik}$ ), 1_${ik}$ )
              else
                 do i = 2, m
                    a( i, 1_${ik}$ ) = a( i, 1_${ik}$ ) / a( 1_${ik}$, 1_${ik}$ )
                 end do
              end if
           else
              ! divide the matrix b into four submatrices
              n1 = min( m, n ) / 2_${ik}$
              n2 = n-n1
              ! factor b11, recursive call
              call stdlib${ii}$_${ci}$launhr_col_getrfnp2( n1, n1, a, lda, d, iinfo )
              ! solve for b21
              call stdlib${ii}$_${ci}$trsm( 'R', 'U', 'N', 'N', m-n1, n1, cone, a, lda,a( n1+1, 1_${ik}$ ), lda )
                        
              ! solve for b12
              call stdlib${ii}$_${ci}$trsm( 'L', 'L', 'N', 'U', n1, n2, cone, a, lda,a( 1_${ik}$, n1+1 ), lda )
                        
              ! update b22, i.e. compute the schur complement
              ! b22 := b22 - b21*b12
              call stdlib${ii}$_${ci}$gemm( 'N', 'N', m-n1, n2, n1, -cone, a( n1+1, 1_${ik}$ ), lda,a( 1_${ik}$, n1+1 ), &
                        lda, cone, a( n1+1, n1+1 ), lda )
              ! factor b22, recursive call
              call stdlib${ii}$_${ci}$launhr_col_getrfnp2( m-n1, n2, a( n1+1, n1+1 ), lda,d( n1+1 ), iinfo )
                        
           end if
           return
     end subroutine stdlib${ii}$_${ci}$launhr_col_getrfnp2

#:endif
#:endfor



     pure recursive module subroutine stdlib${ii}$_slaorhr_col_getrfnp2( m, n, a, lda, d, info )
     !! SLAORHR_COL_GETRFNP2 computes the modified LU factorization without
     !! pivoting of a real general M-by-N matrix A. The factorization has
     !! the form:
     !! A - S = L * U,
     !! where:
     !! S is a m-by-n diagonal sign matrix with the diagonal D, so that
     !! D(i) = S(i,i), 1 <= i <= min(M,N). The diagonal D is constructed
     !! as D(i)=-SIGN(A(i,i)), where A(i,i) is the value after performing
     !! i-1 steps of Gaussian elimination. This means that the diagonal
     !! element at each step of "modified" Gaussian elimination is at
     !! least one in absolute value (so that division-by-zero not
     !! possible during the division by the diagonal element);
     !! L is a M-by-N lower triangular matrix with unit diagonal elements
     !! (lower trapezoidal if M > N);
     !! and U is a M-by-N upper triangular matrix
     !! (upper trapezoidal if M < N).
     !! This routine is an auxiliary routine used in the Householder
     !! reconstruction routine SORHR_COL. In SORHR_COL, this routine is
     !! applied to an M-by-N matrix A with orthonormal columns, where each
     !! element is bounded by one in absolute value. With the choice of
     !! the matrix S above, one can show that the diagonal element at each
     !! step of Gaussian elimination is the largest (in absolute value) in
     !! the column on or below the diagonal, so that no pivoting is required
     !! for numerical stability [1].
     !! For more details on the Householder reconstruction algorithm,
     !! including the modified LU factorization, see [1].
     !! This is the recursive version of the LU factorization algorithm.
     !! Denote A - S by B. The algorithm divides the matrix B into four
     !! submatrices:
     !! [  B11 | B12  ]  where B11 is n1 by n1,
     !! B = [ -----|----- ]        B21 is (m-n1) by n1,
     !! [  B21 | B22  ]        B12 is n1 by n2,
     !! B22 is (m-n1) by n2,
     !! with n1 = min(m,n)/2, n2 = n-n1.
     !! The subroutine calls itself to factor B11, solves for B21,
     !! solves for B12, updates B22, then calls itself to factor B22.
     !! For more details on the recursive LU algorithm, see [2].
     !! SLAORHR_COL_GETRFNP2 is called to factorize a block by the blocked
     !! routine SLAORHR_COL_GETRFNP, which uses blocked code calling
     !! Level 3 BLAS to update the submatrix. However, SLAORHR_COL_GETRFNP2
     !! is self-sufficient and can be used without SLAORHR_COL_GETRFNP.
     !! [1] "Reconstructing Householder vectors from tall-skinny QR",
     !! G. Ballard, J. Demmel, L. Grigori, M. Jacquelin, H.D. Nguyen,
     !! E. Solomonik, J. Parallel Distrib. Comput.,
     !! vol. 85, pp. 3-31, 2015.
     !! [2] "Recursion leads to automatic variable blocking for dense linear
     !! algebra algorithms", F. Gustavson, IBM J. of Res. and Dev.,
     !! vol. 41, no. 6, pp. 737-755, 1997.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, m, n
           ! Array Arguments 
           real(sp), intent(inout) :: a(lda,*)
           real(sp), intent(out) :: d(*)
        ! =====================================================================
           
           ! Local Scalars 
           real(sp) :: sfmin
           integer(${ik}$) :: i, iinfo, n1, n2
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input parameters
           info = 0_${ik}$
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -4_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'SLAORHR_COL_GETRFNP2', -info )
              return
           end if
           ! quick return if possible
           if( min( m, n )==0 )return
           if ( m==1_${ik}$ ) then
              ! one row case, (also recursion termination case),
              ! use unblocked code
              ! transfer the sign
              d( 1_${ik}$ ) = -sign( one, a( 1_${ik}$, 1_${ik}$ ) )
              ! construct the row of u
              a( 1_${ik}$, 1_${ik}$ ) = a( 1_${ik}$, 1_${ik}$ ) - d( 1_${ik}$ )
           else if( n==1_${ik}$ ) then
              ! one column case, (also recursion termination case),
              ! use unblocked code
              ! transfer the sign
              d( 1_${ik}$ ) = -sign( one, a( 1_${ik}$, 1_${ik}$ ) )
              ! construct the row of u
              a( 1_${ik}$, 1_${ik}$ ) = a( 1_${ik}$, 1_${ik}$ ) - d( 1_${ik}$ )
              ! scale the elements 2:m of the column
              ! determine machine safe minimum
              sfmin = stdlib${ii}$_slamch('S')
              ! construct the subdiagonal elements of l
              if( abs( a( 1_${ik}$, 1_${ik}$ ) ) >= sfmin ) then
                 call stdlib${ii}$_sscal( m-1, one / a( 1_${ik}$, 1_${ik}$ ), a( 2_${ik}$, 1_${ik}$ ), 1_${ik}$ )
              else
                 do i = 2, m
                    a( i, 1_${ik}$ ) = a( i, 1_${ik}$ ) / a( 1_${ik}$, 1_${ik}$ )
                 end do
              end if
           else
              ! divide the matrix b into four submatrices
              n1 = min( m, n ) / 2_${ik}$
              n2 = n-n1
              ! factor b11, recursive call
              call stdlib${ii}$_slaorhr_col_getrfnp2( n1, n1, a, lda, d, iinfo )
              ! solve for b21
              call stdlib${ii}$_strsm( 'R', 'U', 'N', 'N', m-n1, n1, one, a, lda,a( n1+1, 1_${ik}$ ), lda )
                        
              ! solve for b12
              call stdlib${ii}$_strsm( 'L', 'L', 'N', 'U', n1, n2, one, a, lda,a( 1_${ik}$, n1+1 ), lda )
                        
              ! update b22, i.e. compute the schur complement
              ! b22 := b22 - b21*b12
              call stdlib${ii}$_sgemm( 'N', 'N', m-n1, n2, n1, -one, a( n1+1, 1_${ik}$ ), lda,a( 1_${ik}$, n1+1 ), &
                        lda, one, a( n1+1, n1+1 ), lda )
              ! factor b22, recursive call
              call stdlib${ii}$_slaorhr_col_getrfnp2( m-n1, n2, a( n1+1, n1+1 ), lda,d( n1+1 ), iinfo )
                        
           end if
           return
     end subroutine stdlib${ii}$_slaorhr_col_getrfnp2

     pure recursive module subroutine stdlib${ii}$_dlaorhr_col_getrfnp2( m, n, a, lda, d, info )
     !! DLAORHR_COL_GETRFNP2 computes the modified LU factorization without
     !! pivoting of a real general M-by-N matrix A. The factorization has
     !! the form:
     !! A - S = L * U,
     !! where:
     !! S is a m-by-n diagonal sign matrix with the diagonal D, so that
     !! D(i) = S(i,i), 1 <= i <= min(M,N). The diagonal D is constructed
     !! as D(i)=-SIGN(A(i,i)), where A(i,i) is the value after performing
     !! i-1 steps of Gaussian elimination. This means that the diagonal
     !! element at each step of "modified" Gaussian elimination is at
     !! least one in absolute value (so that division-by-zero not
     !! possible during the division by the diagonal element);
     !! L is a M-by-N lower triangular matrix with unit diagonal elements
     !! (lower trapezoidal if M > N);
     !! and U is a M-by-N upper triangular matrix
     !! (upper trapezoidal if M < N).
     !! This routine is an auxiliary routine used in the Householder
     !! reconstruction routine DORHR_COL. In DORHR_COL, this routine is
     !! applied to an M-by-N matrix A with orthonormal columns, where each
     !! element is bounded by one in absolute value. With the choice of
     !! the matrix S above, one can show that the diagonal element at each
     !! step of Gaussian elimination is the largest (in absolute value) in
     !! the column on or below the diagonal, so that no pivoting is required
     !! for numerical stability [1].
     !! For more details on the Householder reconstruction algorithm,
     !! including the modified LU factorization, see [1].
     !! This is the recursive version of the LU factorization algorithm.
     !! Denote A - S by B. The algorithm divides the matrix B into four
     !! submatrices:
     !! [  B11 | B12  ]  where B11 is n1 by n1,
     !! B = [ -----|----- ]        B21 is (m-n1) by n1,
     !! [  B21 | B22  ]        B12 is n1 by n2,
     !! B22 is (m-n1) by n2,
     !! with n1 = min(m,n)/2, n2 = n-n1.
     !! The subroutine calls itself to factor B11, solves for B21,
     !! solves for B12, updates B22, then calls itself to factor B22.
     !! For more details on the recursive LU algorithm, see [2].
     !! DLAORHR_COL_GETRFNP2 is called to factorize a block by the blocked
     !! routine DLAORHR_COL_GETRFNP, which uses blocked code calling
     !! Level 3 BLAS to update the submatrix. However, DLAORHR_COL_GETRFNP2
     !! is self-sufficient and can be used without DLAORHR_COL_GETRFNP.
     !! [1] "Reconstructing Householder vectors from tall-skinny QR",
     !! G. Ballard, J. Demmel, L. Grigori, M. Jacquelin, H.D. Nguyen,
     !! E. Solomonik, J. Parallel Distrib. Comput.,
     !! vol. 85, pp. 3-31, 2015.
     !! [2] "Recursion leads to automatic variable blocking for dense linear
     !! algebra algorithms", F. Gustavson, IBM J. of Res. and Dev.,
     !! vol. 41, no. 6, pp. 737-755, 1997.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, m, n
           ! Array Arguments 
           real(dp), intent(inout) :: a(lda,*)
           real(dp), intent(out) :: d(*)
        ! =====================================================================
           
           ! Local Scalars 
           real(dp) :: sfmin
           integer(${ik}$) :: i, iinfo, n1, n2
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input parameters
           info = 0_${ik}$
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -4_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DLAORHR_COL_GETRFNP2', -info )
              return
           end if
           ! quick return if possible
           if( min( m, n )==0 )return
           if ( m==1_${ik}$ ) then
              ! one row case, (also recursion termination case),
              ! use unblocked code
              ! transfer the sign
              d( 1_${ik}$ ) = -sign( one, a( 1_${ik}$, 1_${ik}$ ) )
              ! construct the row of u
              a( 1_${ik}$, 1_${ik}$ ) = a( 1_${ik}$, 1_${ik}$ ) - d( 1_${ik}$ )
           else if( n==1_${ik}$ ) then
              ! one column case, (also recursion termination case),
              ! use unblocked code
              ! transfer the sign
              d( 1_${ik}$ ) = -sign( one, a( 1_${ik}$, 1_${ik}$ ) )
              ! construct the row of u
              a( 1_${ik}$, 1_${ik}$ ) = a( 1_${ik}$, 1_${ik}$ ) - d( 1_${ik}$ )
              ! scale the elements 2:m of the column
              ! determine machine safe minimum
              sfmin = stdlib${ii}$_dlamch('S')
              ! construct the subdiagonal elements of l
              if( abs( a( 1_${ik}$, 1_${ik}$ ) ) >= sfmin ) then
                 call stdlib${ii}$_dscal( m-1, one / a( 1_${ik}$, 1_${ik}$ ), a( 2_${ik}$, 1_${ik}$ ), 1_${ik}$ )
              else
                 do i = 2, m
                    a( i, 1_${ik}$ ) = a( i, 1_${ik}$ ) / a( 1_${ik}$, 1_${ik}$ )
                 end do
              end if
           else
              ! divide the matrix b into four submatrices
              n1 = min( m, n ) / 2_${ik}$
              n2 = n-n1
              ! factor b11, recursive call
              call stdlib${ii}$_dlaorhr_col_getrfnp2( n1, n1, a, lda, d, iinfo )
              ! solve for b21
              call stdlib${ii}$_dtrsm( 'R', 'U', 'N', 'N', m-n1, n1, one, a, lda,a( n1+1, 1_${ik}$ ), lda )
                        
              ! solve for b12
              call stdlib${ii}$_dtrsm( 'L', 'L', 'N', 'U', n1, n2, one, a, lda,a( 1_${ik}$, n1+1 ), lda )
                        
              ! update b22, i.e. compute the schur complement
              ! b22 := b22 - b21*b12
              call stdlib${ii}$_dgemm( 'N', 'N', m-n1, n2, n1, -one, a( n1+1, 1_${ik}$ ), lda,a( 1_${ik}$, n1+1 ), &
                        lda, one, a( n1+1, n1+1 ), lda )
              ! factor b22, recursive call
              call stdlib${ii}$_dlaorhr_col_getrfnp2( m-n1, n2, a( n1+1, n1+1 ), lda,d( n1+1 ), iinfo )
                        
           end if
           return
     end subroutine stdlib${ii}$_dlaorhr_col_getrfnp2

#:for rk,rt,ri in REAL_KINDS_TYPES
#:if not rk in ["sp","dp"]
     pure recursive module subroutine stdlib${ii}$_${ri}$laorhr_col_getrfnp2( m, n, a, lda, d, info )
     !! DLAORHR_COL_GETRFNP2: computes the modified LU factorization without
     !! pivoting of a real general M-by-N matrix A. The factorization has
     !! the form:
     !! A - S = L * U,
     !! where:
     !! S is a m-by-n diagonal sign matrix with the diagonal D, so that
     !! D(i) = S(i,i), 1 <= i <= min(M,N). The diagonal D is constructed
     !! as D(i)=-SIGN(A(i,i)), where A(i,i) is the value after performing
     !! i-1 steps of Gaussian elimination. This means that the diagonal
     !! element at each step of "modified" Gaussian elimination is at
     !! least one in absolute value (so that division-by-zero not
     !! possible during the division by the diagonal element);
     !! L is a M-by-N lower triangular matrix with unit diagonal elements
     !! (lower trapezoidal if M > N);
     !! and U is a M-by-N upper triangular matrix
     !! (upper trapezoidal if M < N).
     !! This routine is an auxiliary routine used in the Householder
     !! reconstruction routine DORHR_COL. In DORHR_COL, this routine is
     !! applied to an M-by-N matrix A with orthonormal columns, where each
     !! element is bounded by one in absolute value. With the choice of
     !! the matrix S above, one can show that the diagonal element at each
     !! step of Gaussian elimination is the largest (in absolute value) in
     !! the column on or below the diagonal, so that no pivoting is required
     !! for numerical stability [1].
     !! For more details on the Householder reconstruction algorithm,
     !! including the modified LU factorization, see [1].
     !! This is the recursive version of the LU factorization algorithm.
     !! Denote A - S by B. The algorithm divides the matrix B into four
     !! submatrices:
     !! [  B11 | B12  ]  where B11 is n1 by n1,
     !! B = [ -----|----- ]        B21 is (m-n1) by n1,
     !! [  B21 | B22  ]        B12 is n1 by n2,
     !! B22 is (m-n1) by n2,
     !! with n1 = min(m,n)/2, n2 = n-n1.
     !! The subroutine calls itself to factor B11, solves for B21,
     !! solves for B12, updates B22, then calls itself to factor B22.
     !! For more details on the recursive LU algorithm, see [2].
     !! DLAORHR_COL_GETRFNP2 is called to factorize a block by the blocked
     !! routine DLAORHR_COL_GETRFNP, which uses blocked code calling
     !! Level 3 BLAS to update the submatrix. However, DLAORHR_COL_GETRFNP2
     !! is self-sufficient and can be used without DLAORHR_COL_GETRFNP.
     !! [1] "Reconstructing Householder vectors from tall-skinny QR",
     !! G. Ballard, J. Demmel, L. Grigori, M. Jacquelin, H.D. Nguyen,
     !! E. Solomonik, J. Parallel Distrib. Comput.,
     !! vol. 85, pp. 3-31, 2015.
     !! [2] "Recursion leads to automatic variable blocking for dense linear
     !! algebra algorithms", F. Gustavson, IBM J. of Res. and Dev.,
     !! vol. 41, no. 6, pp. 737-755, 1997.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, m, n
           ! Array Arguments 
           real(${rk}$), intent(inout) :: a(lda,*)
           real(${rk}$), intent(out) :: d(*)
        ! =====================================================================
           
           ! Local Scalars 
           real(${rk}$) :: sfmin
           integer(${ik}$) :: i, iinfo, n1, n2
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input parameters
           info = 0_${ik}$
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -4_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DLAORHR_COL_GETRFNP2', -info )
              return
           end if
           ! quick return if possible
           if( min( m, n )==0 )return
           if ( m==1_${ik}$ ) then
              ! one row case, (also recursion termination case),
              ! use unblocked code
              ! transfer the sign
              d( 1_${ik}$ ) = -sign( one, a( 1_${ik}$, 1_${ik}$ ) )
              ! construct the row of u
              a( 1_${ik}$, 1_${ik}$ ) = a( 1_${ik}$, 1_${ik}$ ) - d( 1_${ik}$ )
           else if( n==1_${ik}$ ) then
              ! one column case, (also recursion termination case),
              ! use unblocked code
              ! transfer the sign
              d( 1_${ik}$ ) = -sign( one, a( 1_${ik}$, 1_${ik}$ ) )
              ! construct the row of u
              a( 1_${ik}$, 1_${ik}$ ) = a( 1_${ik}$, 1_${ik}$ ) - d( 1_${ik}$ )
              ! scale the elements 2:m of the column
              ! determine machine safe minimum
              sfmin = stdlib${ii}$_${ri}$lamch('S')
              ! construct the subdiagonal elements of l
              if( abs( a( 1_${ik}$, 1_${ik}$ ) ) >= sfmin ) then
                 call stdlib${ii}$_${ri}$scal( m-1, one / a( 1_${ik}$, 1_${ik}$ ), a( 2_${ik}$, 1_${ik}$ ), 1_${ik}$ )
              else
                 do i = 2, m
                    a( i, 1_${ik}$ ) = a( i, 1_${ik}$ ) / a( 1_${ik}$, 1_${ik}$ )
                 end do
              end if
           else
              ! divide the matrix b into four submatrices
              n1 = min( m, n ) / 2_${ik}$
              n2 = n-n1
              ! factor b11, recursive call
              call stdlib${ii}$_${ri}$laorhr_col_getrfnp2( n1, n1, a, lda, d, iinfo )
              ! solve for b21
              call stdlib${ii}$_${ri}$trsm( 'R', 'U', 'N', 'N', m-n1, n1, one, a, lda,a( n1+1, 1_${ik}$ ), lda )
                        
              ! solve for b12
              call stdlib${ii}$_${ri}$trsm( 'L', 'L', 'N', 'U', n1, n2, one, a, lda,a( 1_${ik}$, n1+1 ), lda )
                        
              ! update b22, i.e. compute the schur complement
              ! b22 := b22 - b21*b12
              call stdlib${ii}$_${ri}$gemm( 'N', 'N', m-n1, n2, n1, -one, a( n1+1, 1_${ik}$ ), lda,a( 1_${ik}$, n1+1 ), &
                        lda, one, a( n1+1, n1+1 ), lda )
              ! factor b22, recursive call
              call stdlib${ii}$_${ri}$laorhr_col_getrfnp2( m-n1, n2, a( n1+1, n1+1 ), lda,d( n1+1 ), iinfo )
                        
           end if
           return
     end subroutine stdlib${ii}$_${ri}$laorhr_col_getrfnp2

#:endif
#:endfor



     pure module subroutine stdlib${ii}$_stpqrt( m, n, l, nb, a, lda, b, ldb, t, ldt, work,info )
     !! STPQRT computes a blocked QR factorization of a real
     !! "triangular-pentagonal" matrix C, which is composed of a
     !! triangular block A and pentagonal block B, using the compact
     !! WY representation for Q.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, ldb, ldt, n, m, l, nb
           ! Array Arguments 
           real(sp), intent(inout) :: a(lda,*), b(ldb,*)
           real(sp), intent(out) :: t(ldt,*), work(*)
       ! =====================================================================
           ! Local Scalars 
           integer(${ik}$) :: i, ib, lb, mb, iinfo
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( l<0_${ik}$ .or. (l>min(m,n) .and. min(m,n)>=0_${ik}$)) then
              info = -3_${ik}$
           else if( nb<1_${ik}$ .or. (nb>n .and. n>0_${ik}$)) then
              info = -4_${ik}$
           else if( lda<max( 1_${ik}$, n ) ) then
              info = -6_${ik}$
           else if( ldb<max( 1_${ik}$, m ) ) then
              info = -8_${ik}$
           else if( ldt<nb ) then
              info = -10_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'STPQRT', -info )
              return
           end if
           ! quick return if possible
           if( m==0 .or. n==0 ) return
           do i = 1, n, nb
           ! compute the qr factorization of the current block
              ib = min( n-i+1, nb )
              mb = min( m-l+i+ib-1, m )
              if( i>=l ) then
                 lb = 0_${ik}$
              else
                 lb = mb-m+l-i+1
              end if
              call stdlib${ii}$_stpqrt2( mb, ib, lb, a(i,i), lda, b( 1_${ik}$, i ), ldb,t(1_${ik}$, i ), ldt, iinfo )
                        
           ! update by applying h^h to b(:,i+ib:n) from the left
              if( i+ib<=n ) then
                 call stdlib${ii}$_stprfb( 'L', 'T', 'F', 'C', mb, n-i-ib+1, ib, lb,b( 1_${ik}$, i ), ldb, t( &
                           1_${ik}$, i ), ldt,a( i, i+ib ), lda, b( 1_${ik}$, i+ib ), ldb,work, ib )
              end if
           end do
           return
     end subroutine stdlib${ii}$_stpqrt

     pure module subroutine stdlib${ii}$_dtpqrt( m, n, l, nb, a, lda, b, ldb, t, ldt, work,info )
     !! DTPQRT computes a blocked QR factorization of a real
     !! "triangular-pentagonal" matrix C, which is composed of a
     !! triangular block A and pentagonal block B, using the compact
     !! WY representation for Q.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, ldb, ldt, n, m, l, nb
           ! Array Arguments 
           real(dp), intent(inout) :: a(lda,*), b(ldb,*)
           real(dp), intent(out) :: t(ldt,*), work(*)
       ! =====================================================================
           ! Local Scalars 
           integer(${ik}$) :: i, ib, lb, mb, iinfo
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( l<0_${ik}$ .or. (l>min(m,n) .and. min(m,n)>=0_${ik}$)) then
              info = -3_${ik}$
           else if( nb<1_${ik}$ .or. (nb>n .and. n>0_${ik}$)) then
              info = -4_${ik}$
           else if( lda<max( 1_${ik}$, n ) ) then
              info = -6_${ik}$
           else if( ldb<max( 1_${ik}$, m ) ) then
              info = -8_${ik}$
           else if( ldt<nb ) then
              info = -10_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DTPQRT', -info )
              return
           end if
           ! quick return if possible
           if( m==0 .or. n==0 ) return
           do i = 1, n, nb
           ! compute the qr factorization of the current block
              ib = min( n-i+1, nb )
              mb = min( m-l+i+ib-1, m )
              if( i>=l ) then
                 lb = 0_${ik}$
              else
                 lb = mb-m+l-i+1
              end if
              call stdlib${ii}$_dtpqrt2( mb, ib, lb, a(i,i), lda, b( 1_${ik}$, i ), ldb,t(1_${ik}$, i ), ldt, iinfo )
                        
           ! update by applying h**t to b(:,i+ib:n) from the left
              if( i+ib<=n ) then
                 call stdlib${ii}$_dtprfb( 'L', 'T', 'F', 'C', mb, n-i-ib+1, ib, lb,b( 1_${ik}$, i ), ldb, t( &
                           1_${ik}$, i ), ldt,a( i, i+ib ), lda, b( 1_${ik}$, i+ib ), ldb,work, ib )
              end if
           end do
           return
     end subroutine stdlib${ii}$_dtpqrt

#:for rk,rt,ri in REAL_KINDS_TYPES
#:if not rk in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ri}$tpqrt( m, n, l, nb, a, lda, b, ldb, t, ldt, work,info )
     !! DTPQRT: computes a blocked QR factorization of a real
     !! "triangular-pentagonal" matrix C, which is composed of a
     !! triangular block A and pentagonal block B, using the compact
     !! WY representation for Q.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, ldb, ldt, n, m, l, nb
           ! Array Arguments 
           real(${rk}$), intent(inout) :: a(lda,*), b(ldb,*)
           real(${rk}$), intent(out) :: t(ldt,*), work(*)
       ! =====================================================================
           ! Local Scalars 
           integer(${ik}$) :: i, ib, lb, mb, iinfo
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( l<0_${ik}$ .or. (l>min(m,n) .and. min(m,n)>=0_${ik}$)) then
              info = -3_${ik}$
           else if( nb<1_${ik}$ .or. (nb>n .and. n>0_${ik}$)) then
              info = -4_${ik}$
           else if( lda<max( 1_${ik}$, n ) ) then
              info = -6_${ik}$
           else if( ldb<max( 1_${ik}$, m ) ) then
              info = -8_${ik}$
           else if( ldt<nb ) then
              info = -10_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DTPQRT', -info )
              return
           end if
           ! quick return if possible
           if( m==0 .or. n==0 ) return
           do i = 1, n, nb
           ! compute the qr factorization of the current block
              ib = min( n-i+1, nb )
              mb = min( m-l+i+ib-1, m )
              if( i>=l ) then
                 lb = 0_${ik}$
              else
                 lb = mb-m+l-i+1
              end if
              call stdlib${ii}$_${ri}$tpqrt2( mb, ib, lb, a(i,i), lda, b( 1_${ik}$, i ), ldb,t(1_${ik}$, i ), ldt, iinfo )
                        
           ! update by applying h**t to b(:,i+ib:n) from the left
              if( i+ib<=n ) then
                 call stdlib${ii}$_${ri}$tprfb( 'L', 'T', 'F', 'C', mb, n-i-ib+1, ib, lb,b( 1_${ik}$, i ), ldb, t( &
                           1_${ik}$, i ), ldt,a( i, i+ib ), lda, b( 1_${ik}$, i+ib ), ldb,work, ib )
              end if
           end do
           return
     end subroutine stdlib${ii}$_${ri}$tpqrt

#:endif
#:endfor

     pure module subroutine stdlib${ii}$_ctpqrt( m, n, l, nb, a, lda, b, ldb, t, ldt, work,info )
     !! CTPQRT computes a blocked QR factorization of a complex
     !! "triangular-pentagonal" matrix C, which is composed of a
     !! triangular block A and pentagonal block B, using the compact
     !! WY representation for Q.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, ldb, ldt, n, m, l, nb
           ! Array Arguments 
           complex(sp), intent(inout) :: a(lda,*), b(ldb,*)
           complex(sp), intent(out) :: t(ldt,*), work(*)
       ! =====================================================================
           ! Local Scalars 
           integer(${ik}$) :: i, ib, lb, mb, iinfo
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( l<0_${ik}$ .or. (l>min(m,n) .and. min(m,n)>=0_${ik}$)) then
              info = -3_${ik}$
           else if( nb<1_${ik}$ .or. (nb>n .and. n>0_${ik}$)) then
              info = -4_${ik}$
           else if( lda<max( 1_${ik}$, n ) ) then
              info = -6_${ik}$
           else if( ldb<max( 1_${ik}$, m ) ) then
              info = -8_${ik}$
           else if( ldt<nb ) then
              info = -10_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'CTPQRT', -info )
              return
           end if
           ! quick return if possible
           if( m==0 .or. n==0 ) return
           do i = 1, n, nb
           ! compute the qr factorization of the current block
              ib = min( n-i+1, nb )
              mb = min( m-l+i+ib-1, m )
              if( i>=l ) then
                 lb = 0_${ik}$
              else
                 lb = mb-m+l-i+1
              end if
              call stdlib${ii}$_ctpqrt2( mb, ib, lb, a(i,i), lda, b( 1_${ik}$, i ), ldb,t(1_${ik}$, i ), ldt, iinfo )
                        
           ! update by applying h**h to b(:,i+ib:n) from the left
              if( i+ib<=n ) then
                 call stdlib${ii}$_ctprfb( 'L', 'C', 'F', 'C', mb, n-i-ib+1, ib, lb,b( 1_${ik}$, i ), ldb, t( &
                           1_${ik}$, i ), ldt,a( i, i+ib ), lda, b( 1_${ik}$, i+ib ), ldb,work, ib )
              end if
           end do
           return
     end subroutine stdlib${ii}$_ctpqrt

     pure module subroutine stdlib${ii}$_ztpqrt( m, n, l, nb, a, lda, b, ldb, t, ldt, work,info )
     !! ZTPQRT computes a blocked QR factorization of a complex
     !! "triangular-pentagonal" matrix C, which is composed of a
     !! triangular block A and pentagonal block B, using the compact
     !! WY representation for Q.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, ldb, ldt, n, m, l, nb
           ! Array Arguments 
           complex(dp), intent(inout) :: a(lda,*), b(ldb,*)
           complex(dp), intent(out) :: t(ldt,*), work(*)
       ! =====================================================================
           ! Local Scalars 
           integer(${ik}$) :: i, ib, lb, mb, iinfo
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( l<0_${ik}$ .or. (l>min(m,n) .and. min(m,n)>=0_${ik}$)) then
              info = -3_${ik}$
           else if( nb<1_${ik}$ .or. (nb>n .and. n>0_${ik}$)) then
              info = -4_${ik}$
           else if( lda<max( 1_${ik}$, n ) ) then
              info = -6_${ik}$
           else if( ldb<max( 1_${ik}$, m ) ) then
              info = -8_${ik}$
           else if( ldt<nb ) then
              info = -10_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'ZTPQRT', -info )
              return
           end if
           ! quick return if possible
           if( m==0 .or. n==0 ) return
           do i = 1, n, nb
           ! compute the qr factorization of the current block
              ib = min( n-i+1, nb )
              mb = min( m-l+i+ib-1, m )
              if( i>=l ) then
                 lb = 0_${ik}$
              else
                 lb = mb-m+l-i+1
              end if
              call stdlib${ii}$_ztpqrt2( mb, ib, lb, a(i,i), lda, b( 1_${ik}$, i ), ldb,t(1_${ik}$, i ), ldt, iinfo )
                        
           ! update by applying h**h to b(:,i+ib:n) from the left
              if( i+ib<=n ) then
                 call stdlib${ii}$_ztprfb( 'L', 'C', 'F', 'C', mb, n-i-ib+1, ib, lb,b( 1_${ik}$, i ), ldb, t( &
                           1_${ik}$, i ), ldt,a( i, i+ib ), lda, b( 1_${ik}$, i+ib ), ldb,work, ib )
              end if
           end do
           return
     end subroutine stdlib${ii}$_ztpqrt

#:for ck,ct,ci in CMPLX_KINDS_TYPES
#:if not ck in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ci}$tpqrt( m, n, l, nb, a, lda, b, ldb, t, ldt, work,info )
     !! ZTPQRT: computes a blocked QR factorization of a complex
     !! "triangular-pentagonal" matrix C, which is composed of a
     !! triangular block A and pentagonal block B, using the compact
     !! WY representation for Q.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${ck}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, ldb, ldt, n, m, l, nb
           ! Array Arguments 
           complex(${ck}$), intent(inout) :: a(lda,*), b(ldb,*)
           complex(${ck}$), intent(out) :: t(ldt,*), work(*)
       ! =====================================================================
           ! Local Scalars 
           integer(${ik}$) :: i, ib, lb, mb, iinfo
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( l<0_${ik}$ .or. (l>min(m,n) .and. min(m,n)>=0_${ik}$)) then
              info = -3_${ik}$
           else if( nb<1_${ik}$ .or. (nb>n .and. n>0_${ik}$)) then
              info = -4_${ik}$
           else if( lda<max( 1_${ik}$, n ) ) then
              info = -6_${ik}$
           else if( ldb<max( 1_${ik}$, m ) ) then
              info = -8_${ik}$
           else if( ldt<nb ) then
              info = -10_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'ZTPQRT', -info )
              return
           end if
           ! quick return if possible
           if( m==0 .or. n==0 ) return
           do i = 1, n, nb
           ! compute the qr factorization of the current block
              ib = min( n-i+1, nb )
              mb = min( m-l+i+ib-1, m )
              if( i>=l ) then
                 lb = 0_${ik}$
              else
                 lb = mb-m+l-i+1
              end if
              call stdlib${ii}$_${ci}$tpqrt2( mb, ib, lb, a(i,i), lda, b( 1_${ik}$, i ), ldb,t(1_${ik}$, i ), ldt, iinfo )
                        
           ! update by applying h**h to b(:,i+ib:n) from the left
              if( i+ib<=n ) then
                 call stdlib${ii}$_${ci}$tprfb( 'L', 'C', 'F', 'C', mb, n-i-ib+1, ib, lb,b( 1_${ik}$, i ), ldb, t( &
                           1_${ik}$, i ), ldt,a( i, i+ib ), lda, b( 1_${ik}$, i+ib ), ldb,work, ib )
              end if
           end do
           return
     end subroutine stdlib${ii}$_${ci}$tpqrt

#:endif
#:endfor



     pure module subroutine stdlib${ii}$_stpqrt2( m, n, l, a, lda, b, ldb, t, ldt, info )
     !! STPQRT2 computes a QR factorization of a real "triangular-pentagonal"
     !! matrix C, which is composed of a triangular block A and pentagonal block B,
     !! using the compact WY representation for Q.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, ldb, ldt, n, m, l
           ! Array Arguments 
           real(sp), intent(inout) :: a(lda,*), b(ldb,*)
           real(sp), intent(out) :: t(ldt,*)
        ! =====================================================================
           
           ! Local Scalars 
           integer(${ik}$) :: i, j, p, mp, np
           real(sp) :: alpha
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( l<0_${ik}$ .or. l>min(m,n) ) then
              info = -3_${ik}$
           else if( lda<max( 1_${ik}$, n ) ) then
              info = -5_${ik}$
           else if( ldb<max( 1_${ik}$, m ) ) then
              info = -7_${ik}$
           else if( ldt<max( 1_${ik}$, n ) ) then
              info = -9_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'STPQRT2', -info )
              return
           end if
           ! quick return if possible
           if( n==0 .or. m==0 ) return
           do i = 1, n
              ! generate elementary reflector h(i) to annihilate b(:,i)
              p = m-l+min( l, i )
              call stdlib${ii}$_slarfg( p+1, a( i, i ), b( 1_${ik}$, i ), 1_${ik}$, t( i, 1_${ik}$ ) )
              if( i<n ) then
                 ! w(1:n-i) := c(i:m,i+1:n)^h * c(i:m,i) [use w = t(:,n)]
                 do j = 1, n-i
                    t( j, n ) = (a( i, i+j ))
                 end do
                 call stdlib${ii}$_sgemv( 'T', p, n-i, one, b( 1_${ik}$, i+1 ), ldb,b( 1_${ik}$, i ), 1_${ik}$, one, t( 1_${ik}$, n &
                           ), 1_${ik}$ )
                 ! c(i:m,i+1:n) = c(i:m,i+1:n) + alpha*c(i:m,i)*w(1:n-1)^h
                 alpha = -(t( i, 1_${ik}$ ))
                 do j = 1, n-i
                    a( i, i+j ) = a( i, i+j ) + alpha*(t( j, n ))
                 end do
                 call stdlib${ii}$_sger( p, n-i, alpha, b( 1_${ik}$, i ), 1_${ik}$,t( 1_${ik}$, n ), 1_${ik}$, b( 1_${ik}$, i+1 ), ldb )
                           
              end if
           end do
           do i = 2, n
              ! t(1:i-1,i) := c(i:m,1:i-1)^h * (alpha * c(i:m,i))
              alpha = -t( i, 1_${ik}$ )
              do j = 1, i-1
                 t( j, i ) = zero
              end do
              p = min( i-1, l )
              mp = min( m-l+1, m )
              np = min( p+1, n )
              ! triangular part of b2
              do j = 1, p
                 t( j, i ) = alpha*b( m-l+j, i )
              end do
              call stdlib${ii}$_strmv( 'U', 'T', 'N', p, b( mp, 1_${ik}$ ), ldb,t( 1_${ik}$, i ), 1_${ik}$ )
              ! rectangular part of b2
              call stdlib${ii}$_sgemv( 'T', l, i-1-p, alpha, b( mp, np ), ldb,b( mp, i ), 1_${ik}$, zero, t( &
                        np, i ), 1_${ik}$ )
              ! b1
              call stdlib${ii}$_sgemv( 'T', m-l, i-1, alpha, b, ldb, b( 1_${ik}$, i ), 1_${ik}$,one, t( 1_${ik}$, i ), 1_${ik}$ )
                        
              ! t(1:i-1,i) := t(1:i-1,1:i-1) * t(1:i-1,i)
              call stdlib${ii}$_strmv( 'U', 'N', 'N', i-1, t, ldt, t( 1_${ik}$, i ), 1_${ik}$ )
              ! t(i,i) = tau(i)
              t( i, i ) = t( i, 1_${ik}$ )
              t( i, 1_${ik}$ ) = zero
           end do
     end subroutine stdlib${ii}$_stpqrt2

     pure module subroutine stdlib${ii}$_dtpqrt2( m, n, l, a, lda, b, ldb, t, ldt, info )
     !! DTPQRT2 computes a QR factorization of a real "triangular-pentagonal"
     !! matrix C, which is composed of a triangular block A and pentagonal block B,
     !! using the compact WY representation for Q.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, ldb, ldt, n, m, l
           ! Array Arguments 
           real(dp), intent(inout) :: a(lda,*), b(ldb,*)
           real(dp), intent(out) :: t(ldt,*)
        ! =====================================================================
           
           ! Local Scalars 
           integer(${ik}$) :: i, j, p, mp, np
           real(dp) :: alpha
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( l<0_${ik}$ .or. l>min(m,n) ) then
              info = -3_${ik}$
           else if( lda<max( 1_${ik}$, n ) ) then
              info = -5_${ik}$
           else if( ldb<max( 1_${ik}$, m ) ) then
              info = -7_${ik}$
           else if( ldt<max( 1_${ik}$, n ) ) then
              info = -9_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DTPQRT2', -info )
              return
           end if
           ! quick return if possible
           if( n==0 .or. m==0 ) return
           do i = 1, n
              ! generate elementary reflector h(i) to annihilate b(:,i)
              p = m-l+min( l, i )
              call stdlib${ii}$_dlarfg( p+1, a( i, i ), b( 1_${ik}$, i ), 1_${ik}$, t( i, 1_${ik}$ ) )
              if( i<n ) then
                 ! w(1:n-i) := c(i:m,i+1:n)^h * c(i:m,i) [use w = t(:,n)]
                 do j = 1, n-i
                    t( j, n ) = (a( i, i+j ))
                 end do
                 call stdlib${ii}$_dgemv( 'T', p, n-i, one, b( 1_${ik}$, i+1 ), ldb,b( 1_${ik}$, i ), 1_${ik}$, one, t( 1_${ik}$, n &
                           ), 1_${ik}$ )
                 ! c(i:m,i+1:n) = c(i:m,i+1:n) + alpha*c(i:m,i)*w(1:n-1)^h
                 alpha = -(t( i, 1_${ik}$ ))
                 do j = 1, n-i
                    a( i, i+j ) = a( i, i+j ) + alpha*(t( j, n ))
                 end do
                 call stdlib${ii}$_dger( p, n-i, alpha, b( 1_${ik}$, i ), 1_${ik}$,t( 1_${ik}$, n ), 1_${ik}$, b( 1_${ik}$, i+1 ), ldb )
                           
              end if
           end do
           do i = 2, n
              ! t(1:i-1,i) := c(i:m,1:i-1)^h * (alpha * c(i:m,i))
              alpha = -t( i, 1_${ik}$ )
              do j = 1, i-1
                 t( j, i ) = zero
              end do
              p = min( i-1, l )
              mp = min( m-l+1, m )
              np = min( p+1, n )
              ! triangular part of b2
              do j = 1, p
                 t( j, i ) = alpha*b( m-l+j, i )
              end do
              call stdlib${ii}$_dtrmv( 'U', 'T', 'N', p, b( mp, 1_${ik}$ ), ldb,t( 1_${ik}$, i ), 1_${ik}$ )
              ! rectangular part of b2
              call stdlib${ii}$_dgemv( 'T', l, i-1-p, alpha, b( mp, np ), ldb,b( mp, i ), 1_${ik}$, zero, t( &
                        np, i ), 1_${ik}$ )
              ! b1
              call stdlib${ii}$_dgemv( 'T', m-l, i-1, alpha, b, ldb, b( 1_${ik}$, i ), 1_${ik}$,one, t( 1_${ik}$, i ), 1_${ik}$ )
                        
              ! t(1:i-1,i) := t(1:i-1,1:i-1) * t(1:i-1,i)
              call stdlib${ii}$_dtrmv( 'U', 'N', 'N', i-1, t, ldt, t( 1_${ik}$, i ), 1_${ik}$ )
              ! t(i,i) = tau(i)
              t( i, i ) = t( i, 1_${ik}$ )
              t( i, 1_${ik}$ ) = zero
           end do
     end subroutine stdlib${ii}$_dtpqrt2

#:for rk,rt,ri in REAL_KINDS_TYPES
#:if not rk in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ri}$tpqrt2( m, n, l, a, lda, b, ldb, t, ldt, info )
     !! DTPQRT2: computes a QR factorization of a real "triangular-pentagonal"
     !! matrix C, which is composed of a triangular block A and pentagonal block B,
     !! using the compact WY representation for Q.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, ldb, ldt, n, m, l
           ! Array Arguments 
           real(${rk}$), intent(inout) :: a(lda,*), b(ldb,*)
           real(${rk}$), intent(out) :: t(ldt,*)
        ! =====================================================================
           
           ! Local Scalars 
           integer(${ik}$) :: i, j, p, mp, np
           real(${rk}$) :: alpha
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( l<0_${ik}$ .or. l>min(m,n) ) then
              info = -3_${ik}$
           else if( lda<max( 1_${ik}$, n ) ) then
              info = -5_${ik}$
           else if( ldb<max( 1_${ik}$, m ) ) then
              info = -7_${ik}$
           else if( ldt<max( 1_${ik}$, n ) ) then
              info = -9_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DTPQRT2', -info )
              return
           end if
           ! quick return if possible
           if( n==0 .or. m==0 ) return
           do i = 1, n
              ! generate elementary reflector h(i) to annihilate b(:,i)
              p = m-l+min( l, i )
              call stdlib${ii}$_${ri}$larfg( p+1, a( i, i ), b( 1_${ik}$, i ), 1_${ik}$, t( i, 1_${ik}$ ) )
              if( i<n ) then
                 ! w(1:n-i) := c(i:m,i+1:n)^h * c(i:m,i) [use w = t(:,n)]
                 do j = 1, n-i
                    t( j, n ) = (a( i, i+j ))
                 end do
                 call stdlib${ii}$_${ri}$gemv( 'T', p, n-i, one, b( 1_${ik}$, i+1 ), ldb,b( 1_${ik}$, i ), 1_${ik}$, one, t( 1_${ik}$, n &
                           ), 1_${ik}$ )
                 ! c(i:m,i+1:n) = c(i:m,i+1:n) + alpha*c(i:m,i)*w(1:n-1)^h
                 alpha = -(t( i, 1_${ik}$ ))
                 do j = 1, n-i
                    a( i, i+j ) = a( i, i+j ) + alpha*(t( j, n ))
                 end do
                 call stdlib${ii}$_${ri}$ger( p, n-i, alpha, b( 1_${ik}$, i ), 1_${ik}$,t( 1_${ik}$, n ), 1_${ik}$, b( 1_${ik}$, i+1 ), ldb )
                           
              end if
           end do
           do i = 2, n
              ! t(1:i-1,i) := c(i:m,1:i-1)^h * (alpha * c(i:m,i))
              alpha = -t( i, 1_${ik}$ )
              do j = 1, i-1
                 t( j, i ) = zero
              end do
              p = min( i-1, l )
              mp = min( m-l+1, m )
              np = min( p+1, n )
              ! triangular part of b2
              do j = 1, p
                 t( j, i ) = alpha*b( m-l+j, i )
              end do
              call stdlib${ii}$_${ri}$trmv( 'U', 'T', 'N', p, b( mp, 1_${ik}$ ), ldb,t( 1_${ik}$, i ), 1_${ik}$ )
              ! rectangular part of b2
              call stdlib${ii}$_${ri}$gemv( 'T', l, i-1-p, alpha, b( mp, np ), ldb,b( mp, i ), 1_${ik}$, zero, t( &
                        np, i ), 1_${ik}$ )
              ! b1
              call stdlib${ii}$_${ri}$gemv( 'T', m-l, i-1, alpha, b, ldb, b( 1_${ik}$, i ), 1_${ik}$,one, t( 1_${ik}$, i ), 1_${ik}$ )
                        
              ! t(1:i-1,i) := t(1:i-1,1:i-1) * t(1:i-1,i)
              call stdlib${ii}$_${ri}$trmv( 'U', 'N', 'N', i-1, t, ldt, t( 1_${ik}$, i ), 1_${ik}$ )
              ! t(i,i) = tau(i)
              t( i, i ) = t( i, 1_${ik}$ )
              t( i, 1_${ik}$ ) = zero
           end do
     end subroutine stdlib${ii}$_${ri}$tpqrt2

#:endif
#:endfor

     pure module subroutine stdlib${ii}$_ctpqrt2( m, n, l, a, lda, b, ldb, t, ldt, info )
     !! CTPQRT2 computes a QR factorization of a complex "triangular-pentagonal"
     !! matrix C, which is composed of a triangular block A and pentagonal block B,
     !! using the compact WY representation for Q.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, ldb, ldt, n, m, l
           ! Array Arguments 
           complex(sp), intent(inout) :: a(lda,*), b(ldb,*)
           complex(sp), intent(out) :: t(ldt,*)
        ! =====================================================================
           
           ! Local Scalars 
           integer(${ik}$) :: i, j, p, mp, np
           complex(sp) :: alpha
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( l<0_${ik}$ .or. l>min(m,n) ) then
              info = -3_${ik}$
           else if( lda<max( 1_${ik}$, n ) ) then
              info = -5_${ik}$
           else if( ldb<max( 1_${ik}$, m ) ) then
              info = -7_${ik}$
           else if( ldt<max( 1_${ik}$, n ) ) then
              info = -9_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'CTPQRT2', -info )
              return
           end if
           ! quick return if possible
           if( n==0 .or. m==0 ) return
           do i = 1, n
              ! generate elementary reflector h(i) to annihilate b(:,i)
              p = m-l+min( l, i )
              call stdlib${ii}$_clarfg( p+1, a( i, i ), b( 1_${ik}$, i ), 1_${ik}$, t( i, 1_${ik}$ ) )
              if( i<n ) then
                 ! w(1:n-i) := c(i:m,i+1:n)**h * c(i:m,i) [use w = t(:,n)]
                 do j = 1, n-i
                    t( j, n ) = conjg(a( i, i+j ))
                 end do
                 call stdlib${ii}$_cgemv( 'C', p, n-i, cone, b( 1_${ik}$, i+1 ), ldb,b( 1_${ik}$, i ), 1_${ik}$, cone, t( 1_${ik}$, &
                           n ), 1_${ik}$ )
                 ! c(i:m,i+1:n) = c(i:m,i+1:n) + alpha*c(i:m,i)*w(1:n-1)**h
                 alpha = -conjg(t( i, 1_${ik}$ ))
                 do j = 1, n-i
                    a( i, i+j ) = a( i, i+j ) + alpha*conjg(t( j, n ))
                 end do
                 call stdlib${ii}$_cgerc( p, n-i, alpha, b( 1_${ik}$, i ), 1_${ik}$,t( 1_${ik}$, n ), 1_${ik}$, b( 1_${ik}$, i+1 ), ldb )
                           
              end if
           end do
           do i = 2, n
              ! t(1:i-1,i) := c(i:m,1:i-1)**h * (alpha * c(i:m,i))
              alpha = -t( i, 1_${ik}$ )
              do j = 1, i-1
                 t( j, i ) = czero
              end do
              p = min( i-1, l )
              mp = min( m-l+1, m )
              np = min( p+1, n )
              ! triangular part of b2
              do j = 1, p
                 t( j, i ) = alpha*b( m-l+j, i )
              end do
              call stdlib${ii}$_ctrmv( 'U', 'C', 'N', p, b( mp, 1_${ik}$ ), ldb,t( 1_${ik}$, i ), 1_${ik}$ )
              ! rectangular part of b2
              call stdlib${ii}$_cgemv( 'C', l, i-1-p, alpha, b( mp, np ), ldb,b( mp, i ), 1_${ik}$, czero, t( &
                        np, i ), 1_${ik}$ )
              ! b1
              call stdlib${ii}$_cgemv( 'C', m-l, i-1, alpha, b, ldb, b( 1_${ik}$, i ), 1_${ik}$,cone, t( 1_${ik}$, i ), 1_${ik}$ )
                        
              ! t(1:i-1,i) := t(1:i-1,1:i-1) * t(1:i-1,i)
              call stdlib${ii}$_ctrmv( 'U', 'N', 'N', i-1, t, ldt, t( 1_${ik}$, i ), 1_${ik}$ )
              ! t(i,i) = tau(i)
              t( i, i ) = t( i, 1_${ik}$ )
              t( i, 1_${ik}$ ) = czero
           end do
     end subroutine stdlib${ii}$_ctpqrt2

     pure module subroutine stdlib${ii}$_ztpqrt2( m, n, l, a, lda, b, ldb, t, ldt, info )
     !! ZTPQRT2 computes a QR factorization of a complex "triangular-pentagonal"
     !! matrix C, which is composed of a triangular block A and pentagonal block B,
     !! using the compact WY representation for Q.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, ldb, ldt, n, m, l
           ! Array Arguments 
           complex(dp), intent(inout) :: a(lda,*), b(ldb,*)
           complex(dp), intent(out) :: t(ldt,*)
        ! =====================================================================
           
           ! Local Scalars 
           integer(${ik}$) :: i, j, p, mp, np
           complex(dp) :: alpha
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( l<0_${ik}$ .or. l>min(m,n) ) then
              info = -3_${ik}$
           else if( lda<max( 1_${ik}$, n ) ) then
              info = -5_${ik}$
           else if( ldb<max( 1_${ik}$, m ) ) then
              info = -7_${ik}$
           else if( ldt<max( 1_${ik}$, n ) ) then
              info = -9_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'ZTPQRT2', -info )
              return
           end if
           ! quick return if possible
           if( n==0 .or. m==0 ) return
           do i = 1, n
              ! generate elementary reflector h(i) to annihilate b(:,i)
              p = m-l+min( l, i )
              call stdlib${ii}$_zlarfg( p+1, a( i, i ), b( 1_${ik}$, i ), 1_${ik}$, t( i, 1_${ik}$ ) )
              if( i<n ) then
                 ! w(1:n-i) := c(i:m,i+1:n)**h * c(i:m,i) [use w = t(:,n)]
                 do j = 1, n-i
                    t( j, n ) = conjg(a( i, i+j ))
                 end do
                 call stdlib${ii}$_zgemv( 'C', p, n-i, cone, b( 1_${ik}$, i+1 ), ldb,b( 1_${ik}$, i ), 1_${ik}$, cone, t( 1_${ik}$, &
                           n ), 1_${ik}$ )
                 ! c(i:m,i+1:n) = c(i:m,i+1:n) + alpha*c(i:m,i)*w(1:n-1)**h
                 alpha = -conjg(t( i, 1_${ik}$ ))
                 do j = 1, n-i
                    a( i, i+j ) = a( i, i+j ) + alpha*conjg(t( j, n ))
                 end do
                 call stdlib${ii}$_zgerc( p, n-i, alpha, b( 1_${ik}$, i ), 1_${ik}$,t( 1_${ik}$, n ), 1_${ik}$, b( 1_${ik}$, i+1 ), ldb )
                           
              end if
           end do
           do i = 2, n
              ! t(1:i-1,i) := c(i:m,1:i-1)**h * (alpha * c(i:m,i))
              alpha = -t( i, 1_${ik}$ )
              do j = 1, i-1
                 t( j, i ) = czero
              end do
              p = min( i-1, l )
              mp = min( m-l+1, m )
              np = min( p+1, n )
              ! triangular part of b2
              do j = 1, p
                 t( j, i ) = alpha*b( m-l+j, i )
              end do
              call stdlib${ii}$_ztrmv( 'U', 'C', 'N', p, b( mp, 1_${ik}$ ), ldb,t( 1_${ik}$, i ), 1_${ik}$ )
              ! rectangular part of b2
              call stdlib${ii}$_zgemv( 'C', l, i-1-p, alpha, b( mp, np ), ldb,b( mp, i ), 1_${ik}$, czero, t( &
                        np, i ), 1_${ik}$ )
              ! b1
              call stdlib${ii}$_zgemv( 'C', m-l, i-1, alpha, b, ldb, b( 1_${ik}$, i ), 1_${ik}$,cone, t( 1_${ik}$, i ), 1_${ik}$ )
                        
              ! t(1:i-1,i) := t(1:i-1,1:i-1) * t(1:i-1,i)
              call stdlib${ii}$_ztrmv( 'U', 'N', 'N', i-1, t, ldt, t( 1_${ik}$, i ), 1_${ik}$ )
              ! t(i,i) = tau(i)
              t( i, i ) = t( i, 1_${ik}$ )
              t( i, 1_${ik}$ ) = czero
           end do
     end subroutine stdlib${ii}$_ztpqrt2

#:for ck,ct,ci in CMPLX_KINDS_TYPES
#:if not ck in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ci}$tpqrt2( m, n, l, a, lda, b, ldb, t, ldt, info )
     !! ZTPQRT2: computes a QR factorization of a complex "triangular-pentagonal"
     !! matrix C, which is composed of a triangular block A and pentagonal block B,
     !! using the compact WY representation for Q.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${ck}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, ldb, ldt, n, m, l
           ! Array Arguments 
           complex(${ck}$), intent(inout) :: a(lda,*), b(ldb,*)
           complex(${ck}$), intent(out) :: t(ldt,*)
        ! =====================================================================
           
           ! Local Scalars 
           integer(${ik}$) :: i, j, p, mp, np
           complex(${ck}$) :: alpha
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( l<0_${ik}$ .or. l>min(m,n) ) then
              info = -3_${ik}$
           else if( lda<max( 1_${ik}$, n ) ) then
              info = -5_${ik}$
           else if( ldb<max( 1_${ik}$, m ) ) then
              info = -7_${ik}$
           else if( ldt<max( 1_${ik}$, n ) ) then
              info = -9_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'ZTPQRT2', -info )
              return
           end if
           ! quick return if possible
           if( n==0 .or. m==0 ) return
           do i = 1, n
              ! generate elementary reflector h(i) to annihilate b(:,i)
              p = m-l+min( l, i )
              call stdlib${ii}$_${ci}$larfg( p+1, a( i, i ), b( 1_${ik}$, i ), 1_${ik}$, t( i, 1_${ik}$ ) )
              if( i<n ) then
                 ! w(1:n-i) := c(i:m,i+1:n)**h * c(i:m,i) [use w = t(:,n)]
                 do j = 1, n-i
                    t( j, n ) = conjg(a( i, i+j ))
                 end do
                 call stdlib${ii}$_${ci}$gemv( 'C', p, n-i, cone, b( 1_${ik}$, i+1 ), ldb,b( 1_${ik}$, i ), 1_${ik}$, cone, t( 1_${ik}$, &
                           n ), 1_${ik}$ )
                 ! c(i:m,i+1:n) = c(i:m,i+1:n) + alpha*c(i:m,i)*w(1:n-1)**h
                 alpha = -conjg(t( i, 1_${ik}$ ))
                 do j = 1, n-i
                    a( i, i+j ) = a( i, i+j ) + alpha*conjg(t( j, n ))
                 end do
                 call stdlib${ii}$_${ci}$gerc( p, n-i, alpha, b( 1_${ik}$, i ), 1_${ik}$,t( 1_${ik}$, n ), 1_${ik}$, b( 1_${ik}$, i+1 ), ldb )
                           
              end if
           end do
           do i = 2, n
              ! t(1:i-1,i) := c(i:m,1:i-1)**h * (alpha * c(i:m,i))
              alpha = -t( i, 1_${ik}$ )
              do j = 1, i-1
                 t( j, i ) = czero
              end do
              p = min( i-1, l )
              mp = min( m-l+1, m )
              np = min( p+1, n )
              ! triangular part of b2
              do j = 1, p
                 t( j, i ) = alpha*b( m-l+j, i )
              end do
              call stdlib${ii}$_${ci}$trmv( 'U', 'C', 'N', p, b( mp, 1_${ik}$ ), ldb,t( 1_${ik}$, i ), 1_${ik}$ )
              ! rectangular part of b2
              call stdlib${ii}$_${ci}$gemv( 'C', l, i-1-p, alpha, b( mp, np ), ldb,b( mp, i ), 1_${ik}$, czero, t( &
                        np, i ), 1_${ik}$ )
              ! b1
              call stdlib${ii}$_${ci}$gemv( 'C', m-l, i-1, alpha, b, ldb, b( 1_${ik}$, i ), 1_${ik}$,cone, t( 1_${ik}$, i ), 1_${ik}$ )
                        
              ! t(1:i-1,i) := t(1:i-1,1:i-1) * t(1:i-1,i)
              call stdlib${ii}$_${ci}$trmv( 'U', 'N', 'N', i-1, t, ldt, t( 1_${ik}$, i ), 1_${ik}$ )
              ! t(i,i) = tau(i)
              t( i, i ) = t( i, 1_${ik}$ )
              t( i, 1_${ik}$ ) = czero
           end do
     end subroutine stdlib${ii}$_${ci}$tpqrt2

#:endif
#:endfor



     pure module subroutine stdlib${ii}$_stpmqrt( side, trans, m, n, k, l, nb, v, ldv, t, ldt,a, lda, b, ldb, &
     !! STPMQRT applies a real orthogonal matrix Q obtained from a
     !! "triangular-pentagonal" real block reflector H to a general
     !! real matrix C, which consists of two blocks A and B.
               work, info )
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: side, trans
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: k, ldv, lda, ldb, m, n, l, nb, ldt
           ! Array Arguments 
           real(sp), intent(in) :: v(ldv,*), t(ldt,*)
           real(sp), intent(inout) :: a(lda,*), b(ldb,*)
           real(sp), intent(out) :: work(*)
        ! =====================================================================
           ! Local Scalars 
           logical(lk) :: left, right, tran, notran
           integer(${ik}$) :: i, ib, mb, lb, kf, ldaq, ldvq
           ! Intrinsic Functions 
           ! Executable Statements 
           ! Test The Input Arguments 
           info   = 0_${ik}$
           left   = stdlib_lsame( side,  'L' )
           right  = stdlib_lsame( side,  'R' )
           tran   = stdlib_lsame( trans, 'T' )
           notran = stdlib_lsame( trans, 'N' )
           if ( left ) then
              ldvq = max( 1_${ik}$, m )
              ldaq = max( 1_${ik}$, k )
           else if ( right ) then
              ldvq = max( 1_${ik}$, n )
              ldaq = max( 1_${ik}$, m )
           end if
           if( .not.left .and. .not.right ) then
              info = -1_${ik}$
           else if( .not.tran .and. .not.notran ) then
              info = -2_${ik}$
           else if( m<0_${ik}$ ) then
              info = -3_${ik}$
           else if( n<0_${ik}$ ) then
              info = -4_${ik}$
           else if( k<0_${ik}$ ) then
              info = -5_${ik}$
           else if( l<0_${ik}$ .or. l>k ) then
              info = -6_${ik}$
           else if( nb<1_${ik}$ .or. (nb>k .and. k>0_${ik}$) ) then
              info = -7_${ik}$
           else if( ldv<ldvq ) then
              info = -9_${ik}$
           else if( ldt<nb ) then
              info = -11_${ik}$
           else if( lda<ldaq ) then
              info = -13_${ik}$
           else if( ldb<max( 1_${ik}$, m ) ) then
              info = -15_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'STPMQRT', -info )
              return
           end if
           ! Quick Return If Possible 
           if( m==0 .or. n==0 .or. k==0 ) return
           if( left .and. tran ) then
              do i = 1, k, nb
                 ib = min( nb, k-i+1 )
                 mb = min( m-l+i+ib-1, m )
                 if( i>=l ) then
                    lb = 0_${ik}$
                 else
                    lb = mb-m+l-i+1
                 end if
                 call stdlib${ii}$_stprfb( 'L', 'T', 'F', 'C', mb, n, ib, lb,v( 1_${ik}$, i ), ldv, t( 1_${ik}$, i ), &
                           ldt,a( i, 1_${ik}$ ), lda, b, ldb, work, ib )
              end do
           else if( right .and. notran ) then
              do i = 1, k, nb
                 ib = min( nb, k-i+1 )
                 mb = min( n-l+i+ib-1, n )
                 if( i>=l ) then
                    lb = 0_${ik}$
                 else
                    lb = mb-n+l-i+1
                 end if
                 call stdlib${ii}$_stprfb( 'R', 'N', 'F', 'C', m, mb, ib, lb,v( 1_${ik}$, i ), ldv, t( 1_${ik}$, i ), &
                           ldt,a( 1_${ik}$, i ), lda, b, ldb, work, m )
              end do
           else if( left .and. notran ) then
              kf = ((k-1)/nb)*nb+1
              do i = kf, 1, -nb
                 ib = min( nb, k-i+1 )
                 mb = min( m-l+i+ib-1, m )
                 if( i>=l ) then
                    lb = 0_${ik}$
                 else
                    lb = mb-m+l-i+1
                 end if
                 call stdlib${ii}$_stprfb( 'L', 'N', 'F', 'C', mb, n, ib, lb,v( 1_${ik}$, i ), ldv, t( 1_${ik}$, i ), &
                           ldt,a( i, 1_${ik}$ ), lda, b, ldb, work, ib )
              end do
           else if( right .and. tran ) then
              kf = ((k-1)/nb)*nb+1
              do i = kf, 1, -nb
                 ib = min( nb, k-i+1 )
                 mb = min( n-l+i+ib-1, n )
                 if( i>=l ) then
                    lb = 0_${ik}$
                 else
                    lb = mb-n+l-i+1
                 end if
                 call stdlib${ii}$_stprfb( 'R', 'T', 'F', 'C', m, mb, ib, lb,v( 1_${ik}$, i ), ldv, t( 1_${ik}$, i ), &
                           ldt,a( 1_${ik}$, i ), lda, b, ldb, work, m )
              end do
           end if
           return
     end subroutine stdlib${ii}$_stpmqrt

     pure module subroutine stdlib${ii}$_dtpmqrt( side, trans, m, n, k, l, nb, v, ldv, t, ldt,a, lda, b, ldb, &
     !! DTPMQRT applies a real orthogonal matrix Q obtained from a
     !! "triangular-pentagonal" real block reflector H to a general
     !! real matrix C, which consists of two blocks A and B.
               work, info )
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: side, trans
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: k, ldv, lda, ldb, m, n, l, nb, ldt
           ! Array Arguments 
           real(dp), intent(in) :: v(ldv,*), t(ldt,*)
           real(dp), intent(inout) :: a(lda,*), b(ldb,*)
           real(dp), intent(out) :: work(*)
        ! =====================================================================
           ! Local Scalars 
           logical(lk) :: left, right, tran, notran
           integer(${ik}$) :: i, ib, mb, lb, kf, ldaq, ldvq
           ! Intrinsic Functions 
           ! Executable Statements 
           ! Test The Input Arguments 
           info   = 0_${ik}$
           left   = stdlib_lsame( side,  'L' )
           right  = stdlib_lsame( side,  'R' )
           tran   = stdlib_lsame( trans, 'T' )
           notran = stdlib_lsame( trans, 'N' )
           if ( left ) then
              ldvq = max( 1_${ik}$, m )
              ldaq = max( 1_${ik}$, k )
           else if ( right ) then
              ldvq = max( 1_${ik}$, n )
              ldaq = max( 1_${ik}$, m )
           end if
           if( .not.left .and. .not.right ) then
              info = -1_${ik}$
           else if( .not.tran .and. .not.notran ) then
              info = -2_${ik}$
           else if( m<0_${ik}$ ) then
              info = -3_${ik}$
           else if( n<0_${ik}$ ) then
              info = -4_${ik}$
           else if( k<0_${ik}$ ) then
              info = -5_${ik}$
           else if( l<0_${ik}$ .or. l>k ) then
              info = -6_${ik}$
           else if( nb<1_${ik}$ .or. (nb>k .and. k>0_${ik}$) ) then
              info = -7_${ik}$
           else if( ldv<ldvq ) then
              info = -9_${ik}$
           else if( ldt<nb ) then
              info = -11_${ik}$
           else if( lda<ldaq ) then
              info = -13_${ik}$
           else if( ldb<max( 1_${ik}$, m ) ) then
              info = -15_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DTPMQRT', -info )
              return
           end if
           ! Quick Return If Possible 
           if( m==0 .or. n==0 .or. k==0 ) return
           if( left .and. tran ) then
              do i = 1, k, nb
                 ib = min( nb, k-i+1 )
                 mb = min( m-l+i+ib-1, m )
                 if( i>=l ) then
                    lb = 0_${ik}$
                 else
                    lb = mb-m+l-i+1
                 end if
                 call stdlib${ii}$_dtprfb( 'L', 'T', 'F', 'C', mb, n, ib, lb,v( 1_${ik}$, i ), ldv, t( 1_${ik}$, i ), &
                           ldt,a( i, 1_${ik}$ ), lda, b, ldb, work, ib )
              end do
           else if( right .and. notran ) then
              do i = 1, k, nb
                 ib = min( nb, k-i+1 )
                 mb = min( n-l+i+ib-1, n )
                 if( i>=l ) then
                    lb = 0_${ik}$
                 else
                    lb = mb-n+l-i+1
                 end if
                 call stdlib${ii}$_dtprfb( 'R', 'N', 'F', 'C', m, mb, ib, lb,v( 1_${ik}$, i ), ldv, t( 1_${ik}$, i ), &
                           ldt,a( 1_${ik}$, i ), lda, b, ldb, work, m )
              end do
           else if( left .and. notran ) then
              kf = ((k-1)/nb)*nb+1
              do i = kf, 1, -nb
                 ib = min( nb, k-i+1 )
                 mb = min( m-l+i+ib-1, m )
                 if( i>=l ) then
                    lb = 0_${ik}$
                 else
                    lb = mb-m+l-i+1
                 end if
                 call stdlib${ii}$_dtprfb( 'L', 'N', 'F', 'C', mb, n, ib, lb,v( 1_${ik}$, i ), ldv, t( 1_${ik}$, i ), &
                           ldt,a( i, 1_${ik}$ ), lda, b, ldb, work, ib )
              end do
           else if( right .and. tran ) then
              kf = ((k-1)/nb)*nb+1
              do i = kf, 1, -nb
                 ib = min( nb, k-i+1 )
                 mb = min( n-l+i+ib-1, n )
                 if( i>=l ) then
                    lb = 0_${ik}$
                 else
                    lb = mb-n+l-i+1
                 end if
                 call stdlib${ii}$_dtprfb( 'R', 'T', 'F', 'C', m, mb, ib, lb,v( 1_${ik}$, i ), ldv, t( 1_${ik}$, i ), &
                           ldt,a( 1_${ik}$, i ), lda, b, ldb, work, m )
              end do
           end if
           return
     end subroutine stdlib${ii}$_dtpmqrt

#:for rk,rt,ri in REAL_KINDS_TYPES
#:if not rk in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ri}$tpmqrt( side, trans, m, n, k, l, nb, v, ldv, t, ldt,a, lda, b, ldb, &
     !! DTPMQRT: applies a real orthogonal matrix Q obtained from a
     !! "triangular-pentagonal" real block reflector H to a general
     !! real matrix C, which consists of two blocks A and B.
               work, info )
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: side, trans
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: k, ldv, lda, ldb, m, n, l, nb, ldt
           ! Array Arguments 
           real(${rk}$), intent(in) :: v(ldv,*), t(ldt,*)
           real(${rk}$), intent(inout) :: a(lda,*), b(ldb,*)
           real(${rk}$), intent(out) :: work(*)
        ! =====================================================================
           ! Local Scalars 
           logical(lk) :: left, right, tran, notran
           integer(${ik}$) :: i, ib, mb, lb, kf, ldaq, ldvq
           ! Intrinsic Functions 
           ! Executable Statements 
           ! Test The Input Arguments 
           info   = 0_${ik}$
           left   = stdlib_lsame( side,  'L' )
           right  = stdlib_lsame( side,  'R' )
           tran   = stdlib_lsame( trans, 'T' )
           notran = stdlib_lsame( trans, 'N' )
           if ( left ) then
              ldvq = max( 1_${ik}$, m )
              ldaq = max( 1_${ik}$, k )
           else if ( right ) then
              ldvq = max( 1_${ik}$, n )
              ldaq = max( 1_${ik}$, m )
           end if
           if( .not.left .and. .not.right ) then
              info = -1_${ik}$
           else if( .not.tran .and. .not.notran ) then
              info = -2_${ik}$
           else if( m<0_${ik}$ ) then
              info = -3_${ik}$
           else if( n<0_${ik}$ ) then
              info = -4_${ik}$
           else if( k<0_${ik}$ ) then
              info = -5_${ik}$
           else if( l<0_${ik}$ .or. l>k ) then
              info = -6_${ik}$
           else if( nb<1_${ik}$ .or. (nb>k .and. k>0_${ik}$) ) then
              info = -7_${ik}$
           else if( ldv<ldvq ) then
              info = -9_${ik}$
           else if( ldt<nb ) then
              info = -11_${ik}$
           else if( lda<ldaq ) then
              info = -13_${ik}$
           else if( ldb<max( 1_${ik}$, m ) ) then
              info = -15_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DTPMQRT', -info )
              return
           end if
           ! Quick Return If Possible 
           if( m==0 .or. n==0 .or. k==0 ) return
           if( left .and. tran ) then
              do i = 1, k, nb
                 ib = min( nb, k-i+1 )
                 mb = min( m-l+i+ib-1, m )
                 if( i>=l ) then
                    lb = 0_${ik}$
                 else
                    lb = mb-m+l-i+1
                 end if
                 call stdlib${ii}$_${ri}$tprfb( 'L', 'T', 'F', 'C', mb, n, ib, lb,v( 1_${ik}$, i ), ldv, t( 1_${ik}$, i ), &
                           ldt,a( i, 1_${ik}$ ), lda, b, ldb, work, ib )
              end do
           else if( right .and. notran ) then
              do i = 1, k, nb
                 ib = min( nb, k-i+1 )
                 mb = min( n-l+i+ib-1, n )
                 if( i>=l ) then
                    lb = 0_${ik}$
                 else
                    lb = mb-n+l-i+1
                 end if
                 call stdlib${ii}$_${ri}$tprfb( 'R', 'N', 'F', 'C', m, mb, ib, lb,v( 1_${ik}$, i ), ldv, t( 1_${ik}$, i ), &
                           ldt,a( 1_${ik}$, i ), lda, b, ldb, work, m )
              end do
           else if( left .and. notran ) then
              kf = ((k-1)/nb)*nb+1
              do i = kf, 1, -nb
                 ib = min( nb, k-i+1 )
                 mb = min( m-l+i+ib-1, m )
                 if( i>=l ) then
                    lb = 0_${ik}$
                 else
                    lb = mb-m+l-i+1
                 end if
                 call stdlib${ii}$_${ri}$tprfb( 'L', 'N', 'F', 'C', mb, n, ib, lb,v( 1_${ik}$, i ), ldv, t( 1_${ik}$, i ), &
                           ldt,a( i, 1_${ik}$ ), lda, b, ldb, work, ib )
              end do
           else if( right .and. tran ) then
              kf = ((k-1)/nb)*nb+1
              do i = kf, 1, -nb
                 ib = min( nb, k-i+1 )
                 mb = min( n-l+i+ib-1, n )
                 if( i>=l ) then
                    lb = 0_${ik}$
                 else
                    lb = mb-n+l-i+1
                 end if
                 call stdlib${ii}$_${ri}$tprfb( 'R', 'T', 'F', 'C', m, mb, ib, lb,v( 1_${ik}$, i ), ldv, t( 1_${ik}$, i ), &
                           ldt,a( 1_${ik}$, i ), lda, b, ldb, work, m )
              end do
           end if
           return
     end subroutine stdlib${ii}$_${ri}$tpmqrt

#:endif
#:endfor

     pure module subroutine stdlib${ii}$_ctpmqrt( side, trans, m, n, k, l, nb, v, ldv, t, ldt,a, lda, b, ldb, &
     !! CTPMQRT applies a complex orthogonal matrix Q obtained from a
     !! "triangular-pentagonal" complex block reflector H to a general
     !! complex matrix C, which consists of two blocks A and B.
               work, info )
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: side, trans
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: k, ldv, lda, ldb, m, n, l, nb, ldt
           ! Array Arguments 
           complex(sp), intent(in) :: v(ldv,*), t(ldt,*)
           complex(sp), intent(inout) :: a(lda,*), b(ldb,*)
           complex(sp), intent(out) :: work(*)
        ! =====================================================================
           ! Local Scalars 
           logical(lk) :: left, right, tran, notran
           integer(${ik}$) :: i, ib, mb, lb, kf, ldaq, ldvq
           ! Intrinsic Functions 
           ! Executable Statements 
           ! Test The Input Arguments 
           info   = 0_${ik}$
           left   = stdlib_lsame( side,  'L' )
           right  = stdlib_lsame( side,  'R' )
           tran   = stdlib_lsame( trans, 'C' )
           notran = stdlib_lsame( trans, 'N' )
           if ( left ) then
              ldvq = max( 1_${ik}$, m )
              ldaq = max( 1_${ik}$, k )
           else if ( right ) then
              ldvq = max( 1_${ik}$, n )
              ldaq = max( 1_${ik}$, m )
           end if
           if( .not.left .and. .not.right ) then
              info = -1_${ik}$
           else if( .not.tran .and. .not.notran ) then
              info = -2_${ik}$
           else if( m<0_${ik}$ ) then
              info = -3_${ik}$
           else if( n<0_${ik}$ ) then
              info = -4_${ik}$
           else if( k<0_${ik}$ ) then
              info = -5_${ik}$
           else if( l<0_${ik}$ .or. l>k ) then
              info = -6_${ik}$
           else if( nb<1_${ik}$ .or. (nb>k .and. k>0_${ik}$) ) then
              info = -7_${ik}$
           else if( ldv<ldvq ) then
              info = -9_${ik}$
           else if( ldt<nb ) then
              info = -11_${ik}$
           else if( lda<ldaq ) then
              info = -13_${ik}$
           else if( ldb<max( 1_${ik}$, m ) ) then
              info = -15_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'CTPMQRT', -info )
              return
           end if
           ! Quick Return If Possible 
           if( m==0 .or. n==0 .or. k==0 ) return
           if( left .and. tran ) then
              do i = 1, k, nb
                 ib = min( nb, k-i+1 )
                 mb = min( m-l+i+ib-1, m )
                 if( i>=l ) then
                    lb = 0_${ik}$
                 else
                    lb = mb-m+l-i+1
                 end if
                 call stdlib${ii}$_ctprfb( 'L', 'C', 'F', 'C', mb, n, ib, lb,v( 1_${ik}$, i ), ldv, t( 1_${ik}$, i ), &
                           ldt,a( i, 1_${ik}$ ), lda, b, ldb, work, ib )
              end do
           else if( right .and. notran ) then
              do i = 1, k, nb
                 ib = min( nb, k-i+1 )
                 mb = min( n-l+i+ib-1, n )
                 if( i>=l ) then
                    lb = 0_${ik}$
                 else
                    lb = mb-n+l-i+1
                 end if
                 call stdlib${ii}$_ctprfb( 'R', 'N', 'F', 'C', m, mb, ib, lb,v( 1_${ik}$, i ), ldv, t( 1_${ik}$, i ), &
                           ldt,a( 1_${ik}$, i ), lda, b, ldb, work, m )
              end do
           else if( left .and. notran ) then
              kf = ((k-1)/nb)*nb+1
              do i = kf, 1, -nb
                 ib = min( nb, k-i+1 )
                 mb = min( m-l+i+ib-1, m )
                 if( i>=l ) then
                    lb = 0_${ik}$
                 else
                    lb = mb-m+l-i+1
                 end if
                 call stdlib${ii}$_ctprfb( 'L', 'N', 'F', 'C', mb, n, ib, lb,v( 1_${ik}$, i ), ldv, t( 1_${ik}$, i ), &
                           ldt,a( i, 1_${ik}$ ), lda, b, ldb, work, ib )
              end do
           else if( right .and. tran ) then
              kf = ((k-1)/nb)*nb+1
              do i = kf, 1, -nb
                 ib = min( nb, k-i+1 )
                 mb = min( n-l+i+ib-1, n )
                 if( i>=l ) then
                    lb = 0_${ik}$
                 else
                    lb = mb-n+l-i+1
                 end if
                 call stdlib${ii}$_ctprfb( 'R', 'C', 'F', 'C', m, mb, ib, lb,v( 1_${ik}$, i ), ldv, t( 1_${ik}$, i ), &
                           ldt,a( 1_${ik}$, i ), lda, b, ldb, work, m )
              end do
           end if
           return
     end subroutine stdlib${ii}$_ctpmqrt

     pure module subroutine stdlib${ii}$_ztpmqrt( side, trans, m, n, k, l, nb, v, ldv, t, ldt,a, lda, b, ldb, &
     !! ZTPMQRT applies a complex orthogonal matrix Q obtained from a
     !! "triangular-pentagonal" complex block reflector H to a general
     !! complex matrix C, which consists of two blocks A and B.
               work, info )
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: side, trans
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: k, ldv, lda, ldb, m, n, l, nb, ldt
           ! Array Arguments 
           complex(dp), intent(in) :: v(ldv,*), t(ldt,*)
           complex(dp), intent(inout) :: a(lda,*), b(ldb,*)
           complex(dp), intent(out) :: work(*)
        ! =====================================================================
           ! Local Scalars 
           logical(lk) :: left, right, tran, notran
           integer(${ik}$) :: i, ib, mb, lb, kf, ldaq, ldvq
           ! Intrinsic Functions 
           ! Executable Statements 
           ! Test The Input Arguments 
           info   = 0_${ik}$
           left   = stdlib_lsame( side,  'L' )
           right  = stdlib_lsame( side,  'R' )
           tran   = stdlib_lsame( trans, 'C' )
           notran = stdlib_lsame( trans, 'N' )
           if ( left ) then
              ldvq = max( 1_${ik}$, m )
              ldaq = max( 1_${ik}$, k )
           else if ( right ) then
              ldvq = max( 1_${ik}$, n )
              ldaq = max( 1_${ik}$, m )
           end if
           if( .not.left .and. .not.right ) then
              info = -1_${ik}$
           else if( .not.tran .and. .not.notran ) then
              info = -2_${ik}$
           else if( m<0_${ik}$ ) then
              info = -3_${ik}$
           else if( n<0_${ik}$ ) then
              info = -4_${ik}$
           else if( k<0_${ik}$ ) then
              info = -5_${ik}$
           else if( l<0_${ik}$ .or. l>k ) then
              info = -6_${ik}$
           else if( nb<1_${ik}$ .or. (nb>k .and. k>0_${ik}$) ) then
              info = -7_${ik}$
           else if( ldv<ldvq ) then
              info = -9_${ik}$
           else if( ldt<nb ) then
              info = -11_${ik}$
           else if( lda<ldaq ) then
              info = -13_${ik}$
           else if( ldb<max( 1_${ik}$, m ) ) then
              info = -15_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'ZTPMQRT', -info )
              return
           end if
           ! Quick Return If Possible 
           if( m==0 .or. n==0 .or. k==0 ) return
           if( left .and. tran ) then
              do i = 1, k, nb
                 ib = min( nb, k-i+1 )
                 mb = min( m-l+i+ib-1, m )
                 if( i>=l ) then
                    lb = 0_${ik}$
                 else
                    lb = mb-m+l-i+1
                 end if
                 call stdlib${ii}$_ztprfb( 'L', 'C', 'F', 'C', mb, n, ib, lb,v( 1_${ik}$, i ), ldv, t( 1_${ik}$, i ), &
                           ldt,a( i, 1_${ik}$ ), lda, b, ldb, work, ib )
              end do
           else if( right .and. notran ) then
              do i = 1, k, nb
                 ib = min( nb, k-i+1 )
                 mb = min( n-l+i+ib-1, n )
                 if( i>=l ) then
                    lb = 0_${ik}$
                 else
                    lb = mb-n+l-i+1
                 end if
                 call stdlib${ii}$_ztprfb( 'R', 'N', 'F', 'C', m, mb, ib, lb,v( 1_${ik}$, i ), ldv, t( 1_${ik}$, i ), &
                           ldt,a( 1_${ik}$, i ), lda, b, ldb, work, m )
              end do
           else if( left .and. notran ) then
              kf = ((k-1)/nb)*nb+1
              do i = kf, 1, -nb
                 ib = min( nb, k-i+1 )
                 mb = min( m-l+i+ib-1, m )
                 if( i>=l ) then
                    lb = 0_${ik}$
                 else
                    lb = mb-m+l-i+1
                 end if
                 call stdlib${ii}$_ztprfb( 'L', 'N', 'F', 'C', mb, n, ib, lb,v( 1_${ik}$, i ), ldv, t( 1_${ik}$, i ), &
                           ldt,a( i, 1_${ik}$ ), lda, b, ldb, work, ib )
              end do
           else if( right .and. tran ) then
              kf = ((k-1)/nb)*nb+1
              do i = kf, 1, -nb
                 ib = min( nb, k-i+1 )
                 mb = min( n-l+i+ib-1, n )
                 if( i>=l ) then
                    lb = 0_${ik}$
                 else
                    lb = mb-n+l-i+1
                 end if
                 call stdlib${ii}$_ztprfb( 'R', 'C', 'F', 'C', m, mb, ib, lb,v( 1_${ik}$, i ), ldv, t( 1_${ik}$, i ), &
                           ldt,a( 1_${ik}$, i ), lda, b, ldb, work, m )
              end do
           end if
           return
     end subroutine stdlib${ii}$_ztpmqrt

#:for ck,ct,ci in CMPLX_KINDS_TYPES
#:if not ck in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ci}$tpmqrt( side, trans, m, n, k, l, nb, v, ldv, t, ldt,a, lda, b, ldb, &
     !! ZTPMQRT: applies a complex orthogonal matrix Q obtained from a
     !! "triangular-pentagonal" complex block reflector H to a general
     !! complex matrix C, which consists of two blocks A and B.
               work, info )
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${ck}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: side, trans
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: k, ldv, lda, ldb, m, n, l, nb, ldt
           ! Array Arguments 
           complex(${ck}$), intent(in) :: v(ldv,*), t(ldt,*)
           complex(${ck}$), intent(inout) :: a(lda,*), b(ldb,*)
           complex(${ck}$), intent(out) :: work(*)
        ! =====================================================================
           ! Local Scalars 
           logical(lk) :: left, right, tran, notran
           integer(${ik}$) :: i, ib, mb, lb, kf, ldaq, ldvq
           ! Intrinsic Functions 
           ! Executable Statements 
           ! Test The Input Arguments 
           info   = 0_${ik}$
           left   = stdlib_lsame( side,  'L' )
           right  = stdlib_lsame( side,  'R' )
           tran   = stdlib_lsame( trans, 'C' )
           notran = stdlib_lsame( trans, 'N' )
           if ( left ) then
              ldvq = max( 1_${ik}$, m )
              ldaq = max( 1_${ik}$, k )
           else if ( right ) then
              ldvq = max( 1_${ik}$, n )
              ldaq = max( 1_${ik}$, m )
           end if
           if( .not.left .and. .not.right ) then
              info = -1_${ik}$
           else if( .not.tran .and. .not.notran ) then
              info = -2_${ik}$
           else if( m<0_${ik}$ ) then
              info = -3_${ik}$
           else if( n<0_${ik}$ ) then
              info = -4_${ik}$
           else if( k<0_${ik}$ ) then
              info = -5_${ik}$
           else if( l<0_${ik}$ .or. l>k ) then
              info = -6_${ik}$
           else if( nb<1_${ik}$ .or. (nb>k .and. k>0_${ik}$) ) then
              info = -7_${ik}$
           else if( ldv<ldvq ) then
              info = -9_${ik}$
           else if( ldt<nb ) then
              info = -11_${ik}$
           else if( lda<ldaq ) then
              info = -13_${ik}$
           else if( ldb<max( 1_${ik}$, m ) ) then
              info = -15_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'ZTPMQRT', -info )
              return
           end if
           ! Quick Return If Possible 
           if( m==0 .or. n==0 .or. k==0 ) return
           if( left .and. tran ) then
              do i = 1, k, nb
                 ib = min( nb, k-i+1 )
                 mb = min( m-l+i+ib-1, m )
                 if( i>=l ) then
                    lb = 0_${ik}$
                 else
                    lb = mb-m+l-i+1
                 end if
                 call stdlib${ii}$_${ci}$tprfb( 'L', 'C', 'F', 'C', mb, n, ib, lb,v( 1_${ik}$, i ), ldv, t( 1_${ik}$, i ), &
                           ldt,a( i, 1_${ik}$ ), lda, b, ldb, work, ib )
              end do
           else if( right .and. notran ) then
              do i = 1, k, nb
                 ib = min( nb, k-i+1 )
                 mb = min( n-l+i+ib-1, n )
                 if( i>=l ) then
                    lb = 0_${ik}$
                 else
                    lb = mb-n+l-i+1
                 end if
                 call stdlib${ii}$_${ci}$tprfb( 'R', 'N', 'F', 'C', m, mb, ib, lb,v( 1_${ik}$, i ), ldv, t( 1_${ik}$, i ), &
                           ldt,a( 1_${ik}$, i ), lda, b, ldb, work, m )
              end do
           else if( left .and. notran ) then
              kf = ((k-1)/nb)*nb+1
              do i = kf, 1, -nb
                 ib = min( nb, k-i+1 )
                 mb = min( m-l+i+ib-1, m )
                 if( i>=l ) then
                    lb = 0_${ik}$
                 else
                    lb = mb-m+l-i+1
                 end if
                 call stdlib${ii}$_${ci}$tprfb( 'L', 'N', 'F', 'C', mb, n, ib, lb,v( 1_${ik}$, i ), ldv, t( 1_${ik}$, i ), &
                           ldt,a( i, 1_${ik}$ ), lda, b, ldb, work, ib )
              end do
           else if( right .and. tran ) then
              kf = ((k-1)/nb)*nb+1
              do i = kf, 1, -nb
                 ib = min( nb, k-i+1 )
                 mb = min( n-l+i+ib-1, n )
                 if( i>=l ) then
                    lb = 0_${ik}$
                 else
                    lb = mb-n+l-i+1
                 end if
                 call stdlib${ii}$_${ci}$tprfb( 'R', 'C', 'F', 'C', m, mb, ib, lb,v( 1_${ik}$, i ), ldv, t( 1_${ik}$, i ), &
                           ldt,a( 1_${ik}$, i ), lda, b, ldb, work, m )
              end do
           end if
           return
     end subroutine stdlib${ii}$_${ci}$tpmqrt

#:endif
#:endfor



     pure module subroutine stdlib${ii}$_stprfb( side, trans, direct, storev, m, n, k, l,v, ldv, t, ldt, a, &
     !! STPRFB applies a real "triangular-pentagonal" block reflector H or its
     !! conjugate transpose H^H to a real matrix C, which is composed of two
     !! blocks A and B, either from the left or right.
               lda, b, ldb, work, ldwork )
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: direct, side, storev, trans
           integer(${ik}$), intent(in) :: k, l, lda, ldb, ldt, ldv, ldwork, m, n
           ! Array Arguments 
           real(sp), intent(inout) :: a(lda,*), b(ldb,*)
           real(sp), intent(in) :: t(ldt,*), v(ldv,*)
           real(sp), intent(out) :: work(ldwork,*)
        ! ==========================================================================
           
           ! Local Scalars 
           integer(${ik}$) :: i, j, mp, np, kp
           logical(lk) :: left, forward, column, right, backward, row
           ! Executable Statements 
           ! quick return if possible
           if( m<=0 .or. n<=0 .or. k<=0 .or. l<0 ) return
           if( stdlib_lsame( storev, 'C' ) ) then
              column = .true.
              row = .false.
           else if ( stdlib_lsame( storev, 'R' ) ) then
              column = .false.
              row = .true.
           else
              column = .false.
              row = .false.
           end if
           if( stdlib_lsame( side, 'L' ) ) then
              left = .true.
              right = .false.
           else if( stdlib_lsame( side, 'R' ) ) then
              left = .false.
              right = .true.
           else
              left = .false.
              right = .false.
           end if
           if( stdlib_lsame( direct, 'F' ) ) then
              forward = .true.
              backward = .false.
           else if( stdlib_lsame( direct, 'B' ) ) then
              forward = .false.
              backward = .true.
           else
              forward = .false.
              backward = .false.
           end if
       ! ---------------------------------------------------------------------------
           if( column .and. forward .and. left  ) then
       ! ---------------------------------------------------------------------------
              ! let  w =  [ i ]    (k-by-k)
                        ! [ v ]    (m-by-k)
              ! form  h c  or  h^h c  where  c = [ a ]  (k-by-n)
                                               ! [ b ]  (m-by-n)
              ! h = i - w t w^h          or  h^h = i - w t^h w^h
              ! a = a -   t (a + v^h b)  or  a = a -   t^h (a + v^h b)
              ! b = b - v t (a + v^h b)  or  b = b - v t^h (a + v^h b)
       ! ---------------------------------------------------------------------------
              mp = min( m-l+1, m )
              kp = min( l+1, k )
              do j = 1, n
                 do i = 1, l
                    work( i, j ) = b( m-l+i, j )
                 end do
              end do
              call stdlib${ii}$_strmm( 'L', 'U', 'T', 'N', l, n, one, v( mp, 1_${ik}$ ), ldv,work, ldwork )
                        
              call stdlib${ii}$_sgemm( 'T', 'N', l, n, m-l, one, v, ldv, b, ldb,one, work, ldwork )
                        
              call stdlib${ii}$_sgemm( 'T', 'N', k-l, n, m, one, v( 1_${ik}$, kp ), ldv,b, ldb, zero, work( kp,&
                         1_${ik}$ ), ldwork )
              do j = 1, n
                 do i = 1, k
                    work( i, j ) = work( i, j ) + a( i, j )
                 end do
              end do
              call stdlib${ii}$_strmm( 'L', 'U', trans, 'N', k, n, one, t, ldt,work, ldwork )
              do j = 1, n
                 do i = 1, k
                    a( i, j ) = a( i, j ) - work( i, j )
                 end do
              end do
              call stdlib${ii}$_sgemm( 'N', 'N', m-l, n, k, -one, v, ldv, work, ldwork,one, b, ldb )
                        
              call stdlib${ii}$_sgemm( 'N', 'N', l, n, k-l, -one, v( mp, kp ), ldv,work( kp, 1_${ik}$ ), &
                        ldwork, one, b( mp, 1_${ik}$ ),  ldb )
              call stdlib${ii}$_strmm( 'L', 'U', 'N', 'N', l, n, one, v( mp, 1_${ik}$ ), ldv,work, ldwork )
                        
              do j = 1, n
                 do i = 1, l
                    b( m-l+i, j ) = b( m-l+i, j ) - work( i, j )
                 end do
              end do
       ! ---------------------------------------------------------------------------
           else if( column .and. forward .and. right ) then
       ! ---------------------------------------------------------------------------
              ! let  w =  [ i ]    (k-by-k)
                        ! [ v ]    (n-by-k)
              ! form  c h or  c h^h  where  c = [ a b ] (a is m-by-k, b is m-by-n)
              ! h = i - w t w^h          or  h^h = i - w t^h w^h
              ! a = a - (a + b v) t      or  a = a - (a + b v) t^h
              ! b = b - (a + b v) t v^h  or  b = b - (a + b v) t^h v^h
       ! ---------------------------------------------------------------------------
              np = min( n-l+1, n )
              kp = min( l+1, k )
              do j = 1, l
                 do i = 1, m
                    work( i, j ) = b( i, n-l+j )
                 end do
              end do
              call stdlib${ii}$_strmm( 'R', 'U', 'N', 'N', m, l, one, v( np, 1_${ik}$ ), ldv,work, ldwork )
                        
              call stdlib${ii}$_sgemm( 'N', 'N', m, l, n-l, one, b, ldb,v, ldv, one, work, ldwork )
                        
              call stdlib${ii}$_sgemm( 'N', 'N', m, k-l, n, one, b, ldb,v( 1_${ik}$, kp ), ldv, zero, work( 1_${ik}$, &
                        kp ), ldwork )
              do j = 1, k
                 do i = 1, m
                    work( i, j ) = work( i, j ) + a( i, j )
                 end do
              end do
              call stdlib${ii}$_strmm( 'R', 'U', trans, 'N', m, k, one, t, ldt,work, ldwork )
              do j = 1, k
                 do i = 1, m
                    a( i, j ) = a( i, j ) - work( i, j )
                 end do
              end do
              call stdlib${ii}$_sgemm( 'N', 'T', m, n-l, k, -one, work, ldwork,v, ldv, one, b, ldb )
                        
              call stdlib${ii}$_sgemm( 'N', 'T', m, l, k-l, -one, work( 1_${ik}$, kp ), ldwork,v( np, kp ), &
                        ldv, one, b( 1_${ik}$, np ), ldb )
              call stdlib${ii}$_strmm( 'R', 'U', 'T', 'N', m, l, one, v( np, 1_${ik}$ ), ldv,work, ldwork )
                        
              do j = 1, l
                 do i = 1, m
                    b( i, n-l+j ) = b( i, n-l+j ) - work( i, j )
                 end do
              end do
       ! ---------------------------------------------------------------------------
           else if( column .and. backward .and. left ) then
       ! ---------------------------------------------------------------------------
              ! let  w =  [ v ]    (m-by-k)
                        ! [ i ]    (k-by-k)
              ! form  h c  or  h^h c  where  c = [ b ]  (m-by-n)
                                               ! [ a ]  (k-by-n)
              ! h = i - w t w^h          or  h^h = i - w t^h w^h
              ! a = a -   t (a + v^h b)  or  a = a -   t^h (a + v^h b)
              ! b = b - v t (a + v^h b)  or  b = b - v t^h (a + v^h b)
       ! ---------------------------------------------------------------------------
              mp = min( l+1, m )
              kp = min( k-l+1, k )
              do j = 1, n
                 do i = 1, l
                    work( k-l+i, j ) = b( i, j )
                 end do
              end do
              call stdlib${ii}$_strmm( 'L', 'L', 'T', 'N', l, n, one, v( 1_${ik}$, kp ), ldv,work( kp, 1_${ik}$ ), &
                        ldwork )
              call stdlib${ii}$_sgemm( 'T', 'N', l, n, m-l, one, v( mp, kp ), ldv,b( mp, 1_${ik}$ ), ldb, one, &
                        work( kp, 1_${ik}$ ), ldwork )
              call stdlib${ii}$_sgemm( 'T', 'N', k-l, n, m, one, v, ldv,b, ldb, zero, work, ldwork )
                        
              do j = 1, n
                 do i = 1, k
                    work( i, j ) = work( i, j ) + a( i, j )
                 end do
              end do
              call stdlib${ii}$_strmm( 'L', 'L', trans, 'N', k, n, one, t, ldt,work, ldwork )
              do j = 1, n
                 do i = 1, k
                    a( i, j ) = a( i, j ) - work( i, j )
                 end do
              end do
              call stdlib${ii}$_sgemm( 'N', 'N', m-l, n, k, -one, v( mp, 1_${ik}$ ), ldv,work, ldwork, one, b( &
                        mp, 1_${ik}$ ), ldb )
              call stdlib${ii}$_sgemm( 'N', 'N', l, n, k-l, -one, v, ldv,work, ldwork, one, b,  ldb )
                        
              call stdlib${ii}$_strmm( 'L', 'L', 'N', 'N', l, n, one, v( 1_${ik}$, kp ), ldv,work( kp, 1_${ik}$ ), &
                        ldwork )
              do j = 1, n
                 do i = 1, l
                    b( i, j ) = b( i, j ) - work( k-l+i, j )
                 end do
              end do
       ! ---------------------------------------------------------------------------
           else if( column .and. backward .and. right ) then
       ! ---------------------------------------------------------------------------
              ! let  w =  [ v ]    (n-by-k)
                        ! [ i ]    (k-by-k)
              ! form  c h  or  c h^h  where  c = [ b a ] (b is m-by-n, a is m-by-k)
              ! h = i - w t w^h          or  h^h = i - w t^h w^h
              ! a = a - (a + b v) t      or  a = a - (a + b v) t^h
              ! b = b - (a + b v) t v^h  or  b = b - (a + b v) t^h v^h
       ! ---------------------------------------------------------------------------
              np = min( l+1, n )
              kp = min( k-l+1, k )
              do j = 1, l
                 do i = 1, m
                    work( i, k-l+j ) = b( i, j )
                 end do
              end do
              call stdlib${ii}$_strmm( 'R', 'L', 'N', 'N', m, l, one, v( 1_${ik}$, kp ), ldv,work( 1_${ik}$, kp ), &
                        ldwork )
              call stdlib${ii}$_sgemm( 'N', 'N', m, l, n-l, one, b( 1_${ik}$, np ), ldb,v( np, kp ), ldv, one, &
                        work( 1_${ik}$, kp ), ldwork )
              call stdlib${ii}$_sgemm( 'N', 'N', m, k-l, n, one, b, ldb,v, ldv, zero, work, ldwork )
                        
              do j = 1, k
                 do i = 1, m
                    work( i, j ) = work( i, j ) + a( i, j )
                 end do
              end do
              call stdlib${ii}$_strmm( 'R', 'L', trans, 'N', m, k, one, t, ldt,work, ldwork )
              do j = 1, k
                 do i = 1, m
                    a( i, j ) = a( i, j ) - work( i, j )
                 end do
              end do
              call stdlib${ii}$_sgemm( 'N', 'T', m, n-l, k, -one, work, ldwork,v( np, 1_${ik}$ ), ldv, one, b( &
                        1_${ik}$, np ), ldb )
              call stdlib${ii}$_sgemm( 'N', 'T', m, l, k-l, -one, work, ldwork,v, ldv, one, b, ldb )
                        
              call stdlib${ii}$_strmm( 'R', 'L', 'T', 'N', m, l, one, v( 1_${ik}$, kp ), ldv,work( 1_${ik}$, kp ), &
                        ldwork )
              do j = 1, l
                 do i = 1, m
                    b( i, j ) = b( i, j ) - work( i, k-l+j )
                 end do
              end do
       ! ---------------------------------------------------------------------------
           else if( row .and. forward .and. left ) then
       ! ---------------------------------------------------------------------------
              ! let  w =  [ i v ] ( i is k-by-k, v is k-by-m )
              ! form  h c  or  h^h c  where  c = [ a ]  (k-by-n)
                                               ! [ b ]  (m-by-n)
              ! h = i - w^h t w          or  h^h = i - w^h t^h w
              ! a = a -     t (a + v b)  or  a = a -     t^h (a + v b)
              ! b = b - v^h t (a + v b)  or  b = b - v^h t^h (a + v b)
       ! ---------------------------------------------------------------------------
              mp = min( m-l+1, m )
              kp = min( l+1, k )
              do j = 1, n
                 do i = 1, l
                    work( i, j ) = b( m-l+i, j )
                 end do
              end do
              call stdlib${ii}$_strmm( 'L', 'L', 'N', 'N', l, n, one, v( 1_${ik}$, mp ), ldv,work, ldb )
                        
              call stdlib${ii}$_sgemm( 'N', 'N', l, n, m-l, one, v, ldv,b, ldb,one, work, ldwork )
                        
              call stdlib${ii}$_sgemm( 'N', 'N', k-l, n, m, one, v( kp, 1_${ik}$ ), ldv,b, ldb, zero, work( kp,&
                         1_${ik}$ ), ldwork )
              do j = 1, n
                 do i = 1, k
                    work( i, j ) = work( i, j ) + a( i, j )
                 end do
              end do
              call stdlib${ii}$_strmm( 'L', 'U', trans, 'N', k, n, one, t, ldt,work, ldwork )
              do j = 1, n
                 do i = 1, k
                    a( i, j ) = a( i, j ) - work( i, j )
                 end do
              end do
              call stdlib${ii}$_sgemm( 'T', 'N', m-l, n, k, -one, v, ldv, work, ldwork,one, b, ldb )
                        
              call stdlib${ii}$_sgemm( 'T', 'N', l, n, k-l, -one, v( kp, mp ), ldv,work( kp, 1_${ik}$ ), &
                        ldwork, one, b( mp, 1_${ik}$ ), ldb )
              call stdlib${ii}$_strmm( 'L', 'L', 'T', 'N', l, n, one, v( 1_${ik}$, mp ), ldv,work, ldwork )
                        
              do j = 1, n
                 do i = 1, l
                    b( m-l+i, j ) = b( m-l+i, j ) - work( i, j )
                 end do
              end do
       ! ---------------------------------------------------------------------------
           else if( row .and. forward .and. right ) then
       ! ---------------------------------------------------------------------------
              ! let  w =  [ i v ] ( i is k-by-k, v is k-by-n )
              ! form  c h  or  c h^h  where  c = [ a b ] (a is m-by-k, b is m-by-n)
              ! h = i - w^h t w            or  h^h = i - w^h t^h w
              ! a = a - (a + b v^h) t      or  a = a - (a + b v^h) t^h
              ! b = b - (a + b v^h) t v    or  b = b - (a + b v^h) t^h v
       ! ---------------------------------------------------------------------------
              np = min( n-l+1, n )
              kp = min( l+1, k )
              do j = 1, l
                 do i = 1, m
                    work( i, j ) = b( i, n-l+j )
                 end do
              end do
              call stdlib${ii}$_strmm( 'R', 'L', 'T', 'N', m, l, one, v( 1_${ik}$, np ), ldv,work, ldwork )
                        
              call stdlib${ii}$_sgemm( 'N', 'T', m, l, n-l, one, b, ldb, v, ldv,one, work, ldwork )
                        
              call stdlib${ii}$_sgemm( 'N', 'T', m, k-l, n, one, b, ldb,v( kp, 1_${ik}$ ), ldv, zero, work( 1_${ik}$, &
                        kp ), ldwork )
              do j = 1, k
                 do i = 1, m
                    work( i, j ) = work( i, j ) + a( i, j )
                 end do
              end do
              call stdlib${ii}$_strmm( 'R', 'U', trans, 'N', m, k, one, t, ldt,work, ldwork )
              do j = 1, k
                 do i = 1, m
                    a( i, j ) = a( i, j ) - work( i, j )
                 end do
              end do
              call stdlib${ii}$_sgemm( 'N', 'N', m, n-l, k, -one, work, ldwork,v, ldv, one, b, ldb )
                        
              call stdlib${ii}$_sgemm( 'N', 'N', m, l, k-l, -one, work( 1_${ik}$, kp ), ldwork,v( kp, np ), &
                        ldv, one, b( 1_${ik}$, np ), ldb )
              call stdlib${ii}$_strmm( 'R', 'L', 'N', 'N', m, l, one, v( 1_${ik}$, np ), ldv,work, ldwork )
                        
              do j = 1, l
                 do i = 1, m
                    b( i, n-l+j ) = b( i, n-l+j ) - work( i, j )
                 end do
              end do
       ! ---------------------------------------------------------------------------
           else if( row .and. backward .and. left ) then
       ! ---------------------------------------------------------------------------
              ! let  w =  [ v i ] ( i is k-by-k, v is k-by-m )
              ! form  h c  or  h^h c  where  c = [ b ]  (m-by-n)
                                               ! [ a ]  (k-by-n)
              ! h = i - w^h t w          or  h^h = i - w^h t^h w
              ! a = a -     t (a + v b)  or  a = a -     t^h (a + v b)
              ! b = b - v^h t (a + v b)  or  b = b - v^h t^h (a + v b)
       ! ---------------------------------------------------------------------------
              mp = min( l+1, m )
              kp = min( k-l+1, k )
              do j = 1, n
                 do i = 1, l
                    work( k-l+i, j ) = b( i, j )
                 end do
              end do
              call stdlib${ii}$_strmm( 'L', 'U', 'N', 'N', l, n, one, v( kp, 1_${ik}$ ), ldv,work( kp, 1_${ik}$ ), &
                        ldwork )
              call stdlib${ii}$_sgemm( 'N', 'N', l, n, m-l, one, v( kp, mp ), ldv,b( mp, 1_${ik}$ ), ldb, one, &
                        work( kp, 1_${ik}$ ), ldwork )
              call stdlib${ii}$_sgemm( 'N', 'N', k-l, n, m, one, v, ldv, b, ldb,zero, work, ldwork )
                        
              do j = 1, n
                 do i = 1, k
                    work( i, j ) = work( i, j ) + a( i, j )
                 end do
              end do
              call stdlib${ii}$_strmm( 'L', 'L ', trans, 'N', k, n, one, t, ldt,work, ldwork )
              do j = 1, n
                 do i = 1, k
                    a( i, j ) = a( i, j ) - work( i, j )
                 end do
              end do
              call stdlib${ii}$_sgemm( 'T', 'N', m-l, n, k, -one, v( 1_${ik}$, mp ), ldv,work, ldwork, one, b( &
                        mp, 1_${ik}$ ), ldb )
              call stdlib${ii}$_sgemm( 'T', 'N', l, n, k-l, -one, v, ldv,work, ldwork, one, b, ldb )
                        
              call stdlib${ii}$_strmm( 'L', 'U', 'T', 'N', l, n, one, v( kp, 1_${ik}$ ), ldv,work( kp, 1_${ik}$ ), &
                        ldwork )
              do j = 1, n
                 do i = 1, l
                    b( i, j ) = b( i, j ) - work( k-l+i, j )
                 end do
              end do
       ! ---------------------------------------------------------------------------
           else if( row .and. backward .and. right ) then
       ! ---------------------------------------------------------------------------
              ! let  w =  [ v i ] ( i is k-by-k, v is k-by-n )
              ! form  c h  or  c h^h  where  c = [ b a ] (a is m-by-k, b is m-by-n)
              ! h = i - w^h t w            or  h^h = i - w^h t^h w
              ! a = a - (a + b v^h) t      or  a = a - (a + b v^h) t^h
              ! b = b - (a + b v^h) t v    or  b = b - (a + b v^h) t^h v
       ! ---------------------------------------------------------------------------
              np = min( l+1, n )
              kp = min( k-l+1, k )
              do j = 1, l
                 do i = 1, m
                    work( i, k-l+j ) = b( i, j )
                 end do
              end do
              call stdlib${ii}$_strmm( 'R', 'U', 'T', 'N', m, l, one, v( kp, 1_${ik}$ ), ldv,work( 1_${ik}$, kp ), &
                        ldwork )
              call stdlib${ii}$_sgemm( 'N', 'T', m, l, n-l, one, b( 1_${ik}$, np ), ldb,v( kp, np ), ldv, one, &
                        work( 1_${ik}$, kp ), ldwork )
              call stdlib${ii}$_sgemm( 'N', 'T', m, k-l, n, one, b, ldb, v, ldv,zero, work, ldwork )
                        
              do j = 1, k
                 do i = 1, m
                    work( i, j ) = work( i, j ) + a( i, j )
                 end do
              end do
              call stdlib${ii}$_strmm( 'R', 'L', trans, 'N', m, k, one, t, ldt,work, ldwork )
              do j = 1, k
                 do i = 1, m
                    a( i, j ) = a( i, j ) - work( i, j )
                 end do
              end do
              call stdlib${ii}$_sgemm( 'N', 'N', m, n-l, k, -one, work, ldwork,v( 1_${ik}$, np ), ldv, one, b( &
                        1_${ik}$, np ), ldb )
              call stdlib${ii}$_sgemm( 'N', 'N', m, l, k-l , -one, work, ldwork,v, ldv, one, b, ldb )
                        
              call stdlib${ii}$_strmm( 'R', 'U', 'N', 'N', m, l, one, v( kp, 1_${ik}$ ), ldv,work( 1_${ik}$, kp ), &
                        ldwork )
              do j = 1, l
                 do i = 1, m
                    b( i, j ) = b( i, j ) - work( i, k-l+j )
                 end do
              end do
           end if
           return
     end subroutine stdlib${ii}$_stprfb

     pure module subroutine stdlib${ii}$_dtprfb( side, trans, direct, storev, m, n, k, l,v, ldv, t, ldt, a, &
     !! DTPRFB applies a real "triangular-pentagonal" block reflector H or its
     !! transpose H**T to a real matrix C, which is composed of two
     !! blocks A and B, either from the left or right.
               lda, b, ldb, work, ldwork )
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: direct, side, storev, trans
           integer(${ik}$), intent(in) :: k, l, lda, ldb, ldt, ldv, ldwork, m, n
           ! Array Arguments 
           real(dp), intent(inout) :: a(lda,*), b(ldb,*)
           real(dp), intent(in) :: t(ldt,*), v(ldv,*)
           real(dp), intent(out) :: work(ldwork,*)
        ! ==========================================================================
           
           ! Local Scalars 
           integer(${ik}$) :: i, j, mp, np, kp
           logical(lk) :: left, forward, column, right, backward, row
           ! Executable Statements 
           ! quick return if possible
           if( m<=0 .or. n<=0 .or. k<=0 .or. l<0 ) return
           if( stdlib_lsame( storev, 'C' ) ) then
              column = .true.
              row = .false.
           else if ( stdlib_lsame( storev, 'R' ) ) then
              column = .false.
              row = .true.
           else
              column = .false.
              row = .false.
           end if
           if( stdlib_lsame( side, 'L' ) ) then
              left = .true.
              right = .false.
           else if( stdlib_lsame( side, 'R' ) ) then
              left = .false.
              right = .true.
           else
              left = .false.
              right = .false.
           end if
           if( stdlib_lsame( direct, 'F' ) ) then
              forward = .true.
              backward = .false.
           else if( stdlib_lsame( direct, 'B' ) ) then
              forward = .false.
              backward = .true.
           else
              forward = .false.
              backward = .false.
           end if
       ! ---------------------------------------------------------------------------
           if( column .and. forward .and. left  ) then
       ! ---------------------------------------------------------------------------
              ! let  w =  [ i ]    (k-by-k)
                        ! [ v ]    (m-by-k)
              ! form  h c  or  h**t c  where  c = [ a ]  (k-by-n)
                                                ! [ b ]  (m-by-n)
              ! h = i - w t w**t          or  h**t = i - w t**t w**t
              ! a = a -   t (a + v**t b)  or  a = a -   t**t (a + v**t b)
              ! b = b - v t (a + v**t b)  or  b = b - v t**t (a + v**t b)
       ! ---------------------------------------------------------------------------
              mp = min( m-l+1, m )
              kp = min( l+1, k )
              do j = 1, n
                 do i = 1, l
                    work( i, j ) = b( m-l+i, j )
                 end do
              end do
              call stdlib${ii}$_dtrmm( 'L', 'U', 'T', 'N', l, n, one, v( mp, 1_${ik}$ ), ldv,work, ldwork )
                        
              call stdlib${ii}$_dgemm( 'T', 'N', l, n, m-l, one, v, ldv, b, ldb,one, work, ldwork )
                        
              call stdlib${ii}$_dgemm( 'T', 'N', k-l, n, m, one, v( 1_${ik}$, kp ), ldv,b, ldb, zero, work( kp,&
                         1_${ik}$ ), ldwork )
              do j = 1, n
                 do i = 1, k
                    work( i, j ) = work( i, j ) + a( i, j )
                 end do
              end do
              call stdlib${ii}$_dtrmm( 'L', 'U', trans, 'N', k, n, one, t, ldt,work, ldwork )
              do j = 1, n
                 do i = 1, k
                    a( i, j ) = a( i, j ) - work( i, j )
                 end do
              end do
              call stdlib${ii}$_dgemm( 'N', 'N', m-l, n, k, -one, v, ldv, work, ldwork,one, b, ldb )
                        
              call stdlib${ii}$_dgemm( 'N', 'N', l, n, k-l, -one, v( mp, kp ), ldv,work( kp, 1_${ik}$ ), &
                        ldwork, one, b( mp, 1_${ik}$ ),  ldb )
              call stdlib${ii}$_dtrmm( 'L', 'U', 'N', 'N', l, n, one, v( mp, 1_${ik}$ ), ldv,work, ldwork )
                        
              do j = 1, n
                 do i = 1, l
                    b( m-l+i, j ) = b( m-l+i, j ) - work( i, j )
                 end do
              end do
       ! ---------------------------------------------------------------------------
           else if( column .and. forward .and. right ) then
       ! ---------------------------------------------------------------------------
              ! let  w =  [ i ]    (k-by-k)
                        ! [ v ]    (n-by-k)
              ! form  c h or  c h**t  where  c = [ a b ] (a is m-by-k, b is m-by-n)
              ! h = i - w t w**t          or  h**t = i - w t**t w**t
              ! a = a - (a + b v) t      or  a = a - (a + b v) t**t
              ! b = b - (a + b v) t v**t  or  b = b - (a + b v) t**t v**t
       ! ---------------------------------------------------------------------------
              np = min( n-l+1, n )
              kp = min( l+1, k )
              do j = 1, l
                 do i = 1, m
                    work( i, j ) = b( i, n-l+j )
                 end do
              end do
              call stdlib${ii}$_dtrmm( 'R', 'U', 'N', 'N', m, l, one, v( np, 1_${ik}$ ), ldv,work, ldwork )
                        
              call stdlib${ii}$_dgemm( 'N', 'N', m, l, n-l, one, b, ldb,v, ldv, one, work, ldwork )
                        
              call stdlib${ii}$_dgemm( 'N', 'N', m, k-l, n, one, b, ldb,v( 1_${ik}$, kp ), ldv, zero, work( 1_${ik}$, &
                        kp ), ldwork )
              do j = 1, k
                 do i = 1, m
                    work( i, j ) = work( i, j ) + a( i, j )
                 end do
              end do
              call stdlib${ii}$_dtrmm( 'R', 'U', trans, 'N', m, k, one, t, ldt,work, ldwork )
              do j = 1, k
                 do i = 1, m
                    a( i, j ) = a( i, j ) - work( i, j )
                 end do
              end do
              call stdlib${ii}$_dgemm( 'N', 'T', m, n-l, k, -one, work, ldwork,v, ldv, one, b, ldb )
                        
              call stdlib${ii}$_dgemm( 'N', 'T', m, l, k-l, -one, work( 1_${ik}$, kp ), ldwork,v( np, kp ), &
                        ldv, one, b( 1_${ik}$, np ), ldb )
              call stdlib${ii}$_dtrmm( 'R', 'U', 'T', 'N', m, l, one, v( np, 1_${ik}$ ), ldv,work, ldwork )
                        
              do j = 1, l
                 do i = 1, m
                    b( i, n-l+j ) = b( i, n-l+j ) - work( i, j )
                 end do
              end do
       ! ---------------------------------------------------------------------------
           else if( column .and. backward .and. left ) then
       ! ---------------------------------------------------------------------------
              ! let  w =  [ v ]    (m-by-k)
                        ! [ i ]    (k-by-k)
              ! form  h c  or  h**t c  where  c = [ b ]  (m-by-n)
                                                ! [ a ]  (k-by-n)
              ! h = i - w t w**t          or  h**t = i - w t**t w**t
              ! a = a -   t (a + v**t b)  or  a = a -   t**t (a + v**t b)
              ! b = b - v t (a + v**t b)  or  b = b - v t**t (a + v**t b)
       ! ---------------------------------------------------------------------------
              mp = min( l+1, m )
              kp = min( k-l+1, k )
              do j = 1, n
                 do i = 1, l
                    work( k-l+i, j ) = b( i, j )
                 end do
              end do
              call stdlib${ii}$_dtrmm( 'L', 'L', 'T', 'N', l, n, one, v( 1_${ik}$, kp ), ldv,work( kp, 1_${ik}$ ), &
                        ldwork )
              call stdlib${ii}$_dgemm( 'T', 'N', l, n, m-l, one, v( mp, kp ), ldv,b( mp, 1_${ik}$ ), ldb, one, &
                        work( kp, 1_${ik}$ ), ldwork )
              call stdlib${ii}$_dgemm( 'T', 'N', k-l, n, m, one, v, ldv,b, ldb, zero, work, ldwork )
                        
              do j = 1, n
                 do i = 1, k
                    work( i, j ) = work( i, j ) + a( i, j )
                 end do
              end do
              call stdlib${ii}$_dtrmm( 'L', 'L', trans, 'N', k, n, one, t, ldt,work, ldwork )
              do j = 1, n
                 do i = 1, k
                    a( i, j ) = a( i, j ) - work( i, j )
                 end do
              end do
              call stdlib${ii}$_dgemm( 'N', 'N', m-l, n, k, -one, v( mp, 1_${ik}$ ), ldv,work, ldwork, one, b( &
                        mp, 1_${ik}$ ), ldb )
              call stdlib${ii}$_dgemm( 'N', 'N', l, n, k-l, -one, v, ldv,work, ldwork, one, b,  ldb )
                        
              call stdlib${ii}$_dtrmm( 'L', 'L', 'N', 'N', l, n, one, v( 1_${ik}$, kp ), ldv,work( kp, 1_${ik}$ ), &
                        ldwork )
              do j = 1, n
                 do i = 1, l
                    b( i, j ) = b( i, j ) - work( k-l+i, j )
                 end do
              end do
       ! ---------------------------------------------------------------------------
           else if( column .and. backward .and. right ) then
       ! ---------------------------------------------------------------------------
              ! let  w =  [ v ]    (n-by-k)
                        ! [ i ]    (k-by-k)
              ! form  c h  or  c h**t  where  c = [ b a ] (b is m-by-n, a is m-by-k)
              ! h = i - w t w**t          or  h**t = i - w t**t w**t
              ! a = a - (a + b v) t      or  a = a - (a + b v) t**t
              ! b = b - (a + b v) t v**t  or  b = b - (a + b v) t**t v**t
       ! ---------------------------------------------------------------------------
              np = min( l+1, n )
              kp = min( k-l+1, k )
              do j = 1, l
                 do i = 1, m
                    work( i, k-l+j ) = b( i, j )
                 end do
              end do
              call stdlib${ii}$_dtrmm( 'R', 'L', 'N', 'N', m, l, one, v( 1_${ik}$, kp ), ldv,work( 1_${ik}$, kp ), &
                        ldwork )
              call stdlib${ii}$_dgemm( 'N', 'N', m, l, n-l, one, b( 1_${ik}$, np ), ldb,v( np, kp ), ldv, one, &
                        work( 1_${ik}$, kp ), ldwork )
              call stdlib${ii}$_dgemm( 'N', 'N', m, k-l, n, one, b, ldb,v, ldv, zero, work, ldwork )
                        
              do j = 1, k
                 do i = 1, m
                    work( i, j ) = work( i, j ) + a( i, j )
                 end do
              end do
              call stdlib${ii}$_dtrmm( 'R', 'L', trans, 'N', m, k, one, t, ldt,work, ldwork )
              do j = 1, k
                 do i = 1, m
                    a( i, j ) = a( i, j ) - work( i, j )
                 end do
              end do
              call stdlib${ii}$_dgemm( 'N', 'T', m, n-l, k, -one, work, ldwork,v( np, 1_${ik}$ ), ldv, one, b( &
                        1_${ik}$, np ), ldb )
              call stdlib${ii}$_dgemm( 'N', 'T', m, l, k-l, -one, work, ldwork,v, ldv, one, b, ldb )
                        
              call stdlib${ii}$_dtrmm( 'R', 'L', 'T', 'N', m, l, one, v( 1_${ik}$, kp ), ldv,work( 1_${ik}$, kp ), &
                        ldwork )
              do j = 1, l
                 do i = 1, m
                    b( i, j ) = b( i, j ) - work( i, k-l+j )
                 end do
              end do
       ! ---------------------------------------------------------------------------
           else if( row .and. forward .and. left ) then
       ! ---------------------------------------------------------------------------
              ! let  w =  [ i v ] ( i is k-by-k, v is k-by-m )
              ! form  h c  or  h**t c  where  c = [ a ]  (k-by-n)
                                                ! [ b ]  (m-by-n)
              ! h = i - w**t t w          or  h**t = i - w**t t**t w
              ! a = a -     t (a + v b)  or  a = a -     t**t (a + v b)
              ! b = b - v**t t (a + v b)  or  b = b - v**t t**t (a + v b)
       ! ---------------------------------------------------------------------------
              mp = min( m-l+1, m )
              kp = min( l+1, k )
              do j = 1, n
                 do i = 1, l
                    work( i, j ) = b( m-l+i, j )
                 end do
              end do
              call stdlib${ii}$_dtrmm( 'L', 'L', 'N', 'N', l, n, one, v( 1_${ik}$, mp ), ldv,work, ldb )
                        
              call stdlib${ii}$_dgemm( 'N', 'N', l, n, m-l, one, v, ldv,b, ldb,one, work, ldwork )
                        
              call stdlib${ii}$_dgemm( 'N', 'N', k-l, n, m, one, v( kp, 1_${ik}$ ), ldv,b, ldb, zero, work( kp,&
                         1_${ik}$ ), ldwork )
              do j = 1, n
                 do i = 1, k
                    work( i, j ) = work( i, j ) + a( i, j )
                 end do
              end do
              call stdlib${ii}$_dtrmm( 'L', 'U', trans, 'N', k, n, one, t, ldt,work, ldwork )
              do j = 1, n
                 do i = 1, k
                    a( i, j ) = a( i, j ) - work( i, j )
                 end do
              end do
              call stdlib${ii}$_dgemm( 'T', 'N', m-l, n, k, -one, v, ldv, work, ldwork,one, b, ldb )
                        
              call stdlib${ii}$_dgemm( 'T', 'N', l, n, k-l, -one, v( kp, mp ), ldv,work( kp, 1_${ik}$ ), &
                        ldwork, one, b( mp, 1_${ik}$ ), ldb )
              call stdlib${ii}$_dtrmm( 'L', 'L', 'T', 'N', l, n, one, v( 1_${ik}$, mp ), ldv,work, ldwork )
                        
              do j = 1, n
                 do i = 1, l
                    b( m-l+i, j ) = b( m-l+i, j ) - work( i, j )
                 end do
              end do
       ! ---------------------------------------------------------------------------
           else if( row .and. forward .and. right ) then
       ! ---------------------------------------------------------------------------
              ! let  w =  [ i v ] ( i is k-by-k, v is k-by-n )
              ! form  c h  or  c h**t  where  c = [ a b ] (a is m-by-k, b is m-by-n)
              ! h = i - w**t t w            or  h**t = i - w**t t**t w
              ! a = a - (a + b v**t) t      or  a = a - (a + b v**t) t**t
              ! b = b - (a + b v**t) t v    or  b = b - (a + b v**t) t**t v
       ! ---------------------------------------------------------------------------
              np = min( n-l+1, n )
              kp = min( l+1, k )
              do j = 1, l
                 do i = 1, m
                    work( i, j ) = b( i, n-l+j )
                 end do
              end do
              call stdlib${ii}$_dtrmm( 'R', 'L', 'T', 'N', m, l, one, v( 1_${ik}$, np ), ldv,work, ldwork )
                        
              call stdlib${ii}$_dgemm( 'N', 'T', m, l, n-l, one, b, ldb, v, ldv,one, work, ldwork )
                        
              call stdlib${ii}$_dgemm( 'N', 'T', m, k-l, n, one, b, ldb,v( kp, 1_${ik}$ ), ldv, zero, work( 1_${ik}$, &
                        kp ), ldwork )
              do j = 1, k
                 do i = 1, m
                    work( i, j ) = work( i, j ) + a( i, j )
                 end do
              end do
              call stdlib${ii}$_dtrmm( 'R', 'U', trans, 'N', m, k, one, t, ldt,work, ldwork )
              do j = 1, k
                 do i = 1, m
                    a( i, j ) = a( i, j ) - work( i, j )
                 end do
              end do
              call stdlib${ii}$_dgemm( 'N', 'N', m, n-l, k, -one, work, ldwork,v, ldv, one, b, ldb )
                        
              call stdlib${ii}$_dgemm( 'N', 'N', m, l, k-l, -one, work( 1_${ik}$, kp ), ldwork,v( kp, np ), &
                        ldv, one, b( 1_${ik}$, np ), ldb )
              call stdlib${ii}$_dtrmm( 'R', 'L', 'N', 'N', m, l, one, v( 1_${ik}$, np ), ldv,work, ldwork )
                        
              do j = 1, l
                 do i = 1, m
                    b( i, n-l+j ) = b( i, n-l+j ) - work( i, j )
                 end do
              end do
       ! ---------------------------------------------------------------------------
           else if( row .and. backward .and. left ) then
       ! ---------------------------------------------------------------------------
              ! let  w =  [ v i ] ( i is k-by-k, v is k-by-m )
              ! form  h c  or  h**t c  where  c = [ b ]  (m-by-n)
                                                ! [ a ]  (k-by-n)
              ! h = i - w**t t w          or  h**t = i - w**t t**t w
              ! a = a -     t (a + v b)  or  a = a -     t**t (a + v b)
              ! b = b - v**t t (a + v b)  or  b = b - v**t t**t (a + v b)
       ! ---------------------------------------------------------------------------
              mp = min( l+1, m )
              kp = min( k-l+1, k )
              do j = 1, n
                 do i = 1, l
                    work( k-l+i, j ) = b( i, j )
                 end do
              end do
              call stdlib${ii}$_dtrmm( 'L', 'U', 'N', 'N', l, n, one, v( kp, 1_${ik}$ ), ldv,work( kp, 1_${ik}$ ), &
                        ldwork )
              call stdlib${ii}$_dgemm( 'N', 'N', l, n, m-l, one, v( kp, mp ), ldv,b( mp, 1_${ik}$ ), ldb, one, &
                        work( kp, 1_${ik}$ ), ldwork )
              call stdlib${ii}$_dgemm( 'N', 'N', k-l, n, m, one, v, ldv, b, ldb,zero, work, ldwork )
                        
              do j = 1, n
                 do i = 1, k
                    work( i, j ) = work( i, j ) + a( i, j )
                 end do
              end do
              call stdlib${ii}$_dtrmm( 'L', 'L ', trans, 'N', k, n, one, t, ldt,work, ldwork )
              do j = 1, n
                 do i = 1, k
                    a( i, j ) = a( i, j ) - work( i, j )
                 end do
              end do
              call stdlib${ii}$_dgemm( 'T', 'N', m-l, n, k, -one, v( 1_${ik}$, mp ), ldv,work, ldwork, one, b( &
                        mp, 1_${ik}$ ), ldb )
              call stdlib${ii}$_dgemm( 'T', 'N', l, n, k-l, -one, v, ldv,work, ldwork, one, b, ldb )
                        
              call stdlib${ii}$_dtrmm( 'L', 'U', 'T', 'N', l, n, one, v( kp, 1_${ik}$ ), ldv,work( kp, 1_${ik}$ ), &
                        ldwork )
              do j = 1, n
                 do i = 1, l
                    b( i, j ) = b( i, j ) - work( k-l+i, j )
                 end do
              end do
       ! ---------------------------------------------------------------------------
           else if( row .and. backward .and. right ) then
       ! ---------------------------------------------------------------------------
              ! let  w =  [ v i ] ( i is k-by-k, v is k-by-n )
              ! form  c h  or  c h**t  where  c = [ b a ] (a is m-by-k, b is m-by-n)
              ! h = i - w**t t w            or  h**t = i - w**t t**t w
              ! a = a - (a + b v**t) t      or  a = a - (a + b v**t) t**t
              ! b = b - (a + b v**t) t v    or  b = b - (a + b v**t) t**t v
       ! ---------------------------------------------------------------------------
              np = min( l+1, n )
              kp = min( k-l+1, k )
              do j = 1, l
                 do i = 1, m
                    work( i, k-l+j ) = b( i, j )
                 end do
              end do
              call stdlib${ii}$_dtrmm( 'R', 'U', 'T', 'N', m, l, one, v( kp, 1_${ik}$ ), ldv,work( 1_${ik}$, kp ), &
                        ldwork )
              call stdlib${ii}$_dgemm( 'N', 'T', m, l, n-l, one, b( 1_${ik}$, np ), ldb,v( kp, np ), ldv, one, &
                        work( 1_${ik}$, kp ), ldwork )
              call stdlib${ii}$_dgemm( 'N', 'T', m, k-l, n, one, b, ldb, v, ldv,zero, work, ldwork )
                        
              do j = 1, k
                 do i = 1, m
                    work( i, j ) = work( i, j ) + a( i, j )
                 end do
              end do
              call stdlib${ii}$_dtrmm( 'R', 'L', trans, 'N', m, k, one, t, ldt,work, ldwork )
              do j = 1, k
                 do i = 1, m
                    a( i, j ) = a( i, j ) - work( i, j )
                 end do
              end do
              call stdlib${ii}$_dgemm( 'N', 'N', m, n-l, k, -one, work, ldwork,v( 1_${ik}$, np ), ldv, one, b( &
                        1_${ik}$, np ), ldb )
              call stdlib${ii}$_dgemm( 'N', 'N', m, l, k-l , -one, work, ldwork,v, ldv, one, b, ldb )
                        
              call stdlib${ii}$_dtrmm( 'R', 'U', 'N', 'N', m, l, one, v( kp, 1_${ik}$ ), ldv,work( 1_${ik}$, kp ), &
                        ldwork )
              do j = 1, l
                 do i = 1, m
                    b( i, j ) = b( i, j ) - work( i, k-l+j )
                 end do
              end do
           end if
           return
     end subroutine stdlib${ii}$_dtprfb

#:for rk,rt,ri in REAL_KINDS_TYPES
#:if not rk in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ri}$tprfb( side, trans, direct, storev, m, n, k, l,v, ldv, t, ldt, a, &
     !! DTPRFB: applies a real "triangular-pentagonal" block reflector H or its
     !! transpose H**T to a real matrix C, which is composed of two
     !! blocks A and B, either from the left or right.
               lda, b, ldb, work, ldwork )
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: direct, side, storev, trans
           integer(${ik}$), intent(in) :: k, l, lda, ldb, ldt, ldv, ldwork, m, n
           ! Array Arguments 
           real(${rk}$), intent(inout) :: a(lda,*), b(ldb,*)
           real(${rk}$), intent(in) :: t(ldt,*), v(ldv,*)
           real(${rk}$), intent(out) :: work(ldwork,*)
        ! ==========================================================================
           
           ! Local Scalars 
           integer(${ik}$) :: i, j, mp, np, kp
           logical(lk) :: left, forward, column, right, backward, row
           ! Executable Statements 
           ! quick return if possible
           if( m<=0 .or. n<=0 .or. k<=0 .or. l<0 ) return
           if( stdlib_lsame( storev, 'C' ) ) then
              column = .true.
              row = .false.
           else if ( stdlib_lsame( storev, 'R' ) ) then
              column = .false.
              row = .true.
           else
              column = .false.
              row = .false.
           end if
           if( stdlib_lsame( side, 'L' ) ) then
              left = .true.
              right = .false.
           else if( stdlib_lsame( side, 'R' ) ) then
              left = .false.
              right = .true.
           else
              left = .false.
              right = .false.
           end if
           if( stdlib_lsame( direct, 'F' ) ) then
              forward = .true.
              backward = .false.
           else if( stdlib_lsame( direct, 'B' ) ) then
              forward = .false.
              backward = .true.
           else
              forward = .false.
              backward = .false.
           end if
       ! ---------------------------------------------------------------------------
           if( column .and. forward .and. left  ) then
       ! ---------------------------------------------------------------------------
              ! let  w =  [ i ]    (k-by-k)
                        ! [ v ]    (m-by-k)
              ! form  h c  or  h**t c  where  c = [ a ]  (k-by-n)
                                                ! [ b ]  (m-by-n)
              ! h = i - w t w**t          or  h**t = i - w t**t w**t
              ! a = a -   t (a + v**t b)  or  a = a -   t**t (a + v**t b)
              ! b = b - v t (a + v**t b)  or  b = b - v t**t (a + v**t b)
       ! ---------------------------------------------------------------------------
              mp = min( m-l+1, m )
              kp = min( l+1, k )
              do j = 1, n
                 do i = 1, l
                    work( i, j ) = b( m-l+i, j )
                 end do
              end do
              call stdlib${ii}$_${ri}$trmm( 'L', 'U', 'T', 'N', l, n, one, v( mp, 1_${ik}$ ), ldv,work, ldwork )
                        
              call stdlib${ii}$_${ri}$gemm( 'T', 'N', l, n, m-l, one, v, ldv, b, ldb,one, work, ldwork )
                        
              call stdlib${ii}$_${ri}$gemm( 'T', 'N', k-l, n, m, one, v( 1_${ik}$, kp ), ldv,b, ldb, zero, work( kp,&
                         1_${ik}$ ), ldwork )
              do j = 1, n
                 do i = 1, k
                    work( i, j ) = work( i, j ) + a( i, j )
                 end do
              end do
              call stdlib${ii}$_${ri}$trmm( 'L', 'U', trans, 'N', k, n, one, t, ldt,work, ldwork )
              do j = 1, n
                 do i = 1, k
                    a( i, j ) = a( i, j ) - work( i, j )
                 end do
              end do
              call stdlib${ii}$_${ri}$gemm( 'N', 'N', m-l, n, k, -one, v, ldv, work, ldwork,one, b, ldb )
                        
              call stdlib${ii}$_${ri}$gemm( 'N', 'N', l, n, k-l, -one, v( mp, kp ), ldv,work( kp, 1_${ik}$ ), &
                        ldwork, one, b( mp, 1_${ik}$ ),  ldb )
              call stdlib${ii}$_${ri}$trmm( 'L', 'U', 'N', 'N', l, n, one, v( mp, 1_${ik}$ ), ldv,work, ldwork )
                        
              do j = 1, n
                 do i = 1, l
                    b( m-l+i, j ) = b( m-l+i, j ) - work( i, j )
                 end do
              end do
       ! ---------------------------------------------------------------------------
           else if( column .and. forward .and. right ) then
       ! ---------------------------------------------------------------------------
              ! let  w =  [ i ]    (k-by-k)
                        ! [ v ]    (n-by-k)
              ! form  c h or  c h**t  where  c = [ a b ] (a is m-by-k, b is m-by-n)
              ! h = i - w t w**t          or  h**t = i - w t**t w**t
              ! a = a - (a + b v) t      or  a = a - (a + b v) t**t
              ! b = b - (a + b v) t v**t  or  b = b - (a + b v) t**t v**t
       ! ---------------------------------------------------------------------------
              np = min( n-l+1, n )
              kp = min( l+1, k )
              do j = 1, l
                 do i = 1, m
                    work( i, j ) = b( i, n-l+j )
                 end do
              end do
              call stdlib${ii}$_${ri}$trmm( 'R', 'U', 'N', 'N', m, l, one, v( np, 1_${ik}$ ), ldv,work, ldwork )
                        
              call stdlib${ii}$_${ri}$gemm( 'N', 'N', m, l, n-l, one, b, ldb,v, ldv, one, work, ldwork )
                        
              call stdlib${ii}$_${ri}$gemm( 'N', 'N', m, k-l, n, one, b, ldb,v( 1_${ik}$, kp ), ldv, zero, work( 1_${ik}$, &
                        kp ), ldwork )
              do j = 1, k
                 do i = 1, m
                    work( i, j ) = work( i, j ) + a( i, j )
                 end do
              end do
              call stdlib${ii}$_${ri}$trmm( 'R', 'U', trans, 'N', m, k, one, t, ldt,work, ldwork )
              do j = 1, k
                 do i = 1, m
                    a( i, j ) = a( i, j ) - work( i, j )
                 end do
              end do
              call stdlib${ii}$_${ri}$gemm( 'N', 'T', m, n-l, k, -one, work, ldwork,v, ldv, one, b, ldb )
                        
              call stdlib${ii}$_${ri}$gemm( 'N', 'T', m, l, k-l, -one, work( 1_${ik}$, kp ), ldwork,v( np, kp ), &
                        ldv, one, b( 1_${ik}$, np ), ldb )
              call stdlib${ii}$_${ri}$trmm( 'R', 'U', 'T', 'N', m, l, one, v( np, 1_${ik}$ ), ldv,work, ldwork )
                        
              do j = 1, l
                 do i = 1, m
                    b( i, n-l+j ) = b( i, n-l+j ) - work( i, j )
                 end do
              end do
       ! ---------------------------------------------------------------------------
           else if( column .and. backward .and. left ) then
       ! ---------------------------------------------------------------------------
              ! let  w =  [ v ]    (m-by-k)
                        ! [ i ]    (k-by-k)
              ! form  h c  or  h**t c  where  c = [ b ]  (m-by-n)
                                                ! [ a ]  (k-by-n)
              ! h = i - w t w**t          or  h**t = i - w t**t w**t
              ! a = a -   t (a + v**t b)  or  a = a -   t**t (a + v**t b)
              ! b = b - v t (a + v**t b)  or  b = b - v t**t (a + v**t b)
       ! ---------------------------------------------------------------------------
              mp = min( l+1, m )
              kp = min( k-l+1, k )
              do j = 1, n
                 do i = 1, l
                    work( k-l+i, j ) = b( i, j )
                 end do
              end do
              call stdlib${ii}$_${ri}$trmm( 'L', 'L', 'T', 'N', l, n, one, v( 1_${ik}$, kp ), ldv,work( kp, 1_${ik}$ ), &
                        ldwork )
              call stdlib${ii}$_${ri}$gemm( 'T', 'N', l, n, m-l, one, v( mp, kp ), ldv,b( mp, 1_${ik}$ ), ldb, one, &
                        work( kp, 1_${ik}$ ), ldwork )
              call stdlib${ii}$_${ri}$gemm( 'T', 'N', k-l, n, m, one, v, ldv,b, ldb, zero, work, ldwork )
                        
              do j = 1, n
                 do i = 1, k
                    work( i, j ) = work( i, j ) + a( i, j )
                 end do
              end do
              call stdlib${ii}$_${ri}$trmm( 'L', 'L', trans, 'N', k, n, one, t, ldt,work, ldwork )
              do j = 1, n
                 do i = 1, k
                    a( i, j ) = a( i, j ) - work( i, j )
                 end do
              end do
              call stdlib${ii}$_${ri}$gemm( 'N', 'N', m-l, n, k, -one, v( mp, 1_${ik}$ ), ldv,work, ldwork, one, b( &
                        mp, 1_${ik}$ ), ldb )
              call stdlib${ii}$_${ri}$gemm( 'N', 'N', l, n, k-l, -one, v, ldv,work, ldwork, one, b,  ldb )
                        
              call stdlib${ii}$_${ri}$trmm( 'L', 'L', 'N', 'N', l, n, one, v( 1_${ik}$, kp ), ldv,work( kp, 1_${ik}$ ), &
                        ldwork )
              do j = 1, n
                 do i = 1, l
                    b( i, j ) = b( i, j ) - work( k-l+i, j )
                 end do
              end do
       ! ---------------------------------------------------------------------------
           else if( column .and. backward .and. right ) then
       ! ---------------------------------------------------------------------------
              ! let  w =  [ v ]    (n-by-k)
                        ! [ i ]    (k-by-k)
              ! form  c h  or  c h**t  where  c = [ b a ] (b is m-by-n, a is m-by-k)
              ! h = i - w t w**t          or  h**t = i - w t**t w**t
              ! a = a - (a + b v) t      or  a = a - (a + b v) t**t
              ! b = b - (a + b v) t v**t  or  b = b - (a + b v) t**t v**t
       ! ---------------------------------------------------------------------------
              np = min( l+1, n )
              kp = min( k-l+1, k )
              do j = 1, l
                 do i = 1, m
                    work( i, k-l+j ) = b( i, j )
                 end do
              end do
              call stdlib${ii}$_${ri}$trmm( 'R', 'L', 'N', 'N', m, l, one, v( 1_${ik}$, kp ), ldv,work( 1_${ik}$, kp ), &
                        ldwork )
              call stdlib${ii}$_${ri}$gemm( 'N', 'N', m, l, n-l, one, b( 1_${ik}$, np ), ldb,v( np, kp ), ldv, one, &
                        work( 1_${ik}$, kp ), ldwork )
              call stdlib${ii}$_${ri}$gemm( 'N', 'N', m, k-l, n, one, b, ldb,v, ldv, zero, work, ldwork )
                        
              do j = 1, k
                 do i = 1, m
                    work( i, j ) = work( i, j ) + a( i, j )
                 end do
              end do
              call stdlib${ii}$_${ri}$trmm( 'R', 'L', trans, 'N', m, k, one, t, ldt,work, ldwork )
              do j = 1, k
                 do i = 1, m
                    a( i, j ) = a( i, j ) - work( i, j )
                 end do
              end do
              call stdlib${ii}$_${ri}$gemm( 'N', 'T', m, n-l, k, -one, work, ldwork,v( np, 1_${ik}$ ), ldv, one, b( &
                        1_${ik}$, np ), ldb )
              call stdlib${ii}$_${ri}$gemm( 'N', 'T', m, l, k-l, -one, work, ldwork,v, ldv, one, b, ldb )
                        
              call stdlib${ii}$_${ri}$trmm( 'R', 'L', 'T', 'N', m, l, one, v( 1_${ik}$, kp ), ldv,work( 1_${ik}$, kp ), &
                        ldwork )
              do j = 1, l
                 do i = 1, m
                    b( i, j ) = b( i, j ) - work( i, k-l+j )
                 end do
              end do
       ! ---------------------------------------------------------------------------
           else if( row .and. forward .and. left ) then
       ! ---------------------------------------------------------------------------
              ! let  w =  [ i v ] ( i is k-by-k, v is k-by-m )
              ! form  h c  or  h**t c  where  c = [ a ]  (k-by-n)
                                                ! [ b ]  (m-by-n)
              ! h = i - w**t t w          or  h**t = i - w**t t**t w
              ! a = a -     t (a + v b)  or  a = a -     t**t (a + v b)
              ! b = b - v**t t (a + v b)  or  b = b - v**t t**t (a + v b)
       ! ---------------------------------------------------------------------------
              mp = min( m-l+1, m )
              kp = min( l+1, k )
              do j = 1, n
                 do i = 1, l
                    work( i, j ) = b( m-l+i, j )
                 end do
              end do
              call stdlib${ii}$_${ri}$trmm( 'L', 'L', 'N', 'N', l, n, one, v( 1_${ik}$, mp ), ldv,work, ldb )
                        
              call stdlib${ii}$_${ri}$gemm( 'N', 'N', l, n, m-l, one, v, ldv,b, ldb,one, work, ldwork )
                        
              call stdlib${ii}$_${ri}$gemm( 'N', 'N', k-l, n, m, one, v( kp, 1_${ik}$ ), ldv,b, ldb, zero, work( kp,&
                         1_${ik}$ ), ldwork )
              do j = 1, n
                 do i = 1, k
                    work( i, j ) = work( i, j ) + a( i, j )
                 end do
              end do
              call stdlib${ii}$_${ri}$trmm( 'L', 'U', trans, 'N', k, n, one, t, ldt,work, ldwork )
              do j = 1, n
                 do i = 1, k
                    a( i, j ) = a( i, j ) - work( i, j )
                 end do
              end do
              call stdlib${ii}$_${ri}$gemm( 'T', 'N', m-l, n, k, -one, v, ldv, work, ldwork,one, b, ldb )
                        
              call stdlib${ii}$_${ri}$gemm( 'T', 'N', l, n, k-l, -one, v( kp, mp ), ldv,work( kp, 1_${ik}$ ), &
                        ldwork, one, b( mp, 1_${ik}$ ), ldb )
              call stdlib${ii}$_${ri}$trmm( 'L', 'L', 'T', 'N', l, n, one, v( 1_${ik}$, mp ), ldv,work, ldwork )
                        
              do j = 1, n
                 do i = 1, l
                    b( m-l+i, j ) = b( m-l+i, j ) - work( i, j )
                 end do
              end do
       ! ---------------------------------------------------------------------------
           else if( row .and. forward .and. right ) then
       ! ---------------------------------------------------------------------------
              ! let  w =  [ i v ] ( i is k-by-k, v is k-by-n )
              ! form  c h  or  c h**t  where  c = [ a b ] (a is m-by-k, b is m-by-n)
              ! h = i - w**t t w            or  h**t = i - w**t t**t w
              ! a = a - (a + b v**t) t      or  a = a - (a + b v**t) t**t
              ! b = b - (a + b v**t) t v    or  b = b - (a + b v**t) t**t v
       ! ---------------------------------------------------------------------------
              np = min( n-l+1, n )
              kp = min( l+1, k )
              do j = 1, l
                 do i = 1, m
                    work( i, j ) = b( i, n-l+j )
                 end do
              end do
              call stdlib${ii}$_${ri}$trmm( 'R', 'L', 'T', 'N', m, l, one, v( 1_${ik}$, np ), ldv,work, ldwork )
                        
              call stdlib${ii}$_${ri}$gemm( 'N', 'T', m, l, n-l, one, b, ldb, v, ldv,one, work, ldwork )
                        
              call stdlib${ii}$_${ri}$gemm( 'N', 'T', m, k-l, n, one, b, ldb,v( kp, 1_${ik}$ ), ldv, zero, work( 1_${ik}$, &
                        kp ), ldwork )
              do j = 1, k
                 do i = 1, m
                    work( i, j ) = work( i, j ) + a( i, j )
                 end do
              end do
              call stdlib${ii}$_${ri}$trmm( 'R', 'U', trans, 'N', m, k, one, t, ldt,work, ldwork )
              do j = 1, k
                 do i = 1, m
                    a( i, j ) = a( i, j ) - work( i, j )
                 end do
              end do
              call stdlib${ii}$_${ri}$gemm( 'N', 'N', m, n-l, k, -one, work, ldwork,v, ldv, one, b, ldb )
                        
              call stdlib${ii}$_${ri}$gemm( 'N', 'N', m, l, k-l, -one, work( 1_${ik}$, kp ), ldwork,v( kp, np ), &
                        ldv, one, b( 1_${ik}$, np ), ldb )
              call stdlib${ii}$_${ri}$trmm( 'R', 'L', 'N', 'N', m, l, one, v( 1_${ik}$, np ), ldv,work, ldwork )
                        
              do j = 1, l
                 do i = 1, m
                    b( i, n-l+j ) = b( i, n-l+j ) - work( i, j )
                 end do
              end do
       ! ---------------------------------------------------------------------------
           else if( row .and. backward .and. left ) then
       ! ---------------------------------------------------------------------------
              ! let  w =  [ v i ] ( i is k-by-k, v is k-by-m )
              ! form  h c  or  h**t c  where  c = [ b ]  (m-by-n)
                                                ! [ a ]  (k-by-n)
              ! h = i - w**t t w          or  h**t = i - w**t t**t w
              ! a = a -     t (a + v b)  or  a = a -     t**t (a + v b)
              ! b = b - v**t t (a + v b)  or  b = b - v**t t**t (a + v b)
       ! ---------------------------------------------------------------------------
              mp = min( l+1, m )
              kp = min( k-l+1, k )
              do j = 1, n
                 do i = 1, l
                    work( k-l+i, j ) = b( i, j )
                 end do
              end do
              call stdlib${ii}$_${ri}$trmm( 'L', 'U', 'N', 'N', l, n, one, v( kp, 1_${ik}$ ), ldv,work( kp, 1_${ik}$ ), &
                        ldwork )
              call stdlib${ii}$_${ri}$gemm( 'N', 'N', l, n, m-l, one, v( kp, mp ), ldv,b( mp, 1_${ik}$ ), ldb, one, &
                        work( kp, 1_${ik}$ ), ldwork )
              call stdlib${ii}$_${ri}$gemm( 'N', 'N', k-l, n, m, one, v, ldv, b, ldb,zero, work, ldwork )
                        
              do j = 1, n
                 do i = 1, k
                    work( i, j ) = work( i, j ) + a( i, j )
                 end do
              end do
              call stdlib${ii}$_${ri}$trmm( 'L', 'L ', trans, 'N', k, n, one, t, ldt,work, ldwork )
              do j = 1, n
                 do i = 1, k
                    a( i, j ) = a( i, j ) - work( i, j )
                 end do
              end do
              call stdlib${ii}$_${ri}$gemm( 'T', 'N', m-l, n, k, -one, v( 1_${ik}$, mp ), ldv,work, ldwork, one, b( &
                        mp, 1_${ik}$ ), ldb )
              call stdlib${ii}$_${ri}$gemm( 'T', 'N', l, n, k-l, -one, v, ldv,work, ldwork, one, b, ldb )
                        
              call stdlib${ii}$_${ri}$trmm( 'L', 'U', 'T', 'N', l, n, one, v( kp, 1_${ik}$ ), ldv,work( kp, 1_${ik}$ ), &
                        ldwork )
              do j = 1, n
                 do i = 1, l
                    b( i, j ) = b( i, j ) - work( k-l+i, j )
                 end do
              end do
       ! ---------------------------------------------------------------------------
           else if( row .and. backward .and. right ) then
       ! ---------------------------------------------------------------------------
              ! let  w =  [ v i ] ( i is k-by-k, v is k-by-n )
              ! form  c h  or  c h**t  where  c = [ b a ] (a is m-by-k, b is m-by-n)
              ! h = i - w**t t w            or  h**t = i - w**t t**t w
              ! a = a - (a + b v**t) t      or  a = a - (a + b v**t) t**t
              ! b = b - (a + b v**t) t v    or  b = b - (a + b v**t) t**t v
       ! ---------------------------------------------------------------------------
              np = min( l+1, n )
              kp = min( k-l+1, k )
              do j = 1, l
                 do i = 1, m
                    work( i, k-l+j ) = b( i, j )
                 end do
              end do
              call stdlib${ii}$_${ri}$trmm( 'R', 'U', 'T', 'N', m, l, one, v( kp, 1_${ik}$ ), ldv,work( 1_${ik}$, kp ), &
                        ldwork )
              call stdlib${ii}$_${ri}$gemm( 'N', 'T', m, l, n-l, one, b( 1_${ik}$, np ), ldb,v( kp, np ), ldv, one, &
                        work( 1_${ik}$, kp ), ldwork )
              call stdlib${ii}$_${ri}$gemm( 'N', 'T', m, k-l, n, one, b, ldb, v, ldv,zero, work, ldwork )
                        
              do j = 1, k
                 do i = 1, m
                    work( i, j ) = work( i, j ) + a( i, j )
                 end do
              end do
              call stdlib${ii}$_${ri}$trmm( 'R', 'L', trans, 'N', m, k, one, t, ldt,work, ldwork )
              do j = 1, k
                 do i = 1, m
                    a( i, j ) = a( i, j ) - work( i, j )
                 end do
              end do
              call stdlib${ii}$_${ri}$gemm( 'N', 'N', m, n-l, k, -one, work, ldwork,v( 1_${ik}$, np ), ldv, one, b( &
                        1_${ik}$, np ), ldb )
              call stdlib${ii}$_${ri}$gemm( 'N', 'N', m, l, k-l , -one, work, ldwork,v, ldv, one, b, ldb )
                        
              call stdlib${ii}$_${ri}$trmm( 'R', 'U', 'N', 'N', m, l, one, v( kp, 1_${ik}$ ), ldv,work( 1_${ik}$, kp ), &
                        ldwork )
              do j = 1, l
                 do i = 1, m
                    b( i, j ) = b( i, j ) - work( i, k-l+j )
                 end do
              end do
           end if
           return
     end subroutine stdlib${ii}$_${ri}$tprfb

#:endif
#:endfor

     pure module subroutine stdlib${ii}$_ctprfb( side, trans, direct, storev, m, n, k, l,v, ldv, t, ldt, a, &
     !! CTPRFB applies a complex "triangular-pentagonal" block reflector H or its
     !! conjugate transpose H**H to a complex matrix C, which is composed of two
     !! blocks A and B, either from the left or right.
               lda, b, ldb, work, ldwork )
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: direct, side, storev, trans
           integer(${ik}$), intent(in) :: k, l, lda, ldb, ldt, ldv, ldwork, m, n
           ! Array Arguments 
           complex(sp), intent(inout) :: a(lda,*), b(ldb,*)
           complex(sp), intent(in) :: t(ldt,*), v(ldv,*)
           complex(sp), intent(out) :: work(ldwork,*)
        ! ==========================================================================
           
           ! Local Scalars 
           integer(${ik}$) :: i, j, mp, np, kp
           logical(lk) :: left, forward, column, right, backward, row
           ! Intrinsic Functions 
           ! Executable Statements 
           ! quick return if possible
           if( m<=0 .or. n<=0 .or. k<=0 .or. l<0 ) return
           if( stdlib_lsame( storev, 'C' ) ) then
              column = .true.
              row = .false.
           else if ( stdlib_lsame( storev, 'R' ) ) then
              column = .false.
              row = .true.
           else
              column = .false.
              row = .false.
           end if
           if( stdlib_lsame( side, 'L' ) ) then
              left = .true.
              right = .false.
           else if( stdlib_lsame( side, 'R' ) ) then
              left = .false.
              right = .true.
           else
              left = .false.
              right = .false.
           end if
           if( stdlib_lsame( direct, 'F' ) ) then
              forward = .true.
              backward = .false.
           else if( stdlib_lsame( direct, 'B' ) ) then
              forward = .false.
              backward = .true.
           else
              forward = .false.
              backward = .false.
           end if
       ! ---------------------------------------------------------------------------
           if( column .and. forward .and. left  ) then
       ! ---------------------------------------------------------------------------
              ! let  w =  [ i ]    (k-by-k)
                        ! [ v ]    (m-by-k)
              ! form  h c  or  h**h c  where  c = [ a ]  (k-by-n)
                                                ! [ b ]  (m-by-n)
              ! h = i - w t w**h          or  h**h = i - w t**h w**h
              ! a = a -   t (a + v**h b)  or  a = a -   t**h (a + v**h b)
              ! b = b - v t (a + v**h b)  or  b = b - v t**h (a + v**h b)
       ! ---------------------------------------------------------------------------
              mp = min( m-l+1, m )
              kp = min( l+1, k )
              do j = 1, n
                 do i = 1, l
                    work( i, j ) = b( m-l+i, j )
                 end do
              end do
              call stdlib${ii}$_ctrmm( 'L', 'U', 'C', 'N', l, n, cone, v( mp, 1_${ik}$ ), ldv,work, ldwork )
                        
              call stdlib${ii}$_cgemm( 'C', 'N', l, n, m-l, cone, v, ldv, b, ldb,cone, work, ldwork )
                        
              call stdlib${ii}$_cgemm( 'C', 'N', k-l, n, m, cone, v( 1_${ik}$, kp ), ldv,b, ldb, czero, work( &
                        kp, 1_${ik}$ ), ldwork )
              do j = 1, n
                 do i = 1, k
                    work( i, j ) = work( i, j ) + a( i, j )
                 end do
              end do
              call stdlib${ii}$_ctrmm( 'L', 'U', trans, 'N', k, n, cone, t, ldt,work, ldwork )
              do j = 1, n
                 do i = 1, k
                    a( i, j ) = a( i, j ) - work( i, j )
                 end do
              end do
              call stdlib${ii}$_cgemm( 'N', 'N', m-l, n, k, -cone, v, ldv, work, ldwork,cone, b, ldb )
                        
              call stdlib${ii}$_cgemm( 'N', 'N', l, n, k-l, -cone, v( mp, kp ), ldv,work( kp, 1_${ik}$ ), &
                        ldwork, cone, b( mp, 1_${ik}$ ),  ldb )
              call stdlib${ii}$_ctrmm( 'L', 'U', 'N', 'N', l, n, cone, v( mp, 1_${ik}$ ), ldv,work, ldwork )
                        
              do j = 1, n
                 do i = 1, l
                    b( m-l+i, j ) = b( m-l+i, j ) - work( i, j )
                 end do
              end do
       ! ---------------------------------------------------------------------------
           else if( column .and. forward .and. right ) then
       ! ---------------------------------------------------------------------------
              ! let  w =  [ i ]    (k-by-k)
                        ! [ v ]    (n-by-k)
              ! form  c h or  c h**h  where  c = [ a b ] (a is m-by-k, b is m-by-n)
              ! h = i - w t w**h          or  h**h = i - w t**h w**h
              ! a = a - (a + b v) t      or  a = a - (a + b v) t**h
              ! b = b - (a + b v) t v**h  or  b = b - (a + b v) t**h v**h
       ! ---------------------------------------------------------------------------
              np = min( n-l+1, n )
              kp = min( l+1, k )
              do j = 1, l
                 do i = 1, m
                    work( i, j ) = b( i, n-l+j )
                 end do
              end do
              call stdlib${ii}$_ctrmm( 'R', 'U', 'N', 'N', m, l, cone, v( np, 1_${ik}$ ), ldv,work, ldwork )
                        
              call stdlib${ii}$_cgemm( 'N', 'N', m, l, n-l, cone, b, ldb,v, ldv, cone, work, ldwork )
                        
              call stdlib${ii}$_cgemm( 'N', 'N', m, k-l, n, cone, b, ldb,v( 1_${ik}$, kp ), ldv, czero, work( &
                        1_${ik}$, kp ), ldwork )
              do j = 1, k
                 do i = 1, m
                    work( i, j ) = work( i, j ) + a( i, j )
                 end do
              end do
              call stdlib${ii}$_ctrmm( 'R', 'U', trans, 'N', m, k, cone, t, ldt,work, ldwork )
              do j = 1, k
                 do i = 1, m
                    a( i, j ) = a( i, j ) - work( i, j )
                 end do
              end do
              call stdlib${ii}$_cgemm( 'N', 'C', m, n-l, k, -cone, work, ldwork,v, ldv, cone, b, ldb )
                        
              call stdlib${ii}$_cgemm( 'N', 'C', m, l, k-l, -cone, work( 1_${ik}$, kp ), ldwork,v( np, kp ), &
                        ldv, cone, b( 1_${ik}$, np ), ldb )
              call stdlib${ii}$_ctrmm( 'R', 'U', 'C', 'N', m, l, cone, v( np, 1_${ik}$ ), ldv,work, ldwork )
                        
              do j = 1, l
                 do i = 1, m
                    b( i, n-l+j ) = b( i, n-l+j ) - work( i, j )
                 end do
              end do
       ! ---------------------------------------------------------------------------
           else if( column .and. backward .and. left ) then
       ! ---------------------------------------------------------------------------
              ! let  w =  [ v ]    (m-by-k)
                        ! [ i ]    (k-by-k)
              ! form  h c  or  h**h c  where  c = [ b ]  (m-by-n)
                                                ! [ a ]  (k-by-n)
              ! h = i - w t w**h          or  h**h = i - w t**h w**h
              ! a = a -   t (a + v**h b)  or  a = a -   t**h (a + v**h b)
              ! b = b - v t (a + v**h b)  or  b = b - v t**h (a + v**h b)
       ! ---------------------------------------------------------------------------
              mp = min( l+1, m )
              kp = min( k-l+1, k )
              do j = 1, n
                 do i = 1, l
                    work( k-l+i, j ) = b( i, j )
                 end do
              end do
              call stdlib${ii}$_ctrmm( 'L', 'L', 'C', 'N', l, n, cone, v( 1_${ik}$, kp ), ldv,work( kp, 1_${ik}$ ), &
                        ldwork )
              call stdlib${ii}$_cgemm( 'C', 'N', l, n, m-l, cone, v( mp, kp ), ldv,b( mp, 1_${ik}$ ), ldb, &
                        cone, work( kp, 1_${ik}$ ), ldwork )
              call stdlib${ii}$_cgemm( 'C', 'N', k-l, n, m, cone, v, ldv,b, ldb, czero, work, ldwork )
                        
              do j = 1, n
                 do i = 1, k
                    work( i, j ) = work( i, j ) + a( i, j )
                 end do
              end do
              call stdlib${ii}$_ctrmm( 'L', 'L', trans, 'N', k, n, cone, t, ldt,work, ldwork )
              do j = 1, n
                 do i = 1, k
                    a( i, j ) = a( i, j ) - work( i, j )
                 end do
              end do
              call stdlib${ii}$_cgemm( 'N', 'N', m-l, n, k, -cone, v( mp, 1_${ik}$ ), ldv,work, ldwork, cone, &
                        b( mp, 1_${ik}$ ), ldb )
              call stdlib${ii}$_cgemm( 'N', 'N', l, n, k-l, -cone, v, ldv,work, ldwork, cone, b,  ldb )
                        
              call stdlib${ii}$_ctrmm( 'L', 'L', 'N', 'N', l, n, cone, v( 1_${ik}$, kp ), ldv,work( kp, 1_${ik}$ ), &
                        ldwork )
              do j = 1, n
                 do i = 1, l
                    b( i, j ) = b( i, j ) - work( k-l+i, j )
                 end do
              end do
       ! ---------------------------------------------------------------------------
           else if( column .and. backward .and. right ) then
       ! ---------------------------------------------------------------------------
              ! let  w =  [ v ]    (n-by-k)
                        ! [ i ]    (k-by-k)
              ! form  c h  or  c h**h  where  c = [ b a ] (b is m-by-n, a is m-by-k)
              ! h = i - w t w**h          or  h**h = i - w t**h w**h
              ! a = a - (a + b v) t      or  a = a - (a + b v) t**h
              ! b = b - (a + b v) t v**h  or  b = b - (a + b v) t**h v**h
       ! ---------------------------------------------------------------------------
              np = min( l+1, n )
              kp = min( k-l+1, k )
              do j = 1, l
                 do i = 1, m
                    work( i, k-l+j ) = b( i, j )
                 end do
              end do
              call stdlib${ii}$_ctrmm( 'R', 'L', 'N', 'N', m, l, cone, v( 1_${ik}$, kp ), ldv,work( 1_${ik}$, kp ), &
                        ldwork )
              call stdlib${ii}$_cgemm( 'N', 'N', m, l, n-l, cone, b( 1_${ik}$, np ), ldb,v( np, kp ), ldv, &
                        cone, work( 1_${ik}$, kp ), ldwork )
              call stdlib${ii}$_cgemm( 'N', 'N', m, k-l, n, cone, b, ldb,v, ldv, czero, work, ldwork )
                        
              do j = 1, k
                 do i = 1, m
                    work( i, j ) = work( i, j ) + a( i, j )
                 end do
              end do
              call stdlib${ii}$_ctrmm( 'R', 'L', trans, 'N', m, k, cone, t, ldt,work, ldwork )
              do j = 1, k
                 do i = 1, m
                    a( i, j ) = a( i, j ) - work( i, j )
                 end do
              end do
              call stdlib${ii}$_cgemm( 'N', 'C', m, n-l, k, -cone, work, ldwork,v( np, 1_${ik}$ ), ldv, cone, &
                        b( 1_${ik}$, np ), ldb )
              call stdlib${ii}$_cgemm( 'N', 'C', m, l, k-l, -cone, work, ldwork,v, ldv, cone, b, ldb )
                        
              call stdlib${ii}$_ctrmm( 'R', 'L', 'C', 'N', m, l, cone, v( 1_${ik}$, kp ), ldv,work( 1_${ik}$, kp ), &
                        ldwork )
              do j = 1, l
                 do i = 1, m
                    b( i, j ) = b( i, j ) - work( i, k-l+j )
                 end do
              end do
       ! ---------------------------------------------------------------------------
           else if( row .and. forward .and. left ) then
       ! ---------------------------------------------------------------------------
              ! let  w =  [ i v ] ( i is k-by-k, v is k-by-m )
              ! form  h c  or  h**h c  where  c = [ a ]  (k-by-n)
                                                ! [ b ]  (m-by-n)
              ! h = i - w**h t w          or  h**h = i - w**h t**h w
              ! a = a -     t (a + v b)  or  a = a -     t**h (a + v b)
              ! b = b - v**h t (a + v b)  or  b = b - v**h t**h (a + v b)
       ! ---------------------------------------------------------------------------
              mp = min( m-l+1, m )
              kp = min( l+1, k )
              do j = 1, n
                 do i = 1, l
                    work( i, j ) = b( m-l+i, j )
                 end do
              end do
              call stdlib${ii}$_ctrmm( 'L', 'L', 'N', 'N', l, n, cone, v( 1_${ik}$, mp ), ldv,work, ldb )
                        
              call stdlib${ii}$_cgemm( 'N', 'N', l, n, m-l, cone, v, ldv,b, ldb,cone, work, ldwork )
                        
              call stdlib${ii}$_cgemm( 'N', 'N', k-l, n, m, cone, v( kp, 1_${ik}$ ), ldv,b, ldb, czero, work( &
                        kp, 1_${ik}$ ), ldwork )
              do j = 1, n
                 do i = 1, k
                    work( i, j ) = work( i, j ) + a( i, j )
                 end do
              end do
              call stdlib${ii}$_ctrmm( 'L', 'U', trans, 'N', k, n, cone, t, ldt,work, ldwork )
              do j = 1, n
                 do i = 1, k
                    a( i, j ) = a( i, j ) - work( i, j )
                 end do
              end do
              call stdlib${ii}$_cgemm( 'C', 'N', m-l, n, k, -cone, v, ldv, work, ldwork,cone, b, ldb )
                        
              call stdlib${ii}$_cgemm( 'C', 'N', l, n, k-l, -cone, v( kp, mp ), ldv,work( kp, 1_${ik}$ ), &
                        ldwork, cone, b( mp, 1_${ik}$ ), ldb )
              call stdlib${ii}$_ctrmm( 'L', 'L', 'C', 'N', l, n, cone, v( 1_${ik}$, mp ), ldv,work, ldwork )
                        
              do j = 1, n
                 do i = 1, l
                    b( m-l+i, j ) = b( m-l+i, j ) - work( i, j )
                 end do
              end do
       ! ---------------------------------------------------------------------------
           else if( row .and. forward .and. right ) then
       ! ---------------------------------------------------------------------------
              ! let  w =  [ i v ] ( i is k-by-k, v is k-by-n )
              ! form  c h  or  c h**h  where  c = [ a b ] (a is m-by-k, b is m-by-n)
              ! h = i - w**h t w            or  h**h = i - w**h t**h w
              ! a = a - (a + b v**h) t      or  a = a - (a + b v**h) t**h
              ! b = b - (a + b v**h) t v    or  b = b - (a + b v**h) t**h v
       ! ---------------------------------------------------------------------------
              np = min( n-l+1, n )
              kp = min( l+1, k )
              do j = 1, l
                 do i = 1, m
                    work( i, j ) = b( i, n-l+j )
                 end do
              end do
              call stdlib${ii}$_ctrmm( 'R', 'L', 'C', 'N', m, l, cone, v( 1_${ik}$, np ), ldv,work, ldwork )
                        
              call stdlib${ii}$_cgemm( 'N', 'C', m, l, n-l, cone, b, ldb, v, ldv,cone, work, ldwork )
                        
              call stdlib${ii}$_cgemm( 'N', 'C', m, k-l, n, cone, b, ldb,v( kp, 1_${ik}$ ), ldv, czero, work( &
                        1_${ik}$, kp ), ldwork )
              do j = 1, k
                 do i = 1, m
                    work( i, j ) = work( i, j ) + a( i, j )
                 end do
              end do
              call stdlib${ii}$_ctrmm( 'R', 'U', trans, 'N', m, k, cone, t, ldt,work, ldwork )
              do j = 1, k
                 do i = 1, m
                    a( i, j ) = a( i, j ) - work( i, j )
                 end do
              end do
              call stdlib${ii}$_cgemm( 'N', 'N', m, n-l, k, -cone, work, ldwork,v, ldv, cone, b, ldb )
                        
              call stdlib${ii}$_cgemm( 'N', 'N', m, l, k-l, -cone, work( 1_${ik}$, kp ), ldwork,v( kp, np ), &
                        ldv, cone, b( 1_${ik}$, np ), ldb )
              call stdlib${ii}$_ctrmm( 'R', 'L', 'N', 'N', m, l, cone, v( 1_${ik}$, np ), ldv,work, ldwork )
                        
              do j = 1, l
                 do i = 1, m
                    b( i, n-l+j ) = b( i, n-l+j ) - work( i, j )
                 end do
              end do
       ! ---------------------------------------------------------------------------
           else if( row .and. backward .and. left ) then
       ! ---------------------------------------------------------------------------
              ! let  w =  [ v i ] ( i is k-by-k, v is k-by-m )
              ! form  h c  or  h**h c  where  c = [ b ]  (m-by-n)
                                                ! [ a ]  (k-by-n)
              ! h = i - w**h t w          or  h**h = i - w**h t**h w
              ! a = a -     t (a + v b)  or  a = a -     t**h (a + v b)
              ! b = b - v**h t (a + v b)  or  b = b - v**h t**h (a + v b)
       ! ---------------------------------------------------------------------------
              mp = min( l+1, m )
              kp = min( k-l+1, k )
              do j = 1, n
                 do i = 1, l
                    work( k-l+i, j ) = b( i, j )
                 end do
              end do
              call stdlib${ii}$_ctrmm( 'L', 'U', 'N', 'N', l, n, cone, v( kp, 1_${ik}$ ), ldv,work( kp, 1_${ik}$ ), &
                        ldwork )
              call stdlib${ii}$_cgemm( 'N', 'N', l, n, m-l, cone, v( kp, mp ), ldv,b( mp, 1_${ik}$ ), ldb, &
                        cone, work( kp, 1_${ik}$ ), ldwork )
              call stdlib${ii}$_cgemm( 'N', 'N', k-l, n, m, cone, v, ldv, b, ldb,czero, work, ldwork )
                        
              do j = 1, n
                 do i = 1, k
                    work( i, j ) = work( i, j ) + a( i, j )
                 end do
              end do
              call stdlib${ii}$_ctrmm( 'L', 'L ', trans, 'N', k, n, cone, t, ldt,work, ldwork )
              do j = 1, n
                 do i = 1, k
                    a( i, j ) = a( i, j ) - work( i, j )
                 end do
              end do
              call stdlib${ii}$_cgemm( 'C', 'N', m-l, n, k, -cone, v( 1_${ik}$, mp ), ldv,work, ldwork, cone, &
                        b( mp, 1_${ik}$ ), ldb )
              call stdlib${ii}$_cgemm( 'C', 'N', l, n, k-l, -cone, v, ldv,work, ldwork, cone, b, ldb )
                        
              call stdlib${ii}$_ctrmm( 'L', 'U', 'C', 'N', l, n, cone, v( kp, 1_${ik}$ ), ldv,work( kp, 1_${ik}$ ), &
                        ldwork )
              do j = 1, n
                 do i = 1, l
                    b( i, j ) = b( i, j ) - work( k-l+i, j )
                 end do
              end do
       ! ---------------------------------------------------------------------------
           else if( row .and. backward .and. right ) then
       ! ---------------------------------------------------------------------------
              ! let  w =  [ v i ] ( i is k-by-k, v is k-by-n )
              ! form  c h  or  c h**h  where  c = [ b a ] (a is m-by-k, b is m-by-n)
              ! h = i - w**h t w            or  h**h = i - w**h t**h w
              ! a = a - (a + b v**h) t      or  a = a - (a + b v**h) t**h
              ! b = b - (a + b v**h) t v    or  b = b - (a + b v**h) t**h v
       ! ---------------------------------------------------------------------------
              np = min( l+1, n )
              kp = min( k-l+1, k )
              do j = 1, l
                 do i = 1, m
                    work( i, k-l+j ) = b( i, j )
                 end do
              end do
              call stdlib${ii}$_ctrmm( 'R', 'U', 'C', 'N', m, l, cone, v( kp, 1_${ik}$ ), ldv,work( 1_${ik}$, kp ), &
                        ldwork )
              call stdlib${ii}$_cgemm( 'N', 'C', m, l, n-l, cone, b( 1_${ik}$, np ), ldb,v( kp, np ), ldv, &
                        cone, work( 1_${ik}$, kp ), ldwork )
              call stdlib${ii}$_cgemm( 'N', 'C', m, k-l, n, cone, b, ldb, v, ldv,czero, work, ldwork )
                        
              do j = 1, k
                 do i = 1, m
                    work( i, j ) = work( i, j ) + a( i, j )
                 end do
              end do
              call stdlib${ii}$_ctrmm( 'R', 'L', trans, 'N', m, k, cone, t, ldt,work, ldwork )
              do j = 1, k
                 do i = 1, m
                    a( i, j ) = a( i, j ) - work( i, j )
                 end do
              end do
              call stdlib${ii}$_cgemm( 'N', 'N', m, n-l, k, -cone, work, ldwork,v( 1_${ik}$, np ), ldv, cone, &
                        b( 1_${ik}$, np ), ldb )
              call stdlib${ii}$_cgemm( 'N', 'N', m, l, k-l , -cone, work, ldwork,v, ldv, cone, b, ldb )
                        
              call stdlib${ii}$_ctrmm( 'R', 'U', 'N', 'N', m, l, cone, v( kp, 1_${ik}$ ), ldv,work( 1_${ik}$, kp ), &
                        ldwork )
              do j = 1, l
                 do i = 1, m
                    b( i, j ) = b( i, j ) - work( i, k-l+j )
                 end do
              end do
           end if
           return
     end subroutine stdlib${ii}$_ctprfb

     pure module subroutine stdlib${ii}$_ztprfb( side, trans, direct, storev, m, n, k, l,v, ldv, t, ldt, a, &
     !! ZTPRFB applies a complex "triangular-pentagonal" block reflector H or its
     !! conjugate transpose H**H to a complex matrix C, which is composed of two
     !! blocks A and B, either from the left or right.
               lda, b, ldb, work, ldwork )
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: direct, side, storev, trans
           integer(${ik}$), intent(in) :: k, l, lda, ldb, ldt, ldv, ldwork, m, n
           ! Array Arguments 
           complex(dp), intent(inout) :: a(lda,*), b(ldb,*)
           complex(dp), intent(in) :: t(ldt,*), v(ldv,*)
           complex(dp), intent(out) :: work(ldwork,*)
        ! ==========================================================================
           
           ! Local Scalars 
           integer(${ik}$) :: i, j, mp, np, kp
           logical(lk) :: left, forward, column, right, backward, row
           ! Intrinsic Functions 
           ! Executable Statements 
           ! quick return if possible
           if( m<=0 .or. n<=0 .or. k<=0 .or. l<0 ) return
           if( stdlib_lsame( storev, 'C' ) ) then
              column = .true.
              row = .false.
           else if ( stdlib_lsame( storev, 'R' ) ) then
              column = .false.
              row = .true.
           else
              column = .false.
              row = .false.
           end if
           if( stdlib_lsame( side, 'L' ) ) then
              left = .true.
              right = .false.
           else if( stdlib_lsame( side, 'R' ) ) then
              left = .false.
              right = .true.
           else
              left = .false.
              right = .false.
           end if
           if( stdlib_lsame( direct, 'F' ) ) then
              forward = .true.
              backward = .false.
           else if( stdlib_lsame( direct, 'B' ) ) then
              forward = .false.
              backward = .true.
           else
              forward = .false.
              backward = .false.
           end if
       ! ---------------------------------------------------------------------------
           if( column .and. forward .and. left  ) then
       ! ---------------------------------------------------------------------------
              ! let  w =  [ i ]    (k-by-k)
                        ! [ v ]    (m-by-k)
              ! form  h c  or  h**h c  where  c = [ a ]  (k-by-n)
                                                ! [ b ]  (m-by-n)
              ! h = i - w t w**h          or  h**h = i - w t**h w**h
              ! a = a -   t (a + v**h b)  or  a = a -   t**h (a + v**h b)
              ! b = b - v t (a + v**h b)  or  b = b - v t**h (a + v**h b)
       ! ---------------------------------------------------------------------------
              mp = min( m-l+1, m )
              kp = min( l+1, k )
              do j = 1, n
                 do i = 1, l
                    work( i, j ) = b( m-l+i, j )
                 end do
              end do
              call stdlib${ii}$_ztrmm( 'L', 'U', 'C', 'N', l, n, cone, v( mp, 1_${ik}$ ), ldv,work, ldwork )
                        
              call stdlib${ii}$_zgemm( 'C', 'N', l, n, m-l, cone, v, ldv, b, ldb,cone, work, ldwork )
                        
              call stdlib${ii}$_zgemm( 'C', 'N', k-l, n, m, cone, v( 1_${ik}$, kp ), ldv,b, ldb, czero, work( &
                        kp, 1_${ik}$ ), ldwork )
              do j = 1, n
                 do i = 1, k
                    work( i, j ) = work( i, j ) + a( i, j )
                 end do
              end do
              call stdlib${ii}$_ztrmm( 'L', 'U', trans, 'N', k, n, cone, t, ldt,work, ldwork )
              do j = 1, n
                 do i = 1, k
                    a( i, j ) = a( i, j ) - work( i, j )
                 end do
              end do
              call stdlib${ii}$_zgemm( 'N', 'N', m-l, n, k, -cone, v, ldv, work, ldwork,cone, b, ldb )
                        
              call stdlib${ii}$_zgemm( 'N', 'N', l, n, k-l, -cone, v( mp, kp ), ldv,work( kp, 1_${ik}$ ), &
                        ldwork, cone, b( mp, 1_${ik}$ ),  ldb )
              call stdlib${ii}$_ztrmm( 'L', 'U', 'N', 'N', l, n, cone, v( mp, 1_${ik}$ ), ldv,work, ldwork )
                        
              do j = 1, n
                 do i = 1, l
                    b( m-l+i, j ) = b( m-l+i, j ) - work( i, j )
                 end do
              end do
       ! ---------------------------------------------------------------------------
           else if( column .and. forward .and. right ) then
       ! ---------------------------------------------------------------------------
              ! let  w =  [ i ]    (k-by-k)
                        ! [ v ]    (n-by-k)
              ! form  c h or  c h**h  where  c = [ a b ] (a is m-by-k, b is m-by-n)
              ! h = i - w t w**h          or  h**h = i - w t**h w**h
              ! a = a - (a + b v) t      or  a = a - (a + b v) t**h
              ! b = b - (a + b v) t v**h  or  b = b - (a + b v) t**h v**h
       ! ---------------------------------------------------------------------------
              np = min( n-l+1, n )
              kp = min( l+1, k )
              do j = 1, l
                 do i = 1, m
                    work( i, j ) = b( i, n-l+j )
                 end do
              end do
              call stdlib${ii}$_ztrmm( 'R', 'U', 'N', 'N', m, l, cone, v( np, 1_${ik}$ ), ldv,work, ldwork )
                        
              call stdlib${ii}$_zgemm( 'N', 'N', m, l, n-l, cone, b, ldb,v, ldv, cone, work, ldwork )
                        
              call stdlib${ii}$_zgemm( 'N', 'N', m, k-l, n, cone, b, ldb,v( 1_${ik}$, kp ), ldv, czero, work( &
                        1_${ik}$, kp ), ldwork )
              do j = 1, k
                 do i = 1, m
                    work( i, j ) = work( i, j ) + a( i, j )
                 end do
              end do
              call stdlib${ii}$_ztrmm( 'R', 'U', trans, 'N', m, k, cone, t, ldt,work, ldwork )
              do j = 1, k
                 do i = 1, m
                    a( i, j ) = a( i, j ) - work( i, j )
                 end do
              end do
              call stdlib${ii}$_zgemm( 'N', 'C', m, n-l, k, -cone, work, ldwork,v, ldv, cone, b, ldb )
                        
              call stdlib${ii}$_zgemm( 'N', 'C', m, l, k-l, -cone, work( 1_${ik}$, kp ), ldwork,v( np, kp ), &
                        ldv, cone, b( 1_${ik}$, np ), ldb )
              call stdlib${ii}$_ztrmm( 'R', 'U', 'C', 'N', m, l, cone, v( np, 1_${ik}$ ), ldv,work, ldwork )
                        
              do j = 1, l
                 do i = 1, m
                    b( i, n-l+j ) = b( i, n-l+j ) - work( i, j )
                 end do
              end do
       ! ---------------------------------------------------------------------------
           else if( column .and. backward .and. left ) then
       ! ---------------------------------------------------------------------------
              ! let  w =  [ v ]    (m-by-k)
                        ! [ i ]    (k-by-k)
              ! form  h c  or  h**h c  where  c = [ b ]  (m-by-n)
                                                ! [ a ]  (k-by-n)
              ! h = i - w t w**h          or  h**h = i - w t**h w**h
              ! a = a -   t (a + v**h b)  or  a = a -   t**h (a + v**h b)
              ! b = b - v t (a + v**h b)  or  b = b - v t**h (a + v**h b)
       ! ---------------------------------------------------------------------------
              mp = min( l+1, m )
              kp = min( k-l+1, k )
              do j = 1, n
                 do i = 1, l
                    work( k-l+i, j ) = b( i, j )
                 end do
              end do
              call stdlib${ii}$_ztrmm( 'L', 'L', 'C', 'N', l, n, cone, v( 1_${ik}$, kp ), ldv,work( kp, 1_${ik}$ ), &
                        ldwork )
              call stdlib${ii}$_zgemm( 'C', 'N', l, n, m-l, cone, v( mp, kp ), ldv,b( mp, 1_${ik}$ ), ldb, &
                        cone, work( kp, 1_${ik}$ ), ldwork )
              call stdlib${ii}$_zgemm( 'C', 'N', k-l, n, m, cone, v, ldv,b, ldb, czero, work, ldwork )
                        
              do j = 1, n
                 do i = 1, k
                    work( i, j ) = work( i, j ) + a( i, j )
                 end do
              end do
              call stdlib${ii}$_ztrmm( 'L', 'L', trans, 'N', k, n, cone, t, ldt,work, ldwork )
              do j = 1, n
                 do i = 1, k
                    a( i, j ) = a( i, j ) - work( i, j )
                 end do
              end do
              call stdlib${ii}$_zgemm( 'N', 'N', m-l, n, k, -cone, v( mp, 1_${ik}$ ), ldv,work, ldwork, cone, &
                        b( mp, 1_${ik}$ ), ldb )
              call stdlib${ii}$_zgemm( 'N', 'N', l, n, k-l, -cone, v, ldv,work, ldwork, cone, b,  ldb )
                        
              call stdlib${ii}$_ztrmm( 'L', 'L', 'N', 'N', l, n, cone, v( 1_${ik}$, kp ), ldv,work( kp, 1_${ik}$ ), &
                        ldwork )
              do j = 1, n
                 do i = 1, l
                    b( i, j ) = b( i, j ) - work( k-l+i, j )
                 end do
              end do
       ! ---------------------------------------------------------------------------
           else if( column .and. backward .and. right ) then
       ! ---------------------------------------------------------------------------
              ! let  w =  [ v ]    (n-by-k)
                        ! [ i ]    (k-by-k)
              ! form  c h  or  c h**h  where  c = [ b a ] (b is m-by-n, a is m-by-k)
              ! h = i - w t w**h          or  h**h = i - w t**h w**h
              ! a = a - (a + b v) t      or  a = a - (a + b v) t**h
              ! b = b - (a + b v) t v**h  or  b = b - (a + b v) t**h v**h
       ! ---------------------------------------------------------------------------
              np = min( l+1, n )
              kp = min( k-l+1, k )
              do j = 1, l
                 do i = 1, m
                    work( i, k-l+j ) = b( i, j )
                 end do
              end do
              call stdlib${ii}$_ztrmm( 'R', 'L', 'N', 'N', m, l, cone, v( 1_${ik}$, kp ), ldv,work( 1_${ik}$, kp ), &
                        ldwork )
              call stdlib${ii}$_zgemm( 'N', 'N', m, l, n-l, cone, b( 1_${ik}$, np ), ldb,v( np, kp ), ldv, &
                        cone, work( 1_${ik}$, kp ), ldwork )
              call stdlib${ii}$_zgemm( 'N', 'N', m, k-l, n, cone, b, ldb,v, ldv, czero, work, ldwork )
                        
              do j = 1, k
                 do i = 1, m
                    work( i, j ) = work( i, j ) + a( i, j )
                 end do
              end do
              call stdlib${ii}$_ztrmm( 'R', 'L', trans, 'N', m, k, cone, t, ldt,work, ldwork )
              do j = 1, k
                 do i = 1, m
                    a( i, j ) = a( i, j ) - work( i, j )
                 end do
              end do
              call stdlib${ii}$_zgemm( 'N', 'C', m, n-l, k, -cone, work, ldwork,v( np, 1_${ik}$ ), ldv, cone, &
                        b( 1_${ik}$, np ), ldb )
              call stdlib${ii}$_zgemm( 'N', 'C', m, l, k-l, -cone, work, ldwork,v, ldv, cone, b, ldb )
                        
              call stdlib${ii}$_ztrmm( 'R', 'L', 'C', 'N', m, l, cone, v( 1_${ik}$, kp ), ldv,work( 1_${ik}$, kp ), &
                        ldwork )
              do j = 1, l
                 do i = 1, m
                    b( i, j ) = b( i, j ) - work( i, k-l+j )
                 end do
              end do
       ! ---------------------------------------------------------------------------
           else if( row .and. forward .and. left ) then
       ! ---------------------------------------------------------------------------
              ! let  w =  [ i v ] ( i is k-by-k, v is k-by-m )
              ! form  h c  or  h**h c  where  c = [ a ]  (k-by-n)
                                                ! [ b ]  (m-by-n)
              ! h = i - w**h t w          or  h**h = i - w**h t**h w
              ! a = a -     t (a + v b)  or  a = a -     t**h (a + v b)
              ! b = b - v**h t (a + v b)  or  b = b - v**h t**h (a + v b)
       ! ---------------------------------------------------------------------------
              mp = min( m-l+1, m )
              kp = min( l+1, k )
              do j = 1, n
                 do i = 1, l
                    work( i, j ) = b( m-l+i, j )
                 end do
              end do
              call stdlib${ii}$_ztrmm( 'L', 'L', 'N', 'N', l, n, cone, v( 1_${ik}$, mp ), ldv,work, ldb )
                        
              call stdlib${ii}$_zgemm( 'N', 'N', l, n, m-l, cone, v, ldv,b, ldb,cone, work, ldwork )
                        
              call stdlib${ii}$_zgemm( 'N', 'N', k-l, n, m, cone, v( kp, 1_${ik}$ ), ldv,b, ldb, czero, work( &
                        kp, 1_${ik}$ ), ldwork )
              do j = 1, n
                 do i = 1, k
                    work( i, j ) = work( i, j ) + a( i, j )
                 end do
              end do
              call stdlib${ii}$_ztrmm( 'L', 'U', trans, 'N', k, n, cone, t, ldt,work, ldwork )
              do j = 1, n
                 do i = 1, k
                    a( i, j ) = a( i, j ) - work( i, j )
                 end do
              end do
              call stdlib${ii}$_zgemm( 'C', 'N', m-l, n, k, -cone, v, ldv, work, ldwork,cone, b, ldb )
                        
              call stdlib${ii}$_zgemm( 'C', 'N', l, n, k-l, -cone, v( kp, mp ), ldv,work( kp, 1_${ik}$ ), &
                        ldwork, cone, b( mp, 1_${ik}$ ), ldb )
              call stdlib${ii}$_ztrmm( 'L', 'L', 'C', 'N', l, n, cone, v( 1_${ik}$, mp ), ldv,work, ldwork )
                        
              do j = 1, n
                 do i = 1, l
                    b( m-l+i, j ) = b( m-l+i, j ) - work( i, j )
                 end do
              end do
       ! ---------------------------------------------------------------------------
           else if( row .and. forward .and. right ) then
       ! ---------------------------------------------------------------------------
              ! let  w =  [ i v ] ( i is k-by-k, v is k-by-n )
              ! form  c h  or  c h**h  where  c = [ a b ] (a is m-by-k, b is m-by-n)
              ! h = i - w**h t w            or  h**h = i - w**h t**h w
              ! a = a - (a + b v**h) t      or  a = a - (a + b v**h) t**h
              ! b = b - (a + b v**h) t v    or  b = b - (a + b v**h) t**h v
       ! ---------------------------------------------------------------------------
              np = min( n-l+1, n )
              kp = min( l+1, k )
              do j = 1, l
                 do i = 1, m
                    work( i, j ) = b( i, n-l+j )
                 end do
              end do
              call stdlib${ii}$_ztrmm( 'R', 'L', 'C', 'N', m, l, cone, v( 1_${ik}$, np ), ldv,work, ldwork )
                        
              call stdlib${ii}$_zgemm( 'N', 'C', m, l, n-l, cone, b, ldb, v, ldv,cone, work, ldwork )
                        
              call stdlib${ii}$_zgemm( 'N', 'C', m, k-l, n, cone, b, ldb,v( kp, 1_${ik}$ ), ldv, czero, work( &
                        1_${ik}$, kp ), ldwork )
              do j = 1, k
                 do i = 1, m
                    work( i, j ) = work( i, j ) + a( i, j )
                 end do
              end do
              call stdlib${ii}$_ztrmm( 'R', 'U', trans, 'N', m, k, cone, t, ldt,work, ldwork )
              do j = 1, k
                 do i = 1, m
                    a( i, j ) = a( i, j ) - work( i, j )
                 end do
              end do
              call stdlib${ii}$_zgemm( 'N', 'N', m, n-l, k, -cone, work, ldwork,v, ldv, cone, b, ldb )
                        
              call stdlib${ii}$_zgemm( 'N', 'N', m, l, k-l, -cone, work( 1_${ik}$, kp ), ldwork,v( kp, np ), &
                        ldv, cone, b( 1_${ik}$, np ), ldb )
              call stdlib${ii}$_ztrmm( 'R', 'L', 'N', 'N', m, l, cone, v( 1_${ik}$, np ), ldv,work, ldwork )
                        
              do j = 1, l
                 do i = 1, m
                    b( i, n-l+j ) = b( i, n-l+j ) - work( i, j )
                 end do
              end do
       ! ---------------------------------------------------------------------------
           else if( row .and. backward .and. left ) then
       ! ---------------------------------------------------------------------------
              ! let  w =  [ v i ] ( i is k-by-k, v is k-by-m )
              ! form  h c  or  h**h c  where  c = [ b ]  (m-by-n)
                                                ! [ a ]  (k-by-n)
              ! h = i - w**h t w          or  h**h = i - w**h t**h w
              ! a = a -     t (a + v b)  or  a = a -     t**h (a + v b)
              ! b = b - v**h t (a + v b)  or  b = b - v**h t**h (a + v b)
       ! ---------------------------------------------------------------------------
              mp = min( l+1, m )
              kp = min( k-l+1, k )
              do j = 1, n
                 do i = 1, l
                    work( k-l+i, j ) = b( i, j )
                 end do
              end do
              call stdlib${ii}$_ztrmm( 'L', 'U', 'N', 'N', l, n, cone, v( kp, 1_${ik}$ ), ldv,work( kp, 1_${ik}$ ), &
                        ldwork )
              call stdlib${ii}$_zgemm( 'N', 'N', l, n, m-l, cone, v( kp, mp ), ldv,b( mp, 1_${ik}$ ), ldb, &
                        cone, work( kp, 1_${ik}$ ), ldwork )
              call stdlib${ii}$_zgemm( 'N', 'N', k-l, n, m, cone, v, ldv, b, ldb,czero, work, ldwork )
                        
              do j = 1, n
                 do i = 1, k
                    work( i, j ) = work( i, j ) + a( i, j )
                 end do
              end do
              call stdlib${ii}$_ztrmm( 'L', 'L ', trans, 'N', k, n, cone, t, ldt,work, ldwork )
              do j = 1, n
                 do i = 1, k
                    a( i, j ) = a( i, j ) - work( i, j )
                 end do
              end do
              call stdlib${ii}$_zgemm( 'C', 'N', m-l, n, k, -cone, v( 1_${ik}$, mp ), ldv,work, ldwork, cone, &
                        b( mp, 1_${ik}$ ), ldb )
              call stdlib${ii}$_zgemm( 'C', 'N', l, n, k-l, -cone, v, ldv,work, ldwork, cone, b, ldb )
                        
              call stdlib${ii}$_ztrmm( 'L', 'U', 'C', 'N', l, n, cone, v( kp, 1_${ik}$ ), ldv,work( kp, 1_${ik}$ ), &
                        ldwork )
              do j = 1, n
                 do i = 1, l
                    b( i, j ) = b( i, j ) - work( k-l+i, j )
                 end do
              end do
       ! ---------------------------------------------------------------------------
           else if( row .and. backward .and. right ) then
       ! ---------------------------------------------------------------------------
              ! let  w =  [ v i ] ( i is k-by-k, v is k-by-n )
              ! form  c h  or  c h**h  where  c = [ b a ] (a is m-by-k, b is m-by-n)
              ! h = i - w**h t w            or  h**h = i - w**h t**h w
              ! a = a - (a + b v**h) t      or  a = a - (a + b v**h) t**h
              ! b = b - (a + b v**h) t v    or  b = b - (a + b v**h) t**h v
       ! ---------------------------------------------------------------------------
              np = min( l+1, n )
              kp = min( k-l+1, k )
              do j = 1, l
                 do i = 1, m
                    work( i, k-l+j ) = b( i, j )
                 end do
              end do
              call stdlib${ii}$_ztrmm( 'R', 'U', 'C', 'N', m, l, cone, v( kp, 1_${ik}$ ), ldv,work( 1_${ik}$, kp ), &
                        ldwork )
              call stdlib${ii}$_zgemm( 'N', 'C', m, l, n-l, cone, b( 1_${ik}$, np ), ldb,v( kp, np ), ldv, &
                        cone, work( 1_${ik}$, kp ), ldwork )
              call stdlib${ii}$_zgemm( 'N', 'C', m, k-l, n, cone, b, ldb, v, ldv,czero, work, ldwork )
                        
              do j = 1, k
                 do i = 1, m
                    work( i, j ) = work( i, j ) + a( i, j )
                 end do
              end do
              call stdlib${ii}$_ztrmm( 'R', 'L', trans, 'N', m, k, cone, t, ldt,work, ldwork )
              do j = 1, k
                 do i = 1, m
                    a( i, j ) = a( i, j ) - work( i, j )
                 end do
              end do
              call stdlib${ii}$_zgemm( 'N', 'N', m, n-l, k, -cone, work, ldwork,v( 1_${ik}$, np ), ldv, cone, &
                        b( 1_${ik}$, np ), ldb )
              call stdlib${ii}$_zgemm( 'N', 'N', m, l, k-l , -cone, work, ldwork,v, ldv, cone, b, ldb )
                        
              call stdlib${ii}$_ztrmm( 'R', 'U', 'N', 'N', m, l, cone, v( kp, 1_${ik}$ ), ldv,work( 1_${ik}$, kp ), &
                        ldwork )
              do j = 1, l
                 do i = 1, m
                    b( i, j ) = b( i, j ) - work( i, k-l+j )
                 end do
              end do
           end if
           return
     end subroutine stdlib${ii}$_ztprfb

#:for ck,ct,ci in CMPLX_KINDS_TYPES
#:if not ck in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ci}$tprfb( side, trans, direct, storev, m, n, k, l,v, ldv, t, ldt, a, &
     !! ZTPRFB: applies a complex "triangular-pentagonal" block reflector H or its
     !! conjugate transpose H**H to a complex matrix C, which is composed of two
     !! blocks A and B, either from the left or right.
               lda, b, ldb, work, ldwork )
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${ck}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: direct, side, storev, trans
           integer(${ik}$), intent(in) :: k, l, lda, ldb, ldt, ldv, ldwork, m, n
           ! Array Arguments 
           complex(${ck}$), intent(inout) :: a(lda,*), b(ldb,*)
           complex(${ck}$), intent(in) :: t(ldt,*), v(ldv,*)
           complex(${ck}$), intent(out) :: work(ldwork,*)
        ! ==========================================================================
           
           ! Local Scalars 
           integer(${ik}$) :: i, j, mp, np, kp
           logical(lk) :: left, forward, column, right, backward, row
           ! Intrinsic Functions 
           ! Executable Statements 
           ! quick return if possible
           if( m<=0 .or. n<=0 .or. k<=0 .or. l<0 ) return
           if( stdlib_lsame( storev, 'C' ) ) then
              column = .true.
              row = .false.
           else if ( stdlib_lsame( storev, 'R' ) ) then
              column = .false.
              row = .true.
           else
              column = .false.
              row = .false.
           end if
           if( stdlib_lsame( side, 'L' ) ) then
              left = .true.
              right = .false.
           else if( stdlib_lsame( side, 'R' ) ) then
              left = .false.
              right = .true.
           else
              left = .false.
              right = .false.
           end if
           if( stdlib_lsame( direct, 'F' ) ) then
              forward = .true.
              backward = .false.
           else if( stdlib_lsame( direct, 'B' ) ) then
              forward = .false.
              backward = .true.
           else
              forward = .false.
              backward = .false.
           end if
       ! ---------------------------------------------------------------------------
           if( column .and. forward .and. left  ) then
       ! ---------------------------------------------------------------------------
              ! let  w =  [ i ]    (k-by-k)
                        ! [ v ]    (m-by-k)
              ! form  h c  or  h**h c  where  c = [ a ]  (k-by-n)
                                                ! [ b ]  (m-by-n)
              ! h = i - w t w**h          or  h**h = i - w t**h w**h
              ! a = a -   t (a + v**h b)  or  a = a -   t**h (a + v**h b)
              ! b = b - v t (a + v**h b)  or  b = b - v t**h (a + v**h b)
       ! ---------------------------------------------------------------------------
              mp = min( m-l+1, m )
              kp = min( l+1, k )
              do j = 1, n
                 do i = 1, l
                    work( i, j ) = b( m-l+i, j )
                 end do
              end do
              call stdlib${ii}$_${ci}$trmm( 'L', 'U', 'C', 'N', l, n, cone, v( mp, 1_${ik}$ ), ldv,work, ldwork )
                        
              call stdlib${ii}$_${ci}$gemm( 'C', 'N', l, n, m-l, cone, v, ldv, b, ldb,cone, work, ldwork )
                        
              call stdlib${ii}$_${ci}$gemm( 'C', 'N', k-l, n, m, cone, v( 1_${ik}$, kp ), ldv,b, ldb, czero, work( &
                        kp, 1_${ik}$ ), ldwork )
              do j = 1, n
                 do i = 1, k
                    work( i, j ) = work( i, j ) + a( i, j )
                 end do
              end do
              call stdlib${ii}$_${ci}$trmm( 'L', 'U', trans, 'N', k, n, cone, t, ldt,work, ldwork )
              do j = 1, n
                 do i = 1, k
                    a( i, j ) = a( i, j ) - work( i, j )
                 end do
              end do
              call stdlib${ii}$_${ci}$gemm( 'N', 'N', m-l, n, k, -cone, v, ldv, work, ldwork,cone, b, ldb )
                        
              call stdlib${ii}$_${ci}$gemm( 'N', 'N', l, n, k-l, -cone, v( mp, kp ), ldv,work( kp, 1_${ik}$ ), &
                        ldwork, cone, b( mp, 1_${ik}$ ),  ldb )
              call stdlib${ii}$_${ci}$trmm( 'L', 'U', 'N', 'N', l, n, cone, v( mp, 1_${ik}$ ), ldv,work, ldwork )
                        
              do j = 1, n
                 do i = 1, l
                    b( m-l+i, j ) = b( m-l+i, j ) - work( i, j )
                 end do
              end do
       ! ---------------------------------------------------------------------------
           else if( column .and. forward .and. right ) then
       ! ---------------------------------------------------------------------------
              ! let  w =  [ i ]    (k-by-k)
                        ! [ v ]    (n-by-k)
              ! form  c h or  c h**h  where  c = [ a b ] (a is m-by-k, b is m-by-n)
              ! h = i - w t w**h          or  h**h = i - w t**h w**h
              ! a = a - (a + b v) t      or  a = a - (a + b v) t**h
              ! b = b - (a + b v) t v**h  or  b = b - (a + b v) t**h v**h
       ! ---------------------------------------------------------------------------
              np = min( n-l+1, n )
              kp = min( l+1, k )
              do j = 1, l
                 do i = 1, m
                    work( i, j ) = b( i, n-l+j )
                 end do
              end do
              call stdlib${ii}$_${ci}$trmm( 'R', 'U', 'N', 'N', m, l, cone, v( np, 1_${ik}$ ), ldv,work, ldwork )
                        
              call stdlib${ii}$_${ci}$gemm( 'N', 'N', m, l, n-l, cone, b, ldb,v, ldv, cone, work, ldwork )
                        
              call stdlib${ii}$_${ci}$gemm( 'N', 'N', m, k-l, n, cone, b, ldb,v( 1_${ik}$, kp ), ldv, czero, work( &
                        1_${ik}$, kp ), ldwork )
              do j = 1, k
                 do i = 1, m
                    work( i, j ) = work( i, j ) + a( i, j )
                 end do
              end do
              call stdlib${ii}$_${ci}$trmm( 'R', 'U', trans, 'N', m, k, cone, t, ldt,work, ldwork )
              do j = 1, k
                 do i = 1, m
                    a( i, j ) = a( i, j ) - work( i, j )
                 end do
              end do
              call stdlib${ii}$_${ci}$gemm( 'N', 'C', m, n-l, k, -cone, work, ldwork,v, ldv, cone, b, ldb )
                        
              call stdlib${ii}$_${ci}$gemm( 'N', 'C', m, l, k-l, -cone, work( 1_${ik}$, kp ), ldwork,v( np, kp ), &
                        ldv, cone, b( 1_${ik}$, np ), ldb )
              call stdlib${ii}$_${ci}$trmm( 'R', 'U', 'C', 'N', m, l, cone, v( np, 1_${ik}$ ), ldv,work, ldwork )
                        
              do j = 1, l
                 do i = 1, m
                    b( i, n-l+j ) = b( i, n-l+j ) - work( i, j )
                 end do
              end do
       ! ---------------------------------------------------------------------------
           else if( column .and. backward .and. left ) then
       ! ---------------------------------------------------------------------------
              ! let  w =  [ v ]    (m-by-k)
                        ! [ i ]    (k-by-k)
              ! form  h c  or  h**h c  where  c = [ b ]  (m-by-n)
                                                ! [ a ]  (k-by-n)
              ! h = i - w t w**h          or  h**h = i - w t**h w**h
              ! a = a -   t (a + v**h b)  or  a = a -   t**h (a + v**h b)
              ! b = b - v t (a + v**h b)  or  b = b - v t**h (a + v**h b)
       ! ---------------------------------------------------------------------------
              mp = min( l+1, m )
              kp = min( k-l+1, k )
              do j = 1, n
                 do i = 1, l
                    work( k-l+i, j ) = b( i, j )
                 end do
              end do
              call stdlib${ii}$_${ci}$trmm( 'L', 'L', 'C', 'N', l, n, cone, v( 1_${ik}$, kp ), ldv,work( kp, 1_${ik}$ ), &
                        ldwork )
              call stdlib${ii}$_${ci}$gemm( 'C', 'N', l, n, m-l, cone, v( mp, kp ), ldv,b( mp, 1_${ik}$ ), ldb, &
                        cone, work( kp, 1_${ik}$ ), ldwork )
              call stdlib${ii}$_${ci}$gemm( 'C', 'N', k-l, n, m, cone, v, ldv,b, ldb, czero, work, ldwork )
                        
              do j = 1, n
                 do i = 1, k
                    work( i, j ) = work( i, j ) + a( i, j )
                 end do
              end do
              call stdlib${ii}$_${ci}$trmm( 'L', 'L', trans, 'N', k, n, cone, t, ldt,work, ldwork )
              do j = 1, n
                 do i = 1, k
                    a( i, j ) = a( i, j ) - work( i, j )
                 end do
              end do
              call stdlib${ii}$_${ci}$gemm( 'N', 'N', m-l, n, k, -cone, v( mp, 1_${ik}$ ), ldv,work, ldwork, cone, &
                        b( mp, 1_${ik}$ ), ldb )
              call stdlib${ii}$_${ci}$gemm( 'N', 'N', l, n, k-l, -cone, v, ldv,work, ldwork, cone, b,  ldb )
                        
              call stdlib${ii}$_${ci}$trmm( 'L', 'L', 'N', 'N', l, n, cone, v( 1_${ik}$, kp ), ldv,work( kp, 1_${ik}$ ), &
                        ldwork )
              do j = 1, n
                 do i = 1, l
                    b( i, j ) = b( i, j ) - work( k-l+i, j )
                 end do
              end do
       ! ---------------------------------------------------------------------------
           else if( column .and. backward .and. right ) then
       ! ---------------------------------------------------------------------------
              ! let  w =  [ v ]    (n-by-k)
                        ! [ i ]    (k-by-k)
              ! form  c h  or  c h**h  where  c = [ b a ] (b is m-by-n, a is m-by-k)
              ! h = i - w t w**h          or  h**h = i - w t**h w**h
              ! a = a - (a + b v) t      or  a = a - (a + b v) t**h
              ! b = b - (a + b v) t v**h  or  b = b - (a + b v) t**h v**h
       ! ---------------------------------------------------------------------------
              np = min( l+1, n )
              kp = min( k-l+1, k )
              do j = 1, l
                 do i = 1, m
                    work( i, k-l+j ) = b( i, j )
                 end do
              end do
              call stdlib${ii}$_${ci}$trmm( 'R', 'L', 'N', 'N', m, l, cone, v( 1_${ik}$, kp ), ldv,work( 1_${ik}$, kp ), &
                        ldwork )
              call stdlib${ii}$_${ci}$gemm( 'N', 'N', m, l, n-l, cone, b( 1_${ik}$, np ), ldb,v( np, kp ), ldv, &
                        cone, work( 1_${ik}$, kp ), ldwork )
              call stdlib${ii}$_${ci}$gemm( 'N', 'N', m, k-l, n, cone, b, ldb,v, ldv, czero, work, ldwork )
                        
              do j = 1, k
                 do i = 1, m
                    work( i, j ) = work( i, j ) + a( i, j )
                 end do
              end do
              call stdlib${ii}$_${ci}$trmm( 'R', 'L', trans, 'N', m, k, cone, t, ldt,work, ldwork )
              do j = 1, k
                 do i = 1, m
                    a( i, j ) = a( i, j ) - work( i, j )
                 end do
              end do
              call stdlib${ii}$_${ci}$gemm( 'N', 'C', m, n-l, k, -cone, work, ldwork,v( np, 1_${ik}$ ), ldv, cone, &
                        b( 1_${ik}$, np ), ldb )
              call stdlib${ii}$_${ci}$gemm( 'N', 'C', m, l, k-l, -cone, work, ldwork,v, ldv, cone, b, ldb )
                        
              call stdlib${ii}$_${ci}$trmm( 'R', 'L', 'C', 'N', m, l, cone, v( 1_${ik}$, kp ), ldv,work( 1_${ik}$, kp ), &
                        ldwork )
              do j = 1, l
                 do i = 1, m
                    b( i, j ) = b( i, j ) - work( i, k-l+j )
                 end do
              end do
       ! ---------------------------------------------------------------------------
           else if( row .and. forward .and. left ) then
       ! ---------------------------------------------------------------------------
              ! let  w =  [ i v ] ( i is k-by-k, v is k-by-m )
              ! form  h c  or  h**h c  where  c = [ a ]  (k-by-n)
                                                ! [ b ]  (m-by-n)
              ! h = i - w**h t w          or  h**h = i - w**h t**h w
              ! a = a -     t (a + v b)  or  a = a -     t**h (a + v b)
              ! b = b - v**h t (a + v b)  or  b = b - v**h t**h (a + v b)
       ! ---------------------------------------------------------------------------
              mp = min( m-l+1, m )
              kp = min( l+1, k )
              do j = 1, n
                 do i = 1, l
                    work( i, j ) = b( m-l+i, j )
                 end do
              end do
              call stdlib${ii}$_${ci}$trmm( 'L', 'L', 'N', 'N', l, n, cone, v( 1_${ik}$, mp ), ldv,work, ldb )
                        
              call stdlib${ii}$_${ci}$gemm( 'N', 'N', l, n, m-l, cone, v, ldv,b, ldb,cone, work, ldwork )
                        
              call stdlib${ii}$_${ci}$gemm( 'N', 'N', k-l, n, m, cone, v( kp, 1_${ik}$ ), ldv,b, ldb, czero, work( &
                        kp, 1_${ik}$ ), ldwork )
              do j = 1, n
                 do i = 1, k
                    work( i, j ) = work( i, j ) + a( i, j )
                 end do
              end do
              call stdlib${ii}$_${ci}$trmm( 'L', 'U', trans, 'N', k, n, cone, t, ldt,work, ldwork )
              do j = 1, n
                 do i = 1, k
                    a( i, j ) = a( i, j ) - work( i, j )
                 end do
              end do
              call stdlib${ii}$_${ci}$gemm( 'C', 'N', m-l, n, k, -cone, v, ldv, work, ldwork,cone, b, ldb )
                        
              call stdlib${ii}$_${ci}$gemm( 'C', 'N', l, n, k-l, -cone, v( kp, mp ), ldv,work( kp, 1_${ik}$ ), &
                        ldwork, cone, b( mp, 1_${ik}$ ), ldb )
              call stdlib${ii}$_${ci}$trmm( 'L', 'L', 'C', 'N', l, n, cone, v( 1_${ik}$, mp ), ldv,work, ldwork )
                        
              do j = 1, n
                 do i = 1, l
                    b( m-l+i, j ) = b( m-l+i, j ) - work( i, j )
                 end do
              end do
       ! ---------------------------------------------------------------------------
           else if( row .and. forward .and. right ) then
       ! ---------------------------------------------------------------------------
              ! let  w =  [ i v ] ( i is k-by-k, v is k-by-n )
              ! form  c h  or  c h**h  where  c = [ a b ] (a is m-by-k, b is m-by-n)
              ! h = i - w**h t w            or  h**h = i - w**h t**h w
              ! a = a - (a + b v**h) t      or  a = a - (a + b v**h) t**h
              ! b = b - (a + b v**h) t v    or  b = b - (a + b v**h) t**h v
       ! ---------------------------------------------------------------------------
              np = min( n-l+1, n )
              kp = min( l+1, k )
              do j = 1, l
                 do i = 1, m
                    work( i, j ) = b( i, n-l+j )
                 end do
              end do
              call stdlib${ii}$_${ci}$trmm( 'R', 'L', 'C', 'N', m, l, cone, v( 1_${ik}$, np ), ldv,work, ldwork )
                        
              call stdlib${ii}$_${ci}$gemm( 'N', 'C', m, l, n-l, cone, b, ldb, v, ldv,cone, work, ldwork )
                        
              call stdlib${ii}$_${ci}$gemm( 'N', 'C', m, k-l, n, cone, b, ldb,v( kp, 1_${ik}$ ), ldv, czero, work( &
                        1_${ik}$, kp ), ldwork )
              do j = 1, k
                 do i = 1, m
                    work( i, j ) = work( i, j ) + a( i, j )
                 end do
              end do
              call stdlib${ii}$_${ci}$trmm( 'R', 'U', trans, 'N', m, k, cone, t, ldt,work, ldwork )
              do j = 1, k
                 do i = 1, m
                    a( i, j ) = a( i, j ) - work( i, j )
                 end do
              end do
              call stdlib${ii}$_${ci}$gemm( 'N', 'N', m, n-l, k, -cone, work, ldwork,v, ldv, cone, b, ldb )
                        
              call stdlib${ii}$_${ci}$gemm( 'N', 'N', m, l, k-l, -cone, work( 1_${ik}$, kp ), ldwork,v( kp, np ), &
                        ldv, cone, b( 1_${ik}$, np ), ldb )
              call stdlib${ii}$_${ci}$trmm( 'R', 'L', 'N', 'N', m, l, cone, v( 1_${ik}$, np ), ldv,work, ldwork )
                        
              do j = 1, l
                 do i = 1, m
                    b( i, n-l+j ) = b( i, n-l+j ) - work( i, j )
                 end do
              end do
       ! ---------------------------------------------------------------------------
           else if( row .and. backward .and. left ) then
       ! ---------------------------------------------------------------------------
              ! let  w =  [ v i ] ( i is k-by-k, v is k-by-m )
              ! form  h c  or  h**h c  where  c = [ b ]  (m-by-n)
                                                ! [ a ]  (k-by-n)
              ! h = i - w**h t w          or  h**h = i - w**h t**h w
              ! a = a -     t (a + v b)  or  a = a -     t**h (a + v b)
              ! b = b - v**h t (a + v b)  or  b = b - v**h t**h (a + v b)
       ! ---------------------------------------------------------------------------
              mp = min( l+1, m )
              kp = min( k-l+1, k )
              do j = 1, n
                 do i = 1, l
                    work( k-l+i, j ) = b( i, j )
                 end do
              end do
              call stdlib${ii}$_${ci}$trmm( 'L', 'U', 'N', 'N', l, n, cone, v( kp, 1_${ik}$ ), ldv,work( kp, 1_${ik}$ ), &
                        ldwork )
              call stdlib${ii}$_${ci}$gemm( 'N', 'N', l, n, m-l, cone, v( kp, mp ), ldv,b( mp, 1_${ik}$ ), ldb, &
                        cone, work( kp, 1_${ik}$ ), ldwork )
              call stdlib${ii}$_${ci}$gemm( 'N', 'N', k-l, n, m, cone, v, ldv, b, ldb,czero, work, ldwork )
                        
              do j = 1, n
                 do i = 1, k
                    work( i, j ) = work( i, j ) + a( i, j )
                 end do
              end do
              call stdlib${ii}$_${ci}$trmm( 'L', 'L ', trans, 'N', k, n, cone, t, ldt,work, ldwork )
              do j = 1, n
                 do i = 1, k
                    a( i, j ) = a( i, j ) - work( i, j )
                 end do
              end do
              call stdlib${ii}$_${ci}$gemm( 'C', 'N', m-l, n, k, -cone, v( 1_${ik}$, mp ), ldv,work, ldwork, cone, &
                        b( mp, 1_${ik}$ ), ldb )
              call stdlib${ii}$_${ci}$gemm( 'C', 'N', l, n, k-l, -cone, v, ldv,work, ldwork, cone, b, ldb )
                        
              call stdlib${ii}$_${ci}$trmm( 'L', 'U', 'C', 'N', l, n, cone, v( kp, 1_${ik}$ ), ldv,work( kp, 1_${ik}$ ), &
                        ldwork )
              do j = 1, n
                 do i = 1, l
                    b( i, j ) = b( i, j ) - work( k-l+i, j )
                 end do
              end do
       ! ---------------------------------------------------------------------------
           else if( row .and. backward .and. right ) then
       ! ---------------------------------------------------------------------------
              ! let  w =  [ v i ] ( i is k-by-k, v is k-by-n )
              ! form  c h  or  c h**h  where  c = [ b a ] (a is m-by-k, b is m-by-n)
              ! h = i - w**h t w            or  h**h = i - w**h t**h w
              ! a = a - (a + b v**h) t      or  a = a - (a + b v**h) t**h
              ! b = b - (a + b v**h) t v    or  b = b - (a + b v**h) t**h v
       ! ---------------------------------------------------------------------------
              np = min( l+1, n )
              kp = min( k-l+1, k )
              do j = 1, l
                 do i = 1, m
                    work( i, k-l+j ) = b( i, j )
                 end do
              end do
              call stdlib${ii}$_${ci}$trmm( 'R', 'U', 'C', 'N', m, l, cone, v( kp, 1_${ik}$ ), ldv,work( 1_${ik}$, kp ), &
                        ldwork )
              call stdlib${ii}$_${ci}$gemm( 'N', 'C', m, l, n-l, cone, b( 1_${ik}$, np ), ldb,v( kp, np ), ldv, &
                        cone, work( 1_${ik}$, kp ), ldwork )
              call stdlib${ii}$_${ci}$gemm( 'N', 'C', m, k-l, n, cone, b, ldb, v, ldv,czero, work, ldwork )
                        
              do j = 1, k
                 do i = 1, m
                    work( i, j ) = work( i, j ) + a( i, j )
                 end do
              end do
              call stdlib${ii}$_${ci}$trmm( 'R', 'L', trans, 'N', m, k, cone, t, ldt,work, ldwork )
              do j = 1, k
                 do i = 1, m
                    a( i, j ) = a( i, j ) - work( i, j )
                 end do
              end do
              call stdlib${ii}$_${ci}$gemm( 'N', 'N', m, n-l, k, -cone, work, ldwork,v( 1_${ik}$, np ), ldv, cone, &
                        b( 1_${ik}$, np ), ldb )
              call stdlib${ii}$_${ci}$gemm( 'N', 'N', m, l, k-l , -cone, work, ldwork,v, ldv, cone, b, ldb )
                        
              call stdlib${ii}$_${ci}$trmm( 'R', 'U', 'N', 'N', m, l, cone, v( kp, 1_${ik}$ ), ldv,work( 1_${ik}$, kp ), &
                        ldwork )
              do j = 1, l
                 do i = 1, m
                    b( i, j ) = b( i, j ) - work( i, k-l+j )
                 end do
              end do
           end if
           return
     end subroutine stdlib${ii}$_${ci}$tprfb

#:endif
#:endfor



     pure module subroutine stdlib${ii}$_sggqrf( n, m, p, a, lda, taua, b, ldb, taub, work,lwork, info )
     !! SGGQRF computes a generalized QR factorization of an N-by-M matrix A
     !! and an N-by-P matrix B:
     !! A = Q*R,        B = Q*T*Z,
     !! where Q is an N-by-N orthogonal matrix, Z is a P-by-P orthogonal
     !! matrix, and R and T assume one of the forms:
     !! if N >= M,  R = ( R11 ) M  ,   or if N < M,  R = ( R11  R12 ) N,
     !! (  0  ) N-M                         N   M-N
     !! M
     !! where R11 is upper triangular, and
     !! if N <= P,  T = ( 0  T12 ) N,   or if N > P,  T = ( T11 ) N-P,
     !! P-N  N                           ( T21 ) P
     !! P
     !! where T12 or T21 is upper triangular.
     !! In particular, if B is square and nonsingular, the GQR factorization
     !! of A and B implicitly gives the QR factorization of inv(B)*A:
     !! inv(B)*A = Z**T*(inv(T)*R)
     !! where inv(B) denotes the inverse of the matrix B, and Z**T denotes the
     !! transpose of the matrix Z.
               
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, ldb, lwork, m, n, p
           ! Array Arguments 
           real(sp), intent(inout) :: a(lda,*), b(ldb,*)
           real(sp), intent(out) :: taua(*), taub(*), work(*)
        ! =====================================================================
           ! Local Scalars 
           logical(lk) :: lquery
           integer(${ik}$) :: lopt, lwkopt, nb, nb1, nb2, nb3
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input parameters
           info = 0_${ik}$
           nb1 = stdlib${ii}$_ilaenv( 1_${ik}$, 'SGEQRF', ' ', n, m, -1_${ik}$, -1_${ik}$ )
           nb2 = stdlib${ii}$_ilaenv( 1_${ik}$, 'SGERQF', ' ', n, p, -1_${ik}$, -1_${ik}$ )
           nb3 = stdlib${ii}$_ilaenv( 1_${ik}$, 'SORMQR', ' ', n, m, p, -1_${ik}$ )
           nb = max( nb1, nb2, nb3 )
           lwkopt = max( n, m, p )*nb
           work( 1_${ik}$ ) = lwkopt
           lquery = ( lwork==-1_${ik}$ )
           if( n<0_${ik}$ ) then
              info = -1_${ik}$
           else if( m<0_${ik}$ ) then
              info = -2_${ik}$
           else if( p<0_${ik}$ ) then
              info = -3_${ik}$
           else if( lda<max( 1_${ik}$, n ) ) then
              info = -5_${ik}$
           else if( ldb<max( 1_${ik}$, n ) ) then
              info = -8_${ik}$
           else if( lwork<max( 1_${ik}$, n, m, p ) .and. .not.lquery ) then
              info = -11_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'SGGQRF', -info )
              return
           else if( lquery ) then
              return
           end if
           ! qr factorization of n-by-m matrix a: a = q*r
           call stdlib${ii}$_sgeqrf( n, m, a, lda, taua, work, lwork, info )
           lopt = work( 1_${ik}$ )
           ! update b := q**t*b.
           call stdlib${ii}$_sormqr( 'LEFT', 'TRANSPOSE', n, p, min( n, m ), a, lda, taua,b, ldb, work, &
                     lwork, info )
           lopt = max( lopt, int( work( 1_${ik}$ ),KIND=${ik}$) )
           ! rq factorization of n-by-p matrix b: b = t*z.
           call stdlib${ii}$_sgerqf( n, p, b, ldb, taub, work, lwork, info )
           work( 1_${ik}$ ) = max( lopt, int( work( 1_${ik}$ ),KIND=${ik}$) )
           return
     end subroutine stdlib${ii}$_sggqrf

     pure module subroutine stdlib${ii}$_dggqrf( n, m, p, a, lda, taua, b, ldb, taub, work,lwork, info )
     !! DGGQRF computes a generalized QR factorization of an N-by-M matrix A
     !! and an N-by-P matrix B:
     !! A = Q*R,        B = Q*T*Z,
     !! where Q is an N-by-N orthogonal matrix, Z is a P-by-P orthogonal
     !! matrix, and R and T assume one of the forms:
     !! if N >= M,  R = ( R11 ) M  ,   or if N < M,  R = ( R11  R12 ) N,
     !! (  0  ) N-M                         N   M-N
     !! M
     !! where R11 is upper triangular, and
     !! if N <= P,  T = ( 0  T12 ) N,   or if N > P,  T = ( T11 ) N-P,
     !! P-N  N                           ( T21 ) P
     !! P
     !! where T12 or T21 is upper triangular.
     !! In particular, if B is square and nonsingular, the GQR factorization
     !! of A and B implicitly gives the QR factorization of inv(B)*A:
     !! inv(B)*A = Z**T*(inv(T)*R)
     !! where inv(B) denotes the inverse of the matrix B, and Z**T denotes the
     !! transpose of the matrix Z.
               
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, ldb, lwork, m, n, p
           ! Array Arguments 
           real(dp), intent(inout) :: a(lda,*), b(ldb,*)
           real(dp), intent(out) :: taua(*), taub(*), work(*)
        ! =====================================================================
           ! Local Scalars 
           logical(lk) :: lquery
           integer(${ik}$) :: lopt, lwkopt, nb, nb1, nb2, nb3
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input parameters
           info = 0_${ik}$
           nb1 = stdlib${ii}$_ilaenv( 1_${ik}$, 'DGEQRF', ' ', n, m, -1_${ik}$, -1_${ik}$ )
           nb2 = stdlib${ii}$_ilaenv( 1_${ik}$, 'DGERQF', ' ', n, p, -1_${ik}$, -1_${ik}$ )
           nb3 = stdlib${ii}$_ilaenv( 1_${ik}$, 'DORMQR', ' ', n, m, p, -1_${ik}$ )
           nb = max( nb1, nb2, nb3 )
           lwkopt = max( n, m, p )*nb
           work( 1_${ik}$ ) = lwkopt
           lquery = ( lwork==-1_${ik}$ )
           if( n<0_${ik}$ ) then
              info = -1_${ik}$
           else if( m<0_${ik}$ ) then
              info = -2_${ik}$
           else if( p<0_${ik}$ ) then
              info = -3_${ik}$
           else if( lda<max( 1_${ik}$, n ) ) then
              info = -5_${ik}$
           else if( ldb<max( 1_${ik}$, n ) ) then
              info = -8_${ik}$
           else if( lwork<max( 1_${ik}$, n, m, p ) .and. .not.lquery ) then
              info = -11_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DGGQRF', -info )
              return
           else if( lquery ) then
              return
           end if
           ! qr factorization of n-by-m matrix a: a = q*r
           call stdlib${ii}$_dgeqrf( n, m, a, lda, taua, work, lwork, info )
           lopt = work( 1_${ik}$ )
           ! update b := q**t*b.
           call stdlib${ii}$_dormqr( 'LEFT', 'TRANSPOSE', n, p, min( n, m ), a, lda, taua,b, ldb, work, &
                     lwork, info )
           lopt = max( lopt, int( work( 1_${ik}$ ),KIND=${ik}$) )
           ! rq factorization of n-by-p matrix b: b = t*z.
           call stdlib${ii}$_dgerqf( n, p, b, ldb, taub, work, lwork, info )
           work( 1_${ik}$ ) = max( lopt, int( work( 1_${ik}$ ),KIND=${ik}$) )
           return
     end subroutine stdlib${ii}$_dggqrf

#:for rk,rt,ri in REAL_KINDS_TYPES
#:if not rk in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ri}$ggqrf( n, m, p, a, lda, taua, b, ldb, taub, work,lwork, info )
     !! DGGQRF: computes a generalized QR factorization of an N-by-M matrix A
     !! and an N-by-P matrix B:
     !! A = Q*R,        B = Q*T*Z,
     !! where Q is an N-by-N orthogonal matrix, Z is a P-by-P orthogonal
     !! matrix, and R and T assume one of the forms:
     !! if N >= M,  R = ( R11 ) M  ,   or if N < M,  R = ( R11  R12 ) N,
     !! (  0  ) N-M                         N   M-N
     !! M
     !! where R11 is upper triangular, and
     !! if N <= P,  T = ( 0  T12 ) N,   or if N > P,  T = ( T11 ) N-P,
     !! P-N  N                           ( T21 ) P
     !! P
     !! where T12 or T21 is upper triangular.
     !! In particular, if B is square and nonsingular, the GQR factorization
     !! of A and B implicitly gives the QR factorization of inv(B)*A:
     !! inv(B)*A = Z**T*(inv(T)*R)
     !! where inv(B) denotes the inverse of the matrix B, and Z**T denotes the
     !! transpose of the matrix Z.
               
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, ldb, lwork, m, n, p
           ! Array Arguments 
           real(${rk}$), intent(inout) :: a(lda,*), b(ldb,*)
           real(${rk}$), intent(out) :: taua(*), taub(*), work(*)
        ! =====================================================================
           ! Local Scalars 
           logical(lk) :: lquery
           integer(${ik}$) :: lopt, lwkopt, nb, nb1, nb2, nb3
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input parameters
           info = 0_${ik}$
           nb1 = stdlib${ii}$_ilaenv( 1_${ik}$, 'DGEQRF', ' ', n, m, -1_${ik}$, -1_${ik}$ )
           nb2 = stdlib${ii}$_ilaenv( 1_${ik}$, 'DGERQF', ' ', n, p, -1_${ik}$, -1_${ik}$ )
           nb3 = stdlib${ii}$_ilaenv( 1_${ik}$, 'DORMQR', ' ', n, m, p, -1_${ik}$ )
           nb = max( nb1, nb2, nb3 )
           lwkopt = max( n, m, p )*nb
           work( 1_${ik}$ ) = lwkopt
           lquery = ( lwork==-1_${ik}$ )
           if( n<0_${ik}$ ) then
              info = -1_${ik}$
           else if( m<0_${ik}$ ) then
              info = -2_${ik}$
           else if( p<0_${ik}$ ) then
              info = -3_${ik}$
           else if( lda<max( 1_${ik}$, n ) ) then
              info = -5_${ik}$
           else if( ldb<max( 1_${ik}$, n ) ) then
              info = -8_${ik}$
           else if( lwork<max( 1_${ik}$, n, m, p ) .and. .not.lquery ) then
              info = -11_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DGGQRF', -info )
              return
           else if( lquery ) then
              return
           end if
           ! qr factorization of n-by-m matrix a: a = q*r
           call stdlib${ii}$_${ri}$geqrf( n, m, a, lda, taua, work, lwork, info )
           lopt = work( 1_${ik}$ )
           ! update b := q**t*b.
           call stdlib${ii}$_${ri}$ormqr( 'LEFT', 'TRANSPOSE', n, p, min( n, m ), a, lda, taua,b, ldb, work, &
                     lwork, info )
           lopt = max( lopt, int( work( 1_${ik}$ ),KIND=${ik}$) )
           ! rq factorization of n-by-p matrix b: b = t*z.
           call stdlib${ii}$_${ri}$gerqf( n, p, b, ldb, taub, work, lwork, info )
           work( 1_${ik}$ ) = max( lopt, int( work( 1_${ik}$ ),KIND=${ik}$) )
           return
     end subroutine stdlib${ii}$_${ri}$ggqrf

#:endif
#:endfor

     pure module subroutine stdlib${ii}$_cggqrf( n, m, p, a, lda, taua, b, ldb, taub, work,lwork, info )
     !! CGGQRF computes a generalized QR factorization of an N-by-M matrix A
     !! and an N-by-P matrix B:
     !! A = Q*R,        B = Q*T*Z,
     !! where Q is an N-by-N unitary matrix, Z is a P-by-P unitary matrix,
     !! and R and T assume one of the forms:
     !! if N >= M,  R = ( R11 ) M  ,   or if N < M,  R = ( R11  R12 ) N,
     !! (  0  ) N-M                         N   M-N
     !! M
     !! where R11 is upper triangular, and
     !! if N <= P,  T = ( 0  T12 ) N,   or if N > P,  T = ( T11 ) N-P,
     !! P-N  N                           ( T21 ) P
     !! P
     !! where T12 or T21 is upper triangular.
     !! In particular, if B is square and nonsingular, the GQR factorization
     !! of A and B implicitly gives the QR factorization of inv(B)*A:
     !! inv(B)*A = Z**H * (inv(T)*R)
     !! where inv(B) denotes the inverse of the matrix B, and Z' denotes the
     !! conjugate transpose of matrix Z.
               
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, ldb, lwork, m, n, p
           ! Array Arguments 
           complex(sp), intent(inout) :: a(lda,*), b(ldb,*)
           complex(sp), intent(out) :: taua(*), taub(*), work(*)
        ! =====================================================================
           ! Local Scalars 
           logical(lk) :: lquery
           integer(${ik}$) :: lopt, lwkopt, nb, nb1, nb2, nb3
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input parameters
           info = 0_${ik}$
           nb1 = stdlib${ii}$_ilaenv( 1_${ik}$, 'CGEQRF', ' ', n, m, -1_${ik}$, -1_${ik}$ )
           nb2 = stdlib${ii}$_ilaenv( 1_${ik}$, 'CGERQF', ' ', n, p, -1_${ik}$, -1_${ik}$ )
           nb3 = stdlib${ii}$_ilaenv( 1_${ik}$, 'CUNMQR', ' ', n, m, p, -1_${ik}$ )
           nb = max( nb1, nb2, nb3 )
           lwkopt = max( n, m, p)*nb
           work( 1_${ik}$ ) = lwkopt
           lquery = ( lwork==-1_${ik}$ )
           if( n<0_${ik}$ ) then
              info = -1_${ik}$
           else if( m<0_${ik}$ ) then
              info = -2_${ik}$
           else if( p<0_${ik}$ ) then
              info = -3_${ik}$
           else if( lda<max( 1_${ik}$, n ) ) then
              info = -5_${ik}$
           else if( ldb<max( 1_${ik}$, n ) ) then
              info = -8_${ik}$
           else if( lwork<max( 1_${ik}$, n, m, p ) .and. .not.lquery ) then
              info = -11_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'CGGQRF', -info )
              return
           else if( lquery ) then
              return
           end if
           ! qr factorization of n-by-m matrix a: a = q*r
           call stdlib${ii}$_cgeqrf( n, m, a, lda, taua, work, lwork, info )
           lopt = real( work( 1_${ik}$ ),KIND=sp)
           ! update b := q**h*b.
           call stdlib${ii}$_cunmqr( 'LEFT', 'CONJUGATE TRANSPOSE', n, p, min( n, m ), a,lda, taua, b, &
                     ldb, work, lwork, info )
           lopt = max( lopt, int( work( 1_${ik}$ ),KIND=${ik}$) )
           ! rq factorization of n-by-p matrix b: b = t*z.
           call stdlib${ii}$_cgerqf( n, p, b, ldb, taub, work, lwork, info )
           work( 1_${ik}$ ) = max( lopt, int( work( 1_${ik}$ ),KIND=${ik}$) )
           return
     end subroutine stdlib${ii}$_cggqrf

     pure module subroutine stdlib${ii}$_zggqrf( n, m, p, a, lda, taua, b, ldb, taub, work,lwork, info )
     !! ZGGQRF computes a generalized QR factorization of an N-by-M matrix A
     !! and an N-by-P matrix B:
     !! A = Q*R,        B = Q*T*Z,
     !! where Q is an N-by-N unitary matrix, Z is a P-by-P unitary matrix,
     !! and R and T assume one of the forms:
     !! if N >= M,  R = ( R11 ) M  ,   or if N < M,  R = ( R11  R12 ) N,
     !! (  0  ) N-M                         N   M-N
     !! M
     !! where R11 is upper triangular, and
     !! if N <= P,  T = ( 0  T12 ) N,   or if N > P,  T = ( T11 ) N-P,
     !! P-N  N                           ( T21 ) P
     !! P
     !! where T12 or T21 is upper triangular.
     !! In particular, if B is square and nonsingular, the GQR factorization
     !! of A and B implicitly gives the QR factorization of inv(B)*A:
     !! inv(B)*A = Z**H * (inv(T)*R)
     !! where inv(B) denotes the inverse of the matrix B, and Z**H denotes the
     !! conjugate transpose of matrix Z.
               
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, ldb, lwork, m, n, p
           ! Array Arguments 
           complex(dp), intent(inout) :: a(lda,*), b(ldb,*)
           complex(dp), intent(out) :: taua(*), taub(*), work(*)
        ! =====================================================================
           ! Local Scalars 
           logical(lk) :: lquery
           integer(${ik}$) :: lopt, lwkopt, nb, nb1, nb2, nb3
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input parameters
           info = 0_${ik}$
           nb1 = stdlib${ii}$_ilaenv( 1_${ik}$, 'ZGEQRF', ' ', n, m, -1_${ik}$, -1_${ik}$ )
           nb2 = stdlib${ii}$_ilaenv( 1_${ik}$, 'ZGERQF', ' ', n, p, -1_${ik}$, -1_${ik}$ )
           nb3 = stdlib${ii}$_ilaenv( 1_${ik}$, 'ZUNMQR', ' ', n, m, p, -1_${ik}$ )
           nb = max( nb1, nb2, nb3 )
           lwkopt = max( n, m, p )*nb
           work( 1_${ik}$ ) = lwkopt
           lquery = ( lwork==-1_${ik}$ )
           if( n<0_${ik}$ ) then
              info = -1_${ik}$
           else if( m<0_${ik}$ ) then
              info = -2_${ik}$
           else if( p<0_${ik}$ ) then
              info = -3_${ik}$
           else if( lda<max( 1_${ik}$, n ) ) then
              info = -5_${ik}$
           else if( ldb<max( 1_${ik}$, n ) ) then
              info = -8_${ik}$
           else if( lwork<max( 1_${ik}$, n, m, p ) .and. .not.lquery ) then
              info = -11_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'ZGGQRF', -info )
              return
           else if( lquery ) then
              return
           end if
           ! qr factorization of n-by-m matrix a: a = q*r
           call stdlib${ii}$_zgeqrf( n, m, a, lda, taua, work, lwork, info )
           lopt = real( work( 1_${ik}$ ),KIND=dp)
           ! update b := q**h*b.
           call stdlib${ii}$_zunmqr( 'LEFT', 'CONJUGATE TRANSPOSE', n, p, min( n, m ), a,lda, taua, b, &
                     ldb, work, lwork, info )
           lopt = max( lopt, int( work( 1_${ik}$ ),KIND=${ik}$) )
           ! rq factorization of n-by-p matrix b: b = t*z.
           call stdlib${ii}$_zgerqf( n, p, b, ldb, taub, work, lwork, info )
           work( 1_${ik}$ ) = max( lopt, int( work( 1_${ik}$ ),KIND=${ik}$) )
           return
     end subroutine stdlib${ii}$_zggqrf

#:for ck,ct,ci in CMPLX_KINDS_TYPES
#:if not ck in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ci}$ggqrf( n, m, p, a, lda, taua, b, ldb, taub, work,lwork, info )
     !! ZGGQRF: computes a generalized QR factorization of an N-by-M matrix A
     !! and an N-by-P matrix B:
     !! A = Q*R,        B = Q*T*Z,
     !! where Q is an N-by-N unitary matrix, Z is a P-by-P unitary matrix,
     !! and R and T assume one of the forms:
     !! if N >= M,  R = ( R11 ) M  ,   or if N < M,  R = ( R11  R12 ) N,
     !! (  0  ) N-M                         N   M-N
     !! M
     !! where R11 is upper triangular, and
     !! if N <= P,  T = ( 0  T12 ) N,   or if N > P,  T = ( T11 ) N-P,
     !! P-N  N                           ( T21 ) P
     !! P
     !! where T12 or T21 is upper triangular.
     !! In particular, if B is square and nonsingular, the GQR factorization
     !! of A and B implicitly gives the QR factorization of inv(B)*A:
     !! inv(B)*A = Z**H * (inv(T)*R)
     !! where inv(B) denotes the inverse of the matrix B, and Z**H denotes the
     !! conjugate transpose of matrix Z.
               
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${ck}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, ldb, lwork, m, n, p
           ! Array Arguments 
           complex(${ck}$), intent(inout) :: a(lda,*), b(ldb,*)
           complex(${ck}$), intent(out) :: taua(*), taub(*), work(*)
        ! =====================================================================
           ! Local Scalars 
           logical(lk) :: lquery
           integer(${ik}$) :: lopt, lwkopt, nb, nb1, nb2, nb3
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input parameters
           info = 0_${ik}$
           nb1 = stdlib${ii}$_ilaenv( 1_${ik}$, 'ZGEQRF', ' ', n, m, -1_${ik}$, -1_${ik}$ )
           nb2 = stdlib${ii}$_ilaenv( 1_${ik}$, 'ZGERQF', ' ', n, p, -1_${ik}$, -1_${ik}$ )
           nb3 = stdlib${ii}$_ilaenv( 1_${ik}$, 'ZUNMQR', ' ', n, m, p, -1_${ik}$ )
           nb = max( nb1, nb2, nb3 )
           lwkopt = max( n, m, p )*nb
           work( 1_${ik}$ ) = lwkopt
           lquery = ( lwork==-1_${ik}$ )
           if( n<0_${ik}$ ) then
              info = -1_${ik}$
           else if( m<0_${ik}$ ) then
              info = -2_${ik}$
           else if( p<0_${ik}$ ) then
              info = -3_${ik}$
           else if( lda<max( 1_${ik}$, n ) ) then
              info = -5_${ik}$
           else if( ldb<max( 1_${ik}$, n ) ) then
              info = -8_${ik}$
           else if( lwork<max( 1_${ik}$, n, m, p ) .and. .not.lquery ) then
              info = -11_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'ZGGQRF', -info )
              return
           else if( lquery ) then
              return
           end if
           ! qr factorization of n-by-m matrix a: a = q*r
           call stdlib${ii}$_${ci}$geqrf( n, m, a, lda, taua, work, lwork, info )
           lopt = real( work( 1_${ik}$ ),KIND=${ck}$)
           ! update b := q**h*b.
           call stdlib${ii}$_${ci}$unmqr( 'LEFT', 'CONJUGATE TRANSPOSE', n, p, min( n, m ), a,lda, taua, b, &
                     ldb, work, lwork, info )
           lopt = max( lopt, int( work( 1_${ik}$ ),KIND=${ik}$) )
           ! rq factorization of n-by-p matrix b: b = t*z.
           call stdlib${ii}$_${ci}$gerqf( n, p, b, ldb, taub, work, lwork, info )
           work( 1_${ik}$ ) = max( lopt, int( work( 1_${ik}$ ),KIND=${ik}$) )
           return
     end subroutine stdlib${ii}$_${ci}$ggqrf

#:endif
#:endfor



     pure module subroutine stdlib${ii}$_sgerqf( m, n, a, lda, tau, work, lwork, info )
     !! SGERQF computes an RQ factorization of a real M-by-N matrix A:
     !! A = R * Q.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, lwork, m, n
           ! Array Arguments 
           real(sp), intent(inout) :: a(lda,*)
           real(sp), intent(out) :: tau(*), work(*)
        ! =====================================================================
           ! Local Scalars 
           logical(lk) :: lquery
           integer(${ik}$) :: i, ib, iinfo, iws, k, ki, kk, ldwork, lwkopt, mu, nb, nbmin, nu, &
                     nx
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           lquery = ( lwork==-1_${ik}$ )
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -4_${ik}$
           end if
           if( info==0_${ik}$ ) then
              k = min( m, n )
              if( k==0_${ik}$ ) then
                 lwkopt = 1_${ik}$
              else
                 nb = stdlib${ii}$_ilaenv( 1_${ik}$, 'SGERQF', ' ', m, n, -1_${ik}$, -1_${ik}$ )
                 lwkopt = m*nb
              end if
              work( 1_${ik}$ ) = lwkopt
              if ( .not.lquery ) then
                 if( lwork<=0_${ik}$ .or. ( n>0_${ik}$ .and. lwork<max( 1_${ik}$, m ) ) )info = -7_${ik}$
              end if
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'SGERQF', -info )
              return
           else if( lquery ) then
              return
           end if
           ! quick return if possible
           if( k==0_${ik}$ ) then
              return
           end if
           nbmin = 2_${ik}$
           nx = 1_${ik}$
           iws = m
           if( nb>1_${ik}$ .and. nb<k ) then
              ! determine when to cross over from blocked to unblocked code.
              nx = max( 0_${ik}$, stdlib${ii}$_ilaenv( 3_${ik}$, 'SGERQF', ' ', m, n, -1_${ik}$, -1_${ik}$ ) )
              if( nx<k ) then
                 ! determine if workspace is large enough for blocked code.
                 ldwork = m
                 iws = ldwork*nb
                 if( lwork<iws ) then
                    ! not enough workspace to use optimal nb:  reduce nb and
                    ! determine the minimum value of nb.
                    nb = lwork / ldwork
                    nbmin = max( 2_${ik}$, stdlib${ii}$_ilaenv( 2_${ik}$, 'SGERQF', ' ', m, n, -1_${ik}$,-1_${ik}$ ) )
                 end if
              end if
           end if
           if( nb>=nbmin .and. nb<k .and. nx<k ) then
              ! use blocked code initially.
              ! the last kk rows are handled by the block method.
              ki = ( ( k-nx-1 ) / nb )*nb
              kk = min( k, ki+nb )
              do i = k - kk + ki + 1, k - kk + 1, -nb
                 ib = min( k-i+1, nb )
                 ! compute the rq factorization of the current block
                 ! a(m-k+i:m-k+i+ib-1,1:n-k+i+ib-1)
                 call stdlib${ii}$_sgerq2( ib, n-k+i+ib-1, a( m-k+i, 1_${ik}$ ), lda, tau( i ),work, iinfo )
                           
                 if( m-k+i>1_${ik}$ ) then
                    ! form the triangular factor of the block reflector
                    ! h = h(i+ib-1) . . . h(i+1) h(i)
                    call stdlib${ii}$_slarft( 'BACKWARD', 'ROWWISE', n-k+i+ib-1, ib,a( m-k+i, 1_${ik}$ ), lda, &
                              tau( i ), work, ldwork )
                    ! apply h to a(1:m-k+i-1,1:n-k+i+ib-1) from the right
                    call stdlib${ii}$_slarfb( 'RIGHT', 'NO TRANSPOSE', 'BACKWARD','ROWWISE', m-k+i-1, n-&
                    k+i+ib-1, ib,a( m-k+i, 1_${ik}$ ), lda, work, ldwork, a, lda,work( ib+1 ), ldwork )
                              
                 end if
              end do
              mu = m - k + i + nb - 1_${ik}$
              nu = n - k + i + nb - 1_${ik}$
           else
              mu = m
              nu = n
           end if
           ! use unblocked code to factor the last or only block
           if( mu>0_${ik}$ .and. nu>0_${ik}$ )call stdlib${ii}$_sgerq2( mu, nu, a, lda, tau, work, iinfo )
           work( 1_${ik}$ ) = iws
           return
     end subroutine stdlib${ii}$_sgerqf

     pure module subroutine stdlib${ii}$_dgerqf( m, n, a, lda, tau, work, lwork, info )
     !! DGERQF computes an RQ factorization of a real M-by-N matrix A:
     !! A = R * Q.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, lwork, m, n
           ! Array Arguments 
           real(dp), intent(inout) :: a(lda,*)
           real(dp), intent(out) :: tau(*), work(*)
        ! =====================================================================
           ! Local Scalars 
           logical(lk) :: lquery
           integer(${ik}$) :: i, ib, iinfo, iws, k, ki, kk, ldwork, lwkopt, mu, nb, nbmin, nu, &
                     nx
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           lquery = ( lwork==-1_${ik}$ )
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -4_${ik}$
           end if
           if( info==0_${ik}$ ) then
              k = min( m, n )
              if( k==0_${ik}$ ) then
                 lwkopt = 1_${ik}$
              else
                 nb = stdlib${ii}$_ilaenv( 1_${ik}$, 'DGERQF', ' ', m, n, -1_${ik}$, -1_${ik}$ )
                 lwkopt = m*nb
              end if
              work( 1_${ik}$ ) = lwkopt
              if ( .not.lquery ) then
                 if( lwork<=0_${ik}$ .or. ( n>0_${ik}$ .and. lwork<max( 1_${ik}$, m ) ) )info = -7_${ik}$
              end if
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DGERQF', -info )
              return
           else if( lquery ) then
              return
           end if
           ! quick return if possible
           if( k==0_${ik}$ ) then
              return
           end if
           nbmin = 2_${ik}$
           nx = 1_${ik}$
           iws = m
           if( nb>1_${ik}$ .and. nb<k ) then
              ! determine when to cross over from blocked to unblocked code.
              nx = max( 0_${ik}$, stdlib${ii}$_ilaenv( 3_${ik}$, 'DGERQF', ' ', m, n, -1_${ik}$, -1_${ik}$ ) )
              if( nx<k ) then
                 ! determine if workspace is large enough for blocked code.
                 ldwork = m
                 iws = ldwork*nb
                 if( lwork<iws ) then
                    ! not enough workspace to use optimal nb:  reduce nb and
                    ! determine the minimum value of nb.
                    nb = lwork / ldwork
                    nbmin = max( 2_${ik}$, stdlib${ii}$_ilaenv( 2_${ik}$, 'DGERQF', ' ', m, n, -1_${ik}$,-1_${ik}$ ) )
                 end if
              end if
           end if
           if( nb>=nbmin .and. nb<k .and. nx<k ) then
              ! use blocked code initially.
              ! the last kk rows are handled by the block method.
              ki = ( ( k-nx-1 ) / nb )*nb
              kk = min( k, ki+nb )
              do i = k - kk + ki + 1, k - kk + 1, -nb
                 ib = min( k-i+1, nb )
                 ! compute the rq factorization of the current block
                 ! a(m-k+i:m-k+i+ib-1,1:n-k+i+ib-1)
                 call stdlib${ii}$_dgerq2( ib, n-k+i+ib-1, a( m-k+i, 1_${ik}$ ), lda, tau( i ),work, iinfo )
                           
                 if( m-k+i>1_${ik}$ ) then
                    ! form the triangular factor of the block reflector
                    ! h = h(i+ib-1) . . . h(i+1) h(i)
                    call stdlib${ii}$_dlarft( 'BACKWARD', 'ROWWISE', n-k+i+ib-1, ib,a( m-k+i, 1_${ik}$ ), lda, &
                              tau( i ), work, ldwork )
                    ! apply h to a(1:m-k+i-1,1:n-k+i+ib-1) from the right
                    call stdlib${ii}$_dlarfb( 'RIGHT', 'NO TRANSPOSE', 'BACKWARD','ROWWISE', m-k+i-1, n-&
                    k+i+ib-1, ib,a( m-k+i, 1_${ik}$ ), lda, work, ldwork, a, lda,work( ib+1 ), ldwork )
                              
                 end if
              end do
              mu = m - k + i + nb - 1_${ik}$
              nu = n - k + i + nb - 1_${ik}$
           else
              mu = m
              nu = n
           end if
           ! use unblocked code to factor the last or only block
           if( mu>0_${ik}$ .and. nu>0_${ik}$ )call stdlib${ii}$_dgerq2( mu, nu, a, lda, tau, work, iinfo )
           work( 1_${ik}$ ) = iws
           return
     end subroutine stdlib${ii}$_dgerqf

#:for rk,rt,ri in REAL_KINDS_TYPES
#:if not rk in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ri}$gerqf( m, n, a, lda, tau, work, lwork, info )
     !! DGERQF: computes an RQ factorization of a real M-by-N matrix A:
     !! A = R * Q.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, lwork, m, n
           ! Array Arguments 
           real(${rk}$), intent(inout) :: a(lda,*)
           real(${rk}$), intent(out) :: tau(*), work(*)
        ! =====================================================================
           ! Local Scalars 
           logical(lk) :: lquery
           integer(${ik}$) :: i, ib, iinfo, iws, k, ki, kk, ldwork, lwkopt, mu, nb, nbmin, nu, &
                     nx
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           lquery = ( lwork==-1_${ik}$ )
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -4_${ik}$
           end if
           if( info==0_${ik}$ ) then
              k = min( m, n )
              if( k==0_${ik}$ ) then
                 lwkopt = 1_${ik}$
              else
                 nb = stdlib${ii}$_ilaenv( 1_${ik}$, 'DGERQF', ' ', m, n, -1_${ik}$, -1_${ik}$ )
                 lwkopt = m*nb
              end if
              work( 1_${ik}$ ) = lwkopt
              if ( .not.lquery ) then
                 if( lwork<=0_${ik}$ .or. ( n>0_${ik}$ .and. lwork<max( 1_${ik}$, m ) ) )info = -7_${ik}$
              end if
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DGERQF', -info )
              return
           else if( lquery ) then
              return
           end if
           ! quick return if possible
           if( k==0_${ik}$ ) then
              return
           end if
           nbmin = 2_${ik}$
           nx = 1_${ik}$
           iws = m
           if( nb>1_${ik}$ .and. nb<k ) then
              ! determine when to cross over from blocked to unblocked code.
              nx = max( 0_${ik}$, stdlib${ii}$_ilaenv( 3_${ik}$, 'DGERQF', ' ', m, n, -1_${ik}$, -1_${ik}$ ) )
              if( nx<k ) then
                 ! determine if workspace is large enough for blocked code.
                 ldwork = m
                 iws = ldwork*nb
                 if( lwork<iws ) then
                    ! not enough workspace to use optimal nb:  reduce nb and
                    ! determine the minimum value of nb.
                    nb = lwork / ldwork
                    nbmin = max( 2_${ik}$, stdlib${ii}$_ilaenv( 2_${ik}$, 'DGERQF', ' ', m, n, -1_${ik}$,-1_${ik}$ ) )
                 end if
              end if
           end if
           if( nb>=nbmin .and. nb<k .and. nx<k ) then
              ! use blocked code initially.
              ! the last kk rows are handled by the block method.
              ki = ( ( k-nx-1 ) / nb )*nb
              kk = min( k, ki+nb )
              do i = k - kk + ki + 1, k - kk + 1, -nb
                 ib = min( k-i+1, nb )
                 ! compute the rq factorization of the current block
                 ! a(m-k+i:m-k+i+ib-1,1:n-k+i+ib-1)
                 call stdlib${ii}$_${ri}$gerq2( ib, n-k+i+ib-1, a( m-k+i, 1_${ik}$ ), lda, tau( i ),work, iinfo )
                           
                 if( m-k+i>1_${ik}$ ) then
                    ! form the triangular factor of the block reflector
                    ! h = h(i+ib-1) . . . h(i+1) h(i)
                    call stdlib${ii}$_${ri}$larft( 'BACKWARD', 'ROWWISE', n-k+i+ib-1, ib,a( m-k+i, 1_${ik}$ ), lda, &
                              tau( i ), work, ldwork )
                    ! apply h to a(1:m-k+i-1,1:n-k+i+ib-1) from the right
                    call stdlib${ii}$_${ri}$larfb( 'RIGHT', 'NO TRANSPOSE', 'BACKWARD','ROWWISE', m-k+i-1, n-&
                    k+i+ib-1, ib,a( m-k+i, 1_${ik}$ ), lda, work, ldwork, a, lda,work( ib+1 ), ldwork )
                              
                 end if
              end do
              mu = m - k + i + nb - 1_${ik}$
              nu = n - k + i + nb - 1_${ik}$
           else
              mu = m
              nu = n
           end if
           ! use unblocked code to factor the last or only block
           if( mu>0_${ik}$ .and. nu>0_${ik}$ )call stdlib${ii}$_${ri}$gerq2( mu, nu, a, lda, tau, work, iinfo )
           work( 1_${ik}$ ) = iws
           return
     end subroutine stdlib${ii}$_${ri}$gerqf

#:endif
#:endfor

     pure module subroutine stdlib${ii}$_cgerqf( m, n, a, lda, tau, work, lwork, info )
     !! CGERQF computes an RQ factorization of a complex M-by-N matrix A:
     !! A = R * Q.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, lwork, m, n
           ! Array Arguments 
           complex(sp), intent(inout) :: a(lda,*)
           complex(sp), intent(out) :: tau(*), work(*)
        ! =====================================================================
           ! Local Scalars 
           logical(lk) :: lquery
           integer(${ik}$) :: i, ib, iinfo, iws, k, ki, kk, ldwork, lwkopt, mu, nb, nbmin, nu, &
                     nx
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           lquery = ( lwork==-1_${ik}$ )
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -4_${ik}$
           end if
           if( info==0_${ik}$ ) then
              k = min( m, n )
              if( k==0_${ik}$ ) then
                 lwkopt = 1_${ik}$
              else
                 nb = stdlib${ii}$_ilaenv( 1_${ik}$, 'CGERQF', ' ', m, n, -1_${ik}$, -1_${ik}$ )
                 lwkopt = m*nb
              end if
              work( 1_${ik}$ ) = lwkopt
              if ( .not.lquery ) then
                 if( lwork<=0_${ik}$ .or. ( n>0_${ik}$ .and. lwork<max( 1_${ik}$, m ) ) )info = -7_${ik}$
              end if
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'CGERQF', -info )
              return
           else if( lquery ) then
              return
           end if
           ! quick return if possible
           if( k==0_${ik}$ ) then
              return
           end if
           nbmin = 2_${ik}$
           nx = 1_${ik}$
           iws = m
           if( nb>1_${ik}$ .and. nb<k ) then
              ! determine when to cross over from blocked to unblocked code.
              nx = max( 0_${ik}$, stdlib${ii}$_ilaenv( 3_${ik}$, 'CGERQF', ' ', m, n, -1_${ik}$, -1_${ik}$ ) )
              if( nx<k ) then
                 ! determine if workspace is large enough for blocked code.
                 ldwork = m
                 iws = ldwork*nb
                 if( lwork<iws ) then
                    ! not enough workspace to use optimal nb:  reduce nb and
                    ! determine the minimum value of nb.
                    nb = lwork / ldwork
                    nbmin = max( 2_${ik}$, stdlib${ii}$_ilaenv( 2_${ik}$, 'CGERQF', ' ', m, n, -1_${ik}$,-1_${ik}$ ) )
                 end if
              end if
           end if
           if( nb>=nbmin .and. nb<k .and. nx<k ) then
              ! use blocked code initially.
              ! the last kk rows are handled by the block method.
              ki = ( ( k-nx-1 ) / nb )*nb
              kk = min( k, ki+nb )
              do i = k - kk + ki + 1, k - kk + 1, -nb
                 ib = min( k-i+1, nb )
                 ! compute the rq factorization of the current block
                 ! a(m-k+i:m-k+i+ib-1,1:n-k+i+ib-1)
                 call stdlib${ii}$_cgerq2( ib, n-k+i+ib-1, a( m-k+i, 1_${ik}$ ), lda, tau( i ),work, iinfo )
                           
                 if( m-k+i>1_${ik}$ ) then
                    ! form the triangular factor of the block reflector
                    ! h = h(i+ib-1) . . . h(i+1) h(i)
                    call stdlib${ii}$_clarft( 'BACKWARD', 'ROWWISE', n-k+i+ib-1, ib,a( m-k+i, 1_${ik}$ ), lda, &
                              tau( i ), work, ldwork )
                    ! apply h to a(1:m-k+i-1,1:n-k+i+ib-1) from the right
                    call stdlib${ii}$_clarfb( 'RIGHT', 'NO TRANSPOSE', 'BACKWARD','ROWWISE', m-k+i-1, n-&
                    k+i+ib-1, ib,a( m-k+i, 1_${ik}$ ), lda, work, ldwork, a, lda,work( ib+1 ), ldwork )
                              
                 end if
              end do
              mu = m - k + i + nb - 1_${ik}$
              nu = n - k + i + nb - 1_${ik}$
           else
              mu = m
              nu = n
           end if
           ! use unblocked code to factor the last or only block
           if( mu>0_${ik}$ .and. nu>0_${ik}$ )call stdlib${ii}$_cgerq2( mu, nu, a, lda, tau, work, iinfo )
           work( 1_${ik}$ ) = iws
           return
     end subroutine stdlib${ii}$_cgerqf

     pure module subroutine stdlib${ii}$_zgerqf( m, n, a, lda, tau, work, lwork, info )
     !! ZGERQF computes an RQ factorization of a complex M-by-N matrix A:
     !! A = R * Q.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, lwork, m, n
           ! Array Arguments 
           complex(dp), intent(inout) :: a(lda,*)
           complex(dp), intent(out) :: tau(*), work(*)
        ! =====================================================================
           ! Local Scalars 
           logical(lk) :: lquery
           integer(${ik}$) :: i, ib, iinfo, iws, k, ki, kk, ldwork, lwkopt, mu, nb, nbmin, nu, &
                     nx
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           lquery = ( lwork==-1_${ik}$ )
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -4_${ik}$
           end if
           if( info==0_${ik}$ ) then
              k = min( m, n )
              if( k==0_${ik}$ ) then
                 lwkopt = 1_${ik}$
              else
                 nb = stdlib${ii}$_ilaenv( 1_${ik}$, 'ZGERQF', ' ', m, n, -1_${ik}$, -1_${ik}$ )
                 lwkopt = m*nb
              end if
              work( 1_${ik}$ ) = lwkopt
              if ( .not.lquery ) then
                 if( lwork<=0_${ik}$ .or. ( n>0_${ik}$ .and. lwork<max( 1_${ik}$, m ) ) )info = -7_${ik}$
              end if
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'ZGERQF', -info )
              return
           else if( lquery ) then
              return
           end if
           ! quick return if possible
           if( k==0_${ik}$ ) then
              return
           end if
           nbmin = 2_${ik}$
           nx = 1_${ik}$
           iws = m
           if( nb>1_${ik}$ .and. nb<k ) then
              ! determine when to cross over from blocked to unblocked code.
              nx = max( 0_${ik}$, stdlib${ii}$_ilaenv( 3_${ik}$, 'ZGERQF', ' ', m, n, -1_${ik}$, -1_${ik}$ ) )
              if( nx<k ) then
                 ! determine if workspace is large enough for blocked code.
                 ldwork = m
                 iws = ldwork*nb
                 if( lwork<iws ) then
                    ! not enough workspace to use optimal nb:  reduce nb and
                    ! determine the minimum value of nb.
                    nb = lwork / ldwork
                    nbmin = max( 2_${ik}$, stdlib${ii}$_ilaenv( 2_${ik}$, 'ZGERQF', ' ', m, n, -1_${ik}$,-1_${ik}$ ) )
                 end if
              end if
           end if
           if( nb>=nbmin .and. nb<k .and. nx<k ) then
              ! use blocked code initially.
              ! the last kk rows are handled by the block method.
              ki = ( ( k-nx-1 ) / nb )*nb
              kk = min( k, ki+nb )
              do i = k - kk + ki + 1, k - kk + 1, -nb
                 ib = min( k-i+1, nb )
                 ! compute the rq factorization of the current block
                 ! a(m-k+i:m-k+i+ib-1,1:n-k+i+ib-1)
                 call stdlib${ii}$_zgerq2( ib, n-k+i+ib-1, a( m-k+i, 1_${ik}$ ), lda, tau( i ),work, iinfo )
                           
                 if( m-k+i>1_${ik}$ ) then
                    ! form the triangular factor of the block reflector
                    ! h = h(i+ib-1) . . . h(i+1) h(i)
                    call stdlib${ii}$_zlarft( 'BACKWARD', 'ROWWISE', n-k+i+ib-1, ib,a( m-k+i, 1_${ik}$ ), lda, &
                              tau( i ), work, ldwork )
                    ! apply h to a(1:m-k+i-1,1:n-k+i+ib-1) from the right
                    call stdlib${ii}$_zlarfb( 'RIGHT', 'NO TRANSPOSE', 'BACKWARD','ROWWISE', m-k+i-1, n-&
                    k+i+ib-1, ib,a( m-k+i, 1_${ik}$ ), lda, work, ldwork, a, lda,work( ib+1 ), ldwork )
                              
                 end if
              end do
              mu = m - k + i + nb - 1_${ik}$
              nu = n - k + i + nb - 1_${ik}$
           else
              mu = m
              nu = n
           end if
           ! use unblocked code to factor the last or only block
           if( mu>0_${ik}$ .and. nu>0_${ik}$ )call stdlib${ii}$_zgerq2( mu, nu, a, lda, tau, work, iinfo )
           work( 1_${ik}$ ) = iws
           return
     end subroutine stdlib${ii}$_zgerqf

#:for ck,ct,ci in CMPLX_KINDS_TYPES
#:if not ck in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ci}$gerqf( m, n, a, lda, tau, work, lwork, info )
     !! ZGERQF: computes an RQ factorization of a complex M-by-N matrix A:
     !! A = R * Q.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${ck}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, lwork, m, n
           ! Array Arguments 
           complex(${ck}$), intent(inout) :: a(lda,*)
           complex(${ck}$), intent(out) :: tau(*), work(*)
        ! =====================================================================
           ! Local Scalars 
           logical(lk) :: lquery
           integer(${ik}$) :: i, ib, iinfo, iws, k, ki, kk, ldwork, lwkopt, mu, nb, nbmin, nu, &
                     nx
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           lquery = ( lwork==-1_${ik}$ )
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -4_${ik}$
           end if
           if( info==0_${ik}$ ) then
              k = min( m, n )
              if( k==0_${ik}$ ) then
                 lwkopt = 1_${ik}$
              else
                 nb = stdlib${ii}$_ilaenv( 1_${ik}$, 'ZGERQF', ' ', m, n, -1_${ik}$, -1_${ik}$ )
                 lwkopt = m*nb
              end if
              work( 1_${ik}$ ) = lwkopt
              if ( .not.lquery ) then
                 if( lwork<=0_${ik}$ .or. ( n>0_${ik}$ .and. lwork<max( 1_${ik}$, m ) ) )info = -7_${ik}$
              end if
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'ZGERQF', -info )
              return
           else if( lquery ) then
              return
           end if
           ! quick return if possible
           if( k==0_${ik}$ ) then
              return
           end if
           nbmin = 2_${ik}$
           nx = 1_${ik}$
           iws = m
           if( nb>1_${ik}$ .and. nb<k ) then
              ! determine when to cross over from blocked to unblocked code.
              nx = max( 0_${ik}$, stdlib${ii}$_ilaenv( 3_${ik}$, 'ZGERQF', ' ', m, n, -1_${ik}$, -1_${ik}$ ) )
              if( nx<k ) then
                 ! determine if workspace is large enough for blocked code.
                 ldwork = m
                 iws = ldwork*nb
                 if( lwork<iws ) then
                    ! not enough workspace to use optimal nb:  reduce nb and
                    ! determine the minimum value of nb.
                    nb = lwork / ldwork
                    nbmin = max( 2_${ik}$, stdlib${ii}$_ilaenv( 2_${ik}$, 'ZGERQF', ' ', m, n, -1_${ik}$,-1_${ik}$ ) )
                 end if
              end if
           end if
           if( nb>=nbmin .and. nb<k .and. nx<k ) then
              ! use blocked code initially.
              ! the last kk rows are handled by the block method.
              ki = ( ( k-nx-1 ) / nb )*nb
              kk = min( k, ki+nb )
              do i = k - kk + ki + 1, k - kk + 1, -nb
                 ib = min( k-i+1, nb )
                 ! compute the rq factorization of the current block
                 ! a(m-k+i:m-k+i+ib-1,1:n-k+i+ib-1)
                 call stdlib${ii}$_${ci}$gerq2( ib, n-k+i+ib-1, a( m-k+i, 1_${ik}$ ), lda, tau( i ),work, iinfo )
                           
                 if( m-k+i>1_${ik}$ ) then
                    ! form the triangular factor of the block reflector
                    ! h = h(i+ib-1) . . . h(i+1) h(i)
                    call stdlib${ii}$_${ci}$larft( 'BACKWARD', 'ROWWISE', n-k+i+ib-1, ib,a( m-k+i, 1_${ik}$ ), lda, &
                              tau( i ), work, ldwork )
                    ! apply h to a(1:m-k+i-1,1:n-k+i+ib-1) from the right
                    call stdlib${ii}$_${ci}$larfb( 'RIGHT', 'NO TRANSPOSE', 'BACKWARD','ROWWISE', m-k+i-1, n-&
                    k+i+ib-1, ib,a( m-k+i, 1_${ik}$ ), lda, work, ldwork, a, lda,work( ib+1 ), ldwork )
                              
                 end if
              end do
              mu = m - k + i + nb - 1_${ik}$
              nu = n - k + i + nb - 1_${ik}$
           else
              mu = m
              nu = n
           end if
           ! use unblocked code to factor the last or only block
           if( mu>0_${ik}$ .and. nu>0_${ik}$ )call stdlib${ii}$_${ci}$gerq2( mu, nu, a, lda, tau, work, iinfo )
           work( 1_${ik}$ ) = iws
           return
     end subroutine stdlib${ii}$_${ci}$gerqf

#:endif
#:endfor



     pure module subroutine stdlib${ii}$_sgerq2( m, n, a, lda, tau, work, info )
     !! SGERQ2 computes an RQ factorization of a real m by n matrix A:
     !! A = R * Q.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, m, n
           ! Array Arguments 
           real(sp), intent(inout) :: a(lda,*)
           real(sp), intent(out) :: tau(*), work(*)
        ! =====================================================================
           
           ! Local Scalars 
           integer(${ik}$) :: i, k
           real(sp) :: aii
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -4_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'SGERQ2', -info )
              return
           end if
           k = min( m, n )
           do i = k, 1, -1
              ! generate elementary reflector h(i) to annihilate
              ! a(m-k+i,1:n-k+i-1)
              call stdlib${ii}$_slarfg( n-k+i, a( m-k+i, n-k+i ), a( m-k+i, 1_${ik}$ ), lda,tau( i ) )
              ! apply h(i) to a(1:m-k+i-1,1:n-k+i) from the right
              aii = a( m-k+i, n-k+i )
              a( m-k+i, n-k+i ) = one
              call stdlib${ii}$_slarf( 'RIGHT', m-k+i-1, n-k+i, a( m-k+i, 1_${ik}$ ), lda,tau( i ), a, lda, &
                        work )
              a( m-k+i, n-k+i ) = aii
           end do
           return
     end subroutine stdlib${ii}$_sgerq2

     pure module subroutine stdlib${ii}$_dgerq2( m, n, a, lda, tau, work, info )
     !! DGERQ2 computes an RQ factorization of a real m by n matrix A:
     !! A = R * Q.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, m, n
           ! Array Arguments 
           real(dp), intent(inout) :: a(lda,*)
           real(dp), intent(out) :: tau(*), work(*)
        ! =====================================================================
           
           ! Local Scalars 
           integer(${ik}$) :: i, k
           real(dp) :: aii
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -4_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DGERQ2', -info )
              return
           end if
           k = min( m, n )
           do i = k, 1, -1
              ! generate elementary reflector h(i) to annihilate
              ! a(m-k+i,1:n-k+i-1)
              call stdlib${ii}$_dlarfg( n-k+i, a( m-k+i, n-k+i ), a( m-k+i, 1_${ik}$ ), lda,tau( i ) )
              ! apply h(i) to a(1:m-k+i-1,1:n-k+i) from the right
              aii = a( m-k+i, n-k+i )
              a( m-k+i, n-k+i ) = one
              call stdlib${ii}$_dlarf( 'RIGHT', m-k+i-1, n-k+i, a( m-k+i, 1_${ik}$ ), lda,tau( i ), a, lda, &
                        work )
              a( m-k+i, n-k+i ) = aii
           end do
           return
     end subroutine stdlib${ii}$_dgerq2

#:for rk,rt,ri in REAL_KINDS_TYPES
#:if not rk in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ri}$gerq2( m, n, a, lda, tau, work, info )
     !! DGERQ2: computes an RQ factorization of a real m by n matrix A:
     !! A = R * Q.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, m, n
           ! Array Arguments 
           real(${rk}$), intent(inout) :: a(lda,*)
           real(${rk}$), intent(out) :: tau(*), work(*)
        ! =====================================================================
           
           ! Local Scalars 
           integer(${ik}$) :: i, k
           real(${rk}$) :: aii
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -4_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DGERQ2', -info )
              return
           end if
           k = min( m, n )
           do i = k, 1, -1
              ! generate elementary reflector h(i) to annihilate
              ! a(m-k+i,1:n-k+i-1)
              call stdlib${ii}$_${ri}$larfg( n-k+i, a( m-k+i, n-k+i ), a( m-k+i, 1_${ik}$ ), lda,tau( i ) )
              ! apply h(i) to a(1:m-k+i-1,1:n-k+i) from the right
              aii = a( m-k+i, n-k+i )
              a( m-k+i, n-k+i ) = one
              call stdlib${ii}$_${ri}$larf( 'RIGHT', m-k+i-1, n-k+i, a( m-k+i, 1_${ik}$ ), lda,tau( i ), a, lda, &
                        work )
              a( m-k+i, n-k+i ) = aii
           end do
           return
     end subroutine stdlib${ii}$_${ri}$gerq2

#:endif
#:endfor

     pure module subroutine stdlib${ii}$_cgerq2( m, n, a, lda, tau, work, info )
     !! CGERQ2 computes an RQ factorization of a complex m by n matrix A:
     !! A = R * Q.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, m, n
           ! Array Arguments 
           complex(sp), intent(inout) :: a(lda,*)
           complex(sp), intent(out) :: tau(*), work(*)
        ! =====================================================================
           
           ! Local Scalars 
           integer(${ik}$) :: i, k
           complex(sp) :: alpha
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -4_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'CGERQ2', -info )
              return
           end if
           k = min( m, n )
           do i = k, 1, -1
              ! generate elementary reflector h(i) to annihilate
              ! a(m-k+i,1:n-k+i-1)
              call stdlib${ii}$_clacgv( n-k+i, a( m-k+i, 1_${ik}$ ), lda )
              alpha = a( m-k+i, n-k+i )
              call stdlib${ii}$_clarfg( n-k+i, alpha, a( m-k+i, 1_${ik}$ ), lda,tau( i ) )
              ! apply h(i) to a(1:m-k+i-1,1:n-k+i) from the right
              a( m-k+i, n-k+i ) = cone
              call stdlib${ii}$_clarf( 'RIGHT', m-k+i-1, n-k+i, a( m-k+i, 1_${ik}$ ), lda,tau( i ), a, lda, &
                        work )
              a( m-k+i, n-k+i ) = alpha
              call stdlib${ii}$_clacgv( n-k+i-1, a( m-k+i, 1_${ik}$ ), lda )
           end do
           return
     end subroutine stdlib${ii}$_cgerq2

     pure module subroutine stdlib${ii}$_zgerq2( m, n, a, lda, tau, work, info )
     !! ZGERQ2 computes an RQ factorization of a complex m by n matrix A:
     !! A = R * Q.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, m, n
           ! Array Arguments 
           complex(dp), intent(inout) :: a(lda,*)
           complex(dp), intent(out) :: tau(*), work(*)
        ! =====================================================================
           
           ! Local Scalars 
           integer(${ik}$) :: i, k
           complex(dp) :: alpha
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -4_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'ZGERQ2', -info )
              return
           end if
           k = min( m, n )
           do i = k, 1, -1
              ! generate elementary reflector h(i) to annihilate
              ! a(m-k+i,1:n-k+i-1)
              call stdlib${ii}$_zlacgv( n-k+i, a( m-k+i, 1_${ik}$ ), lda )
              alpha = a( m-k+i, n-k+i )
              call stdlib${ii}$_zlarfg( n-k+i, alpha, a( m-k+i, 1_${ik}$ ), lda, tau( i ) )
              ! apply h(i) to a(1:m-k+i-1,1:n-k+i) from the right
              a( m-k+i, n-k+i ) = cone
              call stdlib${ii}$_zlarf( 'RIGHT', m-k+i-1, n-k+i, a( m-k+i, 1_${ik}$ ), lda,tau( i ), a, lda, &
                        work )
              a( m-k+i, n-k+i ) = alpha
              call stdlib${ii}$_zlacgv( n-k+i-1, a( m-k+i, 1_${ik}$ ), lda )
           end do
           return
     end subroutine stdlib${ii}$_zgerq2

#:for ck,ct,ci in CMPLX_KINDS_TYPES
#:if not ck in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ci}$gerq2( m, n, a, lda, tau, work, info )
     !! ZGERQ2: computes an RQ factorization of a complex m by n matrix A:
     !! A = R * Q.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${ck}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, m, n
           ! Array Arguments 
           complex(${ck}$), intent(inout) :: a(lda,*)
           complex(${ck}$), intent(out) :: tau(*), work(*)
        ! =====================================================================
           
           ! Local Scalars 
           integer(${ik}$) :: i, k
           complex(${ck}$) :: alpha
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -4_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'ZGERQ2', -info )
              return
           end if
           k = min( m, n )
           do i = k, 1, -1
              ! generate elementary reflector h(i) to annihilate
              ! a(m-k+i,1:n-k+i-1)
              call stdlib${ii}$_${ci}$lacgv( n-k+i, a( m-k+i, 1_${ik}$ ), lda )
              alpha = a( m-k+i, n-k+i )
              call stdlib${ii}$_${ci}$larfg( n-k+i, alpha, a( m-k+i, 1_${ik}$ ), lda, tau( i ) )
              ! apply h(i) to a(1:m-k+i-1,1:n-k+i) from the right
              a( m-k+i, n-k+i ) = cone
              call stdlib${ii}$_${ci}$larf( 'RIGHT', m-k+i-1, n-k+i, a( m-k+i, 1_${ik}$ ), lda,tau( i ), a, lda, &
                        work )
              a( m-k+i, n-k+i ) = alpha
              call stdlib${ii}$_${ci}$lacgv( n-k+i-1, a( m-k+i, 1_${ik}$ ), lda )
           end do
           return
     end subroutine stdlib${ii}$_${ci}$gerq2

#:endif
#:endfor



     pure module subroutine stdlib${ii}$_cungrq( m, n, k, a, lda, tau, work, lwork, info )
     !! CUNGRQ generates an M-by-N complex matrix Q with orthonormal rows,
     !! which is defined as the last M rows of a product of K elementary
     !! reflectors of order N
     !! Q  =  H(1)**H H(2)**H . . . H(k)**H
     !! as returned by CGERQF.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: k, lda, lwork, m, n
           ! Array Arguments 
           complex(sp), intent(inout) :: a(lda,*)
           complex(sp), intent(in) :: tau(*)
           complex(sp), intent(out) :: work(*)
        ! =====================================================================
           
           ! Local Scalars 
           logical(lk) :: lquery
           integer(${ik}$) :: i, ib, ii, iinfo, iws, j, kk, l, ldwork, lwkopt, nb, nbmin, nx
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           lquery = ( lwork==-1_${ik}$ )
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<m ) then
              info = -2_${ik}$
           else if( k<0_${ik}$ .or. k>m ) then
              info = -3_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -5_${ik}$
           end if
           if( info==0_${ik}$ ) then
              if( m<=0_${ik}$ ) then
                 lwkopt = 1_${ik}$
              else
                 nb = stdlib${ii}$_ilaenv( 1_${ik}$, 'CUNGRQ', ' ', m, n, k, -1_${ik}$ )
                 lwkopt = m*nb
              end if
              work( 1_${ik}$ ) = lwkopt
              if( lwork<max( 1_${ik}$, m ) .and. .not.lquery ) then
                 info = -8_${ik}$
              end if
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'CUNGRQ', -info )
              return
           else if( lquery ) then
              return
           end if
           ! quick return if possible
           if( m<=0_${ik}$ ) then
              return
           end if
           nbmin = 2_${ik}$
           nx = 0_${ik}$
           iws = m
           if( nb>1_${ik}$ .and. nb<k ) then
              ! determine when to cross over from blocked to unblocked code.
              nx = max( 0_${ik}$, stdlib${ii}$_ilaenv( 3_${ik}$, 'CUNGRQ', ' ', m, n, k, -1_${ik}$ ) )
              if( nx<k ) then
                 ! determine if workspace is large enough for blocked code.
                 ldwork = m
                 iws = ldwork*nb
                 if( lwork<iws ) then
                    ! not enough workspace to use optimal nb:  reduce nb and
                    ! determine the minimum value of nb.
                    nb = lwork / ldwork
                    nbmin = max( 2_${ik}$, stdlib${ii}$_ilaenv( 2_${ik}$, 'CUNGRQ', ' ', m, n, k, -1_${ik}$ ) )
                 end if
              end if
           end if
           if( nb>=nbmin .and. nb<k .and. nx<k ) then
              ! use blocked code after the first block.
              ! the last kk rows are handled by the block method.
              kk = min( k, ( ( k-nx+nb-1 ) / nb )*nb )
              ! set a(1:m-kk,n-kk+1:n) to czero.
              do j = n - kk + 1, n
                 do i = 1, m - kk
                    a( i, j ) = czero
                 end do
              end do
           else
              kk = 0_${ik}$
           end if
           ! use unblocked code for the first or only block.
           call stdlib${ii}$_cungr2( m-kk, n-kk, k-kk, a, lda, tau, work, iinfo )
           if( kk>0_${ik}$ ) then
              ! use blocked code
              do i = k - kk + 1, k, nb
                 ib = min( nb, k-i+1 )
                 ii = m - k + i
                 if( ii>1_${ik}$ ) then
                    ! form the triangular factor of the block reflector
                    ! h = h(i+ib-1) . . . h(i+1) h(i)
                    call stdlib${ii}$_clarft( 'BACKWARD', 'ROWWISE', n-k+i+ib-1, ib,a( ii, 1_${ik}$ ), lda, &
                              tau( i ), work, ldwork )
                    ! apply h**h to a(1:m-k+i-1,1:n-k+i+ib-1) from the right
                    call stdlib${ii}$_clarfb( 'RIGHT', 'CONJUGATE TRANSPOSE', 'BACKWARD','ROWWISE', ii-&
                    1_${ik}$, n-k+i+ib-1, ib, a( ii, 1_${ik}$ ),lda, work, ldwork, a, lda, work( ib+1 ),ldwork )
                              
                 end if
                 ! apply h**h to columns 1:n-k+i+ib-1 of current block
                 call stdlib${ii}$_cungr2( ib, n-k+i+ib-1, ib, a( ii, 1_${ik}$ ), lda, tau( i ),work, iinfo )
                           
                 ! set columns n-k+i+ib:n of current block to czero
                 do l = n - k + i + ib, n
                    do j = ii, ii + ib - 1
                       a( j, l ) = czero
                    end do
                 end do
              end do
           end if
           work( 1_${ik}$ ) = iws
           return
     end subroutine stdlib${ii}$_cungrq

     pure module subroutine stdlib${ii}$_zungrq( m, n, k, a, lda, tau, work, lwork, info )
     !! ZUNGRQ generates an M-by-N complex matrix Q with orthonormal rows,
     !! which is defined as the last M rows of a product of K elementary
     !! reflectors of order N
     !! Q  =  H(1)**H H(2)**H . . . H(k)**H
     !! as returned by ZGERQF.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: k, lda, lwork, m, n
           ! Array Arguments 
           complex(dp), intent(inout) :: a(lda,*)
           complex(dp), intent(in) :: tau(*)
           complex(dp), intent(out) :: work(*)
        ! =====================================================================
           
           ! Local Scalars 
           logical(lk) :: lquery
           integer(${ik}$) :: i, ib, ii, iinfo, iws, j, kk, l, ldwork, lwkopt, nb, nbmin, nx
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           lquery = ( lwork==-1_${ik}$ )
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<m ) then
              info = -2_${ik}$
           else if( k<0_${ik}$ .or. k>m ) then
              info = -3_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -5_${ik}$
           end if
           if( info==0_${ik}$ ) then
              if( m<=0_${ik}$ ) then
                 lwkopt = 1_${ik}$
              else
                 nb = stdlib${ii}$_ilaenv( 1_${ik}$, 'ZUNGRQ', ' ', m, n, k, -1_${ik}$ )
                 lwkopt = m*nb
              end if
              work( 1_${ik}$ ) = lwkopt
              if( lwork<max( 1_${ik}$, m ) .and. .not.lquery ) then
                 info = -8_${ik}$
              end if
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'ZUNGRQ', -info )
              return
           else if( lquery ) then
              return
           end if
           ! quick return if possible
           if( m<=0_${ik}$ ) then
              return
           end if
           nbmin = 2_${ik}$
           nx = 0_${ik}$
           iws = m
           if( nb>1_${ik}$ .and. nb<k ) then
              ! determine when to cross over from blocked to unblocked code.
              nx = max( 0_${ik}$, stdlib${ii}$_ilaenv( 3_${ik}$, 'ZUNGRQ', ' ', m, n, k, -1_${ik}$ ) )
              if( nx<k ) then
                 ! determine if workspace is large enough for blocked code.
                 ldwork = m
                 iws = ldwork*nb
                 if( lwork<iws ) then
                    ! not enough workspace to use optimal nb:  reduce nb and
                    ! determine the minimum value of nb.
                    nb = lwork / ldwork
                    nbmin = max( 2_${ik}$, stdlib${ii}$_ilaenv( 2_${ik}$, 'ZUNGRQ', ' ', m, n, k, -1_${ik}$ ) )
                 end if
              end if
           end if
           if( nb>=nbmin .and. nb<k .and. nx<k ) then
              ! use blocked code after the first block.
              ! the last kk rows are handled by the block method.
              kk = min( k, ( ( k-nx+nb-1 ) / nb )*nb )
              ! set a(1:m-kk,n-kk+1:n) to czero.
              do j = n - kk + 1, n
                 do i = 1, m - kk
                    a( i, j ) = czero
                 end do
              end do
           else
              kk = 0_${ik}$
           end if
           ! use unblocked code for the first or only block.
           call stdlib${ii}$_zungr2( m-kk, n-kk, k-kk, a, lda, tau, work, iinfo )
           if( kk>0_${ik}$ ) then
              ! use blocked code
              do i = k - kk + 1, k, nb
                 ib = min( nb, k-i+1 )
                 ii = m - k + i
                 if( ii>1_${ik}$ ) then
                    ! form the triangular factor of the block reflector
                    ! h = h(i+ib-1) . . . h(i+1) h(i)
                    call stdlib${ii}$_zlarft( 'BACKWARD', 'ROWWISE', n-k+i+ib-1, ib,a( ii, 1_${ik}$ ), lda, &
                              tau( i ), work, ldwork )
                    ! apply h**h to a(1:m-k+i-1,1:n-k+i+ib-1) from the right
                    call stdlib${ii}$_zlarfb( 'RIGHT', 'CONJUGATE TRANSPOSE', 'BACKWARD','ROWWISE', ii-&
                    1_${ik}$, n-k+i+ib-1, ib, a( ii, 1_${ik}$ ),lda, work, ldwork, a, lda, work( ib+1 ),ldwork )
                              
                 end if
                 ! apply h**h to columns 1:n-k+i+ib-1 of current block
                 call stdlib${ii}$_zungr2( ib, n-k+i+ib-1, ib, a( ii, 1_${ik}$ ), lda, tau( i ),work, iinfo )
                           
                 ! set columns n-k+i+ib:n of current block to czero
                 do l = n - k + i + ib, n
                    do j = ii, ii + ib - 1
                       a( j, l ) = czero
                    end do
                 end do
              end do
           end if
           work( 1_${ik}$ ) = iws
           return
     end subroutine stdlib${ii}$_zungrq

#:for ck,ct,ci in CMPLX_KINDS_TYPES
#:if not ck in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ci}$ungrq( m, n, k, a, lda, tau, work, lwork, info )
     !! ZUNGRQ: generates an M-by-N complex matrix Q with orthonormal rows,
     !! which is defined as the last M rows of a product of K elementary
     !! reflectors of order N
     !! Q  =  H(1)**H H(2)**H . . . H(k)**H
     !! as returned by ZGERQF.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${ck}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: k, lda, lwork, m, n
           ! Array Arguments 
           complex(${ck}$), intent(inout) :: a(lda,*)
           complex(${ck}$), intent(in) :: tau(*)
           complex(${ck}$), intent(out) :: work(*)
        ! =====================================================================
           
           ! Local Scalars 
           logical(lk) :: lquery
           integer(${ik}$) :: i, ib, ii, iinfo, iws, j, kk, l, ldwork, lwkopt, nb, nbmin, nx
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           lquery = ( lwork==-1_${ik}$ )
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<m ) then
              info = -2_${ik}$
           else if( k<0_${ik}$ .or. k>m ) then
              info = -3_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -5_${ik}$
           end if
           if( info==0_${ik}$ ) then
              if( m<=0_${ik}$ ) then
                 lwkopt = 1_${ik}$
              else
                 nb = stdlib${ii}$_ilaenv( 1_${ik}$, 'ZUNGRQ', ' ', m, n, k, -1_${ik}$ )
                 lwkopt = m*nb
              end if
              work( 1_${ik}$ ) = lwkopt
              if( lwork<max( 1_${ik}$, m ) .and. .not.lquery ) then
                 info = -8_${ik}$
              end if
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'ZUNGRQ', -info )
              return
           else if( lquery ) then
              return
           end if
           ! quick return if possible
           if( m<=0_${ik}$ ) then
              return
           end if
           nbmin = 2_${ik}$
           nx = 0_${ik}$
           iws = m
           if( nb>1_${ik}$ .and. nb<k ) then
              ! determine when to cross over from blocked to unblocked code.
              nx = max( 0_${ik}$, stdlib${ii}$_ilaenv( 3_${ik}$, 'ZUNGRQ', ' ', m, n, k, -1_${ik}$ ) )
              if( nx<k ) then
                 ! determine if workspace is large enough for blocked code.
                 ldwork = m
                 iws = ldwork*nb
                 if( lwork<iws ) then
                    ! not enough workspace to use optimal nb:  reduce nb and
                    ! determine the minimum value of nb.
                    nb = lwork / ldwork
                    nbmin = max( 2_${ik}$, stdlib${ii}$_ilaenv( 2_${ik}$, 'ZUNGRQ', ' ', m, n, k, -1_${ik}$ ) )
                 end if
              end if
           end if
           if( nb>=nbmin .and. nb<k .and. nx<k ) then
              ! use blocked code after the first block.
              ! the last kk rows are handled by the block method.
              kk = min( k, ( ( k-nx+nb-1 ) / nb )*nb )
              ! set a(1:m-kk,n-kk+1:n) to czero.
              do j = n - kk + 1, n
                 do i = 1, m - kk
                    a( i, j ) = czero
                 end do
              end do
           else
              kk = 0_${ik}$
           end if
           ! use unblocked code for the first or only block.
           call stdlib${ii}$_${ci}$ungr2( m-kk, n-kk, k-kk, a, lda, tau, work, iinfo )
           if( kk>0_${ik}$ ) then
              ! use blocked code
              do i = k - kk + 1, k, nb
                 ib = min( nb, k-i+1 )
                 ii = m - k + i
                 if( ii>1_${ik}$ ) then
                    ! form the triangular factor of the block reflector
                    ! h = h(i+ib-1) . . . h(i+1) h(i)
                    call stdlib${ii}$_${ci}$larft( 'BACKWARD', 'ROWWISE', n-k+i+ib-1, ib,a( ii, 1_${ik}$ ), lda, &
                              tau( i ), work, ldwork )
                    ! apply h**h to a(1:m-k+i-1,1:n-k+i+ib-1) from the right
                    call stdlib${ii}$_${ci}$larfb( 'RIGHT', 'CONJUGATE TRANSPOSE', 'BACKWARD','ROWWISE', ii-&
                    1_${ik}$, n-k+i+ib-1, ib, a( ii, 1_${ik}$ ),lda, work, ldwork, a, lda, work( ib+1 ),ldwork )
                              
                 end if
                 ! apply h**h to columns 1:n-k+i+ib-1 of current block
                 call stdlib${ii}$_${ci}$ungr2( ib, n-k+i+ib-1, ib, a( ii, 1_${ik}$ ), lda, tau( i ),work, iinfo )
                           
                 ! set columns n-k+i+ib:n of current block to czero
                 do l = n - k + i + ib, n
                    do j = ii, ii + ib - 1
                       a( j, l ) = czero
                    end do
                 end do
              end do
           end if
           work( 1_${ik}$ ) = iws
           return
     end subroutine stdlib${ii}$_${ci}$ungrq

#:endif
#:endfor



     pure module subroutine stdlib${ii}$_cunmrq( side, trans, m, n, k, a, lda, tau, c, ldc,work, lwork, info )
     !! CUNMRQ overwrites the general complex M-by-N matrix C with
     !! SIDE = 'L'     SIDE = 'R'
     !! TRANS = 'N':      Q * C          C * Q
     !! TRANS = 'C':      Q**H * C       C * Q**H
     !! where Q is a complex unitary matrix defined as the product of k
     !! elementary reflectors
     !! Q = H(1)**H H(2)**H . . . H(k)**H
     !! as returned by CGERQF. Q is of order M if SIDE = 'L' and of order N
     !! if SIDE = 'R'.
               
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: side, trans
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: k, lda, ldc, lwork, m, n
           ! Array Arguments 
           complex(sp), intent(inout) :: a(lda,*), c(ldc,*)
           complex(sp), intent(in) :: tau(*)
           complex(sp), intent(out) :: work(*)
        ! =====================================================================
           ! Parameters 
           integer(${ik}$), parameter :: nbmax = 64_${ik}$
           integer(${ik}$), parameter :: ldt = nbmax+1
           integer(${ik}$), parameter :: tsize = ldt*nbmax
           
           ! Local Scalars 
           logical(lk) :: left, lquery, notran
           character :: transt
           integer(${ik}$) :: i, i1, i2, i3, ib, iinfo, iwt, ldwork, lwkopt, mi, nb, nbmin, ni, nq, &
                     nw
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           left = stdlib_lsame( side, 'L' )
           notran = stdlib_lsame( trans, 'N' )
           lquery = ( lwork==-1_${ik}$ )
           ! nq is the order of q and nw is the minimum dimension of work
           if( left ) then
              nq = m
              nw = max( 1_${ik}$, n )
           else
              nq = n
              nw = max( 1_${ik}$, m )
           end if
           if( .not.left .and. .not.stdlib_lsame( side, 'R' ) ) then
              info = -1_${ik}$
           else if( .not.notran .and. .not.stdlib_lsame( trans, 'C' ) ) then
              info = -2_${ik}$
           else if( m<0_${ik}$ ) then
              info = -3_${ik}$
           else if( n<0_${ik}$ ) then
              info = -4_${ik}$
           else if( k<0_${ik}$ .or. k>nq ) then
              info = -5_${ik}$
           else if( lda<max( 1_${ik}$, k ) ) then
              info = -7_${ik}$
           else if( ldc<max( 1_${ik}$, m ) ) then
              info = -10_${ik}$
           else if( lwork<nw .and. .not.lquery ) then
              info = -12_${ik}$
           end if
           if( info==0_${ik}$ ) then
              ! compute the workspace requirements
              if( m==0_${ik}$ .or. n==0_${ik}$ ) then
                 lwkopt = 1_${ik}$
              else
                 nb = min( nbmax, stdlib${ii}$_ilaenv( 1_${ik}$, 'CUNMRQ', side // trans, m, n,k, -1_${ik}$ ) )
                           
                 lwkopt = nw*nb + tsize
              end if
              work( 1_${ik}$ ) = lwkopt
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'CUNMRQ', -info )
              return
           else if( lquery ) then
              return
           end if
           ! quick return if possible
           if( m==0_${ik}$ .or. n==0_${ik}$ ) then
              return
           end if
           nbmin = 2_${ik}$
           ldwork = nw
           if( nb>1_${ik}$ .and. nb<k ) then
              if( lwork<lwkopt ) then
                 nb = (lwork-tsize) / ldwork
                 nbmin = max( 2_${ik}$, stdlib${ii}$_ilaenv( 2_${ik}$, 'CUNMRQ', side // trans, m, n, k,-1_${ik}$ ) )
              end if
           end if
           if( nb<nbmin .or. nb>=k ) then
              ! use unblocked code
              call stdlib${ii}$_cunmr2( side, trans, m, n, k, a, lda, tau, c, ldc, work,iinfo )
           else
              ! use blocked code
              iwt = 1_${ik}$ + nw*nb
              if( ( left .and. .not.notran ) .or.( .not.left .and. notran ) ) then
                 i1 = 1_${ik}$
                 i2 = k
                 i3 = nb
              else
                 i1 = ( ( k-1 ) / nb )*nb + 1_${ik}$
                 i2 = 1_${ik}$
                 i3 = -nb
              end if
              if( left ) then
                 ni = n
              else
                 mi = m
              end if
              if( notran ) then
                 transt = 'C'
              else
                 transt = 'N'
              end if
              do i = i1, i2, i3
                 ib = min( nb, k-i+1 )
                 ! form the triangular factor of the block reflector
                 ! h = h(i+ib-1) . . . h(i+1) h(i)
                 call stdlib${ii}$_clarft( 'BACKWARD', 'ROWWISE', nq-k+i+ib-1, ib,a( i, 1_${ik}$ ), lda, tau( &
                           i ), work( iwt ), ldt )
                 if( left ) then
                    ! h or h**h is applied to c(1:m-k+i+ib-1,1:n)
                    mi = m - k + i + ib - 1_${ik}$
                 else
                    ! h or h**h is applied to c(1:m,1:n-k+i+ib-1)
                    ni = n - k + i + ib - 1_${ik}$
                 end if
                 ! apply h or h**h
                 call stdlib${ii}$_clarfb( side, transt, 'BACKWARD', 'ROWWISE', mi, ni,ib, a( i, 1_${ik}$ ), &
                           lda, work( iwt ), ldt, c, ldc,work, ldwork )
              end do
           end if
           work( 1_${ik}$ ) = lwkopt
           return
     end subroutine stdlib${ii}$_cunmrq

     pure module subroutine stdlib${ii}$_zunmrq( side, trans, m, n, k, a, lda, tau, c, ldc,work, lwork, info )
     !! ZUNMRQ overwrites the general complex M-by-N matrix C with
     !! SIDE = 'L'     SIDE = 'R'
     !! TRANS = 'N':      Q * C          C * Q
     !! TRANS = 'C':      Q**H * C       C * Q**H
     !! where Q is a complex unitary matrix defined as the product of k
     !! elementary reflectors
     !! Q = H(1)**H H(2)**H . . . H(k)**H
     !! as returned by ZGERQF. Q is of order M if SIDE = 'L' and of order N
     !! if SIDE = 'R'.
               
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: side, trans
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: k, lda, ldc, lwork, m, n
           ! Array Arguments 
           complex(dp), intent(inout) :: a(lda,*), c(ldc,*)
           complex(dp), intent(in) :: tau(*)
           complex(dp), intent(out) :: work(*)
        ! =====================================================================
           ! Parameters 
           integer(${ik}$), parameter :: nbmax = 64_${ik}$
           integer(${ik}$), parameter :: ldt = nbmax+1
           integer(${ik}$), parameter :: tsize = ldt*nbmax
           
           ! Local Scalars 
           logical(lk) :: left, lquery, notran
           character :: transt
           integer(${ik}$) :: i, i1, i2, i3, ib, iinfo, iwt, ldwork, lwkopt, mi, nb, nbmin, ni, nq, &
                     nw
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           left = stdlib_lsame( side, 'L' )
           notran = stdlib_lsame( trans, 'N' )
           lquery = ( lwork==-1_${ik}$ )
           ! nq is the order of q and nw is the minimum dimension of work
           if( left ) then
              nq = m
              nw = max( 1_${ik}$, n )
           else
              nq = n
              nw = max( 1_${ik}$, m )
           end if
           if( .not.left .and. .not.stdlib_lsame( side, 'R' ) ) then
              info = -1_${ik}$
           else if( .not.notran .and. .not.stdlib_lsame( trans, 'C' ) ) then
              info = -2_${ik}$
           else if( m<0_${ik}$ ) then
              info = -3_${ik}$
           else if( n<0_${ik}$ ) then
              info = -4_${ik}$
           else if( k<0_${ik}$ .or. k>nq ) then
              info = -5_${ik}$
           else if( lda<max( 1_${ik}$, k ) ) then
              info = -7_${ik}$
           else if( ldc<max( 1_${ik}$, m ) ) then
              info = -10_${ik}$
           else if( lwork<nw .and. .not.lquery ) then
              info = -12_${ik}$
           end if
           if( info==0_${ik}$ ) then
              ! compute the workspace requirements
              if( m==0_${ik}$ .or. n==0_${ik}$ ) then
                 lwkopt = 1_${ik}$
              else
                 nb = min( nbmax, stdlib${ii}$_ilaenv( 1_${ik}$, 'ZUNMRQ', side // trans, m, n,k, -1_${ik}$ ) )
                           
                 lwkopt = nw*nb + tsize
              end if
              work( 1_${ik}$ ) = lwkopt
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'ZUNMRQ', -info )
              return
           else if( lquery ) then
              return
           end if
           ! quick return if possible
           if( m==0_${ik}$ .or. n==0_${ik}$ ) then
              return
           end if
           nbmin = 2_${ik}$
           ldwork = nw
           if( nb>1_${ik}$ .and. nb<k ) then
              if( lwork<lwkopt ) then
                 nb = (lwork-tsize) / ldwork
                 nbmin = max( 2_${ik}$, stdlib${ii}$_ilaenv( 2_${ik}$, 'ZUNMRQ', side // trans, m, n, k,-1_${ik}$ ) )
              end if
           end if
           if( nb<nbmin .or. nb>=k ) then
              ! use unblocked code
              call stdlib${ii}$_zunmr2( side, trans, m, n, k, a, lda, tau, c, ldc, work,iinfo )
           else
              ! use blocked code
              iwt = 1_${ik}$ + nw*nb
              if( ( left .and. .not.notran ) .or.( .not.left .and. notran ) ) then
                 i1 = 1_${ik}$
                 i2 = k
                 i3 = nb
              else
                 i1 = ( ( k-1 ) / nb )*nb + 1_${ik}$
                 i2 = 1_${ik}$
                 i3 = -nb
              end if
              if( left ) then
                 ni = n
              else
                 mi = m
              end if
              if( notran ) then
                 transt = 'C'
              else
                 transt = 'N'
              end if
              do i = i1, i2, i3
                 ib = min( nb, k-i+1 )
                 ! form the triangular factor of the block reflector
                 ! h = h(i+ib-1) . . . h(i+1) h(i)
                 call stdlib${ii}$_zlarft( 'BACKWARD', 'ROWWISE', nq-k+i+ib-1, ib,a( i, 1_${ik}$ ), lda, tau( &
                           i ), work( iwt ), ldt )
                 if( left ) then
                    ! h or h**h is applied to c(1:m-k+i+ib-1,1:n)
                    mi = m - k + i + ib - 1_${ik}$
                 else
                    ! h or h**h is applied to c(1:m,1:n-k+i+ib-1)
                    ni = n - k + i + ib - 1_${ik}$
                 end if
                 ! apply h or h**h
                 call stdlib${ii}$_zlarfb( side, transt, 'BACKWARD', 'ROWWISE', mi, ni,ib, a( i, 1_${ik}$ ), &
                           lda, work( iwt ), ldt, c, ldc,work, ldwork )
              end do
           end if
           work( 1_${ik}$ ) = lwkopt
           return
     end subroutine stdlib${ii}$_zunmrq

#:for ck,ct,ci in CMPLX_KINDS_TYPES
#:if not ck in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ci}$unmrq( side, trans, m, n, k, a, lda, tau, c, ldc,work, lwork, info )
     !! ZUNMRQ: overwrites the general complex M-by-N matrix C with
     !! SIDE = 'L'     SIDE = 'R'
     !! TRANS = 'N':      Q * C          C * Q
     !! TRANS = 'C':      Q**H * C       C * Q**H
     !! where Q is a complex unitary matrix defined as the product of k
     !! elementary reflectors
     !! Q = H(1)**H H(2)**H . . . H(k)**H
     !! as returned by ZGERQF. Q is of order M if SIDE = 'L' and of order N
     !! if SIDE = 'R'.
               
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${ck}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: side, trans
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: k, lda, ldc, lwork, m, n
           ! Array Arguments 
           complex(${ck}$), intent(inout) :: a(lda,*), c(ldc,*)
           complex(${ck}$), intent(in) :: tau(*)
           complex(${ck}$), intent(out) :: work(*)
        ! =====================================================================
           ! Parameters 
           integer(${ik}$), parameter :: nbmax = 64_${ik}$
           integer(${ik}$), parameter :: ldt = nbmax+1
           integer(${ik}$), parameter :: tsize = ldt*nbmax
           
           ! Local Scalars 
           logical(lk) :: left, lquery, notran
           character :: transt
           integer(${ik}$) :: i, i1, i2, i3, ib, iinfo, iwt, ldwork, lwkopt, mi, nb, nbmin, ni, nq, &
                     nw
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           left = stdlib_lsame( side, 'L' )
           notran = stdlib_lsame( trans, 'N' )
           lquery = ( lwork==-1_${ik}$ )
           ! nq is the order of q and nw is the minimum dimension of work
           if( left ) then
              nq = m
              nw = max( 1_${ik}$, n )
           else
              nq = n
              nw = max( 1_${ik}$, m )
           end if
           if( .not.left .and. .not.stdlib_lsame( side, 'R' ) ) then
              info = -1_${ik}$
           else if( .not.notran .and. .not.stdlib_lsame( trans, 'C' ) ) then
              info = -2_${ik}$
           else if( m<0_${ik}$ ) then
              info = -3_${ik}$
           else if( n<0_${ik}$ ) then
              info = -4_${ik}$
           else if( k<0_${ik}$ .or. k>nq ) then
              info = -5_${ik}$
           else if( lda<max( 1_${ik}$, k ) ) then
              info = -7_${ik}$
           else if( ldc<max( 1_${ik}$, m ) ) then
              info = -10_${ik}$
           else if( lwork<nw .and. .not.lquery ) then
              info = -12_${ik}$
           end if
           if( info==0_${ik}$ ) then
              ! compute the workspace requirements
              if( m==0_${ik}$ .or. n==0_${ik}$ ) then
                 lwkopt = 1_${ik}$
              else
                 nb = min( nbmax, stdlib${ii}$_ilaenv( 1_${ik}$, 'ZUNMRQ', side // trans, m, n,k, -1_${ik}$ ) )
                           
                 lwkopt = nw*nb + tsize
              end if
              work( 1_${ik}$ ) = lwkopt
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'ZUNMRQ', -info )
              return
           else if( lquery ) then
              return
           end if
           ! quick return if possible
           if( m==0_${ik}$ .or. n==0_${ik}$ ) then
              return
           end if
           nbmin = 2_${ik}$
           ldwork = nw
           if( nb>1_${ik}$ .and. nb<k ) then
              if( lwork<lwkopt ) then
                 nb = (lwork-tsize) / ldwork
                 nbmin = max( 2_${ik}$, stdlib${ii}$_ilaenv( 2_${ik}$, 'ZUNMRQ', side // trans, m, n, k,-1_${ik}$ ) )
              end if
           end if
           if( nb<nbmin .or. nb>=k ) then
              ! use unblocked code
              call stdlib${ii}$_${ci}$unmr2( side, trans, m, n, k, a, lda, tau, c, ldc, work,iinfo )
           else
              ! use blocked code
              iwt = 1_${ik}$ + nw*nb
              if( ( left .and. .not.notran ) .or.( .not.left .and. notran ) ) then
                 i1 = 1_${ik}$
                 i2 = k
                 i3 = nb
              else
                 i1 = ( ( k-1 ) / nb )*nb + 1_${ik}$
                 i2 = 1_${ik}$
                 i3 = -nb
              end if
              if( left ) then
                 ni = n
              else
                 mi = m
              end if
              if( notran ) then
                 transt = 'C'
              else
                 transt = 'N'
              end if
              do i = i1, i2, i3
                 ib = min( nb, k-i+1 )
                 ! form the triangular factor of the block reflector
                 ! h = h(i+ib-1) . . . h(i+1) h(i)
                 call stdlib${ii}$_${ci}$larft( 'BACKWARD', 'ROWWISE', nq-k+i+ib-1, ib,a( i, 1_${ik}$ ), lda, tau( &
                           i ), work( iwt ), ldt )
                 if( left ) then
                    ! h or h**h is applied to c(1:m-k+i+ib-1,1:n)
                    mi = m - k + i + ib - 1_${ik}$
                 else
                    ! h or h**h is applied to c(1:m,1:n-k+i+ib-1)
                    ni = n - k + i + ib - 1_${ik}$
                 end if
                 ! apply h or h**h
                 call stdlib${ii}$_${ci}$larfb( side, transt, 'BACKWARD', 'ROWWISE', mi, ni,ib, a( i, 1_${ik}$ ), &
                           lda, work( iwt ), ldt, c, ldc,work, ldwork )
              end do
           end if
           work( 1_${ik}$ ) = lwkopt
           return
     end subroutine stdlib${ii}$_${ci}$unmrq

#:endif
#:endfor



     pure module subroutine stdlib${ii}$_cunmr2( side, trans, m, n, k, a, lda, tau, c, ldc,work, info )
     !! CUNMR2 overwrites the general complex m-by-n matrix C with
     !! Q * C  if SIDE = 'L' and TRANS = 'N', or
     !! Q**H* C  if SIDE = 'L' and TRANS = 'C', or
     !! C * Q  if SIDE = 'R' and TRANS = 'N', or
     !! C * Q**H if SIDE = 'R' and TRANS = 'C',
     !! where Q is a complex unitary matrix defined as the product of k
     !! elementary reflectors
     !! Q = H(1)**H H(2)**H . . . H(k)**H
     !! as returned by CGERQF. Q is of order m if SIDE = 'L' and of order n
     !! if SIDE = 'R'.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: side, trans
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: k, lda, ldc, m, n
           ! Array Arguments 
           complex(sp), intent(inout) :: a(lda,*), c(ldc,*)
           complex(sp), intent(in) :: tau(*)
           complex(sp), intent(out) :: work(*)
        ! =====================================================================
           
           ! Local Scalars 
           logical(lk) :: left, notran
           integer(${ik}$) :: i, i1, i2, i3, mi, ni, nq
           complex(sp) :: aii, taui
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           left = stdlib_lsame( side, 'L' )
           notran = stdlib_lsame( trans, 'N' )
           ! nq is the order of q
           if( left ) then
              nq = m
           else
              nq = n
           end if
           if( .not.left .and. .not.stdlib_lsame( side, 'R' ) ) then
              info = -1_${ik}$
           else if( .not.notran .and. .not.stdlib_lsame( trans, 'C' ) ) then
              info = -2_${ik}$
           else if( m<0_${ik}$ ) then
              info = -3_${ik}$
           else if( n<0_${ik}$ ) then
              info = -4_${ik}$
           else if( k<0_${ik}$ .or. k>nq ) then
              info = -5_${ik}$
           else if( lda<max( 1_${ik}$, k ) ) then
              info = -7_${ik}$
           else if( ldc<max( 1_${ik}$, m ) ) then
              info = -10_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'CUNMR2', -info )
              return
           end if
           ! quick return if possible
           if( m==0 .or. n==0 .or. k==0 )return
           if( ( left .and. .not.notran .or. .not.left .and. notran ) ) then
              i1 = 1_${ik}$
              i2 = k
              i3 = 1_${ik}$
           else
              i1 = k
              i2 = 1_${ik}$
              i3 = -1_${ik}$
           end if
           if( left ) then
              ni = n
           else
              mi = m
           end if
           do i = i1, i2, i3
              if( left ) then
                 ! h(i) or h(i)**h is applied to c(1:m-k+i,1:n)
                 mi = m - k + i
              else
                 ! h(i) or h(i)**h is applied to c(1:m,1:n-k+i)
                 ni = n - k + i
              end if
              ! apply h(i) or h(i)**h
              if( notran ) then
                 taui = conjg( tau( i ) )
              else
                 taui = tau( i )
              end if
              call stdlib${ii}$_clacgv( nq-k+i-1, a( i, 1_${ik}$ ), lda )
              aii = a( i, nq-k+i )
              a( i, nq-k+i ) = cone
              call stdlib${ii}$_clarf( side, mi, ni, a( i, 1_${ik}$ ), lda, taui, c, ldc, work )
              a( i, nq-k+i ) = aii
              call stdlib${ii}$_clacgv( nq-k+i-1, a( i, 1_${ik}$ ), lda )
           end do
           return
     end subroutine stdlib${ii}$_cunmr2

     pure module subroutine stdlib${ii}$_zunmr2( side, trans, m, n, k, a, lda, tau, c, ldc,work, info )
     !! ZUNMR2 overwrites the general complex m-by-n matrix C with
     !! Q * C  if SIDE = 'L' and TRANS = 'N', or
     !! Q**H* C  if SIDE = 'L' and TRANS = 'C', or
     !! C * Q  if SIDE = 'R' and TRANS = 'N', or
     !! C * Q**H if SIDE = 'R' and TRANS = 'C',
     !! where Q is a complex unitary matrix defined as the product of k
     !! elementary reflectors
     !! Q = H(1)**H H(2)**H . . . H(k)**H
     !! as returned by ZGERQF. Q is of order m if SIDE = 'L' and of order n
     !! if SIDE = 'R'.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: side, trans
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: k, lda, ldc, m, n
           ! Array Arguments 
           complex(dp), intent(inout) :: a(lda,*), c(ldc,*)
           complex(dp), intent(in) :: tau(*)
           complex(dp), intent(out) :: work(*)
        ! =====================================================================
           
           ! Local Scalars 
           logical(lk) :: left, notran
           integer(${ik}$) :: i, i1, i2, i3, mi, ni, nq
           complex(dp) :: aii, taui
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           left = stdlib_lsame( side, 'L' )
           notran = stdlib_lsame( trans, 'N' )
           ! nq is the order of q
           if( left ) then
              nq = m
           else
              nq = n
           end if
           if( .not.left .and. .not.stdlib_lsame( side, 'R' ) ) then
              info = -1_${ik}$
           else if( .not.notran .and. .not.stdlib_lsame( trans, 'C' ) ) then
              info = -2_${ik}$
           else if( m<0_${ik}$ ) then
              info = -3_${ik}$
           else if( n<0_${ik}$ ) then
              info = -4_${ik}$
           else if( k<0_${ik}$ .or. k>nq ) then
              info = -5_${ik}$
           else if( lda<max( 1_${ik}$, k ) ) then
              info = -7_${ik}$
           else if( ldc<max( 1_${ik}$, m ) ) then
              info = -10_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'ZUNMR2', -info )
              return
           end if
           ! quick return if possible
           if( m==0 .or. n==0 .or. k==0 )return
           if( ( left .and. .not.notran .or. .not.left .and. notran ) ) then
              i1 = 1_${ik}$
              i2 = k
              i3 = 1_${ik}$
           else
              i1 = k
              i2 = 1_${ik}$
              i3 = -1_${ik}$
           end if
           if( left ) then
              ni = n
           else
              mi = m
           end if
           do i = i1, i2, i3
              if( left ) then
                 ! h(i) or h(i)**h is applied to c(1:m-k+i,1:n)
                 mi = m - k + i
              else
                 ! h(i) or h(i)**h is applied to c(1:m,1:n-k+i)
                 ni = n - k + i
              end if
              ! apply h(i) or h(i)**h
              if( notran ) then
                 taui = conjg( tau( i ) )
              else
                 taui = tau( i )
              end if
              call stdlib${ii}$_zlacgv( nq-k+i-1, a( i, 1_${ik}$ ), lda )
              aii = a( i, nq-k+i )
              a( i, nq-k+i ) = cone
              call stdlib${ii}$_zlarf( side, mi, ni, a( i, 1_${ik}$ ), lda, taui, c, ldc, work )
              a( i, nq-k+i ) = aii
              call stdlib${ii}$_zlacgv( nq-k+i-1, a( i, 1_${ik}$ ), lda )
           end do
           return
     end subroutine stdlib${ii}$_zunmr2

#:for ck,ct,ci in CMPLX_KINDS_TYPES
#:if not ck in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ci}$unmr2( side, trans, m, n, k, a, lda, tau, c, ldc,work, info )
     !! ZUNMR2: overwrites the general complex m-by-n matrix C with
     !! Q * C  if SIDE = 'L' and TRANS = 'N', or
     !! Q**H* C  if SIDE = 'L' and TRANS = 'C', or
     !! C * Q  if SIDE = 'R' and TRANS = 'N', or
     !! C * Q**H if SIDE = 'R' and TRANS = 'C',
     !! where Q is a complex unitary matrix defined as the product of k
     !! elementary reflectors
     !! Q = H(1)**H H(2)**H . . . H(k)**H
     !! as returned by ZGERQF. Q is of order m if SIDE = 'L' and of order n
     !! if SIDE = 'R'.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${ck}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: side, trans
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: k, lda, ldc, m, n
           ! Array Arguments 
           complex(${ck}$), intent(inout) :: a(lda,*), c(ldc,*)
           complex(${ck}$), intent(in) :: tau(*)
           complex(${ck}$), intent(out) :: work(*)
        ! =====================================================================
           
           ! Local Scalars 
           logical(lk) :: left, notran
           integer(${ik}$) :: i, i1, i2, i3, mi, ni, nq
           complex(${ck}$) :: aii, taui
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           left = stdlib_lsame( side, 'L' )
           notran = stdlib_lsame( trans, 'N' )
           ! nq is the order of q
           if( left ) then
              nq = m
           else
              nq = n
           end if
           if( .not.left .and. .not.stdlib_lsame( side, 'R' ) ) then
              info = -1_${ik}$
           else if( .not.notran .and. .not.stdlib_lsame( trans, 'C' ) ) then
              info = -2_${ik}$
           else if( m<0_${ik}$ ) then
              info = -3_${ik}$
           else if( n<0_${ik}$ ) then
              info = -4_${ik}$
           else if( k<0_${ik}$ .or. k>nq ) then
              info = -5_${ik}$
           else if( lda<max( 1_${ik}$, k ) ) then
              info = -7_${ik}$
           else if( ldc<max( 1_${ik}$, m ) ) then
              info = -10_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'ZUNMR2', -info )
              return
           end if
           ! quick return if possible
           if( m==0 .or. n==0 .or. k==0 )return
           if( ( left .and. .not.notran .or. .not.left .and. notran ) ) then
              i1 = 1_${ik}$
              i2 = k
              i3 = 1_${ik}$
           else
              i1 = k
              i2 = 1_${ik}$
              i3 = -1_${ik}$
           end if
           if( left ) then
              ni = n
           else
              mi = m
           end if
           do i = i1, i2, i3
              if( left ) then
                 ! h(i) or h(i)**h is applied to c(1:m-k+i,1:n)
                 mi = m - k + i
              else
                 ! h(i) or h(i)**h is applied to c(1:m,1:n-k+i)
                 ni = n - k + i
              end if
              ! apply h(i) or h(i)**h
              if( notran ) then
                 taui = conjg( tau( i ) )
              else
                 taui = tau( i )
              end if
              call stdlib${ii}$_${ci}$lacgv( nq-k+i-1, a( i, 1_${ik}$ ), lda )
              aii = a( i, nq-k+i )
              a( i, nq-k+i ) = cone
              call stdlib${ii}$_${ci}$larf( side, mi, ni, a( i, 1_${ik}$ ), lda, taui, c, ldc, work )
              a( i, nq-k+i ) = aii
              call stdlib${ii}$_${ci}$lacgv( nq-k+i-1, a( i, 1_${ik}$ ), lda )
           end do
           return
     end subroutine stdlib${ii}$_${ci}$unmr2

#:endif
#:endfor



     pure module subroutine stdlib${ii}$_cungr2( m, n, k, a, lda, tau, work, info )
     !! CUNGR2 generates an m by n complex matrix Q with orthonormal rows,
     !! which is defined as the last m rows of a product of k elementary
     !! reflectors of order n
     !! Q  =  H(1)**H H(2)**H . . . H(k)**H
     !! as returned by CGERQF.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: k, lda, m, n
           ! Array Arguments 
           complex(sp), intent(inout) :: a(lda,*)
           complex(sp), intent(in) :: tau(*)
           complex(sp), intent(out) :: work(*)
        ! =====================================================================
           
           ! Local Scalars 
           integer(${ik}$) :: i, ii, j, l
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<m ) then
              info = -2_${ik}$
           else if( k<0_${ik}$ .or. k>m ) then
              info = -3_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -5_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'CUNGR2', -info )
              return
           end if
           ! quick return if possible
           if( m<=0 )return
           if( k<m ) then
              ! initialise rows 1:m-k to rows of the unit matrix
              do j = 1, n
                 do l = 1, m - k
                    a( l, j ) = czero
                 end do
                 if( j>n-m .and. j<=n-k )a( m-n+j, j ) = cone
              end do
           end if
           do i = 1, k
              ii = m - k + i
              ! apply h(i)**h to a(1:m-k+i,1:n-k+i) from the right
              call stdlib${ii}$_clacgv( n-m+ii-1, a( ii, 1_${ik}$ ), lda )
              a( ii, n-m+ii ) = cone
              call stdlib${ii}$_clarf( 'RIGHT', ii-1, n-m+ii, a( ii, 1_${ik}$ ), lda,conjg( tau( i ) ), a, lda,&
                         work )
              call stdlib${ii}$_cscal( n-m+ii-1, -tau( i ), a( ii, 1_${ik}$ ), lda )
              call stdlib${ii}$_clacgv( n-m+ii-1, a( ii, 1_${ik}$ ), lda )
              a( ii, n-m+ii ) = cone - conjg( tau( i ) )
              ! set a(m-k+i,n-k+i+1:n) to czero
              do l = n - m + ii + 1, n
                 a( ii, l ) = czero
              end do
           end do
           return
     end subroutine stdlib${ii}$_cungr2

     pure module subroutine stdlib${ii}$_zungr2( m, n, k, a, lda, tau, work, info )
     !! ZUNGR2 generates an m by n complex matrix Q with orthonormal rows,
     !! which is defined as the last m rows of a product of k elementary
     !! reflectors of order n
     !! Q  =  H(1)**H H(2)**H . . . H(k)**H
     !! as returned by ZGERQF.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: k, lda, m, n
           ! Array Arguments 
           complex(dp), intent(inout) :: a(lda,*)
           complex(dp), intent(in) :: tau(*)
           complex(dp), intent(out) :: work(*)
        ! =====================================================================
           
           ! Local Scalars 
           integer(${ik}$) :: i, ii, j, l
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<m ) then
              info = -2_${ik}$
           else if( k<0_${ik}$ .or. k>m ) then
              info = -3_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -5_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'ZUNGR2', -info )
              return
           end if
           ! quick return if possible
           if( m<=0 )return
           if( k<m ) then
              ! initialise rows 1:m-k to rows of the unit matrix
              do j = 1, n
                 do l = 1, m - k
                    a( l, j ) = czero
                 end do
                 if( j>n-m .and. j<=n-k )a( m-n+j, j ) = cone
              end do
           end if
           do i = 1, k
              ii = m - k + i
              ! apply h(i)**h to a(1:m-k+i,1:n-k+i) from the right
              call stdlib${ii}$_zlacgv( n-m+ii-1, a( ii, 1_${ik}$ ), lda )
              a( ii, n-m+ii ) = cone
              call stdlib${ii}$_zlarf( 'RIGHT', ii-1, n-m+ii, a( ii, 1_${ik}$ ), lda,conjg( tau( i ) ), a, lda,&
                         work )
              call stdlib${ii}$_zscal( n-m+ii-1, -tau( i ), a( ii, 1_${ik}$ ), lda )
              call stdlib${ii}$_zlacgv( n-m+ii-1, a( ii, 1_${ik}$ ), lda )
              a( ii, n-m+ii ) = cone - conjg( tau( i ) )
              ! set a(m-k+i,n-k+i+1:n) to czero
              do l = n - m + ii + 1, n
                 a( ii, l ) = czero
              end do
           end do
           return
     end subroutine stdlib${ii}$_zungr2

#:for ck,ct,ci in CMPLX_KINDS_TYPES
#:if not ck in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ci}$ungr2( m, n, k, a, lda, tau, work, info )
     !! ZUNGR2: generates an m by n complex matrix Q with orthonormal rows,
     !! which is defined as the last m rows of a product of k elementary
     !! reflectors of order n
     !! Q  =  H(1)**H H(2)**H . . . H(k)**H
     !! as returned by ZGERQF.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${ck}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: k, lda, m, n
           ! Array Arguments 
           complex(${ck}$), intent(inout) :: a(lda,*)
           complex(${ck}$), intent(in) :: tau(*)
           complex(${ck}$), intent(out) :: work(*)
        ! =====================================================================
           
           ! Local Scalars 
           integer(${ik}$) :: i, ii, j, l
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<m ) then
              info = -2_${ik}$
           else if( k<0_${ik}$ .or. k>m ) then
              info = -3_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -5_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'ZUNGR2', -info )
              return
           end if
           ! quick return if possible
           if( m<=0 )return
           if( k<m ) then
              ! initialise rows 1:m-k to rows of the unit matrix
              do j = 1, n
                 do l = 1, m - k
                    a( l, j ) = czero
                 end do
                 if( j>n-m .and. j<=n-k )a( m-n+j, j ) = cone
              end do
           end if
           do i = 1, k
              ii = m - k + i
              ! apply h(i)**h to a(1:m-k+i,1:n-k+i) from the right
              call stdlib${ii}$_${ci}$lacgv( n-m+ii-1, a( ii, 1_${ik}$ ), lda )
              a( ii, n-m+ii ) = cone
              call stdlib${ii}$_${ci}$larf( 'RIGHT', ii-1, n-m+ii, a( ii, 1_${ik}$ ), lda,conjg( tau( i ) ), a, lda,&
                         work )
              call stdlib${ii}$_${ci}$scal( n-m+ii-1, -tau( i ), a( ii, 1_${ik}$ ), lda )
              call stdlib${ii}$_${ci}$lacgv( n-m+ii-1, a( ii, 1_${ik}$ ), lda )
              a( ii, n-m+ii ) = cone - conjg( tau( i ) )
              ! set a(m-k+i,n-k+i+1:n) to czero
              do l = n - m + ii + 1, n
                 a( ii, l ) = czero
              end do
           end do
           return
     end subroutine stdlib${ii}$_${ci}$ungr2

#:endif
#:endfor



     pure module subroutine stdlib${ii}$_sorgrq( m, n, k, a, lda, tau, work, lwork, info )
     !! SORGRQ generates an M-by-N real matrix Q with orthonormal rows,
     !! which is defined as the last M rows of a product of K elementary
     !! reflectors of order N
     !! Q  =  H(1) H(2) . . . H(k)
     !! as returned by SGERQF.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: k, lda, lwork, m, n
           ! Array Arguments 
           real(sp), intent(inout) :: a(lda,*)
           real(sp), intent(in) :: tau(*)
           real(sp), intent(out) :: work(*)
        ! =====================================================================
           
           ! Local Scalars 
           logical(lk) :: lquery
           integer(${ik}$) :: i, ib, ii, iinfo, iws, j, kk, l, ldwork, lwkopt, nb, nbmin, nx
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           lquery = ( lwork==-1_${ik}$ )
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<m ) then
              info = -2_${ik}$
           else if( k<0_${ik}$ .or. k>m ) then
              info = -3_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -5_${ik}$
           end if
           if( info==0_${ik}$ ) then
              if( m<=0_${ik}$ ) then
                 lwkopt = 1_${ik}$
              else
                 nb = stdlib${ii}$_ilaenv( 1_${ik}$, 'SORGRQ', ' ', m, n, k, -1_${ik}$ )
                 lwkopt = m*nb
              end if
              work( 1_${ik}$ ) = lwkopt
              if( lwork<max( 1_${ik}$, m ) .and. .not.lquery ) then
                 info = -8_${ik}$
              end if
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'SORGRQ', -info )
              return
           else if( lquery ) then
              return
           end if
           ! quick return if possible
           if( m<=0_${ik}$ ) then
              return
           end if
           nbmin = 2_${ik}$
           nx = 0_${ik}$
           iws = m
           if( nb>1_${ik}$ .and. nb<k ) then
              ! determine when to cross over from blocked to unblocked code.
              nx = max( 0_${ik}$, stdlib${ii}$_ilaenv( 3_${ik}$, 'SORGRQ', ' ', m, n, k, -1_${ik}$ ) )
              if( nx<k ) then
                 ! determine if workspace is large enough for blocked code.
                 ldwork = m
                 iws = ldwork*nb
                 if( lwork<iws ) then
                    ! not enough workspace to use optimal nb:  reduce nb and
                    ! determine the minimum value of nb.
                    nb = lwork / ldwork
                    nbmin = max( 2_${ik}$, stdlib${ii}$_ilaenv( 2_${ik}$, 'SORGRQ', ' ', m, n, k, -1_${ik}$ ) )
                 end if
              end if
           end if
           if( nb>=nbmin .and. nb<k .and. nx<k ) then
              ! use blocked code after the first block.
              ! the last kk rows are handled by the block method.
              kk = min( k, ( ( k-nx+nb-1 ) / nb )*nb )
              ! set a(1:m-kk,n-kk+1:n) to zero.
              do j = n - kk + 1, n
                 do i = 1, m - kk
                    a( i, j ) = zero
                 end do
              end do
           else
              kk = 0_${ik}$
           end if
           ! use unblocked code for the first or only block.
           call stdlib${ii}$_sorgr2( m-kk, n-kk, k-kk, a, lda, tau, work, iinfo )
           if( kk>0_${ik}$ ) then
              ! use blocked code
              do i = k - kk + 1, k, nb
                 ib = min( nb, k-i+1 )
                 ii = m - k + i
                 if( ii>1_${ik}$ ) then
                    ! form the triangular factor of the block reflector
                    ! h = h(i+ib-1) . . . h(i+1) h(i)
                    call stdlib${ii}$_slarft( 'BACKWARD', 'ROWWISE', n-k+i+ib-1, ib,a( ii, 1_${ik}$ ), lda, &
                              tau( i ), work, ldwork )
                    ! apply h**t to a(1:m-k+i-1,1:n-k+i+ib-1) from the right
                    call stdlib${ii}$_slarfb( 'RIGHT', 'TRANSPOSE', 'BACKWARD', 'ROWWISE',ii-1, n-k+i+&
                              ib-1, ib, a( ii, 1_${ik}$ ), lda, work,ldwork, a, lda, work( ib+1 ), ldwork )
                 end if
                 ! apply h**t to columns 1:n-k+i+ib-1 of current block
                 call stdlib${ii}$_sorgr2( ib, n-k+i+ib-1, ib, a( ii, 1_${ik}$ ), lda, tau( i ),work, iinfo )
                           
                 ! set columns n-k+i+ib:n of current block to zero
                 do l = n - k + i + ib, n
                    do j = ii, ii + ib - 1
                       a( j, l ) = zero
                    end do
                 end do
              end do
           end if
           work( 1_${ik}$ ) = iws
           return
     end subroutine stdlib${ii}$_sorgrq

     pure module subroutine stdlib${ii}$_dorgrq( m, n, k, a, lda, tau, work, lwork, info )
     !! DORGRQ generates an M-by-N real matrix Q with orthonormal rows,
     !! which is defined as the last M rows of a product of K elementary
     !! reflectors of order N
     !! Q  =  H(1) H(2) . . . H(k)
     !! as returned by DGERQF.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: k, lda, lwork, m, n
           ! Array Arguments 
           real(dp), intent(inout) :: a(lda,*)
           real(dp), intent(in) :: tau(*)
           real(dp), intent(out) :: work(*)
        ! =====================================================================
           
           ! Local Scalars 
           logical(lk) :: lquery
           integer(${ik}$) :: i, ib, ii, iinfo, iws, j, kk, l, ldwork, lwkopt, nb, nbmin, nx
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           lquery = ( lwork==-1_${ik}$ )
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<m ) then
              info = -2_${ik}$
           else if( k<0_${ik}$ .or. k>m ) then
              info = -3_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -5_${ik}$
           end if
           if( info==0_${ik}$ ) then
              if( m<=0_${ik}$ ) then
                 lwkopt = 1_${ik}$
              else
                 nb = stdlib${ii}$_ilaenv( 1_${ik}$, 'DORGRQ', ' ', m, n, k, -1_${ik}$ )
                 lwkopt = m*nb
              end if
              work( 1_${ik}$ ) = lwkopt
              if( lwork<max( 1_${ik}$, m ) .and. .not.lquery ) then
                 info = -8_${ik}$
              end if
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DORGRQ', -info )
              return
           else if( lquery ) then
              return
           end if
           ! quick return if possible
           if( m<=0_${ik}$ ) then
              return
           end if
           nbmin = 2_${ik}$
           nx = 0_${ik}$
           iws = m
           if( nb>1_${ik}$ .and. nb<k ) then
              ! determine when to cross over from blocked to unblocked code.
              nx = max( 0_${ik}$, stdlib${ii}$_ilaenv( 3_${ik}$, 'DORGRQ', ' ', m, n, k, -1_${ik}$ ) )
              if( nx<k ) then
                 ! determine if workspace is large enough for blocked code.
                 ldwork = m
                 iws = ldwork*nb
                 if( lwork<iws ) then
                    ! not enough workspace to use optimal nb:  reduce nb and
                    ! determine the minimum value of nb.
                    nb = lwork / ldwork
                    nbmin = max( 2_${ik}$, stdlib${ii}$_ilaenv( 2_${ik}$, 'DORGRQ', ' ', m, n, k, -1_${ik}$ ) )
                 end if
              end if
           end if
           if( nb>=nbmin .and. nb<k .and. nx<k ) then
              ! use blocked code after the first block.
              ! the last kk rows are handled by the block method.
              kk = min( k, ( ( k-nx+nb-1 ) / nb )*nb )
              ! set a(1:m-kk,n-kk+1:n) to zero.
              do j = n - kk + 1, n
                 do i = 1, m - kk
                    a( i, j ) = zero
                 end do
              end do
           else
              kk = 0_${ik}$
           end if
           ! use unblocked code for the first or only block.
           call stdlib${ii}$_dorgr2( m-kk, n-kk, k-kk, a, lda, tau, work, iinfo )
           if( kk>0_${ik}$ ) then
              ! use blocked code
              do i = k - kk + 1, k, nb
                 ib = min( nb, k-i+1 )
                 ii = m - k + i
                 if( ii>1_${ik}$ ) then
                    ! form the triangular factor of the block reflector
                    ! h = h(i+ib-1) . . . h(i+1) h(i)
                    call stdlib${ii}$_dlarft( 'BACKWARD', 'ROWWISE', n-k+i+ib-1, ib,a( ii, 1_${ik}$ ), lda, &
                              tau( i ), work, ldwork )
                    ! apply h**t to a(1:m-k+i-1,1:n-k+i+ib-1) from the right
                    call stdlib${ii}$_dlarfb( 'RIGHT', 'TRANSPOSE', 'BACKWARD', 'ROWWISE',ii-1, n-k+i+&
                              ib-1, ib, a( ii, 1_${ik}$ ), lda, work,ldwork, a, lda, work( ib+1 ), ldwork )
                 end if
                 ! apply h**t to columns 1:n-k+i+ib-1 of current block
                 call stdlib${ii}$_dorgr2( ib, n-k+i+ib-1, ib, a( ii, 1_${ik}$ ), lda, tau( i ),work, iinfo )
                           
                 ! set columns n-k+i+ib:n of current block to zero
                 do l = n - k + i + ib, n
                    do j = ii, ii + ib - 1
                       a( j, l ) = zero
                    end do
                 end do
              end do
           end if
           work( 1_${ik}$ ) = iws
           return
     end subroutine stdlib${ii}$_dorgrq

#:for rk,rt,ri in REAL_KINDS_TYPES
#:if not rk in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ri}$orgrq( m, n, k, a, lda, tau, work, lwork, info )
     !! DORGRQ: generates an M-by-N real matrix Q with orthonormal rows,
     !! which is defined as the last M rows of a product of K elementary
     !! reflectors of order N
     !! Q  =  H(1) H(2) . . . H(k)
     !! as returned by DGERQF.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: k, lda, lwork, m, n
           ! Array Arguments 
           real(${rk}$), intent(inout) :: a(lda,*)
           real(${rk}$), intent(in) :: tau(*)
           real(${rk}$), intent(out) :: work(*)
        ! =====================================================================
           
           ! Local Scalars 
           logical(lk) :: lquery
           integer(${ik}$) :: i, ib, ii, iinfo, iws, j, kk, l, ldwork, lwkopt, nb, nbmin, nx
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           lquery = ( lwork==-1_${ik}$ )
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<m ) then
              info = -2_${ik}$
           else if( k<0_${ik}$ .or. k>m ) then
              info = -3_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -5_${ik}$
           end if
           if( info==0_${ik}$ ) then
              if( m<=0_${ik}$ ) then
                 lwkopt = 1_${ik}$
              else
                 nb = stdlib${ii}$_ilaenv( 1_${ik}$, 'DORGRQ', ' ', m, n, k, -1_${ik}$ )
                 lwkopt = m*nb
              end if
              work( 1_${ik}$ ) = lwkopt
              if( lwork<max( 1_${ik}$, m ) .and. .not.lquery ) then
                 info = -8_${ik}$
              end if
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DORGRQ', -info )
              return
           else if( lquery ) then
              return
           end if
           ! quick return if possible
           if( m<=0_${ik}$ ) then
              return
           end if
           nbmin = 2_${ik}$
           nx = 0_${ik}$
           iws = m
           if( nb>1_${ik}$ .and. nb<k ) then
              ! determine when to cross over from blocked to unblocked code.
              nx = max( 0_${ik}$, stdlib${ii}$_ilaenv( 3_${ik}$, 'DORGRQ', ' ', m, n, k, -1_${ik}$ ) )
              if( nx<k ) then
                 ! determine if workspace is large enough for blocked code.
                 ldwork = m
                 iws = ldwork*nb
                 if( lwork<iws ) then
                    ! not enough workspace to use optimal nb:  reduce nb and
                    ! determine the minimum value of nb.
                    nb = lwork / ldwork
                    nbmin = max( 2_${ik}$, stdlib${ii}$_ilaenv( 2_${ik}$, 'DORGRQ', ' ', m, n, k, -1_${ik}$ ) )
                 end if
              end if
           end if
           if( nb>=nbmin .and. nb<k .and. nx<k ) then
              ! use blocked code after the first block.
              ! the last kk rows are handled by the block method.
              kk = min( k, ( ( k-nx+nb-1 ) / nb )*nb )
              ! set a(1:m-kk,n-kk+1:n) to zero.
              do j = n - kk + 1, n
                 do i = 1, m - kk
                    a( i, j ) = zero
                 end do
              end do
           else
              kk = 0_${ik}$
           end if
           ! use unblocked code for the first or only block.
           call stdlib${ii}$_${ri}$orgr2( m-kk, n-kk, k-kk, a, lda, tau, work, iinfo )
           if( kk>0_${ik}$ ) then
              ! use blocked code
              do i = k - kk + 1, k, nb
                 ib = min( nb, k-i+1 )
                 ii = m - k + i
                 if( ii>1_${ik}$ ) then
                    ! form the triangular factor of the block reflector
                    ! h = h(i+ib-1) . . . h(i+1) h(i)
                    call stdlib${ii}$_${ri}$larft( 'BACKWARD', 'ROWWISE', n-k+i+ib-1, ib,a( ii, 1_${ik}$ ), lda, &
                              tau( i ), work, ldwork )
                    ! apply h**t to a(1:m-k+i-1,1:n-k+i+ib-1) from the right
                    call stdlib${ii}$_${ri}$larfb( 'RIGHT', 'TRANSPOSE', 'BACKWARD', 'ROWWISE',ii-1, n-k+i+&
                              ib-1, ib, a( ii, 1_${ik}$ ), lda, work,ldwork, a, lda, work( ib+1 ), ldwork )
                 end if
                 ! apply h**t to columns 1:n-k+i+ib-1 of current block
                 call stdlib${ii}$_${ri}$orgr2( ib, n-k+i+ib-1, ib, a( ii, 1_${ik}$ ), lda, tau( i ),work, iinfo )
                           
                 ! set columns n-k+i+ib:n of current block to zero
                 do l = n - k + i + ib, n
                    do j = ii, ii + ib - 1
                       a( j, l ) = zero
                    end do
                 end do
              end do
           end if
           work( 1_${ik}$ ) = iws
           return
     end subroutine stdlib${ii}$_${ri}$orgrq

#:endif
#:endfor



     pure module subroutine stdlib${ii}$_sormrq( side, trans, m, n, k, a, lda, tau, c, ldc,work, lwork, info )
     !! SORMRQ overwrites the general real M-by-N matrix C with
     !! SIDE = 'L'     SIDE = 'R'
     !! TRANS = 'N':      Q * C          C * Q
     !! TRANS = 'T':      Q**T * C       C * Q**T
     !! where Q is a real orthogonal matrix defined as the product of k
     !! elementary reflectors
     !! Q = H(1) H(2) . . . H(k)
     !! as returned by SGERQF. Q is of order M if SIDE = 'L' and of order N
     !! if SIDE = 'R'.
               
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: side, trans
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: k, lda, ldc, lwork, m, n
           ! Array Arguments 
           real(sp), intent(inout) :: a(lda,*), c(ldc,*)
           real(sp), intent(in) :: tau(*)
           real(sp), intent(out) :: work(*)
        ! =====================================================================
           ! Parameters 
           integer(${ik}$), parameter :: nbmax = 64_${ik}$
           integer(${ik}$), parameter :: ldt = nbmax+1
           integer(${ik}$), parameter :: tsize = ldt*nbmax
           
           ! Local Scalars 
           logical(lk) :: left, lquery, notran
           character :: transt
           integer(${ik}$) :: i, i1, i2, i3, ib, iinfo, iwt, ldwork, lwkopt, mi, nb, nbmin, ni, nq, &
                     nw
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           left = stdlib_lsame( side, 'L' )
           notran = stdlib_lsame( trans, 'N' )
           lquery = ( lwork==-1_${ik}$ )
           ! nq is the order of q and nw is the minimum dimension of work
           if( left ) then
              nq = m
              nw = max( 1_${ik}$, n )
           else
              nq = n
              nw = max( 1_${ik}$, m )
           end if
           if( .not.left .and. .not.stdlib_lsame( side, 'R' ) ) then
              info = -1_${ik}$
           else if( .not.notran .and. .not.stdlib_lsame( trans, 'T' ) ) then
              info = -2_${ik}$
           else if( m<0_${ik}$ ) then
              info = -3_${ik}$
           else if( n<0_${ik}$ ) then
              info = -4_${ik}$
           else if( k<0_${ik}$ .or. k>nq ) then
              info = -5_${ik}$
           else if( lda<max( 1_${ik}$, k ) ) then
              info = -7_${ik}$
           else if( ldc<max( 1_${ik}$, m ) ) then
              info = -10_${ik}$
           else if( lwork<nw .and. .not.lquery ) then
              info = -12_${ik}$
           end if
           if( info==0_${ik}$ ) then
           ! compute the workspace requirements
              if( m==0_${ik}$ .or. n==0_${ik}$ ) then
                 lwkopt = 1_${ik}$
              else
                 nb = min( nbmax, stdlib${ii}$_ilaenv( 1_${ik}$, 'SORMRQ', side // trans, m, n,k, -1_${ik}$ ) )
                           
                 lwkopt = nw*nb + tsize
              end if
              work( 1_${ik}$ ) = lwkopt
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'SORMRQ', -info )
              return
           else if( lquery ) then
              return
           end if
           ! quick return if possible
           if( m==0_${ik}$ .or. n==0_${ik}$ ) then
              return
           end if
           nbmin = 2_${ik}$
           ldwork = nw
           if( nb>1_${ik}$ .and. nb<k ) then
              if( lwork<lwkopt ) then
                 nb = (lwork-tsize) / ldwork
                 nbmin = max( 2_${ik}$, stdlib${ii}$_ilaenv( 2_${ik}$, 'SORMRQ', side // trans, m, n, k,-1_${ik}$ ) )
              end if
           end if
           if( nb<nbmin .or. nb>=k ) then
              ! use unblocked code
              call stdlib${ii}$_sormr2( side, trans, m, n, k, a, lda, tau, c, ldc, work,iinfo )
           else
              ! use blocked code
              iwt = 1_${ik}$ + nw*nb
              if( ( left .and. .not.notran ) .or.( .not.left .and. notran ) ) then
                 i1 = 1_${ik}$
                 i2 = k
                 i3 = nb
              else
                 i1 = ( ( k-1 ) / nb )*nb + 1_${ik}$
                 i2 = 1_${ik}$
                 i3 = -nb
              end if
              if( left ) then
                 ni = n
              else
                 mi = m
              end if
              if( notran ) then
                 transt = 'T'
              else
                 transt = 'N'
              end if
              do i = i1, i2, i3
                 ib = min( nb, k-i+1 )
                 ! form the triangular factor of the block reflector
                 ! h = h(i+ib-1) . . . h(i+1) h(i)
                 call stdlib${ii}$_slarft( 'BACKWARD', 'ROWWISE', nq-k+i+ib-1, ib,a( i, 1_${ik}$ ), lda, tau( &
                           i ), work( iwt ), ldt )
                 if( left ) then
                    ! h or h**t is applied to c(1:m-k+i+ib-1,1:n)
                    mi = m - k + i + ib - 1_${ik}$
                 else
                    ! h or h**t is applied to c(1:m,1:n-k+i+ib-1)
                    ni = n - k + i + ib - 1_${ik}$
                 end if
                 ! apply h or h**t
                 call stdlib${ii}$_slarfb( side, transt, 'BACKWARD', 'ROWWISE', mi, ni,ib, a( i, 1_${ik}$ ), &
                           lda, work( iwt ), ldt, c, ldc,work, ldwork )
              end do
           end if
           work( 1_${ik}$ ) = lwkopt
           return
     end subroutine stdlib${ii}$_sormrq

     pure module subroutine stdlib${ii}$_dormrq( side, trans, m, n, k, a, lda, tau, c, ldc,work, lwork, info )
     !! DORMRQ overwrites the general real M-by-N matrix C with
     !! SIDE = 'L'     SIDE = 'R'
     !! TRANS = 'N':      Q * C          C * Q
     !! TRANS = 'T':      Q**T * C       C * Q**T
     !! where Q is a real orthogonal matrix defined as the product of k
     !! elementary reflectors
     !! Q = H(1) H(2) . . . H(k)
     !! as returned by DGERQF. Q is of order M if SIDE = 'L' and of order N
     !! if SIDE = 'R'.
               
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: side, trans
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: k, lda, ldc, lwork, m, n
           ! Array Arguments 
           real(dp), intent(inout) :: a(lda,*), c(ldc,*)
           real(dp), intent(in) :: tau(*)
           real(dp), intent(out) :: work(*)
        ! =====================================================================
           ! Parameters 
           integer(${ik}$), parameter :: nbmax = 64_${ik}$
           integer(${ik}$), parameter :: ldt = nbmax+1
           integer(${ik}$), parameter :: tsize = ldt*nbmax
           
           ! Local Scalars 
           logical(lk) :: left, lquery, notran
           character :: transt
           integer(${ik}$) :: i, i1, i2, i3, ib, iinfo, iwt, ldwork, lwkopt, mi, nb, nbmin, ni, nq, &
                     nw
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           left = stdlib_lsame( side, 'L' )
           notran = stdlib_lsame( trans, 'N' )
           lquery = ( lwork==-1_${ik}$ )
           ! nq is the order of q and nw is the minimum dimension of work
           if( left ) then
              nq = m
              nw = max( 1_${ik}$, n )
           else
              nq = n
              nw = max( 1_${ik}$, m )
           end if
           if( .not.left .and. .not.stdlib_lsame( side, 'R' ) ) then
              info = -1_${ik}$
           else if( .not.notran .and. .not.stdlib_lsame( trans, 'T' ) ) then
              info = -2_${ik}$
           else if( m<0_${ik}$ ) then
              info = -3_${ik}$
           else if( n<0_${ik}$ ) then
              info = -4_${ik}$
           else if( k<0_${ik}$ .or. k>nq ) then
              info = -5_${ik}$
           else if( lda<max( 1_${ik}$, k ) ) then
              info = -7_${ik}$
           else if( ldc<max( 1_${ik}$, m ) ) then
              info = -10_${ik}$
           else if( lwork<nw .and. .not.lquery ) then
              info = -12_${ik}$
           end if
           if( info==0_${ik}$ ) then
              ! compute the workspace requirements
              if( m==0_${ik}$ .or. n==0_${ik}$ ) then
                 lwkopt = 1_${ik}$
              else
                 nb = min( nbmax, stdlib${ii}$_ilaenv( 1_${ik}$, 'DORMRQ', side // trans, m, n,k, -1_${ik}$ ) )
                           
                 lwkopt = nw*nb + tsize
              end if
              work( 1_${ik}$ ) = lwkopt
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DORMRQ', -info )
              return
           else if( lquery ) then
              return
           end if
           ! quick return if possible
           if( m==0_${ik}$ .or. n==0_${ik}$ ) then
              return
           end if
           nbmin = 2_${ik}$
           ldwork = nw
           if( nb>1_${ik}$ .and. nb<k ) then
              if( lwork<lwkopt ) then
                 nb = (lwork-tsize) / ldwork
                 nbmin = max( 2_${ik}$, stdlib${ii}$_ilaenv( 2_${ik}$, 'DORMRQ', side // trans, m, n, k,-1_${ik}$ ) )
              end if
           end if
           if( nb<nbmin .or. nb>=k ) then
              ! use unblocked code
              call stdlib${ii}$_dormr2( side, trans, m, n, k, a, lda, tau, c, ldc, work,iinfo )
           else
              ! use blocked code
              iwt = 1_${ik}$ + nw*nb
              if( ( left .and. .not.notran ) .or.( .not.left .and. notran ) ) then
                 i1 = 1_${ik}$
                 i2 = k
                 i3 = nb
              else
                 i1 = ( ( k-1 ) / nb )*nb + 1_${ik}$
                 i2 = 1_${ik}$
                 i3 = -nb
              end if
              if( left ) then
                 ni = n
              else
                 mi = m
              end if
              if( notran ) then
                 transt = 'T'
              else
                 transt = 'N'
              end if
              do i = i1, i2, i3
                 ib = min( nb, k-i+1 )
                 ! form the triangular factor of the block reflector
                 ! h = h(i+ib-1) . . . h(i+1) h(i)
                 call stdlib${ii}$_dlarft( 'BACKWARD', 'ROWWISE', nq-k+i+ib-1, ib,a( i, 1_${ik}$ ), lda, tau( &
                           i ), work( iwt ), ldt )
                 if( left ) then
                    ! h or h**t is applied to c(1:m-k+i+ib-1,1:n)
                    mi = m - k + i + ib - 1_${ik}$
                 else
                    ! h or h**t is applied to c(1:m,1:n-k+i+ib-1)
                    ni = n - k + i + ib - 1_${ik}$
                 end if
                 ! apply h or h**t
                 call stdlib${ii}$_dlarfb( side, transt, 'BACKWARD', 'ROWWISE', mi, ni,ib, a( i, 1_${ik}$ ), &
                           lda, work( iwt ), ldt, c, ldc,work, ldwork )
              end do
           end if
           work( 1_${ik}$ ) = lwkopt
           return
     end subroutine stdlib${ii}$_dormrq

#:for rk,rt,ri in REAL_KINDS_TYPES
#:if not rk in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ri}$ormrq( side, trans, m, n, k, a, lda, tau, c, ldc,work, lwork, info )
     !! DORMRQ: overwrites the general real M-by-N matrix C with
     !! SIDE = 'L'     SIDE = 'R'
     !! TRANS = 'N':      Q * C          C * Q
     !! TRANS = 'T':      Q**T * C       C * Q**T
     !! where Q is a real orthogonal matrix defined as the product of k
     !! elementary reflectors
     !! Q = H(1) H(2) . . . H(k)
     !! as returned by DGERQF. Q is of order M if SIDE = 'L' and of order N
     !! if SIDE = 'R'.
               
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: side, trans
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: k, lda, ldc, lwork, m, n
           ! Array Arguments 
           real(${rk}$), intent(inout) :: a(lda,*), c(ldc,*)
           real(${rk}$), intent(in) :: tau(*)
           real(${rk}$), intent(out) :: work(*)
        ! =====================================================================
           ! Parameters 
           integer(${ik}$), parameter :: nbmax = 64_${ik}$
           integer(${ik}$), parameter :: ldt = nbmax+1
           integer(${ik}$), parameter :: tsize = ldt*nbmax
           
           ! Local Scalars 
           logical(lk) :: left, lquery, notran
           character :: transt
           integer(${ik}$) :: i, i1, i2, i3, ib, iinfo, iwt, ldwork, lwkopt, mi, nb, nbmin, ni, nq, &
                     nw
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           left = stdlib_lsame( side, 'L' )
           notran = stdlib_lsame( trans, 'N' )
           lquery = ( lwork==-1_${ik}$ )
           ! nq is the order of q and nw is the minimum dimension of work
           if( left ) then
              nq = m
              nw = max( 1_${ik}$, n )
           else
              nq = n
              nw = max( 1_${ik}$, m )
           end if
           if( .not.left .and. .not.stdlib_lsame( side, 'R' ) ) then
              info = -1_${ik}$
           else if( .not.notran .and. .not.stdlib_lsame( trans, 'T' ) ) then
              info = -2_${ik}$
           else if( m<0_${ik}$ ) then
              info = -3_${ik}$
           else if( n<0_${ik}$ ) then
              info = -4_${ik}$
           else if( k<0_${ik}$ .or. k>nq ) then
              info = -5_${ik}$
           else if( lda<max( 1_${ik}$, k ) ) then
              info = -7_${ik}$
           else if( ldc<max( 1_${ik}$, m ) ) then
              info = -10_${ik}$
           else if( lwork<nw .and. .not.lquery ) then
              info = -12_${ik}$
           end if
           if( info==0_${ik}$ ) then
              ! compute the workspace requirements
              if( m==0_${ik}$ .or. n==0_${ik}$ ) then
                 lwkopt = 1_${ik}$
              else
                 nb = min( nbmax, stdlib${ii}$_ilaenv( 1_${ik}$, 'DORMRQ', side // trans, m, n,k, -1_${ik}$ ) )
                           
                 lwkopt = nw*nb + tsize
              end if
              work( 1_${ik}$ ) = lwkopt
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DORMRQ', -info )
              return
           else if( lquery ) then
              return
           end if
           ! quick return if possible
           if( m==0_${ik}$ .or. n==0_${ik}$ ) then
              return
           end if
           nbmin = 2_${ik}$
           ldwork = nw
           if( nb>1_${ik}$ .and. nb<k ) then
              if( lwork<lwkopt ) then
                 nb = (lwork-tsize) / ldwork
                 nbmin = max( 2_${ik}$, stdlib${ii}$_ilaenv( 2_${ik}$, 'DORMRQ', side // trans, m, n, k,-1_${ik}$ ) )
              end if
           end if
           if( nb<nbmin .or. nb>=k ) then
              ! use unblocked code
              call stdlib${ii}$_${ri}$ormr2( side, trans, m, n, k, a, lda, tau, c, ldc, work,iinfo )
           else
              ! use blocked code
              iwt = 1_${ik}$ + nw*nb
              if( ( left .and. .not.notran ) .or.( .not.left .and. notran ) ) then
                 i1 = 1_${ik}$
                 i2 = k
                 i3 = nb
              else
                 i1 = ( ( k-1 ) / nb )*nb + 1_${ik}$
                 i2 = 1_${ik}$
                 i3 = -nb
              end if
              if( left ) then
                 ni = n
              else
                 mi = m
              end if
              if( notran ) then
                 transt = 'T'
              else
                 transt = 'N'
              end if
              do i = i1, i2, i3
                 ib = min( nb, k-i+1 )
                 ! form the triangular factor of the block reflector
                 ! h = h(i+ib-1) . . . h(i+1) h(i)
                 call stdlib${ii}$_${ri}$larft( 'BACKWARD', 'ROWWISE', nq-k+i+ib-1, ib,a( i, 1_${ik}$ ), lda, tau( &
                           i ), work( iwt ), ldt )
                 if( left ) then
                    ! h or h**t is applied to c(1:m-k+i+ib-1,1:n)
                    mi = m - k + i + ib - 1_${ik}$
                 else
                    ! h or h**t is applied to c(1:m,1:n-k+i+ib-1)
                    ni = n - k + i + ib - 1_${ik}$
                 end if
                 ! apply h or h**t
                 call stdlib${ii}$_${ri}$larfb( side, transt, 'BACKWARD', 'ROWWISE', mi, ni,ib, a( i, 1_${ik}$ ), &
                           lda, work( iwt ), ldt, c, ldc,work, ldwork )
              end do
           end if
           work( 1_${ik}$ ) = lwkopt
           return
     end subroutine stdlib${ii}$_${ri}$ormrq

#:endif
#:endfor



     pure module subroutine stdlib${ii}$_sormr2( side, trans, m, n, k, a, lda, tau, c, ldc,work, info )
     !! SORMR2 overwrites the general real m by n matrix C with
     !! Q * C  if SIDE = 'L' and TRANS = 'N', or
     !! Q**T* C  if SIDE = 'L' and TRANS = 'T', or
     !! C * Q  if SIDE = 'R' and TRANS = 'N', or
     !! C * Q**T if SIDE = 'R' and TRANS = 'T',
     !! where Q is a real orthogonal matrix defined as the product of k
     !! elementary reflectors
     !! Q = H(1) H(2) . . . H(k)
     !! as returned by SGERQF. Q is of order m if SIDE = 'L' and of order n
     !! if SIDE = 'R'.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: side, trans
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: k, lda, ldc, m, n
           ! Array Arguments 
           real(sp), intent(inout) :: a(lda,*), c(ldc,*)
           real(sp), intent(in) :: tau(*)
           real(sp), intent(out) :: work(*)
        ! =====================================================================
           
           ! Local Scalars 
           logical(lk) :: left, notran
           integer(${ik}$) :: i, i1, i2, i3, mi, ni, nq
           real(sp) :: aii
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           left = stdlib_lsame( side, 'L' )
           notran = stdlib_lsame( trans, 'N' )
           ! nq is the order of q
           if( left ) then
              nq = m
           else
              nq = n
           end if
           if( .not.left .and. .not.stdlib_lsame( side, 'R' ) ) then
              info = -1_${ik}$
           else if( .not.notran .and. .not.stdlib_lsame( trans, 'T' ) ) then
              info = -2_${ik}$
           else if( m<0_${ik}$ ) then
              info = -3_${ik}$
           else if( n<0_${ik}$ ) then
              info = -4_${ik}$
           else if( k<0_${ik}$ .or. k>nq ) then
              info = -5_${ik}$
           else if( lda<max( 1_${ik}$, k ) ) then
              info = -7_${ik}$
           else if( ldc<max( 1_${ik}$, m ) ) then
              info = -10_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'SORMR2', -info )
              return
           end if
           ! quick return if possible
           if( m==0 .or. n==0 .or. k==0 )return
           if( ( left .and. .not.notran ) .or. ( .not.left .and. notran ) )then
              i1 = 1_${ik}$
              i2 = k
              i3 = 1_${ik}$
           else
              i1 = k
              i2 = 1_${ik}$
              i3 = -1_${ik}$
           end if
           if( left ) then
              ni = n
           else
              mi = m
           end if
           do i = i1, i2, i3
              if( left ) then
                 ! h(i) is applied to c(1:m-k+i,1:n)
                 mi = m - k + i
              else
                 ! h(i) is applied to c(1:m,1:n-k+i)
                 ni = n - k + i
              end if
              ! apply h(i)
              aii = a( i, nq-k+i )
              a( i, nq-k+i ) = one
              call stdlib${ii}$_slarf( side, mi, ni, a( i, 1_${ik}$ ), lda, tau( i ), c, ldc,work )
              a( i, nq-k+i ) = aii
           end do
           return
     end subroutine stdlib${ii}$_sormr2

     pure module subroutine stdlib${ii}$_dormr2( side, trans, m, n, k, a, lda, tau, c, ldc,work, info )
     !! DORMR2 overwrites the general real m by n matrix C with
     !! Q * C  if SIDE = 'L' and TRANS = 'N', or
     !! Q**T* C  if SIDE = 'L' and TRANS = 'T', or
     !! C * Q  if SIDE = 'R' and TRANS = 'N', or
     !! C * Q**T if SIDE = 'R' and TRANS = 'T',
     !! where Q is a real orthogonal matrix defined as the product of k
     !! elementary reflectors
     !! Q = H(1) H(2) . . . H(k)
     !! as returned by DGERQF. Q is of order m if SIDE = 'L' and of order n
     !! if SIDE = 'R'.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: side, trans
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: k, lda, ldc, m, n
           ! Array Arguments 
           real(dp), intent(inout) :: a(lda,*), c(ldc,*)
           real(dp), intent(in) :: tau(*)
           real(dp), intent(out) :: work(*)
        ! =====================================================================
           
           ! Local Scalars 
           logical(lk) :: left, notran
           integer(${ik}$) :: i, i1, i2, i3, mi, ni, nq
           real(dp) :: aii
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           left = stdlib_lsame( side, 'L' )
           notran = stdlib_lsame( trans, 'N' )
           ! nq is the order of q
           if( left ) then
              nq = m
           else
              nq = n
           end if
           if( .not.left .and. .not.stdlib_lsame( side, 'R' ) ) then
              info = -1_${ik}$
           else if( .not.notran .and. .not.stdlib_lsame( trans, 'T' ) ) then
              info = -2_${ik}$
           else if( m<0_${ik}$ ) then
              info = -3_${ik}$
           else if( n<0_${ik}$ ) then
              info = -4_${ik}$
           else if( k<0_${ik}$ .or. k>nq ) then
              info = -5_${ik}$
           else if( lda<max( 1_${ik}$, k ) ) then
              info = -7_${ik}$
           else if( ldc<max( 1_${ik}$, m ) ) then
              info = -10_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DORMR2', -info )
              return
           end if
           ! quick return if possible
           if( m==0 .or. n==0 .or. k==0 )return
           if( ( left .and. .not.notran ) .or. ( .not.left .and. notran ) )then
              i1 = 1_${ik}$
              i2 = k
              i3 = 1_${ik}$
           else
              i1 = k
              i2 = 1_${ik}$
              i3 = -1_${ik}$
           end if
           if( left ) then
              ni = n
           else
              mi = m
           end if
           do i = i1, i2, i3
              if( left ) then
                 ! h(i) is applied to c(1:m-k+i,1:n)
                 mi = m - k + i
              else
                 ! h(i) is applied to c(1:m,1:n-k+i)
                 ni = n - k + i
              end if
              ! apply h(i)
              aii = a( i, nq-k+i )
              a( i, nq-k+i ) = one
              call stdlib${ii}$_dlarf( side, mi, ni, a( i, 1_${ik}$ ), lda, tau( i ), c, ldc,work )
              a( i, nq-k+i ) = aii
           end do
           return
     end subroutine stdlib${ii}$_dormr2

#:for rk,rt,ri in REAL_KINDS_TYPES
#:if not rk in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ri}$ormr2( side, trans, m, n, k, a, lda, tau, c, ldc,work, info )
     !! DORMR2: overwrites the general real m by n matrix C with
     !! Q * C  if SIDE = 'L' and TRANS = 'N', or
     !! Q**T* C  if SIDE = 'L' and TRANS = 'T', or
     !! C * Q  if SIDE = 'R' and TRANS = 'N', or
     !! C * Q**T if SIDE = 'R' and TRANS = 'T',
     !! where Q is a real orthogonal matrix defined as the product of k
     !! elementary reflectors
     !! Q = H(1) H(2) . . . H(k)
     !! as returned by DGERQF. Q is of order m if SIDE = 'L' and of order n
     !! if SIDE = 'R'.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: side, trans
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: k, lda, ldc, m, n
           ! Array Arguments 
           real(${rk}$), intent(inout) :: a(lda,*), c(ldc,*)
           real(${rk}$), intent(in) :: tau(*)
           real(${rk}$), intent(out) :: work(*)
        ! =====================================================================
           
           ! Local Scalars 
           logical(lk) :: left, notran
           integer(${ik}$) :: i, i1, i2, i3, mi, ni, nq
           real(${rk}$) :: aii
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           left = stdlib_lsame( side, 'L' )
           notran = stdlib_lsame( trans, 'N' )
           ! nq is the order of q
           if( left ) then
              nq = m
           else
              nq = n
           end if
           if( .not.left .and. .not.stdlib_lsame( side, 'R' ) ) then
              info = -1_${ik}$
           else if( .not.notran .and. .not.stdlib_lsame( trans, 'T' ) ) then
              info = -2_${ik}$
           else if( m<0_${ik}$ ) then
              info = -3_${ik}$
           else if( n<0_${ik}$ ) then
              info = -4_${ik}$
           else if( k<0_${ik}$ .or. k>nq ) then
              info = -5_${ik}$
           else if( lda<max( 1_${ik}$, k ) ) then
              info = -7_${ik}$
           else if( ldc<max( 1_${ik}$, m ) ) then
              info = -10_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DORMR2', -info )
              return
           end if
           ! quick return if possible
           if( m==0 .or. n==0 .or. k==0 )return
           if( ( left .and. .not.notran ) .or. ( .not.left .and. notran ) )then
              i1 = 1_${ik}$
              i2 = k
              i3 = 1_${ik}$
           else
              i1 = k
              i2 = 1_${ik}$
              i3 = -1_${ik}$
           end if
           if( left ) then
              ni = n
           else
              mi = m
           end if
           do i = i1, i2, i3
              if( left ) then
                 ! h(i) is applied to c(1:m-k+i,1:n)
                 mi = m - k + i
              else
                 ! h(i) is applied to c(1:m,1:n-k+i)
                 ni = n - k + i
              end if
              ! apply h(i)
              aii = a( i, nq-k+i )
              a( i, nq-k+i ) = one
              call stdlib${ii}$_${ri}$larf( side, mi, ni, a( i, 1_${ik}$ ), lda, tau( i ), c, ldc,work )
              a( i, nq-k+i ) = aii
           end do
           return
     end subroutine stdlib${ii}$_${ri}$ormr2

#:endif
#:endfor



     pure module subroutine stdlib${ii}$_sorgr2( m, n, k, a, lda, tau, work, info )
     !! SORGR2 generates an m by n real matrix Q with orthonormal rows,
     !! which is defined as the last m rows of a product of k elementary
     !! reflectors of order n
     !! Q  =  H(1) H(2) . . . H(k)
     !! as returned by SGERQF.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: k, lda, m, n
           ! Array Arguments 
           real(sp), intent(inout) :: a(lda,*)
           real(sp), intent(in) :: tau(*)
           real(sp), intent(out) :: work(*)
        ! =====================================================================
           
           ! Local Scalars 
           integer(${ik}$) :: i, ii, j, l
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<m ) then
              info = -2_${ik}$
           else if( k<0_${ik}$ .or. k>m ) then
              info = -3_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -5_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'SORGR2', -info )
              return
           end if
           ! quick return if possible
           if( m<=0 )return
           if( k<m ) then
              ! initialise rows 1:m-k to rows of the unit matrix
              do j = 1, n
                 do l = 1, m - k
                    a( l, j ) = zero
                 end do
                 if( j>n-m .and. j<=n-k )a( m-n+j, j ) = one
              end do
           end if
           do i = 1, k
              ii = m - k + i
              ! apply h(i) to a(1:m-k+i,1:n-k+i) from the right
              a( ii, n-m+ii ) = one
              call stdlib${ii}$_slarf( 'RIGHT', ii-1, n-m+ii, a( ii, 1_${ik}$ ), lda, tau( i ),a, lda, work )
                        
              call stdlib${ii}$_sscal( n-m+ii-1, -tau( i ), a( ii, 1_${ik}$ ), lda )
              a( ii, n-m+ii ) = one - tau( i )
              ! set a(m-k+i,n-k+i+1:n) to zero
              do l = n - m + ii + 1, n
                 a( ii, l ) = zero
              end do
           end do
           return
     end subroutine stdlib${ii}$_sorgr2

     pure module subroutine stdlib${ii}$_dorgr2( m, n, k, a, lda, tau, work, info )
     !! DORGR2 generates an m by n real matrix Q with orthonormal rows,
     !! which is defined as the last m rows of a product of k elementary
     !! reflectors of order n
     !! Q  =  H(1) H(2) . . . H(k)
     !! as returned by DGERQF.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: k, lda, m, n
           ! Array Arguments 
           real(dp), intent(inout) :: a(lda,*)
           real(dp), intent(in) :: tau(*)
           real(dp), intent(out) :: work(*)
        ! =====================================================================
           
           ! Local Scalars 
           integer(${ik}$) :: i, ii, j, l
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<m ) then
              info = -2_${ik}$
           else if( k<0_${ik}$ .or. k>m ) then
              info = -3_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -5_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DORGR2', -info )
              return
           end if
           ! quick return if possible
           if( m<=0 )return
           if( k<m ) then
              ! initialise rows 1:m-k to rows of the unit matrix
              do j = 1, n
                 do l = 1, m - k
                    a( l, j ) = zero
                 end do
                 if( j>n-m .and. j<=n-k )a( m-n+j, j ) = one
              end do
           end if
           do i = 1, k
              ii = m - k + i
              ! apply h(i) to a(1:m-k+i,1:n-k+i) from the right
              a( ii, n-m+ii ) = one
              call stdlib${ii}$_dlarf( 'RIGHT', ii-1, n-m+ii, a( ii, 1_${ik}$ ), lda, tau( i ),a, lda, work )
                        
              call stdlib${ii}$_dscal( n-m+ii-1, -tau( i ), a( ii, 1_${ik}$ ), lda )
              a( ii, n-m+ii ) = one - tau( i )
              ! set a(m-k+i,n-k+i+1:n) to zero
              do l = n - m + ii + 1, n
                 a( ii, l ) = zero
              end do
           end do
           return
     end subroutine stdlib${ii}$_dorgr2

#:for rk,rt,ri in REAL_KINDS_TYPES
#:if not rk in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ri}$orgr2( m, n, k, a, lda, tau, work, info )
     !! DORGR2: generates an m by n real matrix Q with orthonormal rows,
     !! which is defined as the last m rows of a product of k elementary
     !! reflectors of order n
     !! Q  =  H(1) H(2) . . . H(k)
     !! as returned by DGERQF.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: k, lda, m, n
           ! Array Arguments 
           real(${rk}$), intent(inout) :: a(lda,*)
           real(${rk}$), intent(in) :: tau(*)
           real(${rk}$), intent(out) :: work(*)
        ! =====================================================================
           
           ! Local Scalars 
           integer(${ik}$) :: i, ii, j, l
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<m ) then
              info = -2_${ik}$
           else if( k<0_${ik}$ .or. k>m ) then
              info = -3_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -5_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DORGR2', -info )
              return
           end if
           ! quick return if possible
           if( m<=0 )return
           if( k<m ) then
              ! initialise rows 1:m-k to rows of the unit matrix
              do j = 1, n
                 do l = 1, m - k
                    a( l, j ) = zero
                 end do
                 if( j>n-m .and. j<=n-k )a( m-n+j, j ) = one
              end do
           end if
           do i = 1, k
              ii = m - k + i
              ! apply h(i) to a(1:m-k+i,1:n-k+i) from the right
              a( ii, n-m+ii ) = one
              call stdlib${ii}$_${ri}$larf( 'RIGHT', ii-1, n-m+ii, a( ii, 1_${ik}$ ), lda, tau( i ),a, lda, work )
                        
              call stdlib${ii}$_${ri}$scal( n-m+ii-1, -tau( i ), a( ii, 1_${ik}$ ), lda )
              a( ii, n-m+ii ) = one - tau( i )
              ! set a(m-k+i,n-k+i+1:n) to zero
              do l = n - m + ii + 1, n
                 a( ii, l ) = zero
              end do
           end do
           return
     end subroutine stdlib${ii}$_${ri}$orgr2

#:endif
#:endfor



     pure module subroutine stdlib${ii}$_sggrqf( m, p, n, a, lda, taua, b, ldb, taub, work,lwork, info )
     !! SGGRQF computes a generalized RQ factorization of an M-by-N matrix A
     !! and a P-by-N matrix B:
     !! A = R*Q,        B = Z*T*Q,
     !! where Q is an N-by-N orthogonal matrix, Z is a P-by-P orthogonal
     !! matrix, and R and T assume one of the forms:
     !! if M <= N,  R = ( 0  R12 ) M,   or if M > N,  R = ( R11 ) M-N,
     !! N-M  M                           ( R21 ) N
     !! N
     !! where R12 or R21 is upper triangular, and
     !! if P >= N,  T = ( T11 ) N  ,   or if P < N,  T = ( T11  T12 ) P,
     !! (  0  ) P-N                         P   N-P
     !! N
     !! where T11 is upper triangular.
     !! In particular, if B is square and nonsingular, the GRQ factorization
     !! of A and B implicitly gives the RQ factorization of A*inv(B):
     !! A*inv(B) = (R*inv(T))*Z**T
     !! where inv(B) denotes the inverse of the matrix B, and Z**T denotes the
     !! transpose of the matrix Z.
               
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, ldb, lwork, m, n, p
           ! Array Arguments 
           real(sp), intent(inout) :: a(lda,*), b(ldb,*)
           real(sp), intent(out) :: taua(*), taub(*), work(*)
        ! =====================================================================
           ! Local Scalars 
           logical(lk) :: lquery
           integer(${ik}$) :: lopt, lwkopt, nb, nb1, nb2, nb3
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input parameters
           info = 0_${ik}$
           nb1 = stdlib${ii}$_ilaenv( 1_${ik}$, 'SGERQF', ' ', m, n, -1_${ik}$, -1_${ik}$ )
           nb2 = stdlib${ii}$_ilaenv( 1_${ik}$, 'SGEQRF', ' ', p, n, -1_${ik}$, -1_${ik}$ )
           nb3 = stdlib${ii}$_ilaenv( 1_${ik}$, 'SORMRQ', ' ', m, n, p, -1_${ik}$ )
           nb = max( nb1, nb2, nb3 )
           lwkopt = max( n, m, p)*nb
           work( 1_${ik}$ ) = lwkopt
           lquery = ( lwork==-1_${ik}$ )
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( p<0_${ik}$ ) then
              info = -2_${ik}$
           else if( n<0_${ik}$ ) then
              info = -3_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -5_${ik}$
           else if( ldb<max( 1_${ik}$, p ) ) then
              info = -8_${ik}$
           else if( lwork<max( 1_${ik}$, m, p, n ) .and. .not.lquery ) then
              info = -11_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'SGGRQF', -info )
              return
           else if( lquery ) then
              return
           end if
           ! rq factorization of m-by-n matrix a: a = r*q
           call stdlib${ii}$_sgerqf( m, n, a, lda, taua, work, lwork, info )
           lopt = work( 1_${ik}$ )
           ! update b := b*q**t
           call stdlib${ii}$_sormrq( 'RIGHT', 'TRANSPOSE', p, n, min( m, n ),a( max( 1_${ik}$, m-n+1 ), 1_${ik}$ ), &
                     lda, taua, b, ldb, work,lwork, info )
           lopt = max( lopt, int( work( 1_${ik}$ ),KIND=${ik}$) )
           ! qr factorization of p-by-n matrix b: b = z*t
           call stdlib${ii}$_sgeqrf( p, n, b, ldb, taub, work, lwork, info )
           work( 1_${ik}$ ) = max( lopt, int( work( 1_${ik}$ ),KIND=${ik}$) )
           return
     end subroutine stdlib${ii}$_sggrqf

     pure module subroutine stdlib${ii}$_dggrqf( m, p, n, a, lda, taua, b, ldb, taub, work,lwork, info )
     !! DGGRQF computes a generalized RQ factorization of an M-by-N matrix A
     !! and a P-by-N matrix B:
     !! A = R*Q,        B = Z*T*Q,
     !! where Q is an N-by-N orthogonal matrix, Z is a P-by-P orthogonal
     !! matrix, and R and T assume one of the forms:
     !! if M <= N,  R = ( 0  R12 ) M,   or if M > N,  R = ( R11 ) M-N,
     !! N-M  M                           ( R21 ) N
     !! N
     !! where R12 or R21 is upper triangular, and
     !! if P >= N,  T = ( T11 ) N  ,   or if P < N,  T = ( T11  T12 ) P,
     !! (  0  ) P-N                         P   N-P
     !! N
     !! where T11 is upper triangular.
     !! In particular, if B is square and nonsingular, the GRQ factorization
     !! of A and B implicitly gives the RQ factorization of A*inv(B):
     !! A*inv(B) = (R*inv(T))*Z**T
     !! where inv(B) denotes the inverse of the matrix B, and Z**T denotes the
     !! transpose of the matrix Z.
               
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, ldb, lwork, m, n, p
           ! Array Arguments 
           real(dp), intent(inout) :: a(lda,*), b(ldb,*)
           real(dp), intent(out) :: taua(*), taub(*), work(*)
        ! =====================================================================
           ! Local Scalars 
           logical(lk) :: lquery
           integer(${ik}$) :: lopt, lwkopt, nb, nb1, nb2, nb3
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input parameters
           info = 0_${ik}$
           nb1 = stdlib${ii}$_ilaenv( 1_${ik}$, 'DGERQF', ' ', m, n, -1_${ik}$, -1_${ik}$ )
           nb2 = stdlib${ii}$_ilaenv( 1_${ik}$, 'DGEQRF', ' ', p, n, -1_${ik}$, -1_${ik}$ )
           nb3 = stdlib${ii}$_ilaenv( 1_${ik}$, 'DORMRQ', ' ', m, n, p, -1_${ik}$ )
           nb = max( nb1, nb2, nb3 )
           lwkopt = max( n, m, p )*nb
           work( 1_${ik}$ ) = lwkopt
           lquery = ( lwork==-1_${ik}$ )
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( p<0_${ik}$ ) then
              info = -2_${ik}$
           else if( n<0_${ik}$ ) then
              info = -3_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -5_${ik}$
           else if( ldb<max( 1_${ik}$, p ) ) then
              info = -8_${ik}$
           else if( lwork<max( 1_${ik}$, m, p, n ) .and. .not.lquery ) then
              info = -11_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DGGRQF', -info )
              return
           else if( lquery ) then
              return
           end if
           ! rq factorization of m-by-n matrix a: a = r*q
           call stdlib${ii}$_dgerqf( m, n, a, lda, taua, work, lwork, info )
           lopt = work( 1_${ik}$ )
           ! update b := b*q**t
           call stdlib${ii}$_dormrq( 'RIGHT', 'TRANSPOSE', p, n, min( m, n ),a( max( 1_${ik}$, m-n+1 ), 1_${ik}$ ), &
                     lda, taua, b, ldb, work,lwork, info )
           lopt = max( lopt, int( work( 1_${ik}$ ),KIND=${ik}$) )
           ! qr factorization of p-by-n matrix b: b = z*t
           call stdlib${ii}$_dgeqrf( p, n, b, ldb, taub, work, lwork, info )
           work( 1_${ik}$ ) = max( lopt, int( work( 1_${ik}$ ),KIND=${ik}$) )
           return
     end subroutine stdlib${ii}$_dggrqf

#:for rk,rt,ri in REAL_KINDS_TYPES
#:if not rk in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ri}$ggrqf( m, p, n, a, lda, taua, b, ldb, taub, work,lwork, info )
     !! DGGRQF: computes a generalized RQ factorization of an M-by-N matrix A
     !! and a P-by-N matrix B:
     !! A = R*Q,        B = Z*T*Q,
     !! where Q is an N-by-N orthogonal matrix, Z is a P-by-P orthogonal
     !! matrix, and R and T assume one of the forms:
     !! if M <= N,  R = ( 0  R12 ) M,   or if M > N,  R = ( R11 ) M-N,
     !! N-M  M                           ( R21 ) N
     !! N
     !! where R12 or R21 is upper triangular, and
     !! if P >= N,  T = ( T11 ) N  ,   or if P < N,  T = ( T11  T12 ) P,
     !! (  0  ) P-N                         P   N-P
     !! N
     !! where T11 is upper triangular.
     !! In particular, if B is square and nonsingular, the GRQ factorization
     !! of A and B implicitly gives the RQ factorization of A*inv(B):
     !! A*inv(B) = (R*inv(T))*Z**T
     !! where inv(B) denotes the inverse of the matrix B, and Z**T denotes the
     !! transpose of the matrix Z.
               
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, ldb, lwork, m, n, p
           ! Array Arguments 
           real(${rk}$), intent(inout) :: a(lda,*), b(ldb,*)
           real(${rk}$), intent(out) :: taua(*), taub(*), work(*)
        ! =====================================================================
           ! Local Scalars 
           logical(lk) :: lquery
           integer(${ik}$) :: lopt, lwkopt, nb, nb1, nb2, nb3
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input parameters
           info = 0_${ik}$
           nb1 = stdlib${ii}$_ilaenv( 1_${ik}$, 'DGERQF', ' ', m, n, -1_${ik}$, -1_${ik}$ )
           nb2 = stdlib${ii}$_ilaenv( 1_${ik}$, 'DGEQRF', ' ', p, n, -1_${ik}$, -1_${ik}$ )
           nb3 = stdlib${ii}$_ilaenv( 1_${ik}$, 'DORMRQ', ' ', m, n, p, -1_${ik}$ )
           nb = max( nb1, nb2, nb3 )
           lwkopt = max( n, m, p )*nb
           work( 1_${ik}$ ) = lwkopt
           lquery = ( lwork==-1_${ik}$ )
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( p<0_${ik}$ ) then
              info = -2_${ik}$
           else if( n<0_${ik}$ ) then
              info = -3_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -5_${ik}$
           else if( ldb<max( 1_${ik}$, p ) ) then
              info = -8_${ik}$
           else if( lwork<max( 1_${ik}$, m, p, n ) .and. .not.lquery ) then
              info = -11_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DGGRQF', -info )
              return
           else if( lquery ) then
              return
           end if
           ! rq factorization of m-by-n matrix a: a = r*q
           call stdlib${ii}$_${ri}$gerqf( m, n, a, lda, taua, work, lwork, info )
           lopt = work( 1_${ik}$ )
           ! update b := b*q**t
           call stdlib${ii}$_${ri}$ormrq( 'RIGHT', 'TRANSPOSE', p, n, min( m, n ),a( max( 1_${ik}$, m-n+1 ), 1_${ik}$ ), &
                     lda, taua, b, ldb, work,lwork, info )
           lopt = max( lopt, int( work( 1_${ik}$ ),KIND=${ik}$) )
           ! qr factorization of p-by-n matrix b: b = z*t
           call stdlib${ii}$_${ri}$geqrf( p, n, b, ldb, taub, work, lwork, info )
           work( 1_${ik}$ ) = max( lopt, int( work( 1_${ik}$ ),KIND=${ik}$) )
           return
     end subroutine stdlib${ii}$_${ri}$ggrqf

#:endif
#:endfor

     pure module subroutine stdlib${ii}$_cggrqf( m, p, n, a, lda, taua, b, ldb, taub, work,lwork, info )
     !! CGGRQF computes a generalized RQ factorization of an M-by-N matrix A
     !! and a P-by-N matrix B:
     !! A = R*Q,        B = Z*T*Q,
     !! where Q is an N-by-N unitary matrix, Z is a P-by-P unitary
     !! matrix, and R and T assume one of the forms:
     !! if M <= N,  R = ( 0  R12 ) M,   or if M > N,  R = ( R11 ) M-N,
     !! N-M  M                           ( R21 ) N
     !! N
     !! where R12 or R21 is upper triangular, and
     !! if P >= N,  T = ( T11 ) N  ,   or if P < N,  T = ( T11  T12 ) P,
     !! (  0  ) P-N                         P   N-P
     !! N
     !! where T11 is upper triangular.
     !! In particular, if B is square and nonsingular, the GRQ factorization
     !! of A and B implicitly gives the RQ factorization of A*inv(B):
     !! A*inv(B) = (R*inv(T))*Z**H
     !! where inv(B) denotes the inverse of the matrix B, and Z**H denotes the
     !! conjugate transpose of the matrix Z.
               
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, ldb, lwork, m, n, p
           ! Array Arguments 
           complex(sp), intent(inout) :: a(lda,*), b(ldb,*)
           complex(sp), intent(out) :: taua(*), taub(*), work(*)
        ! =====================================================================
           ! Local Scalars 
           logical(lk) :: lquery
           integer(${ik}$) :: lopt, lwkopt, nb, nb1, nb2, nb3
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input parameters
           info = 0_${ik}$
           nb1 = stdlib${ii}$_ilaenv( 1_${ik}$, 'CGERQF', ' ', m, n, -1_${ik}$, -1_${ik}$ )
           nb2 = stdlib${ii}$_ilaenv( 1_${ik}$, 'CGEQRF', ' ', p, n, -1_${ik}$, -1_${ik}$ )
           nb3 = stdlib${ii}$_ilaenv( 1_${ik}$, 'CUNMRQ', ' ', m, n, p, -1_${ik}$ )
           nb = max( nb1, nb2, nb3 )
           lwkopt = max( n, m, p)*nb
           work( 1_${ik}$ ) = lwkopt
           lquery = ( lwork==-1_${ik}$ )
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( p<0_${ik}$ ) then
              info = -2_${ik}$
           else if( n<0_${ik}$ ) then
              info = -3_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -5_${ik}$
           else if( ldb<max( 1_${ik}$, p ) ) then
              info = -8_${ik}$
           else if( lwork<max( 1_${ik}$, m, p, n ) .and. .not.lquery ) then
              info = -11_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'CGGRQF', -info )
              return
           else if( lquery ) then
              return
           end if
           ! rq factorization of m-by-n matrix a: a = r*q
           call stdlib${ii}$_cgerqf( m, n, a, lda, taua, work, lwork, info )
           lopt = real( work( 1_${ik}$ ),KIND=sp)
           ! update b := b*q**h
           call stdlib${ii}$_cunmrq( 'RIGHT', 'CONJUGATE TRANSPOSE', p, n, min( m, n ),a( max( 1_${ik}$, m-n+1 &
                     ), 1_${ik}$ ), lda, taua, b, ldb, work,lwork, info )
           lopt = max( lopt, int( work( 1_${ik}$ ),KIND=${ik}$) )
           ! qr factorization of p-by-n matrix b: b = z*t
           call stdlib${ii}$_cgeqrf( p, n, b, ldb, taub, work, lwork, info )
           work( 1_${ik}$ ) = max( lopt, int( work( 1_${ik}$ ),KIND=${ik}$) )
           return
     end subroutine stdlib${ii}$_cggrqf

     pure module subroutine stdlib${ii}$_zggrqf( m, p, n, a, lda, taua, b, ldb, taub, work,lwork, info )
     !! ZGGRQF computes a generalized RQ factorization of an M-by-N matrix A
     !! and a P-by-N matrix B:
     !! A = R*Q,        B = Z*T*Q,
     !! where Q is an N-by-N unitary matrix, Z is a P-by-P unitary
     !! matrix, and R and T assume one of the forms:
     !! if M <= N,  R = ( 0  R12 ) M,   or if M > N,  R = ( R11 ) M-N,
     !! N-M  M                           ( R21 ) N
     !! N
     !! where R12 or R21 is upper triangular, and
     !! if P >= N,  T = ( T11 ) N  ,   or if P < N,  T = ( T11  T12 ) P,
     !! (  0  ) P-N                         P   N-P
     !! N
     !! where T11 is upper triangular.
     !! In particular, if B is square and nonsingular, the GRQ factorization
     !! of A and B implicitly gives the RQ factorization of A*inv(B):
     !! A*inv(B) = (R*inv(T))*Z**H
     !! where inv(B) denotes the inverse of the matrix B, and Z**H denotes the
     !! conjugate transpose of the matrix Z.
               
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, ldb, lwork, m, n, p
           ! Array Arguments 
           complex(dp), intent(inout) :: a(lda,*), b(ldb,*)
           complex(dp), intent(out) :: taua(*), taub(*), work(*)
        ! =====================================================================
           ! Local Scalars 
           logical(lk) :: lquery
           integer(${ik}$) :: lopt, lwkopt, nb, nb1, nb2, nb3
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input parameters
           info = 0_${ik}$
           nb1 = stdlib${ii}$_ilaenv( 1_${ik}$, 'ZGERQF', ' ', m, n, -1_${ik}$, -1_${ik}$ )
           nb2 = stdlib${ii}$_ilaenv( 1_${ik}$, 'ZGEQRF', ' ', p, n, -1_${ik}$, -1_${ik}$ )
           nb3 = stdlib${ii}$_ilaenv( 1_${ik}$, 'ZUNMRQ', ' ', m, n, p, -1_${ik}$ )
           nb = max( nb1, nb2, nb3 )
           lwkopt = max( n, m, p )*nb
           work( 1_${ik}$ ) = lwkopt
           lquery = ( lwork==-1_${ik}$ )
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( p<0_${ik}$ ) then
              info = -2_${ik}$
           else if( n<0_${ik}$ ) then
              info = -3_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -5_${ik}$
           else if( ldb<max( 1_${ik}$, p ) ) then
              info = -8_${ik}$
           else if( lwork<max( 1_${ik}$, m, p, n ) .and. .not.lquery ) then
              info = -11_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'ZGGRQF', -info )
              return
           else if( lquery ) then
              return
           end if
           ! rq factorization of m-by-n matrix a: a = r*q
           call stdlib${ii}$_zgerqf( m, n, a, lda, taua, work, lwork, info )
           lopt = real( work( 1_${ik}$ ),KIND=dp)
           ! update b := b*q**h
           call stdlib${ii}$_zunmrq( 'RIGHT', 'CONJUGATE TRANSPOSE', p, n, min( m, n ),a( max( 1_${ik}$, m-n+1 &
                     ), 1_${ik}$ ), lda, taua, b, ldb, work,lwork, info )
           lopt = max( lopt, int( work( 1_${ik}$ ),KIND=${ik}$) )
           ! qr factorization of p-by-n matrix b: b = z*t
           call stdlib${ii}$_zgeqrf( p, n, b, ldb, taub, work, lwork, info )
           work( 1_${ik}$ ) = max( lopt, int( work( 1_${ik}$ ),KIND=${ik}$) )
           return
     end subroutine stdlib${ii}$_zggrqf

#:for ck,ct,ci in CMPLX_KINDS_TYPES
#:if not ck in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ci}$ggrqf( m, p, n, a, lda, taua, b, ldb, taub, work,lwork, info )
     !! ZGGRQF: computes a generalized RQ factorization of an M-by-N matrix A
     !! and a P-by-N matrix B:
     !! A = R*Q,        B = Z*T*Q,
     !! where Q is an N-by-N unitary matrix, Z is a P-by-P unitary
     !! matrix, and R and T assume one of the forms:
     !! if M <= N,  R = ( 0  R12 ) M,   or if M > N,  R = ( R11 ) M-N,
     !! N-M  M                           ( R21 ) N
     !! N
     !! where R12 or R21 is upper triangular, and
     !! if P >= N,  T = ( T11 ) N  ,   or if P < N,  T = ( T11  T12 ) P,
     !! (  0  ) P-N                         P   N-P
     !! N
     !! where T11 is upper triangular.
     !! In particular, if B is square and nonsingular, the GRQ factorization
     !! of A and B implicitly gives the RQ factorization of A*inv(B):
     !! A*inv(B) = (R*inv(T))*Z**H
     !! where inv(B) denotes the inverse of the matrix B, and Z**H denotes the
     !! conjugate transpose of the matrix Z.
               
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${ck}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, ldb, lwork, m, n, p
           ! Array Arguments 
           complex(${ck}$), intent(inout) :: a(lda,*), b(ldb,*)
           complex(${ck}$), intent(out) :: taua(*), taub(*), work(*)
        ! =====================================================================
           ! Local Scalars 
           logical(lk) :: lquery
           integer(${ik}$) :: lopt, lwkopt, nb, nb1, nb2, nb3
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input parameters
           info = 0_${ik}$
           nb1 = stdlib${ii}$_ilaenv( 1_${ik}$, 'ZGERQF', ' ', m, n, -1_${ik}$, -1_${ik}$ )
           nb2 = stdlib${ii}$_ilaenv( 1_${ik}$, 'ZGEQRF', ' ', p, n, -1_${ik}$, -1_${ik}$ )
           nb3 = stdlib${ii}$_ilaenv( 1_${ik}$, 'ZUNMRQ', ' ', m, n, p, -1_${ik}$ )
           nb = max( nb1, nb2, nb3 )
           lwkopt = max( n, m, p )*nb
           work( 1_${ik}$ ) = lwkopt
           lquery = ( lwork==-1_${ik}$ )
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( p<0_${ik}$ ) then
              info = -2_${ik}$
           else if( n<0_${ik}$ ) then
              info = -3_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -5_${ik}$
           else if( ldb<max( 1_${ik}$, p ) ) then
              info = -8_${ik}$
           else if( lwork<max( 1_${ik}$, m, p, n ) .and. .not.lquery ) then
              info = -11_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'ZGGRQF', -info )
              return
           else if( lquery ) then
              return
           end if
           ! rq factorization of m-by-n matrix a: a = r*q
           call stdlib${ii}$_${ci}$gerqf( m, n, a, lda, taua, work, lwork, info )
           lopt = real( work( 1_${ik}$ ),KIND=${ck}$)
           ! update b := b*q**h
           call stdlib${ii}$_${ci}$unmrq( 'RIGHT', 'CONJUGATE TRANSPOSE', p, n, min( m, n ),a( max( 1_${ik}$, m-n+1 &
                     ), 1_${ik}$ ), lda, taua, b, ldb, work,lwork, info )
           lopt = max( lopt, int( work( 1_${ik}$ ),KIND=${ik}$) )
           ! qr factorization of p-by-n matrix b: b = z*t
           call stdlib${ii}$_${ci}$geqrf( p, n, b, ldb, taub, work, lwork, info )
           work( 1_${ik}$ ) = max( lopt, int( work( 1_${ik}$ ),KIND=${ik}$) )
           return
     end subroutine stdlib${ii}$_${ci}$ggrqf

#:endif
#:endfor


#:endfor
end submodule stdlib_lapack_orthogonal_factors_qr