stdlib_lapack_orthogonal_factors_qr.fypp Source File


Source Code

#:include "common.fypp" 
submodule(stdlib_lapack_orthogonal_factors) stdlib_lapack_orthogonal_factors_qr
  implicit none


  contains
#:for ik,it,ii in LINALG_INT_KINDS_TYPES

     pure module subroutine stdlib${ii}$_sgeqr( m, n, a, lda, t, tsize, work, lwork,info )
     !! SGEQR computes a QR factorization of a real M-by-N matrix A:
     !! A = Q * ( R ),
     !! ( 0 )
     !! where:
     !! Q is a M-by-M orthogonal matrix;
     !! R is an upper-triangular N-by-N matrix;
     !! 0 is a (M-N)-by-N zero matrix, if M > N.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd. --
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, m, n, tsize, lwork
           ! Array Arguments 
           real(sp), intent(inout) :: a(lda,*)
           real(sp), intent(out) :: t(*), work(*)
        ! =====================================================================
           ! Local Scalars 
           logical(lk) :: lquery, lminws, mint, minw
           integer(${ik}$) :: mb, nb, mintsz, nblcks
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           lquery = ( tsize==-1_${ik}$ .or. tsize==-2_${ik}$ .or.lwork==-1_${ik}$ .or. lwork==-2_${ik}$ )
           mint = .false.
           minw = .false.
           if( tsize==-2_${ik}$ .or. lwork==-2_${ik}$ ) then
             if( tsize/=-1_${ik}$ ) mint = .true.
             if( lwork/=-1_${ik}$ ) minw = .true.
           end if
           ! determine the block size
           if( min( m, n )>0_${ik}$ ) then
             mb = stdlib${ii}$_ilaenv( 1_${ik}$, 'SGEQR ', ' ', m, n, 1_${ik}$, -1_${ik}$ )
             nb = stdlib${ii}$_ilaenv( 1_${ik}$, 'SGEQR ', ' ', m, n, 2_${ik}$, -1_${ik}$ )
           else
             mb = m
             nb = 1_${ik}$
           end if
           if( mb>m .or. mb<=n ) mb = m
           if( nb>min( m, n ) .or. nb<1_${ik}$ ) nb = 1_${ik}$
           mintsz = n + 5_${ik}$
           if ( mb>n .and. m>n ) then
             if( mod( m - n, mb - n )==0_${ik}$ ) then
               nblcks = ( m - n ) / ( mb - n )
             else
               nblcks = ( m - n ) / ( mb - n ) + 1_${ik}$
             end if
           else
             nblcks = 1_${ik}$
           end if
           ! determine if the workspace size satisfies minimal size
           lminws = .false.
           if( ( tsize<max( 1_${ik}$, nb*n*nblcks + 5_${ik}$ ) .or. lwork<nb*n ).and. ( lwork>=n ) .and. ( &
                     tsize>=mintsz ).and. ( .not.lquery ) ) then
             if( tsize<max( 1_${ik}$, nb*n*nblcks + 5_${ik}$ ) ) then
               lminws = .true.
               nb = 1_${ik}$
               mb = m
             end if
             if( lwork<nb*n ) then
               lminws = .true.
               nb = 1_${ik}$
             end if
           end if
           if( m<0_${ik}$ ) then
             info = -1_${ik}$
           else if( n<0_${ik}$ ) then
             info = -2_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
             info = -4_${ik}$
           else if( tsize<max( 1_${ik}$, nb*n*nblcks + 5_${ik}$ ).and. ( .not.lquery ) .and. ( .not.lminws ) ) &
                     then
             info = -6_${ik}$
           else if( ( lwork<max( 1_${ik}$, n*nb ) ) .and. ( .not.lquery ).and. ( .not.lminws ) ) &
                     then
             info = -8_${ik}$
           end if
           if( info==0_${ik}$ ) then
             if( mint ) then
               t( 1_${ik}$ ) = mintsz
             else
               t( 1_${ik}$ ) = nb*n*nblcks + 5_${ik}$
             end if
             t( 2_${ik}$ ) = mb
             t( 3_${ik}$ ) = nb
             if( minw ) then
               work( 1_${ik}$ ) = max( 1_${ik}$, n )
             else
               work( 1_${ik}$ ) = max( 1_${ik}$, nb*n )
             end if
           end if
           if( info/=0_${ik}$ ) then
             call stdlib${ii}$_xerbla( 'SGEQR', -info )
             return
           else if( lquery ) then
             return
           end if
           ! quick return if possible
           if( min( m, n )==0_${ik}$ ) then
             return
           end if
           ! the qr decomposition
           if( ( m<=n ) .or. ( mb<=n ) .or. ( mb>=m ) ) then
             call stdlib${ii}$_sgeqrt( m, n, nb, a, lda, t( 6_${ik}$ ), nb, work, info )
           else
             call stdlib${ii}$_slatsqr( m, n, mb, nb, a, lda, t( 6_${ik}$ ), nb, work,lwork, info )
           end if
           work( 1_${ik}$ ) = max( 1_${ik}$, nb*n )
           return
     end subroutine stdlib${ii}$_sgeqr

     pure module subroutine stdlib${ii}$_dgeqr( m, n, a, lda, t, tsize, work, lwork,info )
     !! DGEQR computes a QR factorization of a real M-by-N matrix A:
     !! A = Q * ( R ),
     !! ( 0 )
     !! where:
     !! Q is a M-by-M orthogonal matrix;
     !! R is an upper-triangular N-by-N matrix;
     !! 0 is a (M-N)-by-N zero matrix, if M > N.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd. --
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, m, n, tsize, lwork
           ! Array Arguments 
           real(dp), intent(inout) :: a(lda,*)
           real(dp), intent(out) :: t(*), work(*)
        ! =====================================================================
           ! Local Scalars 
           logical(lk) :: lquery, lminws, mint, minw
           integer(${ik}$) :: mb, nb, mintsz, nblcks
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           lquery = ( tsize==-1_${ik}$ .or. tsize==-2_${ik}$ .or.lwork==-1_${ik}$ .or. lwork==-2_${ik}$ )
           mint = .false.
           minw = .false.
           if( tsize==-2_${ik}$ .or. lwork==-2_${ik}$ ) then
             if( tsize/=-1_${ik}$ ) mint = .true.
             if( lwork/=-1_${ik}$ ) minw = .true.
           end if
           ! determine the block size
           if( min( m, n )>0_${ik}$ ) then
             mb = stdlib${ii}$_ilaenv( 1_${ik}$, 'DGEQR ', ' ', m, n, 1_${ik}$, -1_${ik}$ )
             nb = stdlib${ii}$_ilaenv( 1_${ik}$, 'DGEQR ', ' ', m, n, 2_${ik}$, -1_${ik}$ )
           else
             mb = m
             nb = 1_${ik}$
           end if
           if( mb>m .or. mb<=n ) mb = m
           if( nb>min( m, n ) .or. nb<1_${ik}$ ) nb = 1_${ik}$
           mintsz = n + 5_${ik}$
           if( mb>n .and. m>n ) then
             if( mod( m - n, mb - n )==0_${ik}$ ) then
               nblcks = ( m - n ) / ( mb - n )
             else
               nblcks = ( m - n ) / ( mb - n ) + 1_${ik}$
             end if
           else
             nblcks = 1_${ik}$
           end if
           ! determine if the workspace size satisfies minimal size
           lminws = .false.
           if( ( tsize<max( 1_${ik}$, nb*n*nblcks + 5_${ik}$ ) .or. lwork<nb*n ).and. ( lwork>=n ) .and. ( &
                     tsize>=mintsz ).and. ( .not.lquery ) ) then
             if( tsize<max( 1_${ik}$, nb*n*nblcks + 5_${ik}$ ) ) then
               lminws = .true.
               nb = 1_${ik}$
               mb = m
             end if
             if( lwork<nb*n ) then
               lminws = .true.
               nb = 1_${ik}$
             end if
           end if
           if( m<0_${ik}$ ) then
             info = -1_${ik}$
           else if( n<0_${ik}$ ) then
             info = -2_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
             info = -4_${ik}$
           else if( tsize<max( 1_${ik}$, nb*n*nblcks + 5_${ik}$ ).and. ( .not.lquery ) .and. ( .not.lminws ) ) &
                     then
             info = -6_${ik}$
           else if( ( lwork<max( 1_${ik}$, n*nb ) ) .and. ( .not.lquery ).and. ( .not.lminws ) ) &
                     then
             info = -8_${ik}$
           end if
           if( info==0_${ik}$ ) then
             if( mint ) then
               t( 1_${ik}$ ) = mintsz
             else
               t( 1_${ik}$ ) = nb*n*nblcks + 5_${ik}$
             end if
             t( 2_${ik}$ ) = mb
             t( 3_${ik}$ ) = nb
             if( minw ) then
               work( 1_${ik}$ ) = max( 1_${ik}$, n )
             else
               work( 1_${ik}$ ) = max( 1_${ik}$, nb*n )
             end if
           end if
           if( info/=0_${ik}$ ) then
             call stdlib${ii}$_xerbla( 'DGEQR', -info )
             return
           else if( lquery ) then
             return
           end if
           ! quick return if possible
           if( min( m, n )==0_${ik}$ ) then
             return
           end if
           ! the qr decomposition
           if( ( m<=n ) .or. ( mb<=n ) .or. ( mb>=m ) ) then
             call stdlib${ii}$_dgeqrt( m, n, nb, a, lda, t( 6_${ik}$ ), nb, work, info )
           else
             call stdlib${ii}$_dlatsqr( m, n, mb, nb, a, lda, t( 6_${ik}$ ), nb, work,lwork, info )
           end if
           work( 1_${ik}$ ) = max( 1_${ik}$, nb*n )
           return
     end subroutine stdlib${ii}$_dgeqr

#:for rk,rt,ri in REAL_KINDS_TYPES
#:if not rk in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ri}$geqr( m, n, a, lda, t, tsize, work, lwork,info )
     !! DGEQR: computes a QR factorization of a real M-by-N matrix A:
     !! A = Q * ( R ),
     !! ( 0 )
     !! where:
     !! Q is a M-by-M orthogonal matrix;
     !! R is an upper-triangular N-by-N matrix;
     !! 0 is a (M-N)-by-N zero matrix, if M > N.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd. --
           use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, m, n, tsize, lwork
           ! Array Arguments 
           real(${rk}$), intent(inout) :: a(lda,*)
           real(${rk}$), intent(out) :: t(*), work(*)
        ! =====================================================================
           ! Local Scalars 
           logical(lk) :: lquery, lminws, mint, minw
           integer(${ik}$) :: mb, nb, mintsz, nblcks
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           lquery = ( tsize==-1_${ik}$ .or. tsize==-2_${ik}$ .or.lwork==-1_${ik}$ .or. lwork==-2_${ik}$ )
           mint = .false.
           minw = .false.
           if( tsize==-2_${ik}$ .or. lwork==-2_${ik}$ ) then
             if( tsize/=-1_${ik}$ ) mint = .true.
             if( lwork/=-1_${ik}$ ) minw = .true.
           end if
           ! determine the block size
           if( min( m, n )>0_${ik}$ ) then
             mb = stdlib${ii}$_ilaenv( 1_${ik}$, 'DGEQR ', ' ', m, n, 1_${ik}$, -1_${ik}$ )
             nb = stdlib${ii}$_ilaenv( 1_${ik}$, 'DGEQR ', ' ', m, n, 2_${ik}$, -1_${ik}$ )
           else
             mb = m
             nb = 1_${ik}$
           end if
           if( mb>m .or. mb<=n ) mb = m
           if( nb>min( m, n ) .or. nb<1_${ik}$ ) nb = 1_${ik}$
           mintsz = n + 5_${ik}$
           if( mb>n .and. m>n ) then
             if( mod( m - n, mb - n )==0_${ik}$ ) then
               nblcks = ( m - n ) / ( mb - n )
             else
               nblcks = ( m - n ) / ( mb - n ) + 1_${ik}$
             end if
           else
             nblcks = 1_${ik}$
           end if
           ! determine if the workspace size satisfies minimal size
           lminws = .false.
           if( ( tsize<max( 1_${ik}$, nb*n*nblcks + 5_${ik}$ ) .or. lwork<nb*n ).and. ( lwork>=n ) .and. ( &
                     tsize>=mintsz ).and. ( .not.lquery ) ) then
             if( tsize<max( 1_${ik}$, nb*n*nblcks + 5_${ik}$ ) ) then
               lminws = .true.
               nb = 1_${ik}$
               mb = m
             end if
             if( lwork<nb*n ) then
               lminws = .true.
               nb = 1_${ik}$
             end if
           end if
           if( m<0_${ik}$ ) then
             info = -1_${ik}$
           else if( n<0_${ik}$ ) then
             info = -2_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
             info = -4_${ik}$
           else if( tsize<max( 1_${ik}$, nb*n*nblcks + 5_${ik}$ ).and. ( .not.lquery ) .and. ( .not.lminws ) ) &
                     then
             info = -6_${ik}$
           else if( ( lwork<max( 1_${ik}$, n*nb ) ) .and. ( .not.lquery ).and. ( .not.lminws ) ) &
                     then
             info = -8_${ik}$
           end if
           if( info==0_${ik}$ ) then
             if( mint ) then
               t( 1_${ik}$ ) = mintsz
             else
               t( 1_${ik}$ ) = nb*n*nblcks + 5_${ik}$
             end if
             t( 2_${ik}$ ) = mb
             t( 3_${ik}$ ) = nb
             if( minw ) then
               work( 1_${ik}$ ) = max( 1_${ik}$, n )
             else
               work( 1_${ik}$ ) = max( 1_${ik}$, nb*n )
             end if
           end if
           if( info/=0_${ik}$ ) then
             call stdlib${ii}$_xerbla( 'DGEQR', -info )
             return
           else if( lquery ) then
             return
           end if
           ! quick return if possible
           if( min( m, n )==0_${ik}$ ) then
             return
           end if
           ! the qr decomposition
           if( ( m<=n ) .or. ( mb<=n ) .or. ( mb>=m ) ) then
             call stdlib${ii}$_${ri}$geqrt( m, n, nb, a, lda, t( 6_${ik}$ ), nb, work, info )
           else
             call stdlib${ii}$_${ri}$latsqr( m, n, mb, nb, a, lda, t( 6_${ik}$ ), nb, work,lwork, info )
           end if
           work( 1_${ik}$ ) = max( 1_${ik}$, nb*n )
           return
     end subroutine stdlib${ii}$_${ri}$geqr

#:endif
#:endfor

     pure module subroutine stdlib${ii}$_cgeqr( m, n, a, lda, t, tsize, work, lwork,info )
     !! CGEQR computes a QR factorization of a complex M-by-N matrix A:
     !! A = Q * ( R ),
     !! ( 0 )
     !! where:
     !! Q is a M-by-M orthogonal matrix;
     !! R is an upper-triangular N-by-N matrix;
     !! 0 is a (M-N)-by-N zero matrix, if M > N.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd. --
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, m, n, tsize, lwork
           ! Array Arguments 
           complex(sp), intent(inout) :: a(lda,*)
           complex(sp), intent(out) :: t(*), work(*)
        ! =====================================================================
           ! Local Scalars 
           logical(lk) :: lquery, lminws, mint, minw
           integer(${ik}$) :: mb, nb, mintsz, nblcks
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           lquery = ( tsize==-1_${ik}$ .or. tsize==-2_${ik}$ .or.lwork==-1_${ik}$ .or. lwork==-2_${ik}$ )
           mint = .false.
           minw = .false.
           if( tsize==-2_${ik}$ .or. lwork==-2_${ik}$ ) then
             if( tsize/=-1_${ik}$ ) mint = .true.
             if( lwork/=-1_${ik}$ ) minw = .true.
           end if
           ! determine the block size
           if( min( m, n )>0_${ik}$ ) then
             mb = stdlib${ii}$_ilaenv( 1_${ik}$, 'CGEQR ', ' ', m, n, 1_${ik}$, -1_${ik}$ )
             nb = stdlib${ii}$_ilaenv( 1_${ik}$, 'CGEQR ', ' ', m, n, 2_${ik}$, -1_${ik}$ )
           else
             mb = m
             nb = 1_${ik}$
           end if
           if( mb>m .or. mb<=n ) mb = m
           if( nb>min( m, n ) .or. nb<1_${ik}$ ) nb = 1_${ik}$
           mintsz = n + 5_${ik}$
           if( mb>n .and. m>n ) then
             if( mod( m - n, mb - n )==0_${ik}$ ) then
               nblcks = ( m - n ) / ( mb - n )
             else
               nblcks = ( m - n ) / ( mb - n ) + 1_${ik}$
             end if
           else
             nblcks = 1_${ik}$
           end if
           ! determine if the workspace size satisfies minimal size
           lminws = .false.
           if( ( tsize<max( 1_${ik}$, nb*n*nblcks + 5_${ik}$ ) .or. lwork<nb*n ).and. ( lwork>=n ) .and. ( &
                     tsize>=mintsz ).and. ( .not.lquery ) ) then
             if( tsize<max( 1_${ik}$, nb*n*nblcks + 5_${ik}$ ) ) then
               lminws = .true.
               nb = 1_${ik}$
               mb = m
             end if
             if( lwork<nb*n ) then
               lminws = .true.
               nb = 1_${ik}$
             end if
           end if
           if( m<0_${ik}$ ) then
             info = -1_${ik}$
           else if( n<0_${ik}$ ) then
             info = -2_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
             info = -4_${ik}$
           else if( tsize<max( 1_${ik}$, nb*n*nblcks + 5_${ik}$ ).and. ( .not.lquery ) .and. ( .not.lminws ) ) &
                     then
             info = -6_${ik}$
           else if( ( lwork<max( 1_${ik}$, n*nb ) ) .and. ( .not.lquery ).and. ( .not.lminws ) ) &
                     then
             info = -8_${ik}$
           end if
           if( info==0_${ik}$ ) then
             if( mint ) then
               t( 1_${ik}$ ) = mintsz
             else
               t( 1_${ik}$ ) = nb*n*nblcks + 5_${ik}$
             end if
             t( 2_${ik}$ ) = mb
             t( 3_${ik}$ ) = nb
             if( minw ) then
               work( 1_${ik}$ ) = max( 1_${ik}$, n )
             else
               work( 1_${ik}$ ) = max( 1_${ik}$, nb*n )
             end if
           end if
           if( info/=0_${ik}$ ) then
             call stdlib${ii}$_xerbla( 'CGEQR', -info )
             return
           else if( lquery ) then
             return
           end if
           ! quick return if possible
           if( min( m, n )==0_${ik}$ ) then
             return
           end if
           ! the qr decomposition
           if( ( m<=n ) .or. ( mb<=n ) .or. ( mb>=m ) ) then
             call stdlib${ii}$_cgeqrt( m, n, nb, a, lda, t( 6_${ik}$ ), nb, work, info )
           else
             call stdlib${ii}$_clatsqr( m, n, mb, nb, a, lda, t( 6_${ik}$ ), nb, work,lwork, info )
           end if
           work( 1_${ik}$ ) = max( 1_${ik}$, nb*n )
           return
     end subroutine stdlib${ii}$_cgeqr

     pure module subroutine stdlib${ii}$_zgeqr( m, n, a, lda, t, tsize, work, lwork,info )
     !! ZGEQR computes a QR factorization of a complex M-by-N matrix A:
     !! A = Q * ( R ),
     !! ( 0 )
     !! where:
     !! Q is a M-by-M orthogonal matrix;
     !! R is an upper-triangular N-by-N matrix;
     !! 0 is a (M-N)-by-N zero matrix, if M > N.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd. --
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, m, n, tsize, lwork
           ! Array Arguments 
           complex(dp), intent(inout) :: a(lda,*)
           complex(dp), intent(out) :: t(*), work(*)
        ! =====================================================================
           ! Local Scalars 
           logical(lk) :: lquery, lminws, mint, minw
           integer(${ik}$) :: mb, nb, mintsz, nblcks
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           lquery = ( tsize==-1_${ik}$ .or. tsize==-2_${ik}$ .or.lwork==-1_${ik}$ .or. lwork==-2_${ik}$ )
           mint = .false.
           minw = .false.
           if( tsize==-2_${ik}$ .or. lwork==-2_${ik}$ ) then
             if( tsize/=-1_${ik}$ ) mint = .true.
             if( lwork/=-1_${ik}$ ) minw = .true.
           end if
           ! determine the block size
           if( min ( m, n )>0_${ik}$ ) then
             mb = stdlib${ii}$_ilaenv( 1_${ik}$, 'ZGEQR ', ' ', m, n, 1_${ik}$, -1_${ik}$ )
             nb = stdlib${ii}$_ilaenv( 1_${ik}$, 'ZGEQR ', ' ', m, n, 2_${ik}$, -1_${ik}$ )
           else
             mb = m
             nb = 1_${ik}$
           end if
           if( mb>m .or. mb<=n ) mb = m
           if( nb>min( m, n ) .or. nb<1_${ik}$ ) nb = 1_${ik}$
           mintsz = n + 5_${ik}$
           if( mb>n .and. m>n ) then
             if( mod( m - n, mb - n )==0_${ik}$ ) then
               nblcks = ( m - n ) / ( mb - n )
             else
               nblcks = ( m - n ) / ( mb - n ) + 1_${ik}$
             end if
           else
             nblcks = 1_${ik}$
           end if
           ! determine if the workspace size satisfies minimal size
           lminws = .false.
           if( ( tsize<max( 1_${ik}$, nb*n*nblcks + 5_${ik}$ ) .or. lwork<nb*n ).and. ( lwork>=n ) .and. ( &
                     tsize>=mintsz ).and. ( .not.lquery ) ) then
             if( tsize<max( 1_${ik}$, nb*n*nblcks + 5_${ik}$ ) ) then
               lminws = .true.
               nb = 1_${ik}$
               mb = m
             end if
             if( lwork<nb*n ) then
               lminws = .true.
               nb = 1_${ik}$
             end if
           end if
           if( m<0_${ik}$ ) then
             info = -1_${ik}$
           else if( n<0_${ik}$ ) then
             info = -2_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
             info = -4_${ik}$
           else if( tsize<max( 1_${ik}$, nb*n*nblcks + 5_${ik}$ ).and. ( .not.lquery ) .and. ( .not.lminws ) ) &
                     then
             info = -6_${ik}$
           else if( ( lwork<max( 1_${ik}$, n*nb ) ) .and. ( .not.lquery ).and. ( .not.lminws ) ) &
                     then
             info = -8_${ik}$
           end if
           if( info==0_${ik}$ ) then
             if( mint ) then
               t( 1_${ik}$ ) = mintsz
             else
               t( 1_${ik}$ ) = nb*n*nblcks + 5_${ik}$
             end if
             t( 2_${ik}$ ) = mb
             t( 3_${ik}$ ) = nb
             if( minw ) then
               work( 1_${ik}$ ) = max( 1_${ik}$, n )
             else
               work( 1_${ik}$ ) = max( 1_${ik}$, nb*n )
             end if
           end if
           if( info/=0_${ik}$ ) then
             call stdlib${ii}$_xerbla( 'ZGEQR', -info )
             return
           else if( lquery ) then
             return
           end if
           ! quick return if possible
           if( min( m, n )==0_${ik}$ ) then
             return
           end if
           ! the qr decomposition
           if( ( m<=n ) .or. ( mb<=n ) .or. ( mb>=m ) ) then
             call stdlib${ii}$_zgeqrt( m, n, nb, a, lda, t( 6_${ik}$ ), nb, work, info )
           else
             call stdlib${ii}$_zlatsqr( m, n, mb, nb, a, lda, t( 6_${ik}$ ), nb, work,lwork, info )
           end if
           work( 1_${ik}$ ) = max( 1_${ik}$, nb*n )
           return
     end subroutine stdlib${ii}$_zgeqr

#:for ck,ct,ci in CMPLX_KINDS_TYPES
#:if not ck in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ci}$geqr( m, n, a, lda, t, tsize, work, lwork,info )
     !! ZGEQR: computes a QR factorization of a complex M-by-N matrix A:
     !! A = Q * ( R ),
     !! ( 0 )
     !! where:
     !! Q is a M-by-M orthogonal matrix;
     !! R is an upper-triangular N-by-N matrix;
     !! 0 is a (M-N)-by-N zero matrix, if M > N.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd. --
           use stdlib_blas_constants_${ck}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, m, n, tsize, lwork
           ! Array Arguments 
           complex(${ck}$), intent(inout) :: a(lda,*)
           complex(${ck}$), intent(out) :: t(*), work(*)
        ! =====================================================================
           ! Local Scalars 
           logical(lk) :: lquery, lminws, mint, minw
           integer(${ik}$) :: mb, nb, mintsz, nblcks
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           lquery = ( tsize==-1_${ik}$ .or. tsize==-2_${ik}$ .or.lwork==-1_${ik}$ .or. lwork==-2_${ik}$ )
           mint = .false.
           minw = .false.
           if( tsize==-2_${ik}$ .or. lwork==-2_${ik}$ ) then
             if( tsize/=-1_${ik}$ ) mint = .true.
             if( lwork/=-1_${ik}$ ) minw = .true.
           end if
           ! determine the block size
           if( min ( m, n )>0_${ik}$ ) then
             mb = stdlib${ii}$_ilaenv( 1_${ik}$, 'ZGEQR ', ' ', m, n, 1_${ik}$, -1_${ik}$ )
             nb = stdlib${ii}$_ilaenv( 1_${ik}$, 'ZGEQR ', ' ', m, n, 2_${ik}$, -1_${ik}$ )
           else
             mb = m
             nb = 1_${ik}$
           end if
           if( mb>m .or. mb<=n ) mb = m
           if( nb>min( m, n ) .or. nb<1_${ik}$ ) nb = 1_${ik}$
           mintsz = n + 5_${ik}$
           if( mb>n .and. m>n ) then
             if( mod( m - n, mb - n )==0_${ik}$ ) then
               nblcks = ( m - n ) / ( mb - n )
             else
               nblcks = ( m - n ) / ( mb - n ) + 1_${ik}$
             end if
           else
             nblcks = 1_${ik}$
           end if
           ! determine if the workspace size satisfies minimal size
           lminws = .false.
           if( ( tsize<max( 1_${ik}$, nb*n*nblcks + 5_${ik}$ ) .or. lwork<nb*n ).and. ( lwork>=n ) .and. ( &
                     tsize>=mintsz ).and. ( .not.lquery ) ) then
             if( tsize<max( 1_${ik}$, nb*n*nblcks + 5_${ik}$ ) ) then
               lminws = .true.
               nb = 1_${ik}$
               mb = m
             end if
             if( lwork<nb*n ) then
               lminws = .true.
               nb = 1_${ik}$
             end if
           end if
           if( m<0_${ik}$ ) then
             info = -1_${ik}$
           else if( n<0_${ik}$ ) then
             info = -2_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
             info = -4_${ik}$
           else if( tsize<max( 1_${ik}$, nb*n*nblcks + 5_${ik}$ ).and. ( .not.lquery ) .and. ( .not.lminws ) ) &
                     then
             info = -6_${ik}$
           else if( ( lwork<max( 1_${ik}$, n*nb ) ) .and. ( .not.lquery ).and. ( .not.lminws ) ) &
                     then
             info = -8_${ik}$
           end if
           if( info==0_${ik}$ ) then
             if( mint ) then
               t( 1_${ik}$ ) = mintsz
             else
               t( 1_${ik}$ ) = nb*n*nblcks + 5_${ik}$
             end if
             t( 2_${ik}$ ) = mb
             t( 3_${ik}$ ) = nb
             if( minw ) then
               work( 1_${ik}$ ) = max( 1_${ik}$, n )
             else
               work( 1_${ik}$ ) = max( 1_${ik}$, nb*n )
             end if
           end if
           if( info/=0_${ik}$ ) then
             call stdlib${ii}$_xerbla( 'ZGEQR', -info )
             return
           else if( lquery ) then
             return
           end if
           ! quick return if possible
           if( min( m, n )==0_${ik}$ ) then
             return
           end if
           ! the qr decomposition
           if( ( m<=n ) .or. ( mb<=n ) .or. ( mb>=m ) ) then
             call stdlib${ii}$_${ci}$geqrt( m, n, nb, a, lda, t( 6_${ik}$ ), nb, work, info )
           else
             call stdlib${ii}$_${ci}$latsqr( m, n, mb, nb, a, lda, t( 6_${ik}$ ), nb, work,lwork, info )
           end if
           work( 1_${ik}$ ) = max( 1_${ik}$, nb*n )
           return
     end subroutine stdlib${ii}$_${ci}$geqr

#:endif
#:endfor



     pure module subroutine stdlib${ii}$_sgemqr( side, trans, m, n, k, a, lda, t, tsize,c, ldc, work, lwork, &
     !! SGEMQR overwrites the general real M-by-N matrix C with
     !! SIDE = 'L'     SIDE = 'R'
     !! TRANS = 'N':      Q * C          C * Q
     !! TRANS = 'T':      Q**T * C       C * Q**T
     !! where Q is a real orthogonal matrix defined as the product
     !! of blocked elementary reflectors computed by tall skinny
     !! QR factorization (SGEQR)
               info )
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: side, trans
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, m, n, k, tsize, lwork, ldc
           ! Array Arguments 
           real(sp), intent(in) :: a(lda,*), t(*)
           real(sp), intent(inout) :: c(ldc,*)
           real(sp), intent(out) :: work(*)
       ! =====================================================================
           ! Local Scalars 
           logical(lk) :: left, right, tran, notran, lquery
           integer(${ik}$) :: mb, nb, lw, nblcks, mn
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           lquery  = lwork==-1_${ik}$
           notran  = stdlib_lsame( trans, 'N' )
           tran    = stdlib_lsame( trans, 'T' )
           left    = stdlib_lsame( side, 'L' )
           right   = stdlib_lsame( side, 'R' )
           mb = int( t( 2_${ik}$ ),KIND=${ik}$)
           nb = int( t( 3_${ik}$ ),KIND=${ik}$)
           if( left ) then
             lw = n * nb
             mn = m
           else
             lw = mb * nb
             mn = n
           end if
           if( ( mb>k ) .and. ( mn>k ) ) then
             if( mod( mn - k, mb - k )==0_${ik}$ ) then
               nblcks = ( mn - k ) / ( mb - k )
             else
               nblcks = ( mn - k ) / ( mb - k ) + 1_${ik}$
             end if
           else
             nblcks = 1_${ik}$
           end if
           info = 0_${ik}$
           if( .not.left .and. .not.right ) then
             info = -1_${ik}$
           else if( .not.tran .and. .not.notran ) then
             info = -2_${ik}$
           else if( m<0_${ik}$ ) then
             info = -3_${ik}$
           else if( n<0_${ik}$ ) then
             info = -4_${ik}$
           else if( k<0_${ik}$ .or. k>mn ) then
             info = -5_${ik}$
           else if( lda<max( 1_${ik}$, mn ) ) then
             info = -7_${ik}$
           else if( tsize<5_${ik}$ ) then
             info = -9_${ik}$
           else if( ldc<max( 1_${ik}$, m ) ) then
             info = -11_${ik}$
           else if( ( lwork<max( 1_${ik}$, lw ) ) .and. ( .not.lquery ) ) then
             info = -13_${ik}$
           end if
           if( info==0_${ik}$ ) then
             work( 1_${ik}$ ) = lw
           end if
           if( info/=0_${ik}$ ) then
             call stdlib${ii}$_xerbla( 'SGEMQR', -info )
             return
           else if( lquery ) then
             return
           end if
           ! quick return if possible
           if( min( m, n, k )==0_${ik}$ ) then
             return
           end if
           if( ( left .and. m<=k ) .or. ( right .and. n<=k ).or. ( mb<=k ) .or. ( mb>=max( m, n, &
                     k ) ) ) then
             call stdlib${ii}$_sgemqrt( side, trans, m, n, k, nb, a, lda, t( 6_${ik}$ ),nb, c, ldc, work, info &
                       )
           else
             call stdlib${ii}$_slamtsqr( side, trans, m, n, k, mb, nb, a, lda, t( 6_${ik}$ ),nb, c, ldc, work, &
                       lwork, info )
           end if
           work( 1_${ik}$ ) = lw
           return
     end subroutine stdlib${ii}$_sgemqr

     pure module subroutine stdlib${ii}$_dgemqr( side, trans, m, n, k, a, lda, t, tsize,c, ldc, work, lwork, &
     !! DGEMQR overwrites the general real M-by-N matrix C with
     !! SIDE = 'L'     SIDE = 'R'
     !! TRANS = 'N':      Q * C          C * Q
     !! TRANS = 'T':      Q**T * C       C * Q**T
     !! where Q is a real orthogonal matrix defined as the product
     !! of blocked elementary reflectors computed by tall skinny
     !! QR factorization (DGEQR)
               info )
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: side, trans
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, m, n, k, tsize, lwork, ldc
           ! Array Arguments 
           real(dp), intent(in) :: a(lda,*), t(*)
           real(dp), intent(inout) :: c(ldc,*)
           real(dp), intent(out) :: work(*)
       ! =====================================================================
           ! Local Scalars 
           logical(lk) :: left, right, tran, notran, lquery
           integer(${ik}$) :: mb, nb, lw, nblcks, mn
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           lquery  = lwork==-1_${ik}$
           notran  = stdlib_lsame( trans, 'N' )
           tran    = stdlib_lsame( trans, 'T' )
           left    = stdlib_lsame( side, 'L' )
           right   = stdlib_lsame( side, 'R' )
           mb = int( t( 2_${ik}$ ),KIND=${ik}$)
           nb = int( t( 3_${ik}$ ),KIND=${ik}$)
           if( left ) then
             lw = n * nb
             mn = m
           else
             lw = mb * nb
             mn = n
           end if
           if( ( mb>k ) .and. ( mn>k ) ) then
             if( mod( mn - k, mb - k )==0_${ik}$ ) then
               nblcks = ( mn - k ) / ( mb - k )
             else
               nblcks = ( mn - k ) / ( mb - k ) + 1_${ik}$
             end if
           else
             nblcks = 1_${ik}$
           end if
           info = 0_${ik}$
           if( .not.left .and. .not.right ) then
             info = -1_${ik}$
           else if( .not.tran .and. .not.notran ) then
             info = -2_${ik}$
           else if( m<0_${ik}$ ) then
             info = -3_${ik}$
           else if( n<0_${ik}$ ) then
             info = -4_${ik}$
           else if( k<0_${ik}$ .or. k>mn ) then
             info = -5_${ik}$
           else if( lda<max( 1_${ik}$, mn ) ) then
             info = -7_${ik}$
           else if( tsize<5_${ik}$ ) then
             info = -9_${ik}$
           else if( ldc<max( 1_${ik}$, m ) ) then
             info = -11_${ik}$
           else if( ( lwork<max( 1_${ik}$, lw ) ) .and. ( .not.lquery ) ) then
             info = -13_${ik}$
           end if
           if( info==0_${ik}$ ) then
             work( 1_${ik}$ ) = lw
           end if
           if( info/=0_${ik}$ ) then
             call stdlib${ii}$_xerbla( 'DGEMQR', -info )
             return
           else if( lquery ) then
             return
           end if
           ! quick return if possible
           if( min( m, n, k )==0_${ik}$ ) then
             return
           end if
           if( ( left .and. m<=k ) .or. ( right .and. n<=k ).or. ( mb<=k ) .or. ( mb>=max( m, n, &
                     k ) ) ) then
             call stdlib${ii}$_dgemqrt( side, trans, m, n, k, nb, a, lda, t( 6_${ik}$ ),nb, c, ldc, work, info &
                       )
           else
             call stdlib${ii}$_dlamtsqr( side, trans, m, n, k, mb, nb, a, lda, t( 6_${ik}$ ),nb, c, ldc, work, &
                       lwork, info )
           end if
           work( 1_${ik}$ ) = lw
           return
     end subroutine stdlib${ii}$_dgemqr

#:for rk,rt,ri in REAL_KINDS_TYPES
#:if not rk in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ri}$gemqr( side, trans, m, n, k, a, lda, t, tsize,c, ldc, work, lwork, &
     !! DGEMQR: overwrites the general real M-by-N matrix C with
     !! SIDE = 'L'     SIDE = 'R'
     !! TRANS = 'N':      Q * C          C * Q
     !! TRANS = 'T':      Q**T * C       C * Q**T
     !! where Q is a real orthogonal matrix defined as the product
     !! of blocked elementary reflectors computed by tall skinny
     !! QR factorization (DGEQR)
               info )
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: side, trans
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, m, n, k, tsize, lwork, ldc
           ! Array Arguments 
           real(${rk}$), intent(in) :: a(lda,*), t(*)
           real(${rk}$), intent(inout) :: c(ldc,*)
           real(${rk}$), intent(out) :: work(*)
       ! =====================================================================
           ! Local Scalars 
           logical(lk) :: left, right, tran, notran, lquery
           integer(${ik}$) :: mb, nb, lw, nblcks, mn
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           lquery  = lwork==-1_${ik}$
           notran  = stdlib_lsame( trans, 'N' )
           tran    = stdlib_lsame( trans, 'T' )
           left    = stdlib_lsame( side, 'L' )
           right   = stdlib_lsame( side, 'R' )
           mb = int( t( 2_${ik}$ ),KIND=${ik}$)
           nb = int( t( 3_${ik}$ ),KIND=${ik}$)
           if( left ) then
             lw = n * nb
             mn = m
           else
             lw = mb * nb
             mn = n
           end if
           if( ( mb>k ) .and. ( mn>k ) ) then
             if( mod( mn - k, mb - k )==0_${ik}$ ) then
               nblcks = ( mn - k ) / ( mb - k )
             else
               nblcks = ( mn - k ) / ( mb - k ) + 1_${ik}$
             end if
           else
             nblcks = 1_${ik}$
           end if
           info = 0_${ik}$
           if( .not.left .and. .not.right ) then
             info = -1_${ik}$
           else if( .not.tran .and. .not.notran ) then
             info = -2_${ik}$
           else if( m<0_${ik}$ ) then
             info = -3_${ik}$
           else if( n<0_${ik}$ ) then
             info = -4_${ik}$
           else if( k<0_${ik}$ .or. k>mn ) then
             info = -5_${ik}$
           else if( lda<max( 1_${ik}$, mn ) ) then
             info = -7_${ik}$
           else if( tsize<5_${ik}$ ) then
             info = -9_${ik}$
           else if( ldc<max( 1_${ik}$, m ) ) then
             info = -11_${ik}$
           else if( ( lwork<max( 1_${ik}$, lw ) ) .and. ( .not.lquery ) ) then
             info = -13_${ik}$
           end if
           if( info==0_${ik}$ ) then
             work( 1_${ik}$ ) = lw
           end if
           if( info/=0_${ik}$ ) then
             call stdlib${ii}$_xerbla( 'DGEMQR', -info )
             return
           else if( lquery ) then
             return
           end if
           ! quick return if possible
           if( min( m, n, k )==0_${ik}$ ) then
             return
           end if
           if( ( left .and. m<=k ) .or. ( right .and. n<=k ).or. ( mb<=k ) .or. ( mb>=max( m, n, &
                     k ) ) ) then
             call stdlib${ii}$_${ri}$gemqrt( side, trans, m, n, k, nb, a, lda, t( 6_${ik}$ ),nb, c, ldc, work, info &
                       )
           else
             call stdlib${ii}$_${ri}$lamtsqr( side, trans, m, n, k, mb, nb, a, lda, t( 6_${ik}$ ),nb, c, ldc, work, &
                       lwork, info )
           end if
           work( 1_${ik}$ ) = lw
           return
     end subroutine stdlib${ii}$_${ri}$gemqr

#:endif
#:endfor

     pure module subroutine stdlib${ii}$_cgemqr( side, trans, m, n, k, a, lda, t, tsize,c, ldc, work, lwork, &
     !! CGEMQR overwrites the general real M-by-N matrix C with
     !! SIDE = 'L'     SIDE = 'R'
     !! TRANS = 'N':      Q * C          C * Q
     !! TRANS = 'T':      Q**H * C       C * Q**H
     !! where Q is a complex unitary matrix defined as the product
     !! of blocked elementary reflectors computed by tall skinny
     !! QR factorization (CGEQR)
               info )
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: side, trans
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, m, n, k, tsize, lwork, ldc
           ! Array Arguments 
           complex(sp), intent(in) :: a(lda,*), t(*)
           complex(sp), intent(inout) :: c(ldc,*)
           complex(sp), intent(out) :: work(*)
       ! =====================================================================
           ! Local Scalars 
           logical(lk) :: left, right, tran, notran, lquery
           integer(${ik}$) :: mb, nb, lw, nblcks, mn
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           lquery  = lwork==-1_${ik}$
           notran  = stdlib_lsame( trans, 'N' )
           tran    = stdlib_lsame( trans, 'C' )
           left    = stdlib_lsame( side, 'L' )
           right   = stdlib_lsame( side, 'R' )
           mb = int( t( 2_${ik}$ ),KIND=${ik}$)
           nb = int( t( 3_${ik}$ ),KIND=${ik}$)
           if( left ) then
             lw = n * nb
             mn = m
           else
             lw = mb * nb
             mn = n
           end if
           if( ( mb>k ) .and. ( mn>k ) ) then
             if( mod( mn - k, mb - k )==0_${ik}$ ) then
               nblcks = ( mn - k ) / ( mb - k )
             else
               nblcks = ( mn - k ) / ( mb - k ) + 1_${ik}$
             end if
           else
             nblcks = 1_${ik}$
           end if
           info = 0_${ik}$
           if( .not.left .and. .not.right ) then
             info = -1_${ik}$
           else if( .not.tran .and. .not.notran ) then
             info = -2_${ik}$
           else if( m<0_${ik}$ ) then
             info = -3_${ik}$
           else if( n<0_${ik}$ ) then
             info = -4_${ik}$
           else if( k<0_${ik}$ .or. k>mn ) then
             info = -5_${ik}$
           else if( lda<max( 1_${ik}$, mn ) ) then
             info = -7_${ik}$
           else if( tsize<5_${ik}$ ) then
             info = -9_${ik}$
           else if( ldc<max( 1_${ik}$, m ) ) then
             info = -11_${ik}$
           else if( ( lwork<max( 1_${ik}$, lw ) ) .and. ( .not.lquery ) ) then
             info = -13_${ik}$
           end if
           if( info==0_${ik}$ ) then
             work( 1_${ik}$ ) = lw
           end if
           if( info/=0_${ik}$ ) then
             call stdlib${ii}$_xerbla( 'CGEMQR', -info )
             return
           else if( lquery ) then
             return
           end if
           ! quick return if possible
           if( min( m, n, k )==0_${ik}$ ) then
             return
           end if
           if( ( left .and. m<=k ) .or. ( right .and. n<=k ).or. ( mb<=k ) .or. ( mb>=max( m, n, &
                     k ) ) ) then
             call stdlib${ii}$_cgemqrt( side, trans, m, n, k, nb, a, lda, t( 6_${ik}$ ),nb, c, ldc, work, info &
                       )
           else
             call stdlib${ii}$_clamtsqr( side, trans, m, n, k, mb, nb, a, lda, t( 6_${ik}$ ),nb, c, ldc, work, &
                       lwork, info )
           end if
           work( 1_${ik}$ ) = lw
           return
     end subroutine stdlib${ii}$_cgemqr

     pure module subroutine stdlib${ii}$_zgemqr( side, trans, m, n, k, a, lda, t, tsize,c, ldc, work, lwork, &
     !! ZGEMQR overwrites the general real M-by-N matrix C with
     !! SIDE = 'L'     SIDE = 'R'
     !! TRANS = 'N':      Q * C          C * Q
     !! TRANS = 'T':      Q**H * C       C * Q**H
     !! where Q is a complex unitary matrix defined as the product
     !! of blocked elementary reflectors computed by tall skinny
     !! QR factorization (ZGEQR)
               info )
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: side, trans
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, m, n, k, tsize, lwork, ldc
           ! Array Arguments 
           complex(dp), intent(in) :: a(lda,*), t(*)
           complex(dp), intent(inout) :: c(ldc,*)
           complex(dp), intent(out) :: work(*)
       ! =====================================================================
           ! Local Scalars 
           logical(lk) :: left, right, tran, notran, lquery
           integer(${ik}$) :: mb, nb, lw, nblcks, mn
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           lquery  = lwork==-1_${ik}$
           notran  = stdlib_lsame( trans, 'N' )
           tran    = stdlib_lsame( trans, 'C' )
           left    = stdlib_lsame( side, 'L' )
           right   = stdlib_lsame( side, 'R' )
           mb = int( t( 2_${ik}$ ),KIND=${ik}$)
           nb = int( t( 3_${ik}$ ),KIND=${ik}$)
           if( left ) then
             lw = n * nb
             mn = m
           else
             lw = mb * nb
             mn = n
           end if
           if( ( mb>k ) .and. ( mn>k ) ) then
             if( mod( mn - k, mb - k )==0_${ik}$ ) then
               nblcks = ( mn - k ) / ( mb - k )
             else
               nblcks = ( mn - k ) / ( mb - k ) + 1_${ik}$
             end if
           else
             nblcks = 1_${ik}$
           end if
           info = 0_${ik}$
           if( .not.left .and. .not.right ) then
             info = -1_${ik}$
           else if( .not.tran .and. .not.notran ) then
             info = -2_${ik}$
           else if( m<0_${ik}$ ) then
             info = -3_${ik}$
           else if( n<0_${ik}$ ) then
             info = -4_${ik}$
           else if( k<0_${ik}$ .or. k>mn ) then
             info = -5_${ik}$
           else if( lda<max( 1_${ik}$, mn ) ) then
             info = -7_${ik}$
           else if( tsize<5_${ik}$ ) then
             info = -9_${ik}$
           else if( ldc<max( 1_${ik}$, m ) ) then
             info = -11_${ik}$
           else if( ( lwork<max( 1_${ik}$, lw ) ) .and. ( .not.lquery ) ) then
             info = -13_${ik}$
           end if
           if( info==0_${ik}$ ) then
             work( 1_${ik}$ ) = lw
           end if
           if( info/=0_${ik}$ ) then
             call stdlib${ii}$_xerbla( 'ZGEMQR', -info )
             return
           else if( lquery ) then
             return
           end if
           ! quick return if possible
           if( min( m, n, k )==0_${ik}$ ) then
             return
           end if
           if( ( left .and. m<=k ) .or. ( right .and. n<=k ).or. ( mb<=k ) .or. ( mb>=max( m, n, &
                     k ) ) ) then
             call stdlib${ii}$_zgemqrt( side, trans, m, n, k, nb, a, lda, t( 6_${ik}$ ),nb, c, ldc, work, info &
                       )
           else
             call stdlib${ii}$_zlamtsqr( side, trans, m, n, k, mb, nb, a, lda, t( 6_${ik}$ ),nb, c, ldc, work, &
                       lwork, info )
           end if
           work( 1_${ik}$ ) = lw
           return
     end subroutine stdlib${ii}$_zgemqr

#:for ck,ct,ci in CMPLX_KINDS_TYPES
#:if not ck in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ci}$gemqr( side, trans, m, n, k, a, lda, t, tsize,c, ldc, work, lwork, &
     !! ZGEMQR: overwrites the general real M-by-N matrix C with
     !! SIDE = 'L'     SIDE = 'R'
     !! TRANS = 'N':      Q * C          C * Q
     !! TRANS = 'T':      Q**H * C       C * Q**H
     !! where Q is a complex unitary matrix defined as the product
     !! of blocked elementary reflectors computed by tall skinny
     !! QR factorization (ZGEQR)
               info )
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${ck}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: side, trans
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, m, n, k, tsize, lwork, ldc
           ! Array Arguments 
           complex(${ck}$), intent(in) :: a(lda,*), t(*)
           complex(${ck}$), intent(inout) :: c(ldc,*)
           complex(${ck}$), intent(out) :: work(*)
       ! =====================================================================
           ! Local Scalars 
           logical(lk) :: left, right, tran, notran, lquery
           integer(${ik}$) :: mb, nb, lw, nblcks, mn
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           lquery  = lwork==-1_${ik}$
           notran  = stdlib_lsame( trans, 'N' )
           tran    = stdlib_lsame( trans, 'C' )
           left    = stdlib_lsame( side, 'L' )
           right   = stdlib_lsame( side, 'R' )
           mb = int( t( 2_${ik}$ ),KIND=${ik}$)
           nb = int( t( 3_${ik}$ ),KIND=${ik}$)
           if( left ) then
             lw = n * nb
             mn = m
           else
             lw = mb * nb
             mn = n
           end if
           if( ( mb>k ) .and. ( mn>k ) ) then
             if( mod( mn - k, mb - k )==0_${ik}$ ) then
               nblcks = ( mn - k ) / ( mb - k )
             else
               nblcks = ( mn - k ) / ( mb - k ) + 1_${ik}$
             end if
           else
             nblcks = 1_${ik}$
           end if
           info = 0_${ik}$
           if( .not.left .and. .not.right ) then
             info = -1_${ik}$
           else if( .not.tran .and. .not.notran ) then
             info = -2_${ik}$
           else if( m<0_${ik}$ ) then
             info = -3_${ik}$
           else if( n<0_${ik}$ ) then
             info = -4_${ik}$
           else if( k<0_${ik}$ .or. k>mn ) then
             info = -5_${ik}$
           else if( lda<max( 1_${ik}$, mn ) ) then
             info = -7_${ik}$
           else if( tsize<5_${ik}$ ) then
             info = -9_${ik}$
           else if( ldc<max( 1_${ik}$, m ) ) then
             info = -11_${ik}$
           else if( ( lwork<max( 1_${ik}$, lw ) ) .and. ( .not.lquery ) ) then
             info = -13_${ik}$
           end if
           if( info==0_${ik}$ ) then
             work( 1_${ik}$ ) = lw
           end if
           if( info/=0_${ik}$ ) then
             call stdlib${ii}$_xerbla( 'ZGEMQR', -info )
             return
           else if( lquery ) then
             return
           end if
           ! quick return if possible
           if( min( m, n, k )==0_${ik}$ ) then
             return
           end if
           if( ( left .and. m<=k ) .or. ( right .and. n<=k ).or. ( mb<=k ) .or. ( mb>=max( m, n, &
                     k ) ) ) then
             call stdlib${ii}$_${ci}$gemqrt( side, trans, m, n, k, nb, a, lda, t( 6_${ik}$ ),nb, c, ldc, work, info &
                       )
           else
             call stdlib${ii}$_${ci}$lamtsqr( side, trans, m, n, k, mb, nb, a, lda, t( 6_${ik}$ ),nb, c, ldc, work, &
                       lwork, info )
           end if
           work( 1_${ik}$ ) = lw
           return
     end subroutine stdlib${ii}$_${ci}$gemqr

#:endif
#:endfor



     pure module subroutine stdlib${ii}$_sgeqrf( m, n, a, lda, tau, work, lwork, info )
     !! SGEQRF computes a QR factorization of a real M-by-N matrix A:
     !! A = Q * ( R ),
     !! ( 0 )
     !! where:
     !! Q is a M-by-M orthogonal matrix;
     !! R is an upper-triangular N-by-N matrix;
     !! 0 is a (M-N)-by-N zero matrix, if M > N.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, lwork, m, n
           ! Array Arguments 
           real(sp), intent(inout) :: a(lda,*)
           real(sp), intent(out) :: tau(*), work(*)
        ! =====================================================================
           ! Local Scalars 
           logical(lk) :: lquery
           integer(${ik}$) :: i, ib, iinfo, iws, k, ldwork, lwkopt, nb, nbmin, nx
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           k = min( m, n )
           info = 0_${ik}$
           nb = stdlib${ii}$_ilaenv( 1_${ik}$, 'SGEQRF', ' ', m, n, -1_${ik}$, -1_${ik}$ )
           lquery = ( lwork==-1_${ik}$ )
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -4_${ik}$
           else if( .not.lquery ) then
              if( lwork<=0_${ik}$ .or. ( m>0_${ik}$ .and. lwork<max( 1_${ik}$, n ) ) )info = -7_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'SGEQRF', -info )
              return
           else if( lquery ) then
              if( k==0_${ik}$ ) then
                 lwkopt = 1_${ik}$
              else
                 lwkopt = n*nb
              end if
              work( 1_${ik}$ ) = lwkopt
              return
           end if
           ! quick return if possible
           if( k==0_${ik}$ ) then
              work( 1_${ik}$ ) = 1_${ik}$
              return
           end if
           nbmin = 2_${ik}$
           nx = 0_${ik}$
           iws = n
           if( nb>1_${ik}$ .and. nb<k ) then
              ! determine when to cross over from blocked to unblocked code.
              nx = max( 0_${ik}$, stdlib${ii}$_ilaenv( 3_${ik}$, 'SGEQRF', ' ', m, n, -1_${ik}$, -1_${ik}$ ) )
              if( nx<k ) then
                 ! determine if workspace is large enough for blocked code.
                 ldwork = n
                 iws = ldwork*nb
                 if( lwork<iws ) then
                    ! not enough workspace to use optimal nb:  reduce nb and
                    ! determine the minimum value of nb.
                    nb = lwork / ldwork
                    nbmin = max( 2_${ik}$, stdlib${ii}$_ilaenv( 2_${ik}$, 'SGEQRF', ' ', m, n, -1_${ik}$,-1_${ik}$ ) )
                 end if
              end if
           end if
           if( nb>=nbmin .and. nb<k .and. nx<k ) then
              ! use blocked code initially
              do i = 1, k - nx, nb
                 ib = min( k-i+1, nb )
                 ! compute the qr factorization of the current block
                 ! a(i:m,i:i+ib-1)
                 call stdlib${ii}$_sgeqr2( m-i+1, ib, a( i, i ), lda, tau( i ), work,iinfo )
                 if( i+ib<=n ) then
                    ! form the triangular factor of the block reflector
                    ! h = h(i) h(i+1) . . . h(i+ib-1)
                    call stdlib${ii}$_slarft( 'FORWARD', 'COLUMNWISE', m-i+1, ib,a( i, i ), lda, tau( i &
                              ), work, ldwork )
                    ! apply h**t to a(i:m,i+ib:n) from the left
                    call stdlib${ii}$_slarfb( 'LEFT', 'TRANSPOSE', 'FORWARD','COLUMNWISE', m-i+1, n-i-&
                    ib+1, ib,a( i, i ), lda, work, ldwork, a( i, i+ib ),lda, work( ib+1 ), ldwork &
                              )
                 end if
              end do
           else
              i = 1_${ik}$
           end if
           ! use unblocked code to factor the last or only block.
           if( i<=k )call stdlib${ii}$_sgeqr2( m-i+1, n-i+1, a( i, i ), lda, tau( i ), work,iinfo )
                     
           work( 1_${ik}$ ) = iws
           return
     end subroutine stdlib${ii}$_sgeqrf

     pure module subroutine stdlib${ii}$_dgeqrf( m, n, a, lda, tau, work, lwork, info )
     !! DGEQRF computes a QR factorization of a real M-by-N matrix A:
     !! A = Q * ( R ),
     !! ( 0 )
     !! where:
     !! Q is a M-by-M orthogonal matrix;
     !! R is an upper-triangular N-by-N matrix;
     !! 0 is a (M-N)-by-N zero matrix, if M > N.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, lwork, m, n
           ! Array Arguments 
           real(dp), intent(inout) :: a(lda,*)
           real(dp), intent(out) :: tau(*), work(*)
        ! =====================================================================
           ! Local Scalars 
           logical(lk) :: lquery
           integer(${ik}$) :: i, ib, iinfo, iws, k, ldwork, lwkopt, nb, nbmin, nx
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           k = min( m, n )
           info = 0_${ik}$
           nb = stdlib${ii}$_ilaenv( 1_${ik}$, 'DGEQRF', ' ', m, n, -1_${ik}$, -1_${ik}$ )
           lquery = ( lwork==-1_${ik}$ )
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -4_${ik}$
           else if( .not.lquery ) then
              if( lwork<=0_${ik}$ .or. ( m>0_${ik}$ .and. lwork<max( 1_${ik}$, n ) ) )info = -7_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DGEQRF', -info )
              return
           else if( lquery ) then
              if( k==0_${ik}$ ) then
                 lwkopt = 1_${ik}$
              else
                 lwkopt = n*nb
              end if
              work( 1_${ik}$ ) = lwkopt
              return
           end if
           ! quick return if possible
           if( k==0_${ik}$ ) then
              work( 1_${ik}$ ) = 1_${ik}$
              return
           end if
           nbmin = 2_${ik}$
           nx = 0_${ik}$
           iws = n
           if( nb>1_${ik}$ .and. nb<k ) then
              ! determine when to cross over from blocked to unblocked code.
              nx = max( 0_${ik}$, stdlib${ii}$_ilaenv( 3_${ik}$, 'DGEQRF', ' ', m, n, -1_${ik}$, -1_${ik}$ ) )
              if( nx<k ) then
                 ! determine if workspace is large enough for blocked code.
                 ldwork = n
                 iws = ldwork*nb
                 if( lwork<iws ) then
                    ! not enough workspace to use optimal nb:  reduce nb and
                    ! determine the minimum value of nb.
                    nb = lwork / ldwork
                    nbmin = max( 2_${ik}$, stdlib${ii}$_ilaenv( 2_${ik}$, 'DGEQRF', ' ', m, n, -1_${ik}$,-1_${ik}$ ) )
                 end if
              end if
           end if
           if( nb>=nbmin .and. nb<k .and. nx<k ) then
              ! use blocked code initially
              do i = 1, k - nx, nb
                 ib = min( k-i+1, nb )
                 ! compute the qr factorization of the current block
                 ! a(i:m,i:i+ib-1)
                 call stdlib${ii}$_dgeqr2( m-i+1, ib, a( i, i ), lda, tau( i ), work,iinfo )
                 if( i+ib<=n ) then
                    ! form the triangular factor of the block reflector
                    ! h = h(i) h(i+1) . . . h(i+ib-1)
                    call stdlib${ii}$_dlarft( 'FORWARD', 'COLUMNWISE', m-i+1, ib,a( i, i ), lda, tau( i &
                              ), work, ldwork )
                    ! apply h**t to a(i:m,i+ib:n) from the left
                    call stdlib${ii}$_dlarfb( 'LEFT', 'TRANSPOSE', 'FORWARD','COLUMNWISE', m-i+1, n-i-&
                    ib+1, ib,a( i, i ), lda, work, ldwork, a( i, i+ib ),lda, work( ib+1 ), ldwork &
                              )
                 end if
              end do
           else
              i = 1_${ik}$
           end if
           ! use unblocked code to factor the last or only block.
           if( i<=k )call stdlib${ii}$_dgeqr2( m-i+1, n-i+1, a( i, i ), lda, tau( i ), work,iinfo )
                     
           work( 1_${ik}$ ) = iws
           return
     end subroutine stdlib${ii}$_dgeqrf

#:for rk,rt,ri in REAL_KINDS_TYPES
#:if not rk in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ri}$geqrf( m, n, a, lda, tau, work, lwork, info )
     !! DGEQRF: computes a QR factorization of a real M-by-N matrix A:
     !! A = Q * ( R ),
     !! ( 0 )
     !! where:
     !! Q is a M-by-M orthogonal matrix;
     !! R is an upper-triangular N-by-N matrix;
     !! 0 is a (M-N)-by-N zero matrix, if M > N.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, lwork, m, n
           ! Array Arguments 
           real(${rk}$), intent(inout) :: a(lda,*)
           real(${rk}$), intent(out) :: tau(*), work(*)
        ! =====================================================================
           ! Local Scalars 
           logical(lk) :: lquery
           integer(${ik}$) :: i, ib, iinfo, iws, k, ldwork, lwkopt, nb, nbmin, nx
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           k = min( m, n )
           info = 0_${ik}$
           nb = stdlib${ii}$_ilaenv( 1_${ik}$, 'DGEQRF', ' ', m, n, -1_${ik}$, -1_${ik}$ )
           lquery = ( lwork==-1_${ik}$ )
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -4_${ik}$
           else if( .not.lquery ) then
              if( lwork<=0_${ik}$ .or. ( m>0_${ik}$ .and. lwork<max( 1_${ik}$, n ) ) )info = -7_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DGEQRF', -info )
              return
           else if( lquery ) then
              if( k==0_${ik}$ ) then
                 lwkopt = 1_${ik}$
              else
                 lwkopt = n*nb
              end if
              work( 1_${ik}$ ) = lwkopt
              return
           end if
           ! quick return if possible
           if( k==0_${ik}$ ) then
              work( 1_${ik}$ ) = 1_${ik}$
              return
           end if
           nbmin = 2_${ik}$
           nx = 0_${ik}$
           iws = n
           if( nb>1_${ik}$ .and. nb<k ) then
              ! determine when to cross over from blocked to unblocked code.
              nx = max( 0_${ik}$, stdlib${ii}$_ilaenv( 3_${ik}$, 'DGEQRF', ' ', m, n, -1_${ik}$, -1_${ik}$ ) )
              if( nx<k ) then
                 ! determine if workspace is large enough for blocked code.
                 ldwork = n
                 iws = ldwork*nb
                 if( lwork<iws ) then
                    ! not enough workspace to use optimal nb:  reduce nb and
                    ! determine the minimum value of nb.
                    nb = lwork / ldwork
                    nbmin = max( 2_${ik}$, stdlib${ii}$_ilaenv( 2_${ik}$, 'DGEQRF', ' ', m, n, -1_${ik}$,-1_${ik}$ ) )
                 end if
              end if
           end if
           if( nb>=nbmin .and. nb<k .and. nx<k ) then
              ! use blocked code initially
              do i = 1, k - nx, nb
                 ib = min( k-i+1, nb )
                 ! compute the qr factorization of the current block
                 ! a(i:m,i:i+ib-1)
                 call stdlib${ii}$_${ri}$geqr2( m-i+1, ib, a( i, i ), lda, tau( i ), work,iinfo )
                 if( i+ib<=n ) then
                    ! form the triangular factor of the block reflector
                    ! h = h(i) h(i+1) . . . h(i+ib-1)
                    call stdlib${ii}$_${ri}$larft( 'FORWARD', 'COLUMNWISE', m-i+1, ib,a( i, i ), lda, tau( i &
                              ), work, ldwork )
                    ! apply h**t to a(i:m,i+ib:n) from the left
                    call stdlib${ii}$_${ri}$larfb( 'LEFT', 'TRANSPOSE', 'FORWARD','COLUMNWISE', m-i+1, n-i-&
                    ib+1, ib,a( i, i ), lda, work, ldwork, a( i, i+ib ),lda, work( ib+1 ), ldwork &
                              )
                 end if
              end do
           else
              i = 1_${ik}$
           end if
           ! use unblocked code to factor the last or only block.
           if( i<=k )call stdlib${ii}$_${ri}$geqr2( m-i+1, n-i+1, a( i, i ), lda, tau( i ), work,iinfo )
                     
           work( 1_${ik}$ ) = iws
           return
     end subroutine stdlib${ii}$_${ri}$geqrf

#:endif
#:endfor

     pure module subroutine stdlib${ii}$_cgeqrf( m, n, a, lda, tau, work, lwork, info )
     !! CGEQRF computes a QR factorization of a complex M-by-N matrix A:
     !! A = Q * ( R ),
     !! ( 0 )
     !! where:
     !! Q is a M-by-M orthogonal matrix;
     !! R is an upper-triangular N-by-N matrix;
     !! 0 is a (M-N)-by-N zero matrix, if M > N.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, lwork, m, n
           ! Array Arguments 
           complex(sp), intent(inout) :: a(lda,*)
           complex(sp), intent(out) :: tau(*), work(*)
        ! =====================================================================
           ! Local Scalars 
           logical(lk) :: lquery
           integer(${ik}$) :: i, ib, iinfo, iws, k, ldwork, lwkopt, nb, nbmin, nx
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           k = min( m, n )
           info = 0_${ik}$
           nb = stdlib${ii}$_ilaenv( 1_${ik}$, 'CGEQRF', ' ', m, n, -1_${ik}$, -1_${ik}$ )
           lquery = ( lwork==-1_${ik}$ )
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -4_${ik}$
           else if( .not.lquery ) then
              if( lwork<=0_${ik}$ .or. ( m>0_${ik}$ .and. lwork<max( 1_${ik}$, n ) ) )info = -7_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'CGEQRF', -info )
              return
           else if( lquery ) then
              if( k==0_${ik}$ ) then
                 lwkopt = 1_${ik}$
              else
                 lwkopt = n*nb
              end if
              work( 1_${ik}$ ) = lwkopt
              return
           end if
           ! quick return if possible
           if( k==0_${ik}$ ) then
              work( 1_${ik}$ ) = 1_${ik}$
              return
           end if
           nbmin = 2_${ik}$
           nx = 0_${ik}$
           iws = n
           if( nb>1_${ik}$ .and. nb<k ) then
              ! determine when to cross over from blocked to unblocked code.
              nx = max( 0_${ik}$, stdlib${ii}$_ilaenv( 3_${ik}$, 'CGEQRF', ' ', m, n, -1_${ik}$, -1_${ik}$ ) )
              if( nx<k ) then
                 ! determine if workspace is large enough for blocked code.
                 ldwork = n
                 iws = ldwork*nb
                 if( lwork<iws ) then
                    ! not enough workspace to use optimal nb:  reduce nb and
                    ! determine the minimum value of nb.
                    nb = lwork / ldwork
                    nbmin = max( 2_${ik}$, stdlib${ii}$_ilaenv( 2_${ik}$, 'CGEQRF', ' ', m, n, -1_${ik}$,-1_${ik}$ ) )
                 end if
              end if
           end if
           if( nb>=nbmin .and. nb<k .and. nx<k ) then
              ! use blocked code initially
              do i = 1, k - nx, nb
                 ib = min( k-i+1, nb )
                 ! compute the qr factorization of the current block
                 ! a(i:m,i:i+ib-1)
                 call stdlib${ii}$_cgeqr2( m-i+1, ib, a( i, i ), lda, tau( i ), work,iinfo )
                 if( i+ib<=n ) then
                    ! form the triangular factor of the block reflector
                    ! h = h(i) h(i+1) . . . h(i+ib-1)
                    call stdlib${ii}$_clarft( 'FORWARD', 'COLUMNWISE', m-i+1, ib,a( i, i ), lda, tau( i &
                              ), work, ldwork )
                    ! apply h**h to a(i:m,i+ib:n) from the left
                    call stdlib${ii}$_clarfb( 'LEFT', 'CONJUGATE TRANSPOSE', 'FORWARD','COLUMNWISE', m-&
                    i+1, n-i-ib+1, ib,a( i, i ), lda, work, ldwork, a( i, i+ib ),lda, work( ib+1 )&
                              , ldwork )
                 end if
              end do
           else
              i = 1_${ik}$
           end if
           ! use unblocked code to factor the last or only block.
           if( i<=k )call stdlib${ii}$_cgeqr2( m-i+1, n-i+1, a( i, i ), lda, tau( i ), work,iinfo )
                     
           work( 1_${ik}$ ) = iws
           return
     end subroutine stdlib${ii}$_cgeqrf

     pure module subroutine stdlib${ii}$_zgeqrf( m, n, a, lda, tau, work, lwork, info )
     !! ZGEQRF computes a QR factorization of a complex M-by-N matrix A:
     !! A = Q * ( R ),
     !! ( 0 )
     !! where:
     !! Q is a M-by-M orthogonal matrix;
     !! R is an upper-triangular N-by-N matrix;
     !! 0 is a (M-N)-by-N zero matrix, if M > N.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, lwork, m, n
           ! Array Arguments 
           complex(dp), intent(inout) :: a(lda,*)
           complex(dp), intent(out) :: tau(*), work(*)
        ! =====================================================================
           ! Local Scalars 
           logical(lk) :: lquery
           integer(${ik}$) :: i, ib, iinfo, iws, k, ldwork, lwkopt, nb, nbmin, nx
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           k = min( m, n )
           info = 0_${ik}$
           nb = stdlib${ii}$_ilaenv( 1_${ik}$, 'ZGEQRF', ' ', m, n, -1_${ik}$, -1_${ik}$ )
           lquery = ( lwork==-1_${ik}$ )
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -4_${ik}$
           else if( .not.lquery ) then
              if( lwork<=0_${ik}$ .or. ( m>0_${ik}$ .and. lwork<max( 1_${ik}$, n ) ) )info = -7_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'ZGEQRF', -info )
              return
           else if( lquery ) then
              if( k==0_${ik}$ ) then
                 lwkopt = 1_${ik}$
              else
                 lwkopt = n*nb
              end if
              work( 1_${ik}$ ) = lwkopt
              return
           end if
           ! quick return if possible
           if( k==0_${ik}$ ) then
              work( 1_${ik}$ ) = 1_${ik}$
              return
           end if
           nbmin = 2_${ik}$
           nx = 0_${ik}$
           iws = n
           if( nb>1_${ik}$ .and. nb<k ) then
              ! determine when to cross over from blocked to unblocked code.
              nx = max( 0_${ik}$, stdlib${ii}$_ilaenv( 3_${ik}$, 'ZGEQRF', ' ', m, n, -1_${ik}$, -1_${ik}$ ) )
              if( nx<k ) then
                 ! determine if workspace is large enough for blocked code.
                 ldwork = n
                 iws = ldwork*nb
                 if( lwork<iws ) then
                    ! not enough workspace to use optimal nb:  reduce nb and
                    ! determine the minimum value of nb.
                    nb = lwork / ldwork
                    nbmin = max( 2_${ik}$, stdlib${ii}$_ilaenv( 2_${ik}$, 'ZGEQRF', ' ', m, n, -1_${ik}$,-1_${ik}$ ) )
                 end if
              end if
           end if
           if( nb>=nbmin .and. nb<k .and. nx<k ) then
              ! use blocked code initially
              do i = 1, k - nx, nb
                 ib = min( k-i+1, nb )
                 ! compute the qr factorization of the current block
                 ! a(i:m,i:i+ib-1)
                 call stdlib${ii}$_zgeqr2( m-i+1, ib, a( i, i ), lda, tau( i ), work,iinfo )
                 if( i+ib<=n ) then
                    ! form the triangular factor of the block reflector
                    ! h = h(i) h(i+1) . . . h(i+ib-1)
                    call stdlib${ii}$_zlarft( 'FORWARD', 'COLUMNWISE', m-i+1, ib,a( i, i ), lda, tau( i &
                              ), work, ldwork )
                    ! apply h**h to a(i:m,i+ib:n) from the left
                    call stdlib${ii}$_zlarfb( 'LEFT', 'CONJUGATE TRANSPOSE', 'FORWARD','COLUMNWISE', m-&
                    i+1, n-i-ib+1, ib,a( i, i ), lda, work, ldwork, a( i, i+ib ),lda, work( ib+1 )&
                              , ldwork )
                 end if
              end do
           else
              i = 1_${ik}$
           end if
           ! use unblocked code to factor the last or only block.
           if( i<=k )call stdlib${ii}$_zgeqr2( m-i+1, n-i+1, a( i, i ), lda, tau( i ), work,iinfo )
                     
           work( 1_${ik}$ ) = iws
           return
     end subroutine stdlib${ii}$_zgeqrf

#:for ck,ct,ci in CMPLX_KINDS_TYPES
#:if not ck in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ci}$geqrf( m, n, a, lda, tau, work, lwork, info )
     !! ZGEQRF: computes a QR factorization of a complex M-by-N matrix A:
     !! A = Q * ( R ),
     !! ( 0 )
     !! where:
     !! Q is a M-by-M orthogonal matrix;
     !! R is an upper-triangular N-by-N matrix;
     !! 0 is a (M-N)-by-N zero matrix, if M > N.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${ck}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, lwork, m, n
           ! Array Arguments 
           complex(${ck}$), intent(inout) :: a(lda,*)
           complex(${ck}$), intent(out) :: tau(*), work(*)
        ! =====================================================================
           ! Local Scalars 
           logical(lk) :: lquery
           integer(${ik}$) :: i, ib, iinfo, iws, k, ldwork, lwkopt, nb, nbmin, nx
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           k = min( m, n )
           info = 0_${ik}$
           nb = stdlib${ii}$_ilaenv( 1_${ik}$, 'ZGEQRF', ' ', m, n, -1_${ik}$, -1_${ik}$ )
           lquery = ( lwork==-1_${ik}$ )
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -4_${ik}$
           else if( .not.lquery ) then
              if( lwork<=0_${ik}$ .or. ( m>0_${ik}$ .and. lwork<max( 1_${ik}$, n ) ) )info = -7_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'ZGEQRF', -info )
              return
           else if( lquery ) then
              if( k==0_${ik}$ ) then
                 lwkopt = 1_${ik}$
              else
                 lwkopt = n*nb
              end if
              work( 1_${ik}$ ) = lwkopt
              return
           end if
           ! quick return if possible
           if( k==0_${ik}$ ) then
              work( 1_${ik}$ ) = 1_${ik}$
              return
           end if
           nbmin = 2_${ik}$
           nx = 0_${ik}$
           iws = n
           if( nb>1_${ik}$ .and. nb<k ) then
              ! determine when to cross over from blocked to unblocked code.
              nx = max( 0_${ik}$, stdlib${ii}$_ilaenv( 3_${ik}$, 'ZGEQRF', ' ', m, n, -1_${ik}$, -1_${ik}$ ) )
              if( nx<k ) then
                 ! determine if workspace is large enough for blocked code.
                 ldwork = n
                 iws = ldwork*nb
                 if( lwork<iws ) then
                    ! not enough workspace to use optimal nb:  reduce nb and
                    ! determine the minimum value of nb.
                    nb = lwork / ldwork
                    nbmin = max( 2_${ik}$, stdlib${ii}$_ilaenv( 2_${ik}$, 'ZGEQRF', ' ', m, n, -1_${ik}$,-1_${ik}$ ) )
                 end if
              end if
           end if
           if( nb>=nbmin .and. nb<k .and. nx<k ) then
              ! use blocked code initially
              do i = 1, k - nx, nb
                 ib = min( k-i+1, nb )
                 ! compute the qr factorization of the current block
                 ! a(i:m,i:i+ib-1)
                 call stdlib${ii}$_${ci}$geqr2( m-i+1, ib, a( i, i ), lda, tau( i ), work,iinfo )
                 if( i+ib<=n ) then
                    ! form the triangular factor of the block reflector
                    ! h = h(i) h(i+1) . . . h(i+ib-1)
                    call stdlib${ii}$_${ci}$larft( 'FORWARD', 'COLUMNWISE', m-i+1, ib,a( i, i ), lda, tau( i &
                              ), work, ldwork )
                    ! apply h**h to a(i:m,i+ib:n) from the left
                    call stdlib${ii}$_${ci}$larfb( 'LEFT', 'CONJUGATE TRANSPOSE', 'FORWARD','COLUMNWISE', m-&
                    i+1, n-i-ib+1, ib,a( i, i ), lda, work, ldwork, a( i, i+ib ),lda, work( ib+1 )&
                              , ldwork )
                 end if
              end do
           else
              i = 1_${ik}$
           end if
           ! use unblocked code to factor the last or only block.
           if( i<=k )call stdlib${ii}$_${ci}$geqr2( m-i+1, n-i+1, a( i, i ), lda, tau( i ), work,iinfo )
                     
           work( 1_${ik}$ ) = iws
           return
     end subroutine stdlib${ii}$_${ci}$geqrf

#:endif
#:endfor



     pure module subroutine stdlib${ii}$_sgeqr2( m, n, a, lda, tau, work, info )
     !! SGEQR2 computes a QR factorization of a real m-by-n matrix A:
     !! A = Q * ( R ),
     !! ( 0 )
     !! where:
     !! Q is a m-by-m orthogonal matrix;
     !! R is an upper-triangular n-by-n matrix;
     !! 0 is a (m-n)-by-n zero matrix, if m > n.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, m, n
           ! Array Arguments 
           real(sp), intent(inout) :: a(lda,*)
           real(sp), intent(out) :: tau(*), work(*)
        ! =====================================================================
           
           ! Local Scalars 
           integer(${ik}$) :: i, k
           real(sp) :: aii
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -4_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'SGEQR2', -info )
              return
           end if
           k = min( m, n )
           do i = 1, k
              ! generate elementary reflector h(i) to annihilate a(i+1:m,i)
              call stdlib${ii}$_slarfg( m-i+1, a( i, i ), a( min( i+1, m ), i ), 1_${ik}$,tau( i ) )
              if( i<n ) then
                 ! apply h(i) to a(i:m,i+1:n) from the left
                 aii = a( i, i )
                 a( i, i ) = one
                 call stdlib${ii}$_slarf( 'LEFT', m-i+1, n-i, a( i, i ), 1_${ik}$, tau( i ),a( i, i+1 ), lda, &
                           work )
                 a( i, i ) = aii
              end if
           end do
           return
     end subroutine stdlib${ii}$_sgeqr2

     pure module subroutine stdlib${ii}$_dgeqr2( m, n, a, lda, tau, work, info )
     !! DGEQR2 computes a QR factorization of a real m-by-n matrix A:
     !! A = Q * ( R ),
     !! ( 0 )
     !! where:
     !! Q is a m-by-m orthogonal matrix;
     !! R is an upper-triangular n-by-n matrix;
     !! 0 is a (m-n)-by-n zero matrix, if m > n.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, m, n
           ! Array Arguments 
           real(dp), intent(inout) :: a(lda,*)
           real(dp), intent(out) :: tau(*), work(*)
        ! =====================================================================
           
           ! Local Scalars 
           integer(${ik}$) :: i, k
           real(dp) :: aii
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -4_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DGEQR2', -info )
              return
           end if
           k = min( m, n )
           do i = 1, k
              ! generate elementary reflector h(i) to annihilate a(i+1:m,i)
              call stdlib${ii}$_dlarfg( m-i+1, a( i, i ), a( min( i+1, m ), i ), 1_${ik}$,tau( i ) )
              if( i<n ) then
                 ! apply h(i) to a(i:m,i+1:n) from the left
                 aii = a( i, i )
                 a( i, i ) = one
                 call stdlib${ii}$_dlarf( 'LEFT', m-i+1, n-i, a( i, i ), 1_${ik}$, tau( i ),a( i, i+1 ), lda, &
                           work )
                 a( i, i ) = aii
              end if
           end do
           return
     end subroutine stdlib${ii}$_dgeqr2

#:for rk,rt,ri in REAL_KINDS_TYPES
#:if not rk in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ri}$geqr2( m, n, a, lda, tau, work, info )
     !! DGEQR2: computes a QR factorization of a real m-by-n matrix A:
     !! A = Q * ( R ),
     !! ( 0 )
     !! where:
     !! Q is a m-by-m orthogonal matrix;
     !! R is an upper-triangular n-by-n matrix;
     !! 0 is a (m-n)-by-n zero matrix, if m > n.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, m, n
           ! Array Arguments 
           real(${rk}$), intent(inout) :: a(lda,*)
           real(${rk}$), intent(out) :: tau(*), work(*)
        ! =====================================================================
           
           ! Local Scalars 
           integer(${ik}$) :: i, k
           real(${rk}$) :: aii
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -4_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DGEQR2', -info )
              return
           end if
           k = min( m, n )
           do i = 1, k
              ! generate elementary reflector h(i) to annihilate a(i+1:m,i)
              call stdlib${ii}$_${ri}$larfg( m-i+1, a( i, i ), a( min( i+1, m ), i ), 1_${ik}$,tau( i ) )
              if( i<n ) then
                 ! apply h(i) to a(i:m,i+1:n) from the left
                 aii = a( i, i )
                 a( i, i ) = one
                 call stdlib${ii}$_${ri}$larf( 'LEFT', m-i+1, n-i, a( i, i ), 1_${ik}$, tau( i ),a( i, i+1 ), lda, &
                           work )
                 a( i, i ) = aii
              end if
           end do
           return
     end subroutine stdlib${ii}$_${ri}$geqr2

#:endif
#:endfor

     pure module subroutine stdlib${ii}$_cgeqr2( m, n, a, lda, tau, work, info )
     !! CGEQR2 computes a QR factorization of a complex m-by-n matrix A:
     !! A = Q * ( R ),
     !! ( 0 )
     !! where:
     !! Q is a m-by-m orthogonal matrix;
     !! R is an upper-triangular n-by-n matrix;
     !! 0 is a (m-n)-by-n zero matrix, if m > n.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, m, n
           ! Array Arguments 
           complex(sp), intent(inout) :: a(lda,*)
           complex(sp), intent(out) :: tau(*), work(*)
        ! =====================================================================
           
           ! Local Scalars 
           integer(${ik}$) :: i, k
           complex(sp) :: alpha
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -4_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'CGEQR2', -info )
              return
           end if
           k = min( m, n )
           do i = 1, k
              ! generate elementary reflector h(i) to annihilate a(i+1:m,i)
              call stdlib${ii}$_clarfg( m-i+1, a( i, i ), a( min( i+1, m ), i ), 1_${ik}$,tau( i ) )
              if( i<n ) then
                 ! apply h(i)**h to a(i:m,i+1:n) from the left
                 alpha = a( i, i )
                 a( i, i ) = cone
                 call stdlib${ii}$_clarf( 'LEFT', m-i+1, n-i, a( i, i ), 1_${ik}$,conjg( tau( i ) ), a( i, i+1 &
                           ), lda, work )
                 a( i, i ) = alpha
              end if
           end do
           return
     end subroutine stdlib${ii}$_cgeqr2

     pure module subroutine stdlib${ii}$_zgeqr2( m, n, a, lda, tau, work, info )
     !! ZGEQR2 computes a QR factorization of a complex m-by-n matrix A:
     !! A = Q * ( R ),
     !! ( 0 )
     !! where:
     !! Q is a m-by-m orthogonal matrix;
     !! R is an upper-triangular n-by-n matrix;
     !! 0 is a (m-n)-by-n zero matrix, if m > n.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, m, n
           ! Array Arguments 
           complex(dp), intent(inout) :: a(lda,*)
           complex(dp), intent(out) :: tau(*), work(*)
        ! =====================================================================
           
           ! Local Scalars 
           integer(${ik}$) :: i, k
           complex(dp) :: alpha
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -4_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'ZGEQR2', -info )
              return
           end if
           k = min( m, n )
           do i = 1, k
              ! generate elementary reflector h(i) to annihilate a(i+1:m,i)
              call stdlib${ii}$_zlarfg( m-i+1, a( i, i ), a( min( i+1, m ), i ), 1_${ik}$,tau( i ) )
              if( i<n ) then
                 ! apply h(i)**h to a(i:m,i+1:n) from the left
                 alpha = a( i, i )
                 a( i, i ) = cone
                 call stdlib${ii}$_zlarf( 'LEFT', m-i+1, n-i, a( i, i ), 1_${ik}$,conjg( tau( i ) ), a( i, i+1 &
                           ), lda, work )
                 a( i, i ) = alpha
              end if
           end do
           return
     end subroutine stdlib${ii}$_zgeqr2

#:for ck,ct,ci in CMPLX_KINDS_TYPES
#:if not ck in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ci}$geqr2( m, n, a, lda, tau, work, info )
     !! ZGEQR2: computes a QR factorization of a complex m-by-n matrix A:
     !! A = Q * ( R ),
     !! ( 0 )
     !! where:
     !! Q is a m-by-m orthogonal matrix;
     !! R is an upper-triangular n-by-n matrix;
     !! 0 is a (m-n)-by-n zero matrix, if m > n.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${ck}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, m, n
           ! Array Arguments 
           complex(${ck}$), intent(inout) :: a(lda,*)
           complex(${ck}$), intent(out) :: tau(*), work(*)
        ! =====================================================================
           
           ! Local Scalars 
           integer(${ik}$) :: i, k
           complex(${ck}$) :: alpha
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -4_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'ZGEQR2', -info )
              return
           end if
           k = min( m, n )
           do i = 1, k
              ! generate elementary reflector h(i) to annihilate a(i+1:m,i)
              call stdlib${ii}$_${ci}$larfg( m-i+1, a( i, i ), a( min( i+1, m ), i ), 1_${ik}$,tau( i ) )
              if( i<n ) then
                 ! apply h(i)**h to a(i:m,i+1:n) from the left
                 alpha = a( i, i )
                 a( i, i ) = cone
                 call stdlib${ii}$_${ci}$larf( 'LEFT', m-i+1, n-i, a( i, i ), 1_${ik}$,conjg( tau( i ) ), a( i, i+1 &
                           ), lda, work )
                 a( i, i ) = alpha
              end if
           end do
           return
     end subroutine stdlib${ii}$_${ci}$geqr2

#:endif
#:endfor



     pure module subroutine stdlib${ii}$_cungqr( m, n, k, a, lda, tau, work, lwork, info )
     !! CUNGQR generates an M-by-N complex matrix Q with orthonormal columns,
     !! which is defined as the first N columns of a product of K elementary
     !! reflectors of order M
     !! Q  =  H(1) H(2) . . . H(k)
     !! as returned by CGEQRF.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: k, lda, lwork, m, n
           ! Array Arguments 
           complex(sp), intent(inout) :: a(lda,*)
           complex(sp), intent(in) :: tau(*)
           complex(sp), intent(out) :: work(*)
        ! =====================================================================
           
           ! Local Scalars 
           logical(lk) :: lquery
           integer(${ik}$) :: i, ib, iinfo, iws, j, ki, kk, l, ldwork, lwkopt, nb, nbmin, nx
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           nb = stdlib${ii}$_ilaenv( 1_${ik}$, 'CUNGQR', ' ', m, n, k, -1_${ik}$ )
           lwkopt = max( 1_${ik}$, n )*nb
           work( 1_${ik}$ ) = lwkopt
           lquery = ( lwork==-1_${ik}$ )
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ .or. n>m ) then
              info = -2_${ik}$
           else if( k<0_${ik}$ .or. k>n ) then
              info = -3_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -5_${ik}$
           else if( lwork<max( 1_${ik}$, n ) .and. .not.lquery ) then
              info = -8_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'CUNGQR', -info )
              return
           else if( lquery ) then
              return
           end if
           ! quick return if possible
           if( n<=0_${ik}$ ) then
              work( 1_${ik}$ ) = 1_${ik}$
              return
           end if
           nbmin = 2_${ik}$
           nx = 0_${ik}$
           iws = n
           if( nb>1_${ik}$ .and. nb<k ) then
              ! determine when to cross over from blocked to unblocked code.
              nx = max( 0_${ik}$, stdlib${ii}$_ilaenv( 3_${ik}$, 'CUNGQR', ' ', m, n, k, -1_${ik}$ ) )
              if( nx<k ) then
                 ! determine if workspace is large enough for blocked code.
                 ldwork = n
                 iws = ldwork*nb
                 if( lwork<iws ) then
                    ! not enough workspace to use optimal nb:  reduce nb and
                    ! determine the minimum value of nb.
                    nb = lwork / ldwork
                    nbmin = max( 2_${ik}$, stdlib${ii}$_ilaenv( 2_${ik}$, 'CUNGQR', ' ', m, n, k, -1_${ik}$ ) )
                 end if
              end if
           end if
           if( nb>=nbmin .and. nb<k .and. nx<k ) then
              ! use blocked code after the last block.
              ! the first kk columns are handled by the block method.
              ki = ( ( k-nx-1 ) / nb )*nb
              kk = min( k, ki+nb )
              ! set a(1:kk,kk+1:n) to czero.
              do j = kk + 1, n
                 do i = 1, kk
                    a( i, j ) = czero
                 end do
              end do
           else
              kk = 0_${ik}$
           end if
           ! use unblocked code for the last or only block.
           if( kk<n )call stdlib${ii}$_cung2r( m-kk, n-kk, k-kk, a( kk+1, kk+1 ), lda,tau( kk+1 ), work,&
                      iinfo )
           if( kk>0_${ik}$ ) then
              ! use blocked code
              do i = ki + 1, 1, -nb
                 ib = min( nb, k-i+1 )
                 if( i+ib<=n ) then
                    ! form the triangular factor of the block reflector
                    ! h = h(i) h(i+1) . . . h(i+ib-1)
                    call stdlib${ii}$_clarft( 'FORWARD', 'COLUMNWISE', m-i+1, ib,a( i, i ), lda, tau( i &
                              ), work, ldwork )
                    ! apply h to a(i:m,i+ib:n) from the left
                    call stdlib${ii}$_clarfb( 'LEFT', 'NO TRANSPOSE', 'FORWARD','COLUMNWISE', m-i+1, n-&
                    i-ib+1, ib,a( i, i ), lda, work, ldwork, a( i, i+ib ),lda, work( ib+1 ), &
                              ldwork )
                 end if
                 ! apply h to rows i:m of current block
                 call stdlib${ii}$_cung2r( m-i+1, ib, ib, a( i, i ), lda, tau( i ), work,iinfo )
                 ! set rows 1:i-1 of current block to czero
                 do j = i, i + ib - 1
                    do l = 1, i - 1
                       a( l, j ) = czero
                    end do
                 end do
              end do
           end if
           work( 1_${ik}$ ) = iws
           return
     end subroutine stdlib${ii}$_cungqr

     pure module subroutine stdlib${ii}$_zungqr( m, n, k, a, lda, tau, work, lwork, info )
     !! ZUNGQR generates an M-by-N complex matrix Q with orthonormal columns,
     !! which is defined as the first N columns of a product of K elementary
     !! reflectors of order M
     !! Q  =  H(1) H(2) . . . H(k)
     !! as returned by ZGEQRF.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: k, lda, lwork, m, n
           ! Array Arguments 
           complex(dp), intent(inout) :: a(lda,*)
           complex(dp), intent(in) :: tau(*)
           complex(dp), intent(out) :: work(*)
        ! =====================================================================
           
           ! Local Scalars 
           logical(lk) :: lquery
           integer(${ik}$) :: i, ib, iinfo, iws, j, ki, kk, l, ldwork, lwkopt, nb, nbmin, nx
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           nb = stdlib${ii}$_ilaenv( 1_${ik}$, 'ZUNGQR', ' ', m, n, k, -1_${ik}$ )
           lwkopt = max( 1_${ik}$, n )*nb
           work( 1_${ik}$ ) = lwkopt
           lquery = ( lwork==-1_${ik}$ )
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ .or. n>m ) then
              info = -2_${ik}$
           else if( k<0_${ik}$ .or. k>n ) then
              info = -3_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -5_${ik}$
           else if( lwork<max( 1_${ik}$, n ) .and. .not.lquery ) then
              info = -8_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'ZUNGQR', -info )
              return
           else if( lquery ) then
              return
           end if
           ! quick return if possible
           if( n<=0_${ik}$ ) then
              work( 1_${ik}$ ) = 1_${ik}$
              return
           end if
           nbmin = 2_${ik}$
           nx = 0_${ik}$
           iws = n
           if( nb>1_${ik}$ .and. nb<k ) then
              ! determine when to cross over from blocked to unblocked code.
              nx = max( 0_${ik}$, stdlib${ii}$_ilaenv( 3_${ik}$, 'ZUNGQR', ' ', m, n, k, -1_${ik}$ ) )
              if( nx<k ) then
                 ! determine if workspace is large enough for blocked code.
                 ldwork = n
                 iws = ldwork*nb
                 if( lwork<iws ) then
                    ! not enough workspace to use optimal nb:  reduce nb and
                    ! determine the minimum value of nb.
                    nb = lwork / ldwork
                    nbmin = max( 2_${ik}$, stdlib${ii}$_ilaenv( 2_${ik}$, 'ZUNGQR', ' ', m, n, k, -1_${ik}$ ) )
                 end if
              end if
           end if
           if( nb>=nbmin .and. nb<k .and. nx<k ) then
              ! use blocked code after the last block.
              ! the first kk columns are handled by the block method.
              ki = ( ( k-nx-1 ) / nb )*nb
              kk = min( k, ki+nb )
              ! set a(1:kk,kk+1:n) to czero.
              do j = kk + 1, n
                 do i = 1, kk
                    a( i, j ) = czero
                 end do
              end do
           else
              kk = 0_${ik}$
           end if
           ! use unblocked code for the last or only block.
           if( kk<n )call stdlib${ii}$_zung2r( m-kk, n-kk, k-kk, a( kk+1, kk+1 ), lda,tau( kk+1 ), work,&
                      iinfo )
           if( kk>0_${ik}$ ) then
              ! use blocked code
              do i = ki + 1, 1, -nb
                 ib = min( nb, k-i+1 )
                 if( i+ib<=n ) then
                    ! form the triangular factor of the block reflector
                    ! h = h(i) h(i+1) . . . h(i+ib-1)
                    call stdlib${ii}$_zlarft( 'FORWARD', 'COLUMNWISE', m-i+1, ib,a( i, i ), lda, tau( i &
                              ), work, ldwork )
                    ! apply h to a(i:m,i+ib:n) from the left
                    call stdlib${ii}$_zlarfb( 'LEFT', 'NO TRANSPOSE', 'FORWARD','COLUMNWISE', m-i+1, n-&
                    i-ib+1, ib,a( i, i ), lda, work, ldwork, a( i, i+ib ),lda, work( ib+1 ), &
                              ldwork )
                 end if
                 ! apply h to rows i:m of current block
                 call stdlib${ii}$_zung2r( m-i+1, ib, ib, a( i, i ), lda, tau( i ), work,iinfo )
                 ! set rows 1:i-1 of current block to czero
                 do j = i, i + ib - 1
                    do l = 1, i - 1
                       a( l, j ) = czero
                    end do
                 end do
              end do
           end if
           work( 1_${ik}$ ) = iws
           return
     end subroutine stdlib${ii}$_zungqr

#:for ck,ct,ci in CMPLX_KINDS_TYPES
#:if not ck in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ci}$ungqr( m, n, k, a, lda, tau, work, lwork, info )
     !! ZUNGQR: generates an M-by-N complex matrix Q with orthonormal columns,
     !! which is defined as the first N columns of a product of K elementary
     !! reflectors of order M
     !! Q  =  H(1) H(2) . . . H(k)
     !! as returned by ZGEQRF.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${ck}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: k, lda, lwork, m, n
           ! Array Arguments 
           complex(${ck}$), intent(inout) :: a(lda,*)
           complex(${ck}$), intent(in) :: tau(*)
           complex(${ck}$), intent(out) :: work(*)
        ! =====================================================================
           
           ! Local Scalars 
           logical(lk) :: lquery
           integer(${ik}$) :: i, ib, iinfo, iws, j, ki, kk, l, ldwork, lwkopt, nb, nbmin, nx
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           nb = stdlib${ii}$_ilaenv( 1_${ik}$, 'ZUNGQR', ' ', m, n, k, -1_${ik}$ )
           lwkopt = max( 1_${ik}$, n )*nb
           work( 1_${ik}$ ) = lwkopt
           lquery = ( lwork==-1_${ik}$ )
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ .or. n>m ) then
              info = -2_${ik}$
           else if( k<0_${ik}$ .or. k>n ) then
              info = -3_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -5_${ik}$
           else if( lwork<max( 1_${ik}$, n ) .and. .not.lquery ) then
              info = -8_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'ZUNGQR', -info )
              return
           else if( lquery ) then
              return
           end if
           ! quick return if possible
           if( n<=0_${ik}$ ) then
              work( 1_${ik}$ ) = 1_${ik}$
              return
           end if
           nbmin = 2_${ik}$
           nx = 0_${ik}$
           iws = n
           if( nb>1_${ik}$ .and. nb<k ) then
              ! determine when to cross over from blocked to unblocked code.
              nx = max( 0_${ik}$, stdlib${ii}$_ilaenv( 3_${ik}$, 'ZUNGQR', ' ', m, n, k, -1_${ik}$ ) )
              if( nx<k ) then
                 ! determine if workspace is large enough for blocked code.
                 ldwork = n
                 iws = ldwork*nb
                 if( lwork<iws ) then
                    ! not enough workspace to use optimal nb:  reduce nb and
                    ! determine the minimum value of nb.
                    nb = lwork / ldwork
                    nbmin = max( 2_${ik}$, stdlib${ii}$_ilaenv( 2_${ik}$, 'ZUNGQR', ' ', m, n, k, -1_${ik}$ ) )
                 end if
              end if
           end if
           if( nb>=nbmin .and. nb<k .and. nx<k ) then
              ! use blocked code after the last block.
              ! the first kk columns are handled by the block method.
              ki = ( ( k-nx-1 ) / nb )*nb
              kk = min( k, ki+nb )
              ! set a(1:kk,kk+1:n) to czero.
              do j = kk + 1, n
                 do i = 1, kk
                    a( i, j ) = czero
                 end do
              end do
           else
              kk = 0_${ik}$
           end if
           ! use unblocked code for the last or only block.
           if( kk<n )call stdlib${ii}$_${ci}$ung2r( m-kk, n-kk, k-kk, a( kk+1, kk+1 ), lda,tau( kk+1 ), work,&
                      iinfo )
           if( kk>0_${ik}$ ) then
              ! use blocked code
              do i = ki + 1, 1, -nb
                 ib = min( nb, k-i+1 )
                 if( i+ib<=n ) then
                    ! form the triangular factor of the block reflector
                    ! h = h(i) h(i+1) . . . h(i+ib-1)
                    call stdlib${ii}$_${ci}$larft( 'FORWARD', 'COLUMNWISE', m-i+1, ib,a( i, i ), lda, tau( i &
                              ), work, ldwork )
                    ! apply h to a(i:m,i+ib:n) from the left
                    call stdlib${ii}$_${ci}$larfb( 'LEFT', 'NO TRANSPOSE', 'FORWARD','COLUMNWISE', m-i+1, n-&
                    i-ib+1, ib,a( i, i ), lda, work, ldwork, a( i, i+ib ),lda, work( ib+1 ), &
                              ldwork )
                 end if
                 ! apply h to rows i:m of current block
                 call stdlib${ii}$_${ci}$ung2r( m-i+1, ib, ib, a( i, i ), lda, tau( i ), work,iinfo )
                 ! set rows 1:i-1 of current block to czero
                 do j = i, i + ib - 1
                    do l = 1, i - 1
                       a( l, j ) = czero
                    end do
                 end do
              end do
           end if
           work( 1_${ik}$ ) = iws
           return
     end subroutine stdlib${ii}$_${ci}$ungqr

#:endif
#:endfor



     pure module subroutine stdlib${ii}$_cung2r( m, n, k, a, lda, tau, work, info )
     !! CUNG2R generates an m by n complex matrix Q with orthonormal columns,
     !! which is defined as the first n columns of a product of k elementary
     !! reflectors of order m
     !! Q  =  H(1) H(2) . . . H(k)
     !! as returned by CGEQRF.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: k, lda, m, n
           ! Array Arguments 
           complex(sp), intent(inout) :: a(lda,*)
           complex(sp), intent(in) :: tau(*)
           complex(sp), intent(out) :: work(*)
        ! =====================================================================
           
           ! Local Scalars 
           integer(${ik}$) :: i, j, l
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ .or. n>m ) then
              info = -2_${ik}$
           else if( k<0_${ik}$ .or. k>n ) then
              info = -3_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -5_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'CUNG2R', -info )
              return
           end if
           ! quick return if possible
           if( n<=0 )return
           ! initialise columns k+1:n to columns of the unit matrix
           do j = k + 1, n
              do l = 1, m
                 a( l, j ) = czero
              end do
              a( j, j ) = cone
           end do
           do i = k, 1, -1
              ! apply h(i) to a(i:m,i:n) from the left
              if( i<n ) then
                 a( i, i ) = cone
                 call stdlib${ii}$_clarf( 'LEFT', m-i+1, n-i, a( i, i ), 1_${ik}$, tau( i ),a( i, i+1 ), lda, &
                           work )
              end if
              if( i<m )call stdlib${ii}$_cscal( m-i, -tau( i ), a( i+1, i ), 1_${ik}$ )
              a( i, i ) = cone - tau( i )
              ! set a(1:i-1,i) to czero
              do l = 1, i - 1
                 a( l, i ) = czero
              end do
           end do
           return
     end subroutine stdlib${ii}$_cung2r

     pure module subroutine stdlib${ii}$_zung2r( m, n, k, a, lda, tau, work, info )
     !! ZUNG2R generates an m by n complex matrix Q with orthonormal columns,
     !! which is defined as the first n columns of a product of k elementary
     !! reflectors of order m
     !! Q  =  H(1) H(2) . . . H(k)
     !! as returned by ZGEQRF.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: k, lda, m, n
           ! Array Arguments 
           complex(dp), intent(inout) :: a(lda,*)
           complex(dp), intent(in) :: tau(*)
           complex(dp), intent(out) :: work(*)
        ! =====================================================================
           
           ! Local Scalars 
           integer(${ik}$) :: i, j, l
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ .or. n>m ) then
              info = -2_${ik}$
           else if( k<0_${ik}$ .or. k>n ) then
              info = -3_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -5_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'ZUNG2R', -info )
              return
           end if
           ! quick return if possible
           if( n<=0 )return
           ! initialise columns k+1:n to columns of the unit matrix
           do j = k + 1, n
              do l = 1, m
                 a( l, j ) = czero
              end do
              a( j, j ) = cone
           end do
           do i = k, 1, -1
              ! apply h(i) to a(i:m,i:n) from the left
              if( i<n ) then
                 a( i, i ) = cone
                 call stdlib${ii}$_zlarf( 'LEFT', m-i+1, n-i, a( i, i ), 1_${ik}$, tau( i ),a( i, i+1 ), lda, &
                           work )
              end if
              if( i<m )call stdlib${ii}$_zscal( m-i, -tau( i ), a( i+1, i ), 1_${ik}$ )
              a( i, i ) = cone - tau( i )
              ! set a(1:i-1,i) to czero
              do l = 1, i - 1
                 a( l, i ) = czero
              end do
           end do
           return
     end subroutine stdlib${ii}$_zung2r

#:for ck,ct,ci in CMPLX_KINDS_TYPES
#:if not ck in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ci}$ung2r( m, n, k, a, lda, tau, work, info )
     !! ZUNG2R: generates an m by n complex matrix Q with orthonormal columns,
     !! which is defined as the first n columns of a product of k elementary
     !! reflectors of order m
     !! Q  =  H(1) H(2) . . . H(k)
     !! as returned by ZGEQRF.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${ck}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: k, lda, m, n
           ! Array Arguments 
           complex(${ck}$), intent(inout) :: a(lda,*)
           complex(${ck}$), intent(in) :: tau(*)
           complex(${ck}$), intent(out) :: work(*)
        ! =====================================================================
           
           ! Local Scalars 
           integer(${ik}$) :: i, j, l
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ .or. n>m ) then
              info = -2_${ik}$
           else if( k<0_${ik}$ .or. k>n ) then
              info = -3_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -5_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'ZUNG2R', -info )
              return
           end if
           ! quick return if possible
           if( n<=0 )return
           ! initialise columns k+1:n to columns of the unit matrix
           do j = k + 1, n
              do l = 1, m
                 a( l, j ) = czero
              end do
              a( j, j ) = cone
           end do
           do i = k, 1, -1
              ! apply h(i) to a(i:m,i:n) from the left
              if( i<n ) then
                 a( i, i ) = cone
                 call stdlib${ii}$_${ci}$larf( 'LEFT', m-i+1, n-i, a( i, i ), 1_${ik}$, tau( i ),a( i, i+1 ), lda, &
                           work )
              end if
              if( i<m )call stdlib${ii}$_${ci}$scal( m-i, -tau( i ), a( i+1, i ), 1_${ik}$ )
              a( i, i ) = cone - tau( i )
              ! set a(1:i-1,i) to czero
              do l = 1, i - 1
                 a( l, i ) = czero
              end do
           end do
           return
     end subroutine stdlib${ii}$_${ci}$ung2r

#:endif
#:endfor



     pure module subroutine stdlib${ii}$_cunmqr( side, trans, m, n, k, a, lda, tau, c, ldc,work, lwork, info )
     !! CUNMQR overwrites the general complex M-by-N matrix C with
     !! SIDE = 'L'     SIDE = 'R'
     !! TRANS = 'N':      Q * C          C * Q
     !! TRANS = 'C':      Q**H * C       C * Q**H
     !! where Q is a complex unitary matrix defined as the product of k
     !! elementary reflectors
     !! Q = H(1) H(2) . . . H(k)
     !! as returned by CGEQRF. Q is of order M if SIDE = 'L' and of order N
     !! if SIDE = 'R'.
               
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: side, trans
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: k, lda, ldc, lwork, m, n
           ! Array Arguments 
           complex(sp), intent(inout) :: a(lda,*), c(ldc,*)
           complex(sp), intent(in) :: tau(*)
           complex(sp), intent(out) :: work(*)
        ! =====================================================================
           ! Parameters 
           integer(${ik}$), parameter :: nbmax = 64_${ik}$
           integer(${ik}$), parameter :: ldt = nbmax+1
           integer(${ik}$), parameter :: tsize = ldt*nbmax
           
           ! Local Scalars 
           logical(lk) :: left, lquery, notran
           integer(${ik}$) :: i, i1, i2, i3, ib, ic, iinfo, iwt, jc, ldwork, lwkopt, mi, nb, nbmin, &
                     ni, nq, nw
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           left = stdlib_lsame( side, 'L' )
           notran = stdlib_lsame( trans, 'N' )
           lquery = ( lwork==-1_${ik}$ )
           ! nq is the order of q and nw is the minimum dimension of work
           if( left ) then
              nq = m
              nw = max( 1_${ik}$, n )
           else
              nq = n
              nw = max( 1_${ik}$, m )
           end if
           if( .not.left .and. .not.stdlib_lsame( side, 'R' ) ) then
              info = -1_${ik}$
           else if( .not.notran .and. .not.stdlib_lsame( trans, 'C' ) ) then
              info = -2_${ik}$
           else if( m<0_${ik}$ ) then
              info = -3_${ik}$
           else if( n<0_${ik}$ ) then
              info = -4_${ik}$
           else if( k<0_${ik}$ .or. k>nq ) then
              info = -5_${ik}$
           else if( lda<max( 1_${ik}$, nq ) ) then
              info = -7_${ik}$
           else if( ldc<max( 1_${ik}$, m ) ) then
              info = -10_${ik}$
           else if( lwork<nw .and. .not.lquery ) then
              info = -12_${ik}$
           end if
           if( info==0_${ik}$ ) then
              ! compute the workspace requirements
              nb = min( nbmax, stdlib${ii}$_ilaenv( 1_${ik}$, 'CUNMQR', side // trans, m, n, k,-1_${ik}$ ) )
              lwkopt = nw*nb + tsize
              work( 1_${ik}$ ) = lwkopt
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'CUNMQR', -info )
              return
           else if( lquery ) then
              return
           end if
           ! quick return if possible
           if( m==0_${ik}$ .or. n==0_${ik}$ .or. k==0_${ik}$ ) then
              work( 1_${ik}$ ) = 1_${ik}$
              return
           end if
           nbmin = 2_${ik}$
           ldwork = nw
           if( nb>1_${ik}$ .and. nb<k ) then
              if( lwork<lwkopt ) then
                 nb = (lwork-tsize) / ldwork
                 nbmin = max( 2_${ik}$, stdlib${ii}$_ilaenv( 2_${ik}$, 'CUNMQR', side // trans, m, n, k,-1_${ik}$ ) )
              end if
           end if
           if( nb<nbmin .or. nb>=k ) then
              ! use unblocked code
              call stdlib${ii}$_cunm2r( side, trans, m, n, k, a, lda, tau, c, ldc, work,iinfo )
           else
              ! use blocked code
              iwt = 1_${ik}$ + nw*nb
              if( ( left .and. .not.notran ) .or.( .not.left .and. notran ) ) then
                 i1 = 1_${ik}$
                 i2 = k
                 i3 = nb
              else
                 i1 = ( ( k-1 ) / nb )*nb + 1_${ik}$
                 i2 = 1_${ik}$
                 i3 = -nb
              end if
              if( left ) then
                 ni = n
                 jc = 1_${ik}$
              else
                 mi = m
                 ic = 1_${ik}$
              end if
              do i = i1, i2, i3
                 ib = min( nb, k-i+1 )
                 ! form the triangular factor of the block reflector
                 ! h = h(i) h(i+1) . . . h(i+ib-1)
                 call stdlib${ii}$_clarft( 'FORWARD', 'COLUMNWISE', nq-i+1, ib, a( i, i ),lda, tau( i ),&
                            work( iwt ), ldt )
                 if( left ) then
                    ! h or h**h is applied to c(i:m,1:n)
                    mi = m - i + 1_${ik}$
                    ic = i
                 else
                    ! h or h**h is applied to c(1:m,i:n)
                    ni = n - i + 1_${ik}$
                    jc = i
                 end if
                 ! apply h or h**h
                 call stdlib${ii}$_clarfb( side, trans, 'FORWARD', 'COLUMNWISE', mi, ni,ib, a( i, i ), &
                           lda, work( iwt ), ldt,c( ic, jc ), ldc, work, ldwork )
              end do
           end if
           work( 1_${ik}$ ) = lwkopt
           return
     end subroutine stdlib${ii}$_cunmqr

     pure module subroutine stdlib${ii}$_zunmqr( side, trans, m, n, k, a, lda, tau, c, ldc,work, lwork, info )
     !! ZUNMQR overwrites the general complex M-by-N matrix C with
     !! SIDE = 'L'     SIDE = 'R'
     !! TRANS = 'N':      Q * C          C * Q
     !! TRANS = 'C':      Q**H * C       C * Q**H
     !! where Q is a complex unitary matrix defined as the product of k
     !! elementary reflectors
     !! Q = H(1) H(2) . . . H(k)
     !! as returned by ZGEQRF. Q is of order M if SIDE = 'L' and of order N
     !! if SIDE = 'R'.
               
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: side, trans
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: k, lda, ldc, lwork, m, n
           ! Array Arguments 
           complex(dp), intent(inout) :: a(lda,*), c(ldc,*)
           complex(dp), intent(in) :: tau(*)
           complex(dp), intent(out) :: work(*)
        ! =====================================================================
           ! Parameters 
           integer(${ik}$), parameter :: nbmax = 64_${ik}$
           integer(${ik}$), parameter :: ldt = nbmax+1
           integer(${ik}$), parameter :: tsize = ldt*nbmax
           
           ! Local Scalars 
           logical(lk) :: left, lquery, notran
           integer(${ik}$) :: i, i1, i2, i3, ib, ic, iinfo, iwt, jc, ldwork, lwkopt, mi, nb, nbmin, &
                     ni, nq, nw
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           left = stdlib_lsame( side, 'L' )
           notran = stdlib_lsame( trans, 'N' )
           lquery = ( lwork==-1_${ik}$ )
           ! nq is the order of q and nw is the minimum dimension of work
           if( left ) then
              nq = m
              nw = max( 1_${ik}$, n )
           else
              nq = n
              nw = max( 1_${ik}$, m )
           end if
           if( .not.left .and. .not.stdlib_lsame( side, 'R' ) ) then
              info = -1_${ik}$
           else if( .not.notran .and. .not.stdlib_lsame( trans, 'C' ) ) then
              info = -2_${ik}$
           else if( m<0_${ik}$ ) then
              info = -3_${ik}$
           else if( n<0_${ik}$ ) then
              info = -4_${ik}$
           else if( k<0_${ik}$ .or. k>nq ) then
              info = -5_${ik}$
           else if( lda<max( 1_${ik}$, nq ) ) then
              info = -7_${ik}$
           else if( ldc<max( 1_${ik}$, m ) ) then
              info = -10_${ik}$
           else if( lwork<nw .and. .not.lquery ) then
              info = -12_${ik}$
           end if
           if( info==0_${ik}$ ) then
              ! compute the workspace requirements
              nb = min( nbmax, stdlib${ii}$_ilaenv( 1_${ik}$, 'ZUNMQR', side // trans, m, n, k,-1_${ik}$ ) )
              lwkopt = nw*nb + tsize
              work( 1_${ik}$ ) = lwkopt
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'ZUNMQR', -info )
              return
           else if( lquery ) then
              return
           end if
           ! quick return if possible
           if( m==0_${ik}$ .or. n==0_${ik}$ .or. k==0_${ik}$ ) then
              work( 1_${ik}$ ) = 1_${ik}$
              return
           end if
           nbmin = 2_${ik}$
           ldwork = nw
           if( nb>1_${ik}$ .and. nb<k ) then
              if( lwork<lwkopt ) then
                 nb = (lwork-tsize) / ldwork
                 nbmin = max( 2_${ik}$, stdlib${ii}$_ilaenv( 2_${ik}$, 'ZUNMQR', side // trans, m, n, k,-1_${ik}$ ) )
              end if
           end if
           if( nb<nbmin .or. nb>=k ) then
              ! use unblocked code
              call stdlib${ii}$_zunm2r( side, trans, m, n, k, a, lda, tau, c, ldc, work,iinfo )
           else
              ! use blocked code
              iwt = 1_${ik}$ + nw*nb
              if( ( left .and. .not.notran ) .or.( .not.left .and. notran ) ) then
                 i1 = 1_${ik}$
                 i2 = k
                 i3 = nb
              else
                 i1 = ( ( k-1 ) / nb )*nb + 1_${ik}$
                 i2 = 1_${ik}$
                 i3 = -nb
              end if
              if( left ) then
                 ni = n
                 jc = 1_${ik}$
              else
                 mi = m
                 ic = 1_${ik}$
              end if
              do i = i1, i2, i3
                 ib = min( nb, k-i+1 )
                 ! form the triangular factor of the block reflector
                 ! h = h(i) h(i+1) . . . h(i+ib-1)
                 call stdlib${ii}$_zlarft( 'FORWARD', 'COLUMNWISE', nq-i+1, ib, a( i, i ),lda, tau( i ),&
                            work( iwt ), ldt )
                 if( left ) then
                    ! h or h**h is applied to c(i:m,1:n)
                    mi = m - i + 1_${ik}$
                    ic = i
                 else
                    ! h or h**h is applied to c(1:m,i:n)
                    ni = n - i + 1_${ik}$
                    jc = i
                 end if
                 ! apply h or h**h
                 call stdlib${ii}$_zlarfb( side, trans, 'FORWARD', 'COLUMNWISE', mi, ni,ib, a( i, i ), &
                           lda, work( iwt ), ldt,c( ic, jc ), ldc, work, ldwork )
              end do
           end if
           work( 1_${ik}$ ) = lwkopt
           return
     end subroutine stdlib${ii}$_zunmqr

#:for ck,ct,ci in CMPLX_KINDS_TYPES
#:if not ck in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ci}$unmqr( side, trans, m, n, k, a, lda, tau, c, ldc,work, lwork, info )
     !! ZUNMQR: overwrites the general complex M-by-N matrix C with
     !! SIDE = 'L'     SIDE = 'R'
     !! TRANS = 'N':      Q * C          C * Q
     !! TRANS = 'C':      Q**H * C       C * Q**H
     !! where Q is a complex unitary matrix defined as the product of k
     !! elementary reflectors
     !! Q = H(1) H(2) . . . H(k)
     !! as returned by ZGEQRF. Q is of order M if SIDE = 'L' and of order N
     !! if SIDE = 'R'.
               
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${ck}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: side, trans
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: k, lda, ldc, lwork, m, n
           ! Array Arguments 
           complex(${ck}$), intent(inout) :: a(lda,*), c(ldc,*)
           complex(${ck}$), intent(in) :: tau(*)
           complex(${ck}$), intent(out) :: work(*)
        ! =====================================================================
           ! Parameters 
           integer(${ik}$), parameter :: nbmax = 64_${ik}$
           integer(${ik}$), parameter :: ldt = nbmax+1
           integer(${ik}$), parameter :: tsize = ldt*nbmax
           
           ! Local Scalars 
           logical(lk) :: left, lquery, notran
           integer(${ik}$) :: i, i1, i2, i3, ib, ic, iinfo, iwt, jc, ldwork, lwkopt, mi, nb, nbmin, &
                     ni, nq, nw
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           left = stdlib_lsame( side, 'L' )
           notran = stdlib_lsame( trans, 'N' )
           lquery = ( lwork==-1_${ik}$ )
           ! nq is the order of q and nw is the minimum dimension of work
           if( left ) then
              nq = m
              nw = max( 1_${ik}$, n )
           else
              nq = n
              nw = max( 1_${ik}$, m )
           end if
           if( .not.left .and. .not.stdlib_lsame( side, 'R' ) ) then
              info = -1_${ik}$
           else if( .not.notran .and. .not.stdlib_lsame( trans, 'C' ) ) then
              info = -2_${ik}$
           else if( m<0_${ik}$ ) then
              info = -3_${ik}$
           else if( n<0_${ik}$ ) then
              info = -4_${ik}$
           else if( k<0_${ik}$ .or. k>nq ) then
              info = -5_${ik}$
           else if( lda<max( 1_${ik}$, nq ) ) then
              info = -7_${ik}$
           else if( ldc<max( 1_${ik}$, m ) ) then
              info = -10_${ik}$
           else if( lwork<nw .and. .not.lquery ) then
              info = -12_${ik}$
           end if
           if( info==0_${ik}$ ) then
              ! compute the workspace requirements
              nb = min( nbmax, stdlib${ii}$_ilaenv( 1_${ik}$, 'ZUNMQR', side // trans, m, n, k,-1_${ik}$ ) )
              lwkopt = nw*nb + tsize
              work( 1_${ik}$ ) = lwkopt
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'ZUNMQR', -info )
              return
           else if( lquery ) then
              return
           end if
           ! quick return if possible
           if( m==0_${ik}$ .or. n==0_${ik}$ .or. k==0_${ik}$ ) then
              work( 1_${ik}$ ) = 1_${ik}$
              return
           end if
           nbmin = 2_${ik}$
           ldwork = nw
           if( nb>1_${ik}$ .and. nb<k ) then
              if( lwork<lwkopt ) then
                 nb = (lwork-tsize) / ldwork
                 nbmin = max( 2_${ik}$, stdlib${ii}$_ilaenv( 2_${ik}$, 'ZUNMQR', side // trans, m, n, k,-1_${ik}$ ) )
              end if
           end if
           if( nb<nbmin .or. nb>=k ) then
              ! use unblocked code
              call stdlib${ii}$_${ci}$unm2r( side, trans, m, n, k, a, lda, tau, c, ldc, work,iinfo )
           else
              ! use blocked code
              iwt = 1_${ik}$ + nw*nb
              if( ( left .and. .not.notran ) .or.( .not.left .and. notran ) ) then
                 i1 = 1_${ik}$
                 i2 = k
                 i3 = nb
              else
                 i1 = ( ( k-1 ) / nb )*nb + 1_${ik}$
                 i2 = 1_${ik}$
                 i3 = -nb
              end if
              if( left ) then
                 ni = n
                 jc = 1_${ik}$
              else
                 mi = m
                 ic = 1_${ik}$
              end if
              do i = i1, i2, i3
                 ib = min( nb, k-i+1 )
                 ! form the triangular factor of the block reflector
                 ! h = h(i) h(i+1) . . . h(i+ib-1)
                 call stdlib${ii}$_${ci}$larft( 'FORWARD', 'COLUMNWISE', nq-i+1, ib, a( i, i ),lda, tau( i ),&
                            work( iwt ), ldt )
                 if( left ) then
                    ! h or h**h is applied to c(i:m,1:n)
                    mi = m - i + 1_${ik}$
                    ic = i
                 else
                    ! h or h**h is applied to c(1:m,i:n)
                    ni = n - i + 1_${ik}$
                    jc = i
                 end if
                 ! apply h or h**h
                 call stdlib${ii}$_${ci}$larfb( side, trans, 'FORWARD', 'COLUMNWISE', mi, ni,ib, a( i, i ), &
                           lda, work( iwt ), ldt,c( ic, jc ), ldc, work, ldwork )
              end do
           end if
           work( 1_${ik}$ ) = lwkopt
           return
     end subroutine stdlib${ii}$_${ci}$unmqr

#:endif
#:endfor



     pure module subroutine stdlib${ii}$_cunm2r( side, trans, m, n, k, a, lda, tau, c, ldc,work, info )
     !! CUNM2R overwrites the general complex m-by-n matrix C with
     !! Q * C  if SIDE = 'L' and TRANS = 'N', or
     !! Q**H* C  if SIDE = 'L' and TRANS = 'C', or
     !! C * Q  if SIDE = 'R' and TRANS = 'N', or
     !! C * Q**H if SIDE = 'R' and TRANS = 'C',
     !! where Q is a complex unitary matrix defined as the product of k
     !! elementary reflectors
     !! Q = H(1) H(2) . . . H(k)
     !! as returned by CGEQRF. Q is of order m if SIDE = 'L' and of order n
     !! if SIDE = 'R'.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: side, trans
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: k, lda, ldc, m, n
           ! Array Arguments 
           complex(sp), intent(inout) :: a(lda,*), c(ldc,*)
           complex(sp), intent(in) :: tau(*)
           complex(sp), intent(out) :: work(*)
        ! =====================================================================
           
           ! Local Scalars 
           logical(lk) :: left, notran
           integer(${ik}$) :: i, i1, i2, i3, ic, jc, mi, ni, nq
           complex(sp) :: aii, taui
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           left = stdlib_lsame( side, 'L' )
           notran = stdlib_lsame( trans, 'N' )
           ! nq is the order of q
           if( left ) then
              nq = m
           else
              nq = n
           end if
           if( .not.left .and. .not.stdlib_lsame( side, 'R' ) ) then
              info = -1_${ik}$
           else if( .not.notran .and. .not.stdlib_lsame( trans, 'C' ) ) then
              info = -2_${ik}$
           else if( m<0_${ik}$ ) then
              info = -3_${ik}$
           else if( n<0_${ik}$ ) then
              info = -4_${ik}$
           else if( k<0_${ik}$ .or. k>nq ) then
              info = -5_${ik}$
           else if( lda<max( 1_${ik}$, nq ) ) then
              info = -7_${ik}$
           else if( ldc<max( 1_${ik}$, m ) ) then
              info = -10_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'CUNM2R', -info )
              return
           end if
           ! quick return if possible
           if( m==0 .or. n==0 .or. k==0 )return
           if( ( left .and. .not.notran .or. .not.left .and. notran ) ) then
              i1 = 1_${ik}$
              i2 = k
              i3 = 1_${ik}$
           else
              i1 = k
              i2 = 1_${ik}$
              i3 = -1_${ik}$
           end if
           if( left ) then
              ni = n
              jc = 1_${ik}$
           else
              mi = m
              ic = 1_${ik}$
           end if
           do i = i1, i2, i3
              if( left ) then
                 ! h(i) or h(i)**h is applied to c(i:m,1:n)
                 mi = m - i + 1_${ik}$
                 ic = i
              else
                 ! h(i) or h(i)**h is applied to c(1:m,i:n)
                 ni = n - i + 1_${ik}$
                 jc = i
              end if
              ! apply h(i) or h(i)**h
              if( notran ) then
                 taui = tau( i )
              else
                 taui = conjg( tau( i ) )
              end if
              aii = a( i, i )
              a( i, i ) = cone
              call stdlib${ii}$_clarf( side, mi, ni, a( i, i ), 1_${ik}$, taui, c( ic, jc ), ldc,work )
              a( i, i ) = aii
           end do
           return
     end subroutine stdlib${ii}$_cunm2r

     pure module subroutine stdlib${ii}$_zunm2r( side, trans, m, n, k, a, lda, tau, c, ldc,work, info )
     !! ZUNM2R overwrites the general complex m-by-n matrix C with
     !! Q * C  if SIDE = 'L' and TRANS = 'N', or
     !! Q**H* C  if SIDE = 'L' and TRANS = 'C', or
     !! C * Q  if SIDE = 'R' and TRANS = 'N', or
     !! C * Q**H if SIDE = 'R' and TRANS = 'C',
     !! where Q is a complex unitary matrix defined as the product of k
     !! elementary reflectors
     !! Q = H(1) H(2) . . . H(k)
     !! as returned by ZGEQRF. Q is of order m if SIDE = 'L' and of order n
     !! if SIDE = 'R'.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: side, trans
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: k, lda, ldc, m, n
           ! Array Arguments 
           complex(dp), intent(inout) :: a(lda,*), c(ldc,*)
           complex(dp), intent(in) :: tau(*)
           complex(dp), intent(out) :: work(*)
        ! =====================================================================
           
           ! Local Scalars 
           logical(lk) :: left, notran
           integer(${ik}$) :: i, i1, i2, i3, ic, jc, mi, ni, nq
           complex(dp) :: aii, taui
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           left = stdlib_lsame( side, 'L' )
           notran = stdlib_lsame( trans, 'N' )
           ! nq is the order of q
           if( left ) then
              nq = m
           else
              nq = n
           end if
           if( .not.left .and. .not.stdlib_lsame( side, 'R' ) ) then
              info = -1_${ik}$
           else if( .not.notran .and. .not.stdlib_lsame( trans, 'C' ) ) then
              info = -2_${ik}$
           else if( m<0_${ik}$ ) then
              info = -3_${ik}$
           else if( n<0_${ik}$ ) then
              info = -4_${ik}$
           else if( k<0_${ik}$ .or. k>nq ) then
              info = -5_${ik}$
           else if( lda<max( 1_${ik}$, nq ) ) then
              info = -7_${ik}$
           else if( ldc<max( 1_${ik}$, m ) ) then
              info = -10_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'ZUNM2R', -info )
              return
           end if
           ! quick return if possible
           if( m==0 .or. n==0 .or. k==0 )return
           if( ( left .and. .not.notran .or. .not.left .and. notran ) ) then
              i1 = 1_${ik}$
              i2 = k
              i3 = 1_${ik}$
           else
              i1 = k
              i2 = 1_${ik}$
              i3 = -1_${ik}$
           end if
           if( left ) then
              ni = n
              jc = 1_${ik}$
           else
              mi = m
              ic = 1_${ik}$
           end if
           do i = i1, i2, i3
              if( left ) then
                 ! h(i) or h(i)**h is applied to c(i:m,1:n)
                 mi = m - i + 1_${ik}$
                 ic = i
              else
                 ! h(i) or h(i)**h is applied to c(1:m,i:n)
                 ni = n - i + 1_${ik}$
                 jc = i
              end if
              ! apply h(i) or h(i)**h
              if( notran ) then
                 taui = tau( i )
              else
                 taui = conjg( tau( i ) )
              end if
              aii = a( i, i )
              a( i, i ) = cone
              call stdlib${ii}$_zlarf( side, mi, ni, a( i, i ), 1_${ik}$, taui, c( ic, jc ), ldc,work )
              a( i, i ) = aii
           end do
           return
     end subroutine stdlib${ii}$_zunm2r

#:for ck,ct,ci in CMPLX_KINDS_TYPES
#:if not ck in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ci}$unm2r( side, trans, m, n, k, a, lda, tau, c, ldc,work, info )
     !! ZUNM2R: overwrites the general complex m-by-n matrix C with
     !! Q * C  if SIDE = 'L' and TRANS = 'N', or
     !! Q**H* C  if SIDE = 'L' and TRANS = 'C', or
     !! C * Q  if SIDE = 'R' and TRANS = 'N', or
     !! C * Q**H if SIDE = 'R' and TRANS = 'C',
     !! where Q is a complex unitary matrix defined as the product of k
     !! elementary reflectors
     !! Q = H(1) H(2) . . . H(k)
     !! as returned by ZGEQRF. Q is of order m if SIDE = 'L' and of order n
     !! if SIDE = 'R'.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${ck}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: side, trans
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: k, lda, ldc, m, n
           ! Array Arguments 
           complex(${ck}$), intent(inout) :: a(lda,*), c(ldc,*)
           complex(${ck}$), intent(in) :: tau(*)
           complex(${ck}$), intent(out) :: work(*)
        ! =====================================================================
           
           ! Local Scalars 
           logical(lk) :: left, notran
           integer(${ik}$) :: i, i1, i2, i3, ic, jc, mi, ni, nq
           complex(${ck}$) :: aii, taui
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           left = stdlib_lsame( side, 'L' )
           notran = stdlib_lsame( trans, 'N' )
           ! nq is the order of q
           if( left ) then
              nq = m
           else
              nq = n
           end if
           if( .not.left .and. .not.stdlib_lsame( side, 'R' ) ) then
              info = -1_${ik}$
           else if( .not.notran .and. .not.stdlib_lsame( trans, 'C' ) ) then
              info = -2_${ik}$
           else if( m<0_${ik}$ ) then
              info = -3_${ik}$
           else if( n<0_${ik}$ ) then
              info = -4_${ik}$
           else if( k<0_${ik}$ .or. k>nq ) then
              info = -5_${ik}$
           else if( lda<max( 1_${ik}$, nq ) ) then
              info = -7_${ik}$
           else if( ldc<max( 1_${ik}$, m ) ) then
              info = -10_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'ZUNM2R', -info )
              return
           end if
           ! quick return if possible
           if( m==0 .or. n==0 .or. k==0 )return
           if( ( left .and. .not.notran .or. .not.left .and. notran ) ) then
              i1 = 1_${ik}$
              i2 = k
              i3 = 1_${ik}$
           else
              i1 = k
              i2 = 1_${ik}$
              i3 = -1_${ik}$
           end if
           if( left ) then
              ni = n
              jc = 1_${ik}$
           else
              mi = m
              ic = 1_${ik}$
           end if
           do i = i1, i2, i3
              if( left ) then
                 ! h(i) or h(i)**h is applied to c(i:m,1:n)
                 mi = m - i + 1_${ik}$
                 ic = i
              else
                 ! h(i) or h(i)**h is applied to c(1:m,i:n)
                 ni = n - i + 1_${ik}$
                 jc = i
              end if
              ! apply h(i) or h(i)**h
              if( notran ) then
                 taui = tau( i )
              else
                 taui = conjg( tau( i ) )
              end if
              aii = a( i, i )
              a( i, i ) = cone
              call stdlib${ii}$_${ci}$larf( side, mi, ni, a( i, i ), 1_${ik}$, taui, c( ic, jc ), ldc,work )
              a( i, i ) = aii
           end do
           return
     end subroutine stdlib${ii}$_${ci}$unm2r

#:endif
#:endfor



     pure module subroutine stdlib${ii}$_sorgqr( m, n, k, a, lda, tau, work, lwork, info )
     !! SORGQR generates an M-by-N real matrix Q with orthonormal columns,
     !! which is defined as the first N columns of a product of K elementary
     !! reflectors of order M
     !! Q  =  H(1) H(2) . . . H(k)
     !! as returned by SGEQRF.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: k, lda, lwork, m, n
           ! Array Arguments 
           real(sp), intent(inout) :: a(lda,*)
           real(sp), intent(in) :: tau(*)
           real(sp), intent(out) :: work(*)
        ! =====================================================================
           
           ! Local Scalars 
           logical(lk) :: lquery
           integer(${ik}$) :: i, ib, iinfo, iws, j, ki, kk, l, ldwork, lwkopt, nb, nbmin, nx
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           nb = stdlib${ii}$_ilaenv( 1_${ik}$, 'SORGQR', ' ', m, n, k, -1_${ik}$ )
           lwkopt = max( 1_${ik}$, n )*nb
           work( 1_${ik}$ ) = lwkopt
           lquery = ( lwork==-1_${ik}$ )
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ .or. n>m ) then
              info = -2_${ik}$
           else if( k<0_${ik}$ .or. k>n ) then
              info = -3_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -5_${ik}$
           else if( lwork<max( 1_${ik}$, n ) .and. .not.lquery ) then
              info = -8_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'SORGQR', -info )
              return
           else if( lquery ) then
              return
           end if
           ! quick return if possible
           if( n<=0_${ik}$ ) then
              work( 1_${ik}$ ) = 1_${ik}$
              return
           end if
           nbmin = 2_${ik}$
           nx = 0_${ik}$
           iws = n
           if( nb>1_${ik}$ .and. nb<k ) then
              ! determine when to cross over from blocked to unblocked code.
              nx = max( 0_${ik}$, stdlib${ii}$_ilaenv( 3_${ik}$, 'SORGQR', ' ', m, n, k, -1_${ik}$ ) )
              if( nx<k ) then
                 ! determine if workspace is large enough for blocked code.
                 ldwork = n
                 iws = ldwork*nb
                 if( lwork<iws ) then
                    ! not enough workspace to use optimal nb:  reduce nb and
                    ! determine the minimum value of nb.
                    nb = lwork / ldwork
                    nbmin = max( 2_${ik}$, stdlib${ii}$_ilaenv( 2_${ik}$, 'SORGQR', ' ', m, n, k, -1_${ik}$ ) )
                 end if
              end if
           end if
           if( nb>=nbmin .and. nb<k .and. nx<k ) then
              ! use blocked code after the last block.
              ! the first kk columns are handled by the block method.
              ki = ( ( k-nx-1 ) / nb )*nb
              kk = min( k, ki+nb )
              ! set a(1:kk,kk+1:n) to zero.
              do j = kk + 1, n
                 do i = 1, kk
                    a( i, j ) = zero
                 end do
              end do
           else
              kk = 0_${ik}$
           end if
           ! use unblocked code for the last or only block.
           if( kk<n )call stdlib${ii}$_sorg2r( m-kk, n-kk, k-kk, a( kk+1, kk+1 ), lda,tau( kk+1 ), work,&
                      iinfo )
           if( kk>0_${ik}$ ) then
              ! use blocked code
              do i = ki + 1, 1, -nb
                 ib = min( nb, k-i+1 )
                 if( i+ib<=n ) then
                    ! form the triangular factor of the block reflector
                    ! h = h(i) h(i+1) . . . h(i+ib-1)
                    call stdlib${ii}$_slarft( 'FORWARD', 'COLUMNWISE', m-i+1, ib,a( i, i ), lda, tau( i &
                              ), work, ldwork )
                    ! apply h to a(i:m,i+ib:n) from the left
                    call stdlib${ii}$_slarfb( 'LEFT', 'NO TRANSPOSE', 'FORWARD','COLUMNWISE', m-i+1, n-&
                    i-ib+1, ib,a( i, i ), lda, work, ldwork, a( i, i+ib ),lda, work( ib+1 ), &
                              ldwork )
                 end if
                 ! apply h to rows i:m of current block
                 call stdlib${ii}$_sorg2r( m-i+1, ib, ib, a( i, i ), lda, tau( i ), work,iinfo )
                 ! set rows 1:i-1 of current block to zero
                 do j = i, i + ib - 1
                    do l = 1, i - 1
                       a( l, j ) = zero
                    end do
                 end do
              end do
           end if
           work( 1_${ik}$ ) = iws
           return
     end subroutine stdlib${ii}$_sorgqr

     pure module subroutine stdlib${ii}$_dorgqr( m, n, k, a, lda, tau, work, lwork, info )
     !! DORGQR generates an M-by-N real matrix Q with orthonormal columns,
     !! which is defined as the first N columns of a product of K elementary
     !! reflectors of order M
     !! Q  =  H(1) H(2) . . . H(k)
     !! as returned by DGEQRF.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: k, lda, lwork, m, n
           ! Array Arguments 
           real(dp), intent(inout) :: a(lda,*)
           real(dp), intent(in) :: tau(*)
           real(dp), intent(out) :: work(*)
        ! =====================================================================
           
           ! Local Scalars 
           logical(lk) :: lquery
           integer(${ik}$) :: i, ib, iinfo, iws, j, ki, kk, l, ldwork, lwkopt, nb, nbmin, nx
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           nb = stdlib${ii}$_ilaenv( 1_${ik}$, 'DORGQR', ' ', m, n, k, -1_${ik}$ )
           lwkopt = max( 1_${ik}$, n )*nb
           work( 1_${ik}$ ) = lwkopt
           lquery = ( lwork==-1_${ik}$ )
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ .or. n>m ) then
              info = -2_${ik}$
           else if( k<0_${ik}$ .or. k>n ) then
              info = -3_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -5_${ik}$
           else if( lwork<max( 1_${ik}$, n ) .and. .not.lquery ) then
              info = -8_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DORGQR', -info )
              return
           else if( lquery ) then
              return
           end if
           ! quick return if possible
           if( n<=0_${ik}$ ) then
              work( 1_${ik}$ ) = 1_${ik}$
              return
           end if
           nbmin = 2_${ik}$
           nx = 0_${ik}$
           iws = n
           if( nb>1_${ik}$ .and. nb<k ) then
              ! determine when to cross over from blocked to unblocked code.
              nx = max( 0_${ik}$, stdlib${ii}$_ilaenv( 3_${ik}$, 'DORGQR', ' ', m, n, k, -1_${ik}$ ) )
              if( nx<k ) then
                 ! determine if workspace is large enough for blocked code.
                 ldwork = n
                 iws = ldwork*nb
                 if( lwork<iws ) then
                    ! not enough workspace to use optimal nb:  reduce nb and
                    ! determine the minimum value of nb.
                    nb = lwork / ldwork
                    nbmin = max( 2_${ik}$, stdlib${ii}$_ilaenv( 2_${ik}$, 'DORGQR', ' ', m, n, k, -1_${ik}$ ) )
                 end if
              end if
           end if
           if( nb>=nbmin .and. nb<k .and. nx<k ) then
              ! use blocked code after the last block.
              ! the first kk columns are handled by the block method.
              ki = ( ( k-nx-1 ) / nb )*nb
              kk = min( k, ki+nb )
              ! set a(1:kk,kk+1:n) to zero.
              do j = kk + 1, n
                 do i = 1, kk
                    a( i, j ) = zero
                 end do
              end do
           else
              kk = 0_${ik}$
           end if
           ! use unblocked code for the last or only block.
           if( kk<n )call stdlib${ii}$_dorg2r( m-kk, n-kk, k-kk, a( kk+1, kk+1 ), lda,tau( kk+1 ), work,&
                      iinfo )
           if( kk>0_${ik}$ ) then
              ! use blocked code
              do i = ki + 1, 1, -nb
                 ib = min( nb, k-i+1 )
                 if( i+ib<=n ) then
                    ! form the triangular factor of the block reflector
                    ! h = h(i) h(i+1) . . . h(i+ib-1)
                    call stdlib${ii}$_dlarft( 'FORWARD', 'COLUMNWISE', m-i+1, ib,a( i, i ), lda, tau( i &
                              ), work, ldwork )
                    ! apply h to a(i:m,i+ib:n) from the left
                    call stdlib${ii}$_dlarfb( 'LEFT', 'NO TRANSPOSE', 'FORWARD','COLUMNWISE', m-i+1, n-&
                    i-ib+1, ib,a( i, i ), lda, work, ldwork, a( i, i+ib ),lda, work( ib+1 ), &
                              ldwork )
                 end if
                 ! apply h to rows i:m of current block
                 call stdlib${ii}$_dorg2r( m-i+1, ib, ib, a( i, i ), lda, tau( i ), work,iinfo )
                 ! set rows 1:i-1 of current block to zero
                 do j = i, i + ib - 1
                    do l = 1, i - 1
                       a( l, j ) = zero
                    end do
                 end do
              end do
           end if
           work( 1_${ik}$ ) = iws
           return
     end subroutine stdlib${ii}$_dorgqr

#:for rk,rt,ri in REAL_KINDS_TYPES
#:if not rk in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ri}$orgqr( m, n, k, a, lda, tau, work, lwork, info )
     !! DORGQR: generates an M-by-N real matrix Q with orthonormal columns,
     !! which is defined as the first N columns of a product of K elementary
     !! reflectors of order M
     !! Q  =  H(1) H(2) . . . H(k)
     !! as returned by DGEQRF.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: k, lda, lwork, m, n
           ! Array Arguments 
           real(${rk}$), intent(inout) :: a(lda,*)
           real(${rk}$), intent(in) :: tau(*)
           real(${rk}$), intent(out) :: work(*)
        ! =====================================================================
           
           ! Local Scalars 
           logical(lk) :: lquery
           integer(${ik}$) :: i, ib, iinfo, iws, j, ki, kk, l, ldwork, lwkopt, nb, nbmin, nx
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           nb = stdlib${ii}$_ilaenv( 1_${ik}$, 'DORGQR', ' ', m, n, k, -1_${ik}$ )
           lwkopt = max( 1_${ik}$, n )*nb
           work( 1_${ik}$ ) = lwkopt
           lquery = ( lwork==-1_${ik}$ )
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ .or. n>m ) then
              info = -2_${ik}$
           else if( k<0_${ik}$ .or. k>n ) then
              info = -3_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -5_${ik}$
           else if( lwork<max( 1_${ik}$, n ) .and. .not.lquery ) then
              info = -8_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DORGQR', -info )
              return
           else if( lquery ) then
              return
           end if
           ! quick return if possible
           if( n<=0_${ik}$ ) then
              work( 1_${ik}$ ) = 1_${ik}$
              return
           end if
           nbmin = 2_${ik}$
           nx = 0_${ik}$
           iws = n
           if( nb>1_${ik}$ .and. nb<k ) then
              ! determine when to cross over from blocked to unblocked code.
              nx = max( 0_${ik}$, stdlib${ii}$_ilaenv( 3_${ik}$, 'DORGQR', ' ', m, n, k, -1_${ik}$ ) )
              if( nx<k ) then
                 ! determine if workspace is large enough for blocked code.
                 ldwork = n
                 iws = ldwork*nb
                 if( lwork<iws ) then
                    ! not enough workspace to use optimal nb:  reduce nb and
                    ! determine the minimum value of nb.
                    nb = lwork / ldwork
                    nbmin = max( 2_${ik}$, stdlib${ii}$_ilaenv( 2_${ik}$, 'DORGQR', ' ', m, n, k, -1_${ik}$ ) )
                 end if
              end if
           end if
           if( nb>=nbmin .and. nb<k .and. nx<k ) then
              ! use blocked code after the last block.
              ! the first kk columns are handled by the block method.
              ki = ( ( k-nx-1 ) / nb )*nb
              kk = min( k, ki+nb )
              ! set a(1:kk,kk+1:n) to zero.
              do j = kk + 1, n
                 do i = 1, kk
                    a( i, j ) = zero
                 end do
              end do
           else
              kk = 0_${ik}$
           end if
           ! use unblocked code for the last or only block.
           if( kk<n )call stdlib${ii}$_${ri}$org2r( m-kk, n-kk, k-kk, a( kk+1, kk+1 ), lda,tau( kk+1 ), work,&
                      iinfo )
           if( kk>0_${ik}$ ) then
              ! use blocked code
              do i = ki + 1, 1, -nb
                 ib = min( nb, k-i+1 )
                 if( i+ib<=n ) then
                    ! form the triangular factor of the block reflector
                    ! h = h(i) h(i+1) . . . h(i+ib-1)
                    call stdlib${ii}$_${ri}$larft( 'FORWARD', 'COLUMNWISE', m-i+1, ib,a( i, i ), lda, tau( i &
                              ), work, ldwork )
                    ! apply h to a(i:m,i+ib:n) from the left
                    call stdlib${ii}$_${ri}$larfb( 'LEFT', 'NO TRANSPOSE', 'FORWARD','COLUMNWISE', m-i+1, n-&
                    i-ib+1, ib,a( i, i ), lda, work, ldwork, a( i, i+ib ),lda, work( ib+1 ), &
                              ldwork )
                 end if
                 ! apply h to rows i:m of current block
                 call stdlib${ii}$_${ri}$org2r( m-i+1, ib, ib, a( i, i ), lda, tau( i ), work,iinfo )
                 ! set rows 1:i-1 of current block to zero
                 do j = i, i + ib - 1
                    do l = 1, i - 1
                       a( l, j ) = zero
                    end do
                 end do
              end do
           end if
           work( 1_${ik}$ ) = iws
           return
     end subroutine stdlib${ii}$_${ri}$orgqr

#:endif
#:endfor



     pure module subroutine stdlib${ii}$_sorg2r( m, n, k, a, lda, tau, work, info )
     !! SORG2R generates an m by n real matrix Q with orthonormal columns,
     !! which is defined as the first n columns of a product of k elementary
     !! reflectors of order m
     !! Q  =  H(1) H(2) . . . H(k)
     !! as returned by SGEQRF.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: k, lda, m, n
           ! Array Arguments 
           real(sp), intent(inout) :: a(lda,*)
           real(sp), intent(in) :: tau(*)
           real(sp), intent(out) :: work(*)
        ! =====================================================================
           
           ! Local Scalars 
           integer(${ik}$) :: i, j, l
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ .or. n>m ) then
              info = -2_${ik}$
           else if( k<0_${ik}$ .or. k>n ) then
              info = -3_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -5_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'SORG2R', -info )
              return
           end if
           ! quick return if possible
           if( n<=0 )return
           ! initialise columns k+1:n to columns of the unit matrix
           do j = k + 1, n
              do l = 1, m
                 a( l, j ) = zero
              end do
              a( j, j ) = one
           end do
           do i = k, 1, -1
              ! apply h(i) to a(i:m,i:n) from the left
              if( i<n ) then
                 a( i, i ) = one
                 call stdlib${ii}$_slarf( 'LEFT', m-i+1, n-i, a( i, i ), 1_${ik}$, tau( i ),a( i, i+1 ), lda, &
                           work )
              end if
              if( i<m )call stdlib${ii}$_sscal( m-i, -tau( i ), a( i+1, i ), 1_${ik}$ )
              a( i, i ) = one - tau( i )
              ! set a(1:i-1,i) to zero
              do l = 1, i - 1
                 a( l, i ) = zero
              end do
           end do
           return
     end subroutine stdlib${ii}$_sorg2r

     pure module subroutine stdlib${ii}$_dorg2r( m, n, k, a, lda, tau, work, info )
     !! DORG2R generates an m by n real matrix Q with orthonormal columns,
     !! which is defined as the first n columns of a product of k elementary
     !! reflectors of order m
     !! Q  =  H(1) H(2) . . . H(k)
     !! as returned by DGEQRF.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: k, lda, m, n
           ! Array Arguments 
           real(dp), intent(inout) :: a(lda,*)
           real(dp), intent(in) :: tau(*)
           real(dp), intent(out) :: work(*)
        ! =====================================================================
           
           ! Local Scalars 
           integer(${ik}$) :: i, j, l
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ .or. n>m ) then
              info = -2_${ik}$
           else if( k<0_${ik}$ .or. k>n ) then
              info = -3_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -5_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DORG2R', -info )
              return
           end if
           ! quick return if possible
           if( n<=0 )return
           ! initialise columns k+1:n to columns of the unit matrix
           do j = k + 1, n
              do l = 1, m
                 a( l, j ) = zero
              end do
              a( j, j ) = one
           end do
           do i = k, 1, -1
              ! apply h(i) to a(i:m,i:n) from the left
              if( i<n ) then
                 a( i, i ) = one
                 call stdlib${ii}$_dlarf( 'LEFT', m-i+1, n-i, a( i, i ), 1_${ik}$, tau( i ),a( i, i+1 ), lda, &
                           work )
              end if
              if( i<m )call stdlib${ii}$_dscal( m-i, -tau( i ), a( i+1, i ), 1_${ik}$ )
              a( i, i ) = one - tau( i )
              ! set a(1:i-1,i) to zero
              do l = 1, i - 1
                 a( l, i ) = zero
              end do
           end do
           return
     end subroutine stdlib${ii}$_dorg2r

#:for rk,rt,ri in REAL_KINDS_TYPES
#:if not rk in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ri}$org2r( m, n, k, a, lda, tau, work, info )
     !! DORG2R: generates an m by n real matrix Q with orthonormal columns,
     !! which is defined as the first n columns of a product of k elementary
     !! reflectors of order m
     !! Q  =  H(1) H(2) . . . H(k)
     !! as returned by DGEQRF.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: k, lda, m, n
           ! Array Arguments 
           real(${rk}$), intent(inout) :: a(lda,*)
           real(${rk}$), intent(in) :: tau(*)
           real(${rk}$), intent(out) :: work(*)
        ! =====================================================================
           
           ! Local Scalars 
           integer(${ik}$) :: i, j, l
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ .or. n>m ) then
              info = -2_${ik}$
           else if( k<0_${ik}$ .or. k>n ) then
              info = -3_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -5_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DORG2R', -info )
              return
           end if
           ! quick return if possible
           if( n<=0 )return
           ! initialise columns k+1:n to columns of the unit matrix
           do j = k + 1, n
              do l = 1, m
                 a( l, j ) = zero
              end do
              a( j, j ) = one
           end do
           do i = k, 1, -1
              ! apply h(i) to a(i:m,i:n) from the left
              if( i<n ) then
                 a( i, i ) = one
                 call stdlib${ii}$_${ri}$larf( 'LEFT', m-i+1, n-i, a( i, i ), 1_${ik}$, tau( i ),a( i, i+1 ), lda, &
                           work )
              end if
              if( i<m )call stdlib${ii}$_${ri}$scal( m-i, -tau( i ), a( i+1, i ), 1_${ik}$ )
              a( i, i ) = one - tau( i )
              ! set a(1:i-1,i) to zero
              do l = 1, i - 1
                 a( l, i ) = zero
              end do
           end do
           return
     end subroutine stdlib${ii}$_${ri}$org2r

#:endif
#:endfor



     pure module subroutine stdlib${ii}$_sormqr( side, trans, m, n, k, a, lda, tau, c, ldc,work, lwork, info )
     !! SORMQR overwrites the general real M-by-N matrix C with
     !! SIDE = 'L'     SIDE = 'R'
     !! TRANS = 'N':      Q * C          C * Q
     !! TRANS = 'T':      Q**T * C       C * Q**T
     !! where Q is a real orthogonal matrix defined as the product of k
     !! elementary reflectors
     !! Q = H(1) H(2) . . . H(k)
     !! as returned by SGEQRF. Q is of order M if SIDE = 'L' and of order N
     !! if SIDE = 'R'.
               
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: side, trans
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: k, lda, ldc, lwork, m, n
           ! Array Arguments 
           real(sp), intent(inout) :: a(lda,*), c(ldc,*)
           real(sp), intent(in) :: tau(*)
           real(sp), intent(out) :: work(*)
        ! =====================================================================
           ! Parameters 
           integer(${ik}$), parameter :: nbmax = 64_${ik}$
           integer(${ik}$), parameter :: ldt = nbmax+1
           integer(${ik}$), parameter :: tsize = ldt*nbmax
           
           ! Local Scalars 
           logical(lk) :: left, lquery, notran
           integer(${ik}$) :: i, i1, i2, i3, ib, ic, iinfo, iwt, jc, ldwork, lwkopt, mi, nb, nbmin, &
                     ni, nq, nw
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           left = stdlib_lsame( side, 'L' )
           notran = stdlib_lsame( trans, 'N' )
           lquery = ( lwork==-1_${ik}$ )
           ! nq is the order of q and nw is the minimum dimension of work
           if( left ) then
              nq = m
              nw = max( 1_${ik}$, n )
           else
              nq = n
              nw = max( 1_${ik}$, m )
           end if
           if( .not.left .and. .not.stdlib_lsame( side, 'R' ) ) then
              info = -1_${ik}$
           else if( .not.notran .and. .not.stdlib_lsame( trans, 'T' ) ) then
              info = -2_${ik}$
           else if( m<0_${ik}$ ) then
              info = -3_${ik}$
           else if( n<0_${ik}$ ) then
              info = -4_${ik}$
           else if( k<0_${ik}$ .or. k>nq ) then
              info = -5_${ik}$
           else if( lda<max( 1_${ik}$, nq ) ) then
              info = -7_${ik}$
           else if( ldc<max( 1_${ik}$, m ) ) then
              info = -10_${ik}$
           else if( lwork<nw .and. .not.lquery ) then
              info = -12_${ik}$
           end if
           if( info==0_${ik}$ ) then
              ! compute the workspace requirements
              nb = min( nbmax, stdlib${ii}$_ilaenv( 1_${ik}$, 'SORMQR', side // trans, m, n, k,-1_${ik}$ ) )
              lwkopt = nw*nb + tsize
              work( 1_${ik}$ ) = lwkopt
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'SORMQR', -info )
              return
           else if( lquery ) then
              return
           end if
           ! quick return if possible
           if( m==0_${ik}$ .or. n==0_${ik}$ .or. k==0_${ik}$ ) then
              work( 1_${ik}$ ) = 1_${ik}$
              return
           end if
           nbmin = 2_${ik}$
           ldwork = nw
           if( nb>1_${ik}$ .and. nb<k ) then
              if( lwork<lwkopt ) then
                 nb = (lwork-tsize) / ldwork
                 nbmin = max( 2_${ik}$, stdlib${ii}$_ilaenv( 2_${ik}$, 'SORMQR', side // trans, m, n, k,-1_${ik}$ ) )
              end if
           end if
           if( nb<nbmin .or. nb>=k ) then
              ! use unblocked code
              call stdlib${ii}$_sorm2r( side, trans, m, n, k, a, lda, tau, c, ldc, work,iinfo )
           else
              ! use blocked code
              iwt = 1_${ik}$ + nw*nb
              if( ( left .and. .not.notran ) .or.( .not.left .and. notran ) ) then
                 i1 = 1_${ik}$
                 i2 = k
                 i3 = nb
              else
                 i1 = ( ( k-1 ) / nb )*nb + 1_${ik}$
                 i2 = 1_${ik}$
                 i3 = -nb
              end if
              if( left ) then
                 ni = n
                 jc = 1_${ik}$
              else
                 mi = m
                 ic = 1_${ik}$
              end if
              do i = i1, i2, i3
                 ib = min( nb, k-i+1 )
                 ! form the triangular factor of the block reflector
                 ! h = h(i) h(i+1) . . . h(i+ib-1)
                 call stdlib${ii}$_slarft( 'FORWARD', 'COLUMNWISE', nq-i+1, ib, a( i, i ),lda, tau( i ),&
                            work( iwt ), ldt )
                 if( left ) then
                    ! h or h**t is applied to c(i:m,1:n)
                    mi = m - i + 1_${ik}$
                    ic = i
                 else
                    ! h or h**t is applied to c(1:m,i:n)
                    ni = n - i + 1_${ik}$
                    jc = i
                 end if
                 ! apply h or h**t
                 call stdlib${ii}$_slarfb( side, trans, 'FORWARD', 'COLUMNWISE', mi, ni,ib, a( i, i ), &
                           lda, work( iwt ), ldt,c( ic, jc ), ldc, work, ldwork )
              end do
           end if
           work( 1_${ik}$ ) = lwkopt
           return
     end subroutine stdlib${ii}$_sormqr

     pure module subroutine stdlib${ii}$_dormqr( side, trans, m, n, k, a, lda, tau, c, ldc,work, lwork, info )
     !! DORMQR overwrites the general real M-by-N matrix C with
     !! SIDE = 'L'     SIDE = 'R'
     !! TRANS = 'N':      Q * C          C * Q
     !! TRANS = 'T':      Q**T * C       C * Q**T
     !! where Q is a real orthogonal matrix defined as the product of k
     !! elementary reflectors
     !! Q = H(1) H(2) . . . H(k)
     !! as returned by DGEQRF. Q is of order M if SIDE = 'L' and of order N
     !! if SIDE = 'R'.
               
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: side, trans
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: k, lda, ldc, lwork, m, n
           ! Array Arguments 
           real(dp), intent(inout) :: a(lda,*), c(ldc,*)
           real(dp), intent(in) :: tau(*)
           real(dp), intent(out) :: work(*)
        ! =====================================================================
           ! Parameters 
           integer(${ik}$), parameter :: nbmax = 64_${ik}$
           integer(${ik}$), parameter :: ldt = nbmax+1
           integer(${ik}$), parameter :: tsize = ldt*nbmax
           
           ! Local Scalars 
           logical(lk) :: left, lquery, notran
           integer(${ik}$) :: i, i1, i2, i3, ib, ic, iinfo, iwt, jc, ldwork, lwkopt, mi, nb, nbmin, &
                     ni, nq, nw
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           left = stdlib_lsame( side, 'L' )
           notran = stdlib_lsame( trans, 'N' )
           lquery = ( lwork==-1_${ik}$ )
           ! nq is the order of q and nw is the minimum dimension of work
           if( left ) then
              nq = m
              nw = max( 1_${ik}$, n )
           else
              nq = n
              nw = max( 1_${ik}$, m )
           end if
           if( .not.left .and. .not.stdlib_lsame( side, 'R' ) ) then
              info = -1_${ik}$
           else if( .not.notran .and. .not.stdlib_lsame( trans, 'T' ) ) then
              info = -2_${ik}$
           else if( m<0_${ik}$ ) then
              info = -3_${ik}$
           else if( n<0_${ik}$ ) then
              info = -4_${ik}$
           else if( k<0_${ik}$ .or. k>nq ) then
              info = -5_${ik}$
           else if( lda<max( 1_${ik}$, nq ) ) then
              info = -7_${ik}$
           else if( ldc<max( 1_${ik}$, m ) ) then
              info = -10_${ik}$
           else if( lwork<nw .and. .not.lquery ) then
              info = -12_${ik}$
           end if
           if( info==0_${ik}$ ) then
              ! compute the workspace requirements
              nb = min( nbmax, stdlib${ii}$_ilaenv( 1_${ik}$, 'DORMQR', side // trans, m, n, k,-1_${ik}$ ) )
              lwkopt = nw*nb + tsize
              work( 1_${ik}$ ) = lwkopt
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DORMQR', -info )
              return
           else if( lquery ) then
              return
           end if
           ! quick return if possible
           if( m==0_${ik}$ .or. n==0_${ik}$ .or. k==0_${ik}$ ) then
              work( 1_${ik}$ ) = 1_${ik}$
              return
           end if
           nbmin = 2_${ik}$
           ldwork = nw
           if( nb>1_${ik}$ .and. nb<k ) then
              if( lwork<lwkopt ) then
                 nb = (lwork-tsize) / ldwork
                 nbmin = max( 2_${ik}$, stdlib${ii}$_ilaenv( 2_${ik}$, 'DORMQR', side // trans, m, n, k,-1_${ik}$ ) )
              end if
           end if
           if( nb<nbmin .or. nb>=k ) then
              ! use unblocked code
              call stdlib${ii}$_dorm2r( side, trans, m, n, k, a, lda, tau, c, ldc, work,iinfo )
           else
              ! use blocked code
              iwt = 1_${ik}$ + nw*nb
              if( ( left .and. .not.notran ) .or.( .not.left .and. notran ) ) then
                 i1 = 1_${ik}$
                 i2 = k
                 i3 = nb
              else
                 i1 = ( ( k-1 ) / nb )*nb + 1_${ik}$
                 i2 = 1_${ik}$
                 i3 = -nb
              end if
              if( left ) then
                 ni = n
                 jc = 1_${ik}$
              else
                 mi = m
                 ic = 1_${ik}$
              end if
              do i = i1, i2, i3
                 ib = min( nb, k-i+1 )
                 ! form the triangular factor of the block reflector
                 ! h = h(i) h(i+1) . . . h(i+ib-1)
                 call stdlib${ii}$_dlarft( 'FORWARD', 'COLUMNWISE', nq-i+1, ib, a( i, i ),lda, tau( i ),&
                            work( iwt ), ldt )
                 if( left ) then
                    ! h or h**t is applied to c(i:m,1:n)
                    mi = m - i + 1_${ik}$
                    ic = i
                 else
                    ! h or h**t is applied to c(1:m,i:n)
                    ni = n - i + 1_${ik}$
                    jc = i
                 end if
                 ! apply h or h**t
                 call stdlib${ii}$_dlarfb( side, trans, 'FORWARD', 'COLUMNWISE', mi, ni,ib, a( i, i ), &
                           lda, work( iwt ), ldt,c( ic, jc ), ldc, work, ldwork )
              end do
           end if
           work( 1_${ik}$ ) = lwkopt
           return
     end subroutine stdlib${ii}$_dormqr

#:for rk,rt,ri in REAL_KINDS_TYPES
#:if not rk in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ri}$ormqr( side, trans, m, n, k, a, lda, tau, c, ldc,work, lwork, info )
     !! DORMQR: overwrites the general real M-by-N matrix C with
     !! SIDE = 'L'     SIDE = 'R'
     !! TRANS = 'N':      Q * C          C * Q
     !! TRANS = 'T':      Q**T * C       C * Q**T
     !! where Q is a real orthogonal matrix defined as the product of k
     !! elementary reflectors
     !! Q = H(1) H(2) . . . H(k)
     !! as returned by DGEQRF. Q is of order M if SIDE = 'L' and of order N
     !! if SIDE = 'R'.
               
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: side, trans
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: k, lda, ldc, lwork, m, n
           ! Array Arguments 
           real(${rk}$), intent(inout) :: a(lda,*), c(ldc,*)
           real(${rk}$), intent(in) :: tau(*)
           real(${rk}$), intent(out) :: work(*)
        ! =====================================================================
           ! Parameters 
           integer(${ik}$), parameter :: nbmax = 64_${ik}$
           integer(${ik}$), parameter :: ldt = nbmax+1
           integer(${ik}$), parameter :: tsize = ldt*nbmax
           
           ! Local Scalars 
           logical(lk) :: left, lquery, notran
           integer(${ik}$) :: i, i1, i2, i3, ib, ic, iinfo, iwt, jc, ldwork, lwkopt, mi, nb, nbmin, &
                     ni, nq, nw
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           left = stdlib_lsame( side, 'L' )
           notran = stdlib_lsame( trans, 'N' )
           lquery = ( lwork==-1_${ik}$ )
           ! nq is the order of q and nw is the minimum dimension of work
           if( left ) then
              nq = m
              nw = max( 1_${ik}$, n )
           else
              nq = n
              nw = max( 1_${ik}$, m )
           end if
           if( .not.left .and. .not.stdlib_lsame( side, 'R' ) ) then
              info = -1_${ik}$
           else if( .not.notran .and. .not.stdlib_lsame( trans, 'T' ) ) then
              info = -2_${ik}$
           else if( m<0_${ik}$ ) then
              info = -3_${ik}$
           else if( n<0_${ik}$ ) then
              info = -4_${ik}$
           else if( k<0_${ik}$ .or. k>nq ) then
              info = -5_${ik}$
           else if( lda<max( 1_${ik}$, nq ) ) then
              info = -7_${ik}$
           else if( ldc<max( 1_${ik}$, m ) ) then
              info = -10_${ik}$
           else if( lwork<nw .and. .not.lquery ) then
              info = -12_${ik}$
           end if
           if( info==0_${ik}$ ) then
              ! compute the workspace requirements
              nb = min( nbmax, stdlib${ii}$_ilaenv( 1_${ik}$, 'DORMQR', side // trans, m, n, k,-1_${ik}$ ) )
              lwkopt = nw*nb + tsize
              work( 1_${ik}$ ) = lwkopt
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DORMQR', -info )
              return
           else if( lquery ) then
              return
           end if
           ! quick return if possible
           if( m==0_${ik}$ .or. n==0_${ik}$ .or. k==0_${ik}$ ) then
              work( 1_${ik}$ ) = 1_${ik}$
              return
           end if
           nbmin = 2_${ik}$
           ldwork = nw
           if( nb>1_${ik}$ .and. nb<k ) then
              if( lwork<lwkopt ) then
                 nb = (lwork-tsize) / ldwork
                 nbmin = max( 2_${ik}$, stdlib${ii}$_ilaenv( 2_${ik}$, 'DORMQR', side // trans, m, n, k,-1_${ik}$ ) )
              end if
           end if
           if( nb<nbmin .or. nb>=k ) then
              ! use unblocked code
              call stdlib${ii}$_${ri}$orm2r( side, trans, m, n, k, a, lda, tau, c, ldc, work,iinfo )
           else
              ! use blocked code
              iwt = 1_${ik}$ + nw*nb
              if( ( left .and. .not.notran ) .or.( .not.left .and. notran ) ) then
                 i1 = 1_${ik}$
                 i2 = k
                 i3 = nb
              else
                 i1 = ( ( k-1 ) / nb )*nb + 1_${ik}$
                 i2 = 1_${ik}$
                 i3 = -nb
              end if
              if( left ) then
                 ni = n
                 jc = 1_${ik}$
              else
                 mi = m
                 ic = 1_${ik}$
              end if
              do i = i1, i2, i3
                 ib = min( nb, k-i+1 )
                 ! form the triangular factor of the block reflector
                 ! h = h(i) h(i+1) . . . h(i+ib-1)
                 call stdlib${ii}$_${ri}$larft( 'FORWARD', 'COLUMNWISE', nq-i+1, ib, a( i, i ),lda, tau( i ),&
                            work( iwt ), ldt )
                 if( left ) then
                    ! h or h**t is applied to c(i:m,1:n)
                    mi = m - i + 1_${ik}$
                    ic = i
                 else
                    ! h or h**t is applied to c(1:m,i:n)
                    ni = n - i + 1_${ik}$
                    jc = i
                 end if
                 ! apply h or h**t
                 call stdlib${ii}$_${ri}$larfb( side, trans, 'FORWARD', 'COLUMNWISE', mi, ni,ib, a( i, i ), &
                           lda, work( iwt ), ldt,c( ic, jc ), ldc, work, ldwork )
              end do
           end if
           work( 1_${ik}$ ) = lwkopt
           return
     end subroutine stdlib${ii}$_${ri}$ormqr

#:endif
#:endfor



     pure module subroutine stdlib${ii}$_sorm2r( side, trans, m, n, k, a, lda, tau, c, ldc,work, info )
     !! SORM2R overwrites the general real m by n matrix C with
     !! Q * C  if SIDE = 'L' and TRANS = 'N', or
     !! Q**T* C  if SIDE = 'L' and TRANS = 'T', or
     !! C * Q  if SIDE = 'R' and TRANS = 'N', or
     !! C * Q**T if SIDE = 'R' and TRANS = 'T',
     !! where Q is a real orthogonal matrix defined as the product of k
     !! elementary reflectors
     !! Q = H(1) H(2) . . . H(k)
     !! as returned by SGEQRF. Q is of order m if SIDE = 'L' and of order n
     !! if SIDE = 'R'.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: side, trans
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: k, lda, ldc, m, n
           ! Array Arguments 
           real(sp), intent(inout) :: a(lda,*), c(ldc,*)
           real(sp), intent(in) :: tau(*)
           real(sp), intent(out) :: work(*)
        ! =====================================================================
           
           ! Local Scalars 
           logical(lk) :: left, notran
           integer(${ik}$) :: i, i1, i2, i3, ic, jc, mi, ni, nq
           real(sp) :: aii
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           left = stdlib_lsame( side, 'L' )
           notran = stdlib_lsame( trans, 'N' )
           ! nq is the order of q
           if( left ) then
              nq = m
           else
              nq = n
           end if
           if( .not.left .and. .not.stdlib_lsame( side, 'R' ) ) then
              info = -1_${ik}$
           else if( .not.notran .and. .not.stdlib_lsame( trans, 'T' ) ) then
              info = -2_${ik}$
           else if( m<0_${ik}$ ) then
              info = -3_${ik}$
           else if( n<0_${ik}$ ) then
              info = -4_${ik}$
           else if( k<0_${ik}$ .or. k>nq ) then
              info = -5_${ik}$
           else if( lda<max( 1_${ik}$, nq ) ) then
              info = -7_${ik}$
           else if( ldc<max( 1_${ik}$, m ) ) then
              info = -10_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'SORM2R', -info )
              return
           end if
           ! quick return if possible
           if( m==0 .or. n==0 .or. k==0 )return
           if( ( left .and. .not.notran ) .or. ( .not.left .and. notran ) )then
              i1 = 1_${ik}$
              i2 = k
              i3 = 1_${ik}$
           else
              i1 = k
              i2 = 1_${ik}$
              i3 = -1_${ik}$
           end if
           if( left ) then
              ni = n
              jc = 1_${ik}$
           else
              mi = m
              ic = 1_${ik}$
           end if
           do i = i1, i2, i3
              if( left ) then
                 ! h(i) is applied to c(i:m,1:n)
                 mi = m - i + 1_${ik}$
                 ic = i
              else
                 ! h(i) is applied to c(1:m,i:n)
                 ni = n - i + 1_${ik}$
                 jc = i
              end if
              ! apply h(i)
              aii = a( i, i )
              a( i, i ) = one
              call stdlib${ii}$_slarf( side, mi, ni, a( i, i ), 1_${ik}$, tau( i ), c( ic, jc ),ldc, work )
                        
              a( i, i ) = aii
           end do
           return
     end subroutine stdlib${ii}$_sorm2r

     pure module subroutine stdlib${ii}$_dorm2r( side, trans, m, n, k, a, lda, tau, c, ldc,work, info )
     !! DORM2R overwrites the general real m by n matrix C with
     !! Q * C  if SIDE = 'L' and TRANS = 'N', or
     !! Q**T* C  if SIDE = 'L' and TRANS = 'T', or
     !! C * Q  if SIDE = 'R' and TRANS = 'N', or
     !! C * Q**T if SIDE = 'R' and TRANS = 'T',
     !! where Q is a real orthogonal matrix defined as the product of k
     !! elementary reflectors
     !! Q = H(1) H(2) . . . H(k)
     !! as returned by DGEQRF. Q is of order m if SIDE = 'L' and of order n
     !! if SIDE = 'R'.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: side, trans
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: k, lda, ldc, m, n
           ! Array Arguments 
           real(dp), intent(inout) :: a(lda,*), c(ldc,*)
           real(dp), intent(in) :: tau(*)
           real(dp), intent(out) :: work(*)
        ! =====================================================================
           
           ! Local Scalars 
           logical(lk) :: left, notran
           integer(${ik}$) :: i, i1, i2, i3, ic, jc, mi, ni, nq
           real(dp) :: aii
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           left = stdlib_lsame( side, 'L' )
           notran = stdlib_lsame( trans, 'N' )
           ! nq is the order of q
           if( left ) then
              nq = m
           else
              nq = n
           end if
           if( .not.left .and. .not.stdlib_lsame( side, 'R' ) ) then
              info = -1_${ik}$
           else if( .not.notran .and. .not.stdlib_lsame( trans, 'T' ) ) then
              info = -2_${ik}$
           else if( m<0_${ik}$ ) then
              info = -3_${ik}$
           else if( n<0_${ik}$ ) then
              info = -4_${ik}$
           else if( k<0_${ik}$ .or. k>nq ) then
              info = -5_${ik}$
           else if( lda<max( 1_${ik}$, nq ) ) then
              info = -7_${ik}$
           else if( ldc<max( 1_${ik}$, m ) ) then
              info = -10_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DORM2R', -info )
              return
           end if
           ! quick return if possible
           if( m==0 .or. n==0 .or. k==0 )return
           if( ( left .and. .not.notran ) .or. ( .not.left .and. notran ) )then
              i1 = 1_${ik}$
              i2 = k
              i3 = 1_${ik}$
           else
              i1 = k
              i2 = 1_${ik}$
              i3 = -1_${ik}$
           end if
           if( left ) then
              ni = n
              jc = 1_${ik}$
           else
              mi = m
              ic = 1_${ik}$
           end if
           do i = i1, i2, i3
              if( left ) then
                 ! h(i) is applied to c(i:m,1:n)
                 mi = m - i + 1_${ik}$
                 ic = i
              else
                 ! h(i) is applied to c(1:m,i:n)
                 ni = n - i + 1_${ik}$
                 jc = i
              end if
              ! apply h(i)
              aii = a( i, i )
              a( i, i ) = one
              call stdlib${ii}$_dlarf( side, mi, ni, a( i, i ), 1_${ik}$, tau( i ), c( ic, jc ),ldc, work )
                        
              a( i, i ) = aii
           end do
           return
     end subroutine stdlib${ii}$_dorm2r

#:for rk,rt,ri in REAL_KINDS_TYPES
#:if not rk in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ri}$orm2r( side, trans, m, n, k, a, lda, tau, c, ldc,work, info )
     !! DORM2R: overwrites the general real m by n matrix C with
     !! Q * C  if SIDE = 'L' and TRANS = 'N', or
     !! Q**T* C  if SIDE = 'L' and TRANS = 'T', or
     !! C * Q  if SIDE = 'R' and TRANS = 'N', or
     !! C * Q**T if SIDE = 'R' and TRANS = 'T',
     !! where Q is a real orthogonal matrix defined as the product of k
     !! elementary reflectors
     !! Q = H(1) H(2) . . . H(k)
     !! as returned by DGEQRF. Q is of order m if SIDE = 'L' and of order n
     !! if SIDE = 'R'.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: side, trans
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: k, lda, ldc, m, n
           ! Array Arguments 
           real(${rk}$), intent(inout) :: a(lda,*), c(ldc,*)
           real(${rk}$), intent(in) :: tau(*)
           real(${rk}$), intent(out) :: work(*)
        ! =====================================================================
           
           ! Local Scalars 
           logical(lk) :: left, notran
           integer(${ik}$) :: i, i1, i2, i3, ic, jc, mi, ni, nq
           real(${rk}$) :: aii
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           left = stdlib_lsame( side, 'L' )
           notran = stdlib_lsame( trans, 'N' )
           ! nq is the order of q
           if( left ) then
              nq = m
           else
              nq = n
           end if
           if( .not.left .and. .not.stdlib_lsame( side, 'R' ) ) then
              info = -1_${ik}$
           else if( .not.notran .and. .not.stdlib_lsame( trans, 'T' ) ) then
              info = -2_${ik}$
           else if( m<0_${ik}$ ) then
              info = -3_${ik}$
           else if( n<0_${ik}$ ) then
              info = -4_${ik}$
           else if( k<0_${ik}$ .or. k>nq ) then
              info = -5_${ik}$
           else if( lda<max( 1_${ik}$, nq ) ) then
              info = -7_${ik}$
           else if( ldc<max( 1_${ik}$, m ) ) then
              info = -10_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DORM2R', -info )
              return
           end if
           ! quick return if possible
           if( m==0 .or. n==0 .or. k==0 )return
           if( ( left .and. .not.notran ) .or. ( .not.left .and. notran ) )then
              i1 = 1_${ik}$
              i2 = k
              i3 = 1_${ik}$
           else
              i1 = k
              i2 = 1_${ik}$
              i3 = -1_${ik}$
           end if
           if( left ) then
              ni = n
              jc = 1_${ik}$
           else
              mi = m
              ic = 1_${ik}$
           end if
           do i = i1, i2, i3
              if( left ) then
                 ! h(i) is applied to c(i:m,1:n)
                 mi = m - i + 1_${ik}$
                 ic = i
              else
                 ! h(i) is applied to c(1:m,i:n)
                 ni = n - i + 1_${ik}$
                 jc = i
              end if
              ! apply h(i)
              aii = a( i, i )
              a( i, i ) = one
              call stdlib${ii}$_${ri}$larf( side, mi, ni, a( i, i ), 1_${ik}$, tau( i ), c( ic, jc ),ldc, work )
                        
              a( i, i ) = aii
           end do
           return
     end subroutine stdlib${ii}$_${ri}$orm2r

#:endif
#:endfor



     pure module subroutine stdlib${ii}$_sgeqrt( m, n, nb, a, lda, t, ldt, work, info )
     !! SGEQRT computes a blocked QR factorization of a real M-by-N matrix A
     !! using the compact WY representation of Q.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, ldt, m, n, nb
           ! Array Arguments 
           real(sp), intent(inout) :: a(lda,*)
           real(sp), intent(out) :: t(ldt,*), work(*)
       ! =====================================================================
           ! Local Scalars 
           logical(lk), parameter :: use_recursive_qr = .true.
           integer(${ik}$) :: i, ib, iinfo, k
           
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( nb<1_${ik}$ .or. ( nb>min(m,n) .and. min(m,n)>0_${ik}$ ) )then
              info = -3_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -5_${ik}$
           else if( ldt<nb ) then
              info = -7_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'SGEQRT', -info )
              return
           end if
           ! quick return if possible
           k = min( m, n )
           if( k==0 ) return
           ! blocked loop of length k
           do i = 1, k,  nb
              ib = min( k-i+1, nb )
           ! compute the qr factorization of the current block a(i:m,i:i+ib-1)
              if( use_recursive_qr ) then
                 call stdlib${ii}$_sgeqrt3( m-i+1, ib, a(i,i), lda, t(1_${ik}$,i), ldt, iinfo )
              else
                 call stdlib${ii}$_sgeqrt2( m-i+1, ib, a(i,i), lda, t(1_${ik}$,i), ldt, iinfo )
              end if
              if( i+ib<=n ) then
           ! update by applying h**t to a(i:m,i+ib:n) from the left
                 call stdlib${ii}$_slarfb( 'L', 'T', 'F', 'C', m-i+1, n-i-ib+1, ib,a( i, i ), lda, t( 1_${ik}$,&
                            i ), ldt,a( i, i+ib ), lda, work , n-i-ib+1 )
              end if
           end do
           return
     end subroutine stdlib${ii}$_sgeqrt

     pure module subroutine stdlib${ii}$_dgeqrt( m, n, nb, a, lda, t, ldt, work, info )
     !! DGEQRT computes a blocked QR factorization of a real M-by-N matrix A
     !! using the compact WY representation of Q.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, ldt, m, n, nb
           ! Array Arguments 
           real(dp), intent(inout) :: a(lda,*)
           real(dp), intent(out) :: t(ldt,*), work(*)
       ! =====================================================================
           ! Local Scalars 
           logical(lk), parameter :: use_recursive_qr = .true.
           integer(${ik}$) :: i, ib, iinfo, k
           
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( nb<1_${ik}$ .or. ( nb>min(m,n) .and. min(m,n)>0_${ik}$ ) )then
              info = -3_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -5_${ik}$
           else if( ldt<nb ) then
              info = -7_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DGEQRT', -info )
              return
           end if
           ! quick return if possible
           k = min( m, n )
           if( k==0 ) return
           ! blocked loop of length k
           do i = 1, k,  nb
              ib = min( k-i+1, nb )
           ! compute the qr factorization of the current block a(i:m,i:i+ib-1)
              if( use_recursive_qr ) then
                 call stdlib${ii}$_dgeqrt3( m-i+1, ib, a(i,i), lda, t(1_${ik}$,i), ldt, iinfo )
              else
                 call stdlib${ii}$_dgeqrt2( m-i+1, ib, a(i,i), lda, t(1_${ik}$,i), ldt, iinfo )
              end if
              if( i+ib<=n ) then
           ! update by applying h**t to a(i:m,i+ib:n) from the left
                 call stdlib${ii}$_dlarfb( 'L', 'T', 'F', 'C', m-i+1, n-i-ib+1, ib,a( i, i ), lda, t( 1_${ik}$,&
                            i ), ldt,a( i, i+ib ), lda, work , n-i-ib+1 )
              end if
           end do
           return
     end subroutine stdlib${ii}$_dgeqrt

#:for rk,rt,ri in REAL_KINDS_TYPES
#:if not rk in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ri}$geqrt( m, n, nb, a, lda, t, ldt, work, info )
     !! DGEQRT: computes a blocked QR factorization of a real M-by-N matrix A
     !! using the compact WY representation of Q.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, ldt, m, n, nb
           ! Array Arguments 
           real(${rk}$), intent(inout) :: a(lda,*)
           real(${rk}$), intent(out) :: t(ldt,*), work(*)
       ! =====================================================================
           ! Local Scalars 
           logical(lk), parameter :: use_recursive_qr = .true.
           integer(${ik}$) :: i, ib, iinfo, k
           
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( nb<1_${ik}$ .or. ( nb>min(m,n) .and. min(m,n)>0_${ik}$ ) )then
              info = -3_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -5_${ik}$
           else if( ldt<nb ) then
              info = -7_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DGEQRT', -info )
              return
           end if
           ! quick return if possible
           k = min( m, n )
           if( k==0 ) return
           ! blocked loop of length k
           do i = 1, k,  nb
              ib = min( k-i+1, nb )
           ! compute the qr factorization of the current block a(i:m,i:i+ib-1)
              if( use_recursive_qr ) then
                 call stdlib${ii}$_${ri}$geqrt3( m-i+1, ib, a(i,i), lda, t(1_${ik}$,i), ldt, iinfo )
              else
                 call stdlib${ii}$_${ri}$geqrt2( m-i+1, ib, a(i,i), lda, t(1_${ik}$,i), ldt, iinfo )
              end if
              if( i+ib<=n ) then
           ! update by applying h**t to a(i:m,i+ib:n) from the left
                 call stdlib${ii}$_${ri}$larfb( 'L', 'T', 'F', 'C', m-i+1, n-i-ib+1, ib,a( i, i ), lda, t( 1_${ik}$,&
                            i ), ldt,a( i, i+ib ), lda, work , n-i-ib+1 )
              end if
           end do
           return
     end subroutine stdlib${ii}$_${ri}$geqrt

#:endif
#:endfor

     pure module subroutine stdlib${ii}$_cgeqrt( m, n, nb, a, lda, t, ldt, work, info )
     !! CGEQRT computes a blocked QR factorization of a complex M-by-N matrix A
     !! using the compact WY representation of Q.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, ldt, m, n, nb
           ! Array Arguments 
           complex(sp), intent(inout) :: a(lda,*)
           complex(sp), intent(out) :: t(ldt,*), work(*)
       ! =====================================================================
           ! Local Scalars 
           logical(lk), parameter :: use_recursive_qr = .true.
           integer(${ik}$) :: i, ib, iinfo, k
           
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( nb<1_${ik}$ .or. ( nb>min(m,n) .and. min(m,n)>0_${ik}$ ) )then
              info = -3_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -5_${ik}$
           else if( ldt<nb ) then
              info = -7_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'CGEQRT', -info )
              return
           end if
           ! quick return if possible
           k = min( m, n )
           if( k==0 ) return
           ! blocked loop of length k
           do i = 1, k,  nb
              ib = min( k-i+1, nb )
           ! compute the qr factorization of the current block a(i:m,i:i+ib-1)
              if( use_recursive_qr ) then
                 call stdlib${ii}$_cgeqrt3( m-i+1, ib, a(i,i), lda, t(1_${ik}$,i), ldt, iinfo )
              else
                 call stdlib${ii}$_cgeqrt2( m-i+1, ib, a(i,i), lda, t(1_${ik}$,i), ldt, iinfo )
              end if
              if( i+ib<=n ) then
           ! update by applying h**h to a(i:m,i+ib:n) from the left
                 call stdlib${ii}$_clarfb( 'L', 'C', 'F', 'C', m-i+1, n-i-ib+1, ib,a( i, i ), lda, t( 1_${ik}$,&
                            i ), ldt,a( i, i+ib ), lda, work , n-i-ib+1 )
              end if
           end do
           return
     end subroutine stdlib${ii}$_cgeqrt

     pure module subroutine stdlib${ii}$_zgeqrt( m, n, nb, a, lda, t, ldt, work, info )
     !! ZGEQRT computes a blocked QR factorization of a complex M-by-N matrix A
     !! using the compact WY representation of Q.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, ldt, m, n, nb
           ! Array Arguments 
           complex(dp), intent(inout) :: a(lda,*)
           complex(dp), intent(out) :: t(ldt,*), work(*)
       ! =====================================================================
           ! Local Scalars 
           logical(lk), parameter :: use_recursive_qr = .true.
           integer(${ik}$) :: i, ib, iinfo, k
           
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( nb<1_${ik}$ .or. ( nb>min(m,n) .and. min(m,n)>0_${ik}$ ) )then
              info = -3_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -5_${ik}$
           else if( ldt<nb ) then
              info = -7_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'ZGEQRT', -info )
              return
           end if
           ! quick return if possible
           k = min( m, n )
           if( k==0 ) return
           ! blocked loop of length k
           do i = 1, k,  nb
              ib = min( k-i+1, nb )
           ! compute the qr factorization of the current block a(i:m,i:i+ib-1)
              if( use_recursive_qr ) then
                 call stdlib${ii}$_zgeqrt3( m-i+1, ib, a(i,i), lda, t(1_${ik}$,i), ldt, iinfo )
              else
                 call stdlib${ii}$_zgeqrt2( m-i+1, ib, a(i,i), lda, t(1_${ik}$,i), ldt, iinfo )
              end if
              if( i+ib<=n ) then
           ! update by applying h**h to a(i:m,i+ib:n) from the left
                 call stdlib${ii}$_zlarfb( 'L', 'C', 'F', 'C', m-i+1, n-i-ib+1, ib,a( i, i ), lda, t( 1_${ik}$,&
                            i ), ldt,a( i, i+ib ), lda, work , n-i-ib+1 )
              end if
           end do
           return
     end subroutine stdlib${ii}$_zgeqrt

#:for ck,ct,ci in CMPLX_KINDS_TYPES
#:if not ck in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ci}$geqrt( m, n, nb, a, lda, t, ldt, work, info )
     !! ZGEQRT: computes a blocked QR factorization of a complex M-by-N matrix A
     !! using the compact WY representation of Q.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${ck}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, ldt, m, n, nb
           ! Array Arguments 
           complex(${ck}$), intent(inout) :: a(lda,*)
           complex(${ck}$), intent(out) :: t(ldt,*), work(*)
       ! =====================================================================
           ! Local Scalars 
           logical(lk), parameter :: use_recursive_qr = .true.
           integer(${ik}$) :: i, ib, iinfo, k
           
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( nb<1_${ik}$ .or. ( nb>min(m,n) .and. min(m,n)>0_${ik}$ ) )then
              info = -3_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -5_${ik}$
           else if( ldt<nb ) then
              info = -7_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'ZGEQRT', -info )
              return
           end if
           ! quick return if possible
           k = min( m, n )
           if( k==0 ) return
           ! blocked loop of length k
           do i = 1, k,  nb
              ib = min( k-i+1, nb )
           ! compute the qr factorization of the current block a(i:m,i:i+ib-1)
              if( use_recursive_qr ) then
                 call stdlib${ii}$_${ci}$geqrt3( m-i+1, ib, a(i,i), lda, t(1_${ik}$,i), ldt, iinfo )
              else
                 call stdlib${ii}$_${ci}$geqrt2( m-i+1, ib, a(i,i), lda, t(1_${ik}$,i), ldt, iinfo )
              end if
              if( i+ib<=n ) then
           ! update by applying h**h to a(i:m,i+ib:n) from the left
                 call stdlib${ii}$_${ci}$larfb( 'L', 'C', 'F', 'C', m-i+1, n-i-ib+1, ib,a( i, i ), lda, t( 1_${ik}$,&
                            i ), ldt,a( i, i+ib ), lda, work , n-i-ib+1 )
              end if
           end do
           return
     end subroutine stdlib${ii}$_${ci}$geqrt

#:endif
#:endfor



     pure module subroutine stdlib${ii}$_sgeqrt2( m, n, a, lda, t, ldt, info )
     !! SGEQRT2 computes a QR factorization of a real M-by-N matrix A,
     !! using the compact WY representation of Q.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, ldt, m, n
           ! Array Arguments 
           real(sp), intent(inout) :: a(lda,*)
           real(sp), intent(out) :: t(ldt,*)
        ! =====================================================================
           
           ! Local Scalars 
           integer(${ik}$) :: i, k
           real(sp) :: aii, alpha
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( m<n ) then
              info = -1_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -4_${ik}$
           else if( ldt<max( 1_${ik}$, n ) ) then
              info = -6_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'SGEQRT2', -info )
              return
           end if
           k = min( m, n )
           do i = 1, k
              ! generate elem. refl. h(i) to annihilate a(i+1:m,i), tau(i) -> t(i,1)
              call stdlib${ii}$_slarfg( m-i+1, a( i, i ), a( min( i+1, m ), i ), 1_${ik}$,t( i, 1_${ik}$ ) )
              if( i<n ) then
                 ! apply h(i) to a(i:m,i+1:n) from the left
                 aii = a( i, i )
                 a( i, i ) = one
                 ! w(1:n-i) := a(i:m,i+1:n)^h * a(i:m,i) [w = t(:,n)]
                 call stdlib${ii}$_sgemv( 'T',m-i+1, n-i, one, a( i, i+1 ), lda,a( i, i ), 1_${ik}$, zero, t( &
                           1_${ik}$, n ), 1_${ik}$ )
                 ! a(i:m,i+1:n) = a(i:m,i+1:n) + alpha*a(i:m,i)*w(1:n-1)^h
                 alpha = -(t( i, 1_${ik}$ ))
                 call stdlib${ii}$_sger( m-i+1, n-i, alpha, a( i, i ), 1_${ik}$,t( 1_${ik}$, n ), 1_${ik}$, a( i, i+1 ), lda &
                           )
                 a( i, i ) = aii
              end if
           end do
           do i = 2, n
              aii = a( i, i )
              a( i, i ) = one
              ! t(1:i-1,i) := alpha * a(i:m,1:i-1)**t * a(i:m,i)
              alpha = -t( i, 1_${ik}$ )
              call stdlib${ii}$_sgemv( 'T', m-i+1, i-1, alpha, a( i, 1_${ik}$ ), lda,a( i, i ), 1_${ik}$, zero, t( 1_${ik}$, &
                        i ), 1_${ik}$ )
              a( i, i ) = aii
              ! t(1:i-1,i) := t(1:i-1,1:i-1) * t(1:i-1,i)
              call stdlib${ii}$_strmv( 'U', 'N', 'N', i-1, t, ldt, t( 1_${ik}$, i ), 1_${ik}$ )
                 ! t(i,i) = tau(i)
                 t( i, i ) = t( i, 1_${ik}$ )
                 t( i, 1_${ik}$) = zero
           end do
     end subroutine stdlib${ii}$_sgeqrt2

     pure module subroutine stdlib${ii}$_dgeqrt2( m, n, a, lda, t, ldt, info )
     !! DGEQRT2 computes a QR factorization of a real M-by-N matrix A,
     !! using the compact WY representation of Q.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, ldt, m, n
           ! Array Arguments 
           real(dp), intent(inout) :: a(lda,*)
           real(dp), intent(out) :: t(ldt,*)
        ! =====================================================================
           
           ! Local Scalars 
           integer(${ik}$) :: i, k
           real(dp) :: aii, alpha
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( m<n ) then
              info = -1_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -4_${ik}$
           else if( ldt<max( 1_${ik}$, n ) ) then
              info = -6_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DGEQRT2', -info )
              return
           end if
           k = min( m, n )
           do i = 1, k
              ! generate elem. refl. h(i) to annihilate a(i+1:m,i), tau(i) -> t(i,1)
              call stdlib${ii}$_dlarfg( m-i+1, a( i, i ), a( min( i+1, m ), i ), 1_${ik}$,t( i, 1_${ik}$ ) )
              if( i<n ) then
                 ! apply h(i) to a(i:m,i+1:n) from the left
                 aii = a( i, i )
                 a( i, i ) = one
                 ! w(1:n-i) := a(i:m,i+1:n)^h * a(i:m,i) [w = t(:,n)]
                 call stdlib${ii}$_dgemv( 'T',m-i+1, n-i, one, a( i, i+1 ), lda,a( i, i ), 1_${ik}$, zero, t( &
                           1_${ik}$, n ), 1_${ik}$ )
                 ! a(i:m,i+1:n) = a(i:m,i+1:n) + alpha*a(i:m,i)*w(1:n-1)^h
                 alpha = -(t( i, 1_${ik}$ ))
                 call stdlib${ii}$_dger( m-i+1, n-i, alpha, a( i, i ), 1_${ik}$,t( 1_${ik}$, n ), 1_${ik}$, a( i, i+1 ), lda &
                           )
                 a( i, i ) = aii
              end if
           end do
           do i = 2, n
              aii = a( i, i )
              a( i, i ) = one
              ! t(1:i-1,i) := alpha * a(i:m,1:i-1)**t * a(i:m,i)
              alpha = -t( i, 1_${ik}$ )
              call stdlib${ii}$_dgemv( 'T', m-i+1, i-1, alpha, a( i, 1_${ik}$ ), lda,a( i, i ), 1_${ik}$, zero, t( 1_${ik}$, &
                        i ), 1_${ik}$ )
              a( i, i ) = aii
              ! t(1:i-1,i) := t(1:i-1,1:i-1) * t(1:i-1,i)
              call stdlib${ii}$_dtrmv( 'U', 'N', 'N', i-1, t, ldt, t( 1_${ik}$, i ), 1_${ik}$ )
                 ! t(i,i) = tau(i)
                 t( i, i ) = t( i, 1_${ik}$ )
                 t( i, 1_${ik}$) = zero
           end do
     end subroutine stdlib${ii}$_dgeqrt2

#:for rk,rt,ri in REAL_KINDS_TYPES
#:if not rk in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ri}$geqrt2( m, n, a, lda, t, ldt, info )
     !! DGEQRT2: computes a QR factorization of a real M-by-N matrix A,
     !! using the compact WY representation of Q.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, ldt, m, n
           ! Array Arguments 
           real(${rk}$), intent(inout) :: a(lda,*)
           real(${rk}$), intent(out) :: t(ldt,*)
        ! =====================================================================
           
           ! Local Scalars 
           integer(${ik}$) :: i, k
           real(${rk}$) :: aii, alpha
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( m<n ) then
              info = -1_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -4_${ik}$
           else if( ldt<max( 1_${ik}$, n ) ) then
              info = -6_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DGEQRT2', -info )
              return
           end if
           k = min( m, n )
           do i = 1, k
              ! generate elem. refl. h(i) to annihilate a(i+1:m,i), tau(i) -> t(i,1)
              call stdlib${ii}$_${ri}$larfg( m-i+1, a( i, i ), a( min( i+1, m ), i ), 1_${ik}$,t( i, 1_${ik}$ ) )
              if( i<n ) then
                 ! apply h(i) to a(i:m,i+1:n) from the left
                 aii = a( i, i )
                 a( i, i ) = one
                 ! w(1:n-i) := a(i:m,i+1:n)^h * a(i:m,i) [w = t(:,n)]
                 call stdlib${ii}$_${ri}$gemv( 'T',m-i+1, n-i, one, a( i, i+1 ), lda,a( i, i ), 1_${ik}$, zero, t( &
                           1_${ik}$, n ), 1_${ik}$ )
                 ! a(i:m,i+1:n) = a(i:m,i+1:n) + alpha*a(i:m,i)*w(1:n-1)^h
                 alpha = -(t( i, 1_${ik}$ ))
                 call stdlib${ii}$_${ri}$ger( m-i+1, n-i, alpha, a( i, i ), 1_${ik}$,t( 1_${ik}$, n ), 1_${ik}$, a( i, i+1 ), lda &
                           )
                 a( i, i ) = aii
              end if
           end do
           do i = 2, n
              aii = a( i, i )
              a( i, i ) = one
              ! t(1:i-1,i) := alpha * a(i:m,1:i-1)**t * a(i:m,i)
              alpha = -t( i, 1_${ik}$ )
              call stdlib${ii}$_${ri}$gemv( 'T', m-i+1, i-1, alpha, a( i, 1_${ik}$ ), lda,a( i, i ), 1_${ik}$, zero, t( 1_${ik}$, &
                        i ), 1_${ik}$ )
              a( i, i ) = aii
              ! t(1:i-1,i) := t(1:i-1,1:i-1) * t(1:i-1,i)
              call stdlib${ii}$_${ri}$trmv( 'U', 'N', 'N', i-1, t, ldt, t( 1_${ik}$, i ), 1_${ik}$ )
                 ! t(i,i) = tau(i)
                 t( i, i ) = t( i, 1_${ik}$ )
                 t( i, 1_${ik}$) = zero
           end do
     end subroutine stdlib${ii}$_${ri}$geqrt2

#:endif
#:endfor

     pure module subroutine stdlib${ii}$_cgeqrt2( m, n, a, lda, t, ldt, info )
     !! CGEQRT2 computes a QR factorization of a complex M-by-N matrix A,
     !! using the compact WY representation of Q.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, ldt, m, n
           ! Array Arguments 
           complex(sp), intent(inout) :: a(lda,*)
           complex(sp), intent(out) :: t(ldt,*)
        ! =====================================================================
           
           ! Local Scalars 
           integer(${ik}$) :: i, k
           complex(sp) :: aii, alpha
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( m<n ) then
              info = -1_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -4_${ik}$
           else if( ldt<max( 1_${ik}$, n ) ) then
              info = -6_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'CGEQRT2', -info )
              return
           end if
           k = min( m, n )
           do i = 1, k
              ! generate elem. refl. h(i) to annihilate a(i+1:m,i), tau(i) -> t(i,1)
              call stdlib${ii}$_clarfg( m-i+1, a( i, i ), a( min( i+1, m ), i ), 1_${ik}$,t( i, 1_${ik}$ ) )
              if( i<n ) then
                 ! apply h(i) to a(i:m,i+1:n) from the left
                 aii = a( i, i )
                 a( i, i ) = cone
                 ! w(1:n-i) := a(i:m,i+1:n)**h * a(i:m,i) [w = t(:,n)]
                 call stdlib${ii}$_cgemv( 'C',m-i+1, n-i, cone, a( i, i+1 ), lda,a( i, i ), 1_${ik}$, czero, t(&
                            1_${ik}$, n ), 1_${ik}$ )
                 ! a(i:m,i+1:n) = a(i:m,i+1:n) + alpha*a(i:m,i)*w(1:n-1)**h
                 alpha = -conjg(t( i, 1_${ik}$ ))
                 call stdlib${ii}$_cgerc( m-i+1, n-i, alpha, a( i, i ), 1_${ik}$,t( 1_${ik}$, n ), 1_${ik}$, a( i, i+1 ), &
                           lda )
                 a( i, i ) = aii
              end if
           end do
           do i = 2, n
              aii = a( i, i )
              a( i, i ) = cone
              ! t(1:i-1,i) := alpha * a(i:m,1:i-1)**h * a(i:m,i)
              alpha = -t( i, 1_${ik}$ )
              call stdlib${ii}$_cgemv( 'C', m-i+1, i-1, alpha, a( i, 1_${ik}$ ), lda,a( i, i ), 1_${ik}$, czero, t( 1_${ik}$,&
                         i ), 1_${ik}$ )
              a( i, i ) = aii
              ! t(1:i-1,i) := t(1:i-1,1:i-1) * t(1:i-1,i)
              call stdlib${ii}$_ctrmv( 'U', 'N', 'N', i-1, t, ldt, t( 1_${ik}$, i ), 1_${ik}$ )
                 ! t(i,i) = tau(i)
                 t( i, i ) = t( i, 1_${ik}$ )
                 t( i, 1_${ik}$) = czero
           end do
     end subroutine stdlib${ii}$_cgeqrt2

     pure module subroutine stdlib${ii}$_zgeqrt2( m, n, a, lda, t, ldt, info )
     !! ZGEQRT2 computes a QR factorization of a complex M-by-N matrix A,
     !! using the compact WY representation of Q.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, ldt, m, n
           ! Array Arguments 
           complex(dp), intent(inout) :: a(lda,*)
           complex(dp), intent(out) :: t(ldt,*)
        ! =====================================================================
           
           ! Local Scalars 
           integer(${ik}$) :: i, k
           complex(dp) :: aii, alpha
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( m<n ) then
              info = -1_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -4_${ik}$
           else if( ldt<max( 1_${ik}$, n ) ) then
              info = -6_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'ZGEQRT2', -info )
              return
           end if
           k = min( m, n )
           do i = 1, k
              ! generate elem. refl. h(i) to annihilate a(i+1:m,i), tau(i) -> t(i,1)
              call stdlib${ii}$_zlarfg( m-i+1, a( i, i ), a( min( i+1, m ), i ), 1_${ik}$,t( i, 1_${ik}$ ) )
              if( i<n ) then
                 ! apply h(i) to a(i:m,i+1:n) from the left
                 aii = a( i, i )
                 a( i, i ) = cone
                 ! w(1:n-i) := a(i:m,i+1:n)^h * a(i:m,i) [w = t(:,n)]
                 call stdlib${ii}$_zgemv( 'C',m-i+1, n-i, cone, a( i, i+1 ), lda,a( i, i ), 1_${ik}$, czero, t(&
                            1_${ik}$, n ), 1_${ik}$ )
                 ! a(i:m,i+1:n) = a(i:m,i+1:n) + alpha*a(i:m,i)*w(1:n-1)^h
                 alpha = -conjg(t( i, 1_${ik}$ ))
                 call stdlib${ii}$_zgerc( m-i+1, n-i, alpha, a( i, i ), 1_${ik}$,t( 1_${ik}$, n ), 1_${ik}$, a( i, i+1 ), &
                           lda )
                 a( i, i ) = aii
              end if
           end do
           do i = 2, n
              aii = a( i, i )
              a( i, i ) = cone
              ! t(1:i-1,i) := alpha * a(i:m,1:i-1)**h * a(i:m,i)
              alpha = -t( i, 1_${ik}$ )
              call stdlib${ii}$_zgemv( 'C', m-i+1, i-1, alpha, a( i, 1_${ik}$ ), lda,a( i, i ), 1_${ik}$, czero, t( 1_${ik}$,&
                         i ), 1_${ik}$ )
              a( i, i ) = aii
              ! t(1:i-1,i) := t(1:i-1,1:i-1) * t(1:i-1,i)
              call stdlib${ii}$_ztrmv( 'U', 'N', 'N', i-1, t, ldt, t( 1_${ik}$, i ), 1_${ik}$ )
                 ! t(i,i) = tau(i)
                 t( i, i ) = t( i, 1_${ik}$ )
                 t( i, 1_${ik}$) = czero
           end do
     end subroutine stdlib${ii}$_zgeqrt2

#:for ck,ct,ci in CMPLX_KINDS_TYPES
#:if not ck in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ci}$geqrt2( m, n, a, lda, t, ldt, info )
     !! ZGEQRT2: computes a QR factorization of a complex M-by-N matrix A,
     !! using the compact WY representation of Q.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${ck}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, ldt, m, n
           ! Array Arguments 
           complex(${ck}$), intent(inout) :: a(lda,*)
           complex(${ck}$), intent(out) :: t(ldt,*)
        ! =====================================================================
           
           ! Local Scalars 
           integer(${ik}$) :: i, k
           complex(${ck}$) :: aii, alpha
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( m<n ) then
              info = -1_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -4_${ik}$
           else if( ldt<max( 1_${ik}$, n ) ) then
              info = -6_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'ZGEQRT2', -info )
              return
           end if
           k = min( m, n )
           do i = 1, k
              ! generate elem. refl. h(i) to annihilate a(i+1:m,i), tau(i) -> t(i,1)
              call stdlib${ii}$_${ci}$larfg( m-i+1, a( i, i ), a( min( i+1, m ), i ), 1_${ik}$,t( i, 1_${ik}$ ) )
              if( i<n ) then
                 ! apply h(i) to a(i:m,i+1:n) from the left
                 aii = a( i, i )
                 a( i, i ) = cone
                 ! w(1:n-i) := a(i:m,i+1:n)^h * a(i:m,i) [w = t(:,n)]
                 call stdlib${ii}$_${ci}$gemv( 'C',m-i+1, n-i, cone, a( i, i+1 ), lda,a( i, i ), 1_${ik}$, czero, t(&
                            1_${ik}$, n ), 1_${ik}$ )
                 ! a(i:m,i+1:n) = a(i:m,i+1:n) + alpha*a(i:m,i)*w(1:n-1)^h
                 alpha = -conjg(t( i, 1_${ik}$ ))
                 call stdlib${ii}$_${ci}$gerc( m-i+1, n-i, alpha, a( i, i ), 1_${ik}$,t( 1_${ik}$, n ), 1_${ik}$, a( i, i+1 ), &
                           lda )
                 a( i, i ) = aii
              end if
           end do
           do i = 2, n
              aii = a( i, i )
              a( i, i ) = cone
              ! t(1:i-1,i) := alpha * a(i:m,1:i-1)**h * a(i:m,i)
              alpha = -t( i, 1_${ik}$ )
              call stdlib${ii}$_${ci}$gemv( 'C', m-i+1, i-1, alpha, a( i, 1_${ik}$ ), lda,a( i, i ), 1_${ik}$, czero, t( 1_${ik}$,&
                         i ), 1_${ik}$ )
              a( i, i ) = aii
              ! t(1:i-1,i) := t(1:i-1,1:i-1) * t(1:i-1,i)
              call stdlib${ii}$_${ci}$trmv( 'U', 'N', 'N', i-1, t, ldt, t( 1_${ik}$, i ), 1_${ik}$ )
                 ! t(i,i) = tau(i)
                 t( i, i ) = t( i, 1_${ik}$ )
                 t( i, 1_${ik}$) = czero
           end do
     end subroutine stdlib${ii}$_${ci}$geqrt2

#:endif
#:endfor



     pure recursive module subroutine stdlib${ii}$_sgeqrt3( m, n, a, lda, t, ldt, info )
     !! SGEQRT3 recursively computes a QR factorization of a real M-by-N
     !! matrix A, using the compact WY representation of Q.
     !! Based on the algorithm of Elmroth and Gustavson,
     !! IBM J. Res. Develop. Vol 44 No. 4 July 2000.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, m, n, ldt
           ! Array Arguments 
           real(sp), intent(inout) :: a(lda,*)
           real(sp), intent(out) :: t(ldt,*)
        ! =====================================================================
           
           ! Local Scalars 
           integer(${ik}$) :: i, i1, j, j1, n1, n2, iinfo
           ! Executable Statements 
           info = 0_${ik}$
           if( n < 0_${ik}$ ) then
              info = -2_${ik}$
           else if( m < n ) then
              info = -1_${ik}$
           else if( lda < max( 1_${ik}$, m ) ) then
              info = -4_${ik}$
           else if( ldt < max( 1_${ik}$, n ) ) then
              info = -6_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'SGEQRT3', -info )
              return
           end if
           if( n==1_${ik}$ ) then
              ! compute householder transform when n=1
              call stdlib${ii}$_slarfg( m, a(1_${ik}$,1_${ik}$), a( min( 2_${ik}$, m ), 1_${ik}$ ), 1_${ik}$, t(1_${ik}$,1_${ik}$) )
           else
              ! otherwise, split a into blocks...
              n1 = n/2_${ik}$
              n2 = n-n1
              j1 = min( n1+1, n )
              i1 = min( n+1, m )
              ! compute a(1:m,1:n1) <- (y1,r1,t1), where q1 = i - y1 t1 y1^h
              call stdlib${ii}$_sgeqrt3( m, n1, a, lda, t, ldt, iinfo )
              ! compute a(1:m,j1:n) = q1^h a(1:m,j1:n) [workspace: t(1:n1,j1:n)]
              do j=1,n2
                 do i=1,n1
                    t( i, j+n1 ) = a( i, j+n1 )
                 end do
              end do
              call stdlib${ii}$_strmm( 'L', 'L', 'T', 'U', n1, n2, one,a, lda, t( 1_${ik}$, j1 ), ldt )
              call stdlib${ii}$_sgemm( 'T', 'N', n1, n2, m-n1, one, a( j1, 1_${ik}$ ), lda,a( j1, j1 ), lda, &
                        one, t( 1_${ik}$, j1 ), ldt)
              call stdlib${ii}$_strmm( 'L', 'U', 'T', 'N', n1, n2, one,t, ldt, t( 1_${ik}$, j1 ), ldt )
              call stdlib${ii}$_sgemm( 'N', 'N', m-n1, n2, n1, -one, a( j1, 1_${ik}$ ), lda,t( 1_${ik}$, j1 ), ldt, &
                        one, a( j1, j1 ), lda )
              call stdlib${ii}$_strmm( 'L', 'L', 'N', 'U', n1, n2, one,a, lda, t( 1_${ik}$, j1 ), ldt )
              do j=1,n2
                 do i=1,n1
                    a( i, j+n1 ) = a( i, j+n1 ) - t( i, j+n1 )
                 end do
              end do
              ! compute a(j1:m,j1:n) <- (y2,r2,t2) where q2 = i - y2 t2 y2^h
              call stdlib${ii}$_sgeqrt3( m-n1, n2, a( j1, j1 ), lda,t( j1, j1 ), ldt, iinfo )
              ! compute t3 = t(1:n1,j1:n) = -t1 y1^h y2 t2
              do i=1,n1
                 do j=1,n2
                    t( i, j+n1 ) = (a( j+n1, i ))
                 end do
              end do
              call stdlib${ii}$_strmm( 'R', 'L', 'N', 'U', n1, n2, one,a( j1, j1 ), lda, t( 1_${ik}$, j1 ), &
                        ldt )
              call stdlib${ii}$_sgemm( 'T', 'N', n1, n2, m-n, one, a( i1, 1_${ik}$ ), lda,a( i1, j1 ), lda, &
                        one, t( 1_${ik}$, j1 ), ldt )
              call stdlib${ii}$_strmm( 'L', 'U', 'N', 'N', n1, n2, -one, t, ldt,t( 1_${ik}$, j1 ), ldt )
                        
              call stdlib${ii}$_strmm( 'R', 'U', 'N', 'N', n1, n2, one,t( j1, j1 ), ldt, t( 1_${ik}$, j1 ), &
                        ldt )
              ! y = (y1,y2); r = [ r1  a(1:n1,j1:n) ];  t = [t1 t3]
                               ! [  0        r2     ]       [ 0 t2]
           end if
           return
     end subroutine stdlib${ii}$_sgeqrt3

     pure recursive module subroutine stdlib${ii}$_dgeqrt3( m, n, a, lda, t, ldt, info )
     !! DGEQRT3 recursively computes a QR factorization of a real M-by-N
     !! matrix A, using the compact WY representation of Q.
     !! Based on the algorithm of Elmroth and Gustavson,
     !! IBM J. Res. Develop. Vol 44 No. 4 July 2000.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, m, n, ldt
           ! Array Arguments 
           real(dp), intent(inout) :: a(lda,*)
           real(dp), intent(out) :: t(ldt,*)
        ! =====================================================================
           
           ! Local Scalars 
           integer(${ik}$) :: i, i1, j, j1, n1, n2, iinfo
           ! Executable Statements 
           info = 0_${ik}$
           if( n < 0_${ik}$ ) then
              info = -2_${ik}$
           else if( m < n ) then
              info = -1_${ik}$
           else if( lda < max( 1_${ik}$, m ) ) then
              info = -4_${ik}$
           else if( ldt < max( 1_${ik}$, n ) ) then
              info = -6_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DGEQRT3', -info )
              return
           end if
           if( n==1_${ik}$ ) then
              ! compute householder transform when n=1
              call stdlib${ii}$_dlarfg( m, a(1_${ik}$,1_${ik}$), a( min( 2_${ik}$, m ), 1_${ik}$ ), 1_${ik}$, t(1_${ik}$,1_${ik}$) )
           else
              ! otherwise, split a into blocks...
              n1 = n/2_${ik}$
              n2 = n-n1
              j1 = min( n1+1, n )
              i1 = min( n+1, m )
              ! compute a(1:m,1:n1) <- (y1,r1,t1), where q1 = i - y1 t1 y1^h
              call stdlib${ii}$_dgeqrt3( m, n1, a, lda, t, ldt, iinfo )
              ! compute a(1:m,j1:n) = q1^h a(1:m,j1:n) [workspace: t(1:n1,j1:n)]
              do j=1,n2
                 do i=1,n1
                    t( i, j+n1 ) = a( i, j+n1 )
                 end do
              end do
              call stdlib${ii}$_dtrmm( 'L', 'L', 'T', 'U', n1, n2, one,a, lda, t( 1_${ik}$, j1 ), ldt )
              call stdlib${ii}$_dgemm( 'T', 'N', n1, n2, m-n1, one, a( j1, 1_${ik}$ ), lda,a( j1, j1 ), lda, &
                        one, t( 1_${ik}$, j1 ), ldt)
              call stdlib${ii}$_dtrmm( 'L', 'U', 'T', 'N', n1, n2, one,t, ldt, t( 1_${ik}$, j1 ), ldt )
              call stdlib${ii}$_dgemm( 'N', 'N', m-n1, n2, n1, -one, a( j1, 1_${ik}$ ), lda,t( 1_${ik}$, j1 ), ldt, &
                        one, a( j1, j1 ), lda )
              call stdlib${ii}$_dtrmm( 'L', 'L', 'N', 'U', n1, n2, one,a, lda, t( 1_${ik}$, j1 ), ldt )
              do j=1,n2
                 do i=1,n1
                    a( i, j+n1 ) = a( i, j+n1 ) - t( i, j+n1 )
                 end do
              end do
              ! compute a(j1:m,j1:n) <- (y2,r2,t2) where q2 = i - y2 t2 y2^h
              call stdlib${ii}$_dgeqrt3( m-n1, n2, a( j1, j1 ), lda,t( j1, j1 ), ldt, iinfo )
              ! compute t3 = t(1:n1,j1:n) = -t1 y1^h y2 t2
              do i=1,n1
                 do j=1,n2
                    t( i, j+n1 ) = (a( j+n1, i ))
                 end do
              end do
              call stdlib${ii}$_dtrmm( 'R', 'L', 'N', 'U', n1, n2, one,a( j1, j1 ), lda, t( 1_${ik}$, j1 ), &
                        ldt )
              call stdlib${ii}$_dgemm( 'T', 'N', n1, n2, m-n, one, a( i1, 1_${ik}$ ), lda,a( i1, j1 ), lda, &
                        one, t( 1_${ik}$, j1 ), ldt )
              call stdlib${ii}$_dtrmm( 'L', 'U', 'N', 'N', n1, n2, -one, t, ldt,t( 1_${ik}$, j1 ), ldt )
                        
              call stdlib${ii}$_dtrmm( 'R', 'U', 'N', 'N', n1, n2, one,t( j1, j1 ), ldt, t( 1_${ik}$, j1 ), &
                        ldt )
              ! y = (y1,y2); r = [ r1  a(1:n1,j1:n) ];  t = [t1 t3]
                               ! [  0        r2     ]       [ 0 t2]
           end if
           return
     end subroutine stdlib${ii}$_dgeqrt3

#:for rk,rt,ri in REAL_KINDS_TYPES
#:if not rk in ["sp","dp"]
     pure recursive module subroutine stdlib${ii}$_${ri}$geqrt3( m, n, a, lda, t, ldt, info )
     !! DGEQRT3: recursively computes a QR factorization of a real M-by-N
     !! matrix A, using the compact WY representation of Q.
     !! Based on the algorithm of Elmroth and Gustavson,
     !! IBM J. Res. Develop. Vol 44 No. 4 July 2000.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, m, n, ldt
           ! Array Arguments 
           real(${rk}$), intent(inout) :: a(lda,*)
           real(${rk}$), intent(out) :: t(ldt,*)
        ! =====================================================================
           
           ! Local Scalars 
           integer(${ik}$) :: i, i1, j, j1, n1, n2, iinfo
           ! Executable Statements 
           info = 0_${ik}$
           if( n < 0_${ik}$ ) then
              info = -2_${ik}$
           else if( m < n ) then
              info = -1_${ik}$
           else if( lda < max( 1_${ik}$, m ) ) then
              info = -4_${ik}$
           else if( ldt < max( 1_${ik}$, n ) ) then
              info = -6_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DGEQRT3', -info )
              return
           end if
           if( n==1_${ik}$ ) then
              ! compute householder transform when n=1
              call stdlib${ii}$_${ri}$larfg( m, a(1_${ik}$,1_${ik}$), a( min( 2_${ik}$, m ), 1_${ik}$ ), 1_${ik}$, t(1_${ik}$,1_${ik}$) )
           else
              ! otherwise, split a into blocks...
              n1 = n/2_${ik}$
              n2 = n-n1
              j1 = min( n1+1, n )
              i1 = min( n+1, m )
              ! compute a(1:m,1:n1) <- (y1,r1,t1), where q1 = i - y1 t1 y1^h
              call stdlib${ii}$_${ri}$geqrt3( m, n1, a, lda, t, ldt, iinfo )
              ! compute a(1:m,j1:n) = q1^h a(1:m,j1:n) [workspace: t(1:n1,j1:n)]
              do j=1,n2
                 do i=1,n1
                    t( i, j+n1 ) = a( i, j+n1 )
                 end do
              end do
              call stdlib${ii}$_${ri}$trmm( 'L', 'L', 'T', 'U', n1, n2, one,a, lda, t( 1_${ik}$, j1 ), ldt )
              call stdlib${ii}$_${ri}$gemm( 'T', 'N', n1, n2, m-n1, one, a( j1, 1_${ik}$ ), lda,a( j1, j1 ), lda, &
                        one, t( 1_${ik}$, j1 ), ldt)
              call stdlib${ii}$_${ri}$trmm( 'L', 'U', 'T', 'N', n1, n2, one,t, ldt, t( 1_${ik}$, j1 ), ldt )
              call stdlib${ii}$_${ri}$gemm( 'N', 'N', m-n1, n2, n1, -one, a( j1, 1_${ik}$ ), lda,t( 1_${ik}$, j1 ), ldt, &
                        one, a( j1, j1 ), lda )
              call stdlib${ii}$_${ri}$trmm( 'L', 'L', 'N', 'U', n1, n2, one,a, lda, t( 1_${ik}$, j1 ), ldt )
              do j=1,n2
                 do i=1,n1
                    a( i, j+n1 ) = a( i, j+n1 ) - t( i, j+n1 )
                 end do
              end do
              ! compute a(j1:m,j1:n) <- (y2,r2,t2) where q2 = i - y2 t2 y2^h
              call stdlib${ii}$_${ri}$geqrt3( m-n1, n2, a( j1, j1 ), lda,t( j1, j1 ), ldt, iinfo )
              ! compute t3 = t(1:n1,j1:n) = -t1 y1^h y2 t2
              do i=1,n1
                 do j=1,n2
                    t( i, j+n1 ) = (a( j+n1, i ))
                 end do
              end do
              call stdlib${ii}$_${ri}$trmm( 'R', 'L', 'N', 'U', n1, n2, one,a( j1, j1 ), lda, t( 1_${ik}$, j1 ), &
                        ldt )
              call stdlib${ii}$_${ri}$gemm( 'T', 'N', n1, n2, m-n, one, a( i1, 1_${ik}$ ), lda,a( i1, j1 ), lda, &
                        one, t( 1_${ik}$, j1 ), ldt )
              call stdlib${ii}$_${ri}$trmm( 'L', 'U', 'N', 'N', n1, n2, -one, t, ldt,t( 1_${ik}$, j1 ), ldt )
                        
              call stdlib${ii}$_${ri}$trmm( 'R', 'U', 'N', 'N', n1, n2, one,t( j1, j1 ), ldt, t( 1_${ik}$, j1 ), &
                        ldt )
              ! y = (y1,y2); r = [ r1  a(1:n1,j1:n) ];  t = [t1 t3]
                               ! [  0        r2     ]       [ 0 t2]
           end if
           return
     end subroutine stdlib${ii}$_${ri}$geqrt3

#:endif
#:endfor

     pure recursive module subroutine stdlib${ii}$_cgeqrt3( m, n, a, lda, t, ldt, info )
     !! CGEQRT3 recursively computes a QR factorization of a complex M-by-N matrix A,
     !! using the compact WY representation of Q.
     !! Based on the algorithm of Elmroth and Gustavson,
     !! IBM J. Res. Develop. Vol 44 No. 4 July 2000.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, m, n, ldt
           ! Array Arguments 
           complex(sp), intent(inout) :: a(lda,*)
           complex(sp), intent(out) :: t(ldt,*)
        ! =====================================================================
           
           ! Local Scalars 
           integer(${ik}$) :: i, i1, j, j1, n1, n2, iinfo
           ! Executable Statements 
           info = 0_${ik}$
           if( n < 0_${ik}$ ) then
              info = -2_${ik}$
           else if( m < n ) then
              info = -1_${ik}$
           else if( lda < max( 1_${ik}$, m ) ) then
              info = -4_${ik}$
           else if( ldt < max( 1_${ik}$, n ) ) then
              info = -6_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'CGEQRT3', -info )
              return
           end if
           if( n==1_${ik}$ ) then
              ! compute householder transform when n=1
              call stdlib${ii}$_clarfg( m, a(1_${ik}$,1_${ik}$), a( min( 2_${ik}$, m ), 1_${ik}$ ), 1_${ik}$, t(1_${ik}$,1_${ik}$) )
           else
              ! otherwise, split a into blocks...
              n1 = n/2_${ik}$
              n2 = n-n1
              j1 = min( n1+1, n )
              i1 = min( n+1, m )
              ! compute a(1:m,1:n1) <- (y1,r1,t1), where q1 = i - y1 t1 y1**h
              call stdlib${ii}$_cgeqrt3( m, n1, a, lda, t, ldt, iinfo )
              ! compute a(1:m,j1:n) = q1**h a(1:m,j1:n) [workspace: t(1:n1,j1:n)]
              do j=1,n2
                 do i=1,n1
                    t( i, j+n1 ) = a( i, j+n1 )
                 end do
              end do
              call stdlib${ii}$_ctrmm( 'L', 'L', 'C', 'U', n1, n2, cone,a, lda, t( 1_${ik}$, j1 ), ldt )
                        
              call stdlib${ii}$_cgemm( 'C', 'N', n1, n2, m-n1, cone, a( j1, 1_${ik}$ ), lda,a( j1, j1 ), lda, &
                        cone, t( 1_${ik}$, j1 ), ldt)
              call stdlib${ii}$_ctrmm( 'L', 'U', 'C', 'N', n1, n2, cone,t, ldt, t( 1_${ik}$, j1 ), ldt )
                        
              call stdlib${ii}$_cgemm( 'N', 'N', m-n1, n2, n1, -cone, a( j1, 1_${ik}$ ), lda,t( 1_${ik}$, j1 ), ldt, &
                        cone, a( j1, j1 ), lda )
              call stdlib${ii}$_ctrmm( 'L', 'L', 'N', 'U', n1, n2, cone,a, lda, t( 1_${ik}$, j1 ), ldt )
                        
              do j=1,n2
                 do i=1,n1
                    a( i, j+n1 ) = a( i, j+n1 ) - t( i, j+n1 )
                 end do
              end do
              ! compute a(j1:m,j1:n) <- (y2,r2,t2) where q2 = i - y2 t2 y2**h
              call stdlib${ii}$_cgeqrt3( m-n1, n2, a( j1, j1 ), lda,t( j1, j1 ), ldt, iinfo )
              ! compute t3 = t(1:n1,j1:n) = -t1 y1**h y2 t2
              do i=1,n1
                 do j=1,n2
                    t( i, j+n1 ) = conjg(a( j+n1, i ))
                 end do
              end do
              call stdlib${ii}$_ctrmm( 'R', 'L', 'N', 'U', n1, n2, cone,a( j1, j1 ), lda, t( 1_${ik}$, j1 ), &
                        ldt )
              call stdlib${ii}$_cgemm( 'C', 'N', n1, n2, m-n, cone, a( i1, 1_${ik}$ ), lda,a( i1, j1 ), lda, &
                        cone, t( 1_${ik}$, j1 ), ldt )
              call stdlib${ii}$_ctrmm( 'L', 'U', 'N', 'N', n1, n2, -cone, t, ldt,t( 1_${ik}$, j1 ), ldt )
                        
              call stdlib${ii}$_ctrmm( 'R', 'U', 'N', 'N', n1, n2, cone,t( j1, j1 ), ldt, t( 1_${ik}$, j1 ), &
                        ldt )
              ! y = (y1,y2); r = [ r1  a(1:n1,j1:n) ];  t = [t1 t3]
                               ! [  0        r2     ]       [ 0 t2]
           end if
           return
     end subroutine stdlib${ii}$_cgeqrt3

     pure recursive module subroutine stdlib${ii}$_zgeqrt3( m, n, a, lda, t, ldt, info )
     !! ZGEQRT3 recursively computes a QR factorization of a complex M-by-N
     !! matrix A, using the compact WY representation of Q.
     !! Based on the algorithm of Elmroth and Gustavson,
     !! IBM J. Res. Develop. Vol 44 No. 4 July 2000.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, m, n, ldt
           ! Array Arguments 
           complex(dp), intent(inout) :: a(lda,*)
           complex(dp), intent(out) :: t(ldt,*)
        ! =====================================================================
           
           ! Local Scalars 
           integer(${ik}$) :: i, i1, j, j1, n1, n2, iinfo
           ! Executable Statements 
           info = 0_${ik}$
           if( n < 0_${ik}$ ) then
              info = -2_${ik}$
           else if( m < n ) then
              info = -1_${ik}$
           else if( lda < max( 1_${ik}$, m ) ) then
              info = -4_${ik}$
           else if( ldt < max( 1_${ik}$, n ) ) then
              info = -6_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'ZGEQRT3', -info )
              return
           end if
           if( n==1_${ik}$ ) then
              ! compute householder transform when n=1
              call stdlib${ii}$_zlarfg( m, a(1_${ik}$,1_${ik}$), a( min( 2_${ik}$, m ), 1_${ik}$ ), 1_${ik}$, t(1_${ik}$,1_${ik}$) )
           else
              ! otherwise, split a into blocks...
              n1 = n/2_${ik}$
              n2 = n-n1
              j1 = min( n1+1, n )
              i1 = min( n+1, m )
              ! compute a(1:m,1:n1) <- (y1,r1,t1), where q1 = i - y1 t1 y1^h
              call stdlib${ii}$_zgeqrt3( m, n1, a, lda, t, ldt, iinfo )
              ! compute a(1:m,j1:n) = q1^h a(1:m,j1:n) [workspace: t(1:n1,j1:n)]
              do j=1,n2
                 do i=1,n1
                    t( i, j+n1 ) = a( i, j+n1 )
                 end do
              end do
              call stdlib${ii}$_ztrmm( 'L', 'L', 'C', 'U', n1, n2, cone,a, lda, t( 1_${ik}$, j1 ), ldt )
                        
              call stdlib${ii}$_zgemm( 'C', 'N', n1, n2, m-n1, cone, a( j1, 1_${ik}$ ), lda,a( j1, j1 ), lda, &
                        cone, t( 1_${ik}$, j1 ), ldt)
              call stdlib${ii}$_ztrmm( 'L', 'U', 'C', 'N', n1, n2, cone,t, ldt, t( 1_${ik}$, j1 ), ldt )
                        
              call stdlib${ii}$_zgemm( 'N', 'N', m-n1, n2, n1, -cone, a( j1, 1_${ik}$ ), lda,t( 1_${ik}$, j1 ), ldt, &
                        cone, a( j1, j1 ), lda )
              call stdlib${ii}$_ztrmm( 'L', 'L', 'N', 'U', n1, n2, cone,a, lda, t( 1_${ik}$, j1 ), ldt )
                        
              do j=1,n2
                 do i=1,n1
                    a( i, j+n1 ) = a( i, j+n1 ) - t( i, j+n1 )
                 end do
              end do
              ! compute a(j1:m,j1:n) <- (y2,r2,t2) where q2 = i - y2 t2 y2^h
              call stdlib${ii}$_zgeqrt3( m-n1, n2, a( j1, j1 ), lda,t( j1, j1 ), ldt, iinfo )
              ! compute t3 = t(1:n1,j1:n) = -t1 y1^h y2 t2
              do i=1,n1
                 do j=1,n2
                    t( i, j+n1 ) = conjg(a( j+n1, i ))
                 end do
              end do
              call stdlib${ii}$_ztrmm( 'R', 'L', 'N', 'U', n1, n2, cone,a( j1, j1 ), lda, t( 1_${ik}$, j1 ), &
                        ldt )
              call stdlib${ii}$_zgemm( 'C', 'N', n1, n2, m-n, cone, a( i1, 1_${ik}$ ), lda,a( i1, j1 ), lda, &
                        cone, t( 1_${ik}$, j1 ), ldt )
              call stdlib${ii}$_ztrmm( 'L', 'U', 'N', 'N', n1, n2, -cone, t, ldt,t( 1_${ik}$, j1 ), ldt )
                        
              call stdlib${ii}$_ztrmm( 'R', 'U', 'N', 'N', n1, n2, cone,t( j1, j1 ), ldt, t( 1_${ik}$, j1 ), &
                        ldt )
              ! y = (y1,y2); r = [ r1  a(1:n1,j1:n) ];  t = [t1 t3]
                               ! [  0        r2     ]       [ 0 t2]
           end if
           return
     end subroutine stdlib${ii}$_zgeqrt3

#:for ck,ct,ci in CMPLX_KINDS_TYPES
#:if not ck in ["sp","dp"]
     pure recursive module subroutine stdlib${ii}$_${ci}$geqrt3( m, n, a, lda, t, ldt, info )
     !! ZGEQRT3: recursively computes a QR factorization of a complex M-by-N
     !! matrix A, using the compact WY representation of Q.
     !! Based on the algorithm of Elmroth and Gustavson,
     !! IBM J. Res. Develop. Vol 44 No. 4 July 2000.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${ck}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, m, n, ldt
           ! Array Arguments 
           complex(${ck}$), intent(inout) :: a(lda,*)
           complex(${ck}$), intent(out) :: t(ldt,*)
        ! =====================================================================
           
           ! Local Scalars 
           integer(${ik}$) :: i, i1, j, j1, n1, n2, iinfo
           ! Executable Statements 
           info = 0_${ik}$
           if( n < 0_${ik}$ ) then
              info = -2_${ik}$
           else if( m < n ) then
              info = -1_${ik}$
           else if( lda < max( 1_${ik}$, m ) ) then
              info = -4_${ik}$
           else if( ldt < max( 1_${ik}$, n ) ) then
              info = -6_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'ZGEQRT3', -info )
              return
           end if
           if( n==1_${ik}$ ) then
              ! compute householder transform when n=1
              call stdlib${ii}$_${ci}$larfg( m, a(1_${ik}$,1_${ik}$), a( min( 2_${ik}$, m ), 1_${ik}$ ), 1_${ik}$, t(1_${ik}$,1_${ik}$) )
           else
              ! otherwise, split a into blocks...
              n1 = n/2_${ik}$
              n2 = n-n1
              j1 = min( n1+1, n )
              i1 = min( n+1, m )
              ! compute a(1:m,1:n1) <- (y1,r1,t1), where q1 = i - y1 t1 y1^h
              call stdlib${ii}$_${ci}$geqrt3( m, n1, a, lda, t, ldt, iinfo )
              ! compute a(1:m,j1:n) = q1^h a(1:m,j1:n) [workspace: t(1:n1,j1:n)]
              do j=1,n2
                 do i=1,n1
                    t( i, j+n1 ) = a( i, j+n1 )
                 end do
              end do
              call stdlib${ii}$_${ci}$trmm( 'L', 'L', 'C', 'U', n1, n2, cone,a, lda, t( 1_${ik}$, j1 ), ldt )
                        
              call stdlib${ii}$_${ci}$gemm( 'C', 'N', n1, n2, m-n1, cone, a( j1, 1_${ik}$ ), lda,a( j1, j1 ), lda, &
                        cone, t( 1_${ik}$, j1 ), ldt)
              call stdlib${ii}$_${ci}$trmm( 'L', 'U', 'C', 'N', n1, n2, cone,t, ldt, t( 1_${ik}$, j1 ), ldt )
                        
              call stdlib${ii}$_${ci}$gemm( 'N', 'N', m-n1, n2, n1, -cone, a( j1, 1_${ik}$ ), lda,t( 1_${ik}$, j1 ), ldt, &
                        cone, a( j1, j1 ), lda )
              call stdlib${ii}$_${ci}$trmm( 'L', 'L', 'N', 'U', n1, n2, cone,a, lda, t( 1_${ik}$, j1 ), ldt )
                        
              do j=1,n2
                 do i=1,n1
                    a( i, j+n1 ) = a( i, j+n1 ) - t( i, j+n1 )
                 end do
              end do
              ! compute a(j1:m,j1:n) <- (y2,r2,t2) where q2 = i - y2 t2 y2^h
              call stdlib${ii}$_${ci}$geqrt3( m-n1, n2, a( j1, j1 ), lda,t( j1, j1 ), ldt, iinfo )
              ! compute t3 = t(1:n1,j1:n) = -t1 y1^h y2 t2
              do i=1,n1
                 do j=1,n2
                    t( i, j+n1 ) = conjg(a( j+n1, i ))
                 end do
              end do
              call stdlib${ii}$_${ci}$trmm( 'R', 'L', 'N', 'U', n1, n2, cone,a( j1, j1 ), lda, t( 1_${ik}$, j1 ), &
                        ldt )
              call stdlib${ii}$_${ci}$gemm( 'C', 'N', n1, n2, m-n, cone, a( i1, 1_${ik}$ ), lda,a( i1, j1 ), lda, &
                        cone, t( 1_${ik}$, j1 ), ldt )
              call stdlib${ii}$_${ci}$trmm( 'L', 'U', 'N', 'N', n1, n2, -cone, t, ldt,t( 1_${ik}$, j1 ), ldt )
                        
              call stdlib${ii}$_${ci}$trmm( 'R', 'U', 'N', 'N', n1, n2, cone,t( j1, j1 ), ldt, t( 1_${ik}$, j1 ), &
                        ldt )
              ! y = (y1,y2); r = [ r1  a(1:n1,j1:n) ];  t = [t1 t3]
                               ! [  0        r2     ]       [ 0 t2]
           end if
           return
     end subroutine stdlib${ii}$_${ci}$geqrt3

#:endif
#:endfor



     pure module subroutine stdlib${ii}$_sgemqrt( side, trans, m, n, k, nb, v, ldv, t, ldt,c, ldc, work, info )
     !! SGEMQRT overwrites the general real M-by-N matrix C with
     !! SIDE = 'L'     SIDE = 'R'
     !! TRANS = 'N':      Q C            C Q
     !! TRANS = 'T':   Q**T C            C Q**T
     !! where Q is a real orthogonal matrix defined as the product of K
     !! elementary reflectors:
     !! Q = H(1) H(2) . . . H(K) = I - V T V**T
     !! generated using the compact WY representation as returned by SGEQRT.
     !! Q is of order M if SIDE = 'L' and of order N  if SIDE = 'R'.
               
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: side, trans
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: k, ldv, ldc, m, n, nb, ldt
           ! Array Arguments 
           real(sp), intent(in) :: v(ldv,*), t(ldt,*)
           real(sp), intent(inout) :: c(ldc,*)
           real(sp), intent(out) :: work(*)
        ! =====================================================================
           ! Local Scalars 
           logical(lk) :: left, right, tran, notran
           integer(${ik}$) :: i, ib, ldwork, kf, q
           ! Intrinsic Functions 
           ! Executable Statements 
           ! Test The Input Arguments 
           info   = 0_${ik}$
           left   = stdlib_lsame( side,  'L' )
           right  = stdlib_lsame( side,  'R' )
           tran   = stdlib_lsame( trans, 'T' )
           notran = stdlib_lsame( trans, 'N' )
           if( left ) then
              ldwork = max( 1_${ik}$, n )
              q = m
           else if ( right ) then
              ldwork = max( 1_${ik}$, m )
              q = n
           end if
           if( .not.left .and. .not.right ) then
              info = -1_${ik}$
           else if( .not.tran .and. .not.notran ) then
              info = -2_${ik}$
           else if( m<0_${ik}$ ) then
              info = -3_${ik}$
           else if( n<0_${ik}$ ) then
              info = -4_${ik}$
           else if( k<0_${ik}$ .or. k>q ) then
              info = -5_${ik}$
           else if( nb<1_${ik}$ .or. (nb>k .and. k>0_${ik}$)) then
              info = -6_${ik}$
           else if( ldv<max( 1_${ik}$, q ) ) then
              info = -8_${ik}$
           else if( ldt<nb ) then
              info = -10_${ik}$
           else if( ldc<max( 1_${ik}$, m ) ) then
              info = -12_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'SGEMQRT', -info )
              return
           end if
           ! Quick Return If Possible 
           if( m==0 .or. n==0 .or. k==0 ) return
           if( left .and. tran ) then
              do i = 1, k, nb
                 ib = min( nb, k-i+1 )
                 call stdlib${ii}$_slarfb( 'L', 'T', 'F', 'C', m-i+1, n, ib,v( i, i ), ldv, t( 1_${ik}$, i ), &
                           ldt,c( i, 1_${ik}$ ), ldc, work, ldwork )
              end do
           else if( right .and. notran ) then
              do i = 1, k, nb
                 ib = min( nb, k-i+1 )
                 call stdlib${ii}$_slarfb( 'R', 'N', 'F', 'C', m, n-i+1, ib,v( i, i ), ldv, t( 1_${ik}$, i ), &
                           ldt,c( 1_${ik}$, i ), ldc, work, ldwork )
              end do
           else if( left .and. notran ) then
              kf = ((k-1)/nb)*nb+1
              do i = kf, 1, -nb
                 ib = min( nb, k-i+1 )
                 call stdlib${ii}$_slarfb( 'L', 'N', 'F', 'C', m-i+1, n, ib,v( i, i ), ldv, t( 1_${ik}$, i ), &
                           ldt,c( i, 1_${ik}$ ), ldc, work, ldwork )
              end do
           else if( right .and. tran ) then
              kf = ((k-1)/nb)*nb+1
              do i = kf, 1, -nb
                 ib = min( nb, k-i+1 )
                 call stdlib${ii}$_slarfb( 'R', 'T', 'F', 'C', m, n-i+1, ib,v( i, i ), ldv, t( 1_${ik}$, i ), &
                           ldt,c( 1_${ik}$, i ), ldc, work, ldwork )
              end do
           end if
           return
     end subroutine stdlib${ii}$_sgemqrt

     pure module subroutine stdlib${ii}$_dgemqrt( side, trans, m, n, k, nb, v, ldv, t, ldt,c, ldc, work, info )
     !! DGEMQRT overwrites the general real M-by-N matrix C with
     !! SIDE = 'L'     SIDE = 'R'
     !! TRANS = 'N':      Q C            C Q
     !! TRANS = 'T':   Q**T C            C Q**T
     !! where Q is a real orthogonal matrix defined as the product of K
     !! elementary reflectors:
     !! Q = H(1) H(2) . . . H(K) = I - V T V**T
     !! generated using the compact WY representation as returned by DGEQRT.
     !! Q is of order M if SIDE = 'L' and of order N  if SIDE = 'R'.
               
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: side, trans
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: k, ldv, ldc, m, n, nb, ldt
           ! Array Arguments 
           real(dp), intent(in) :: v(ldv,*), t(ldt,*)
           real(dp), intent(inout) :: c(ldc,*)
           real(dp), intent(out) :: work(*)
        ! =====================================================================
           ! Local Scalars 
           logical(lk) :: left, right, tran, notran
           integer(${ik}$) :: i, ib, ldwork, kf, q
           ! Intrinsic Functions 
           ! Executable Statements 
           ! Test The Input Arguments 
           info   = 0_${ik}$
           left   = stdlib_lsame( side,  'L' )
           right  = stdlib_lsame( side,  'R' )
           tran   = stdlib_lsame( trans, 'T' )
           notran = stdlib_lsame( trans, 'N' )
           if( left ) then
              ldwork = max( 1_${ik}$, n )
              q = m
           else if ( right ) then
              ldwork = max( 1_${ik}$, m )
              q = n
           end if
           if( .not.left .and. .not.right ) then
              info = -1_${ik}$
           else if( .not.tran .and. .not.notran ) then
              info = -2_${ik}$
           else if( m<0_${ik}$ ) then
              info = -3_${ik}$
           else if( n<0_${ik}$ ) then
              info = -4_${ik}$
           else if( k<0_${ik}$ .or. k>q ) then
              info = -5_${ik}$
           else if( nb<1_${ik}$ .or. (nb>k .and. k>0_${ik}$)) then
              info = -6_${ik}$
           else if( ldv<max( 1_${ik}$, q ) ) then
              info = -8_${ik}$
           else if( ldt<nb ) then
              info = -10_${ik}$
           else if( ldc<max( 1_${ik}$, m ) ) then
              info = -12_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DGEMQRT', -info )
              return
           end if
           ! Quick Return If Possible 
           if( m==0 .or. n==0 .or. k==0 ) return
           if( left .and. tran ) then
              do i = 1, k, nb
                 ib = min( nb, k-i+1 )
                 call stdlib${ii}$_dlarfb( 'L', 'T', 'F', 'C', m-i+1, n, ib,v( i, i ), ldv, t( 1_${ik}$, i ), &
                           ldt,c( i, 1_${ik}$ ), ldc, work, ldwork )
              end do
           else if( right .and. notran ) then
              do i = 1, k, nb
                 ib = min( nb, k-i+1 )
                 call stdlib${ii}$_dlarfb( 'R', 'N', 'F', 'C', m, n-i+1, ib,v( i, i ), ldv, t( 1_${ik}$, i ), &
                           ldt,c( 1_${ik}$, i ), ldc, work, ldwork )
              end do
           else if( left .and. notran ) then
              kf = ((k-1)/nb)*nb+1
              do i = kf, 1, -nb
                 ib = min( nb, k-i+1 )
                 call stdlib${ii}$_dlarfb( 'L', 'N', 'F', 'C', m-i+1, n, ib,v( i, i ), ldv, t( 1_${ik}$, i ), &
                           ldt,c( i, 1_${ik}$ ), ldc, work, ldwork )
              end do
           else if( right .and. tran ) then
              kf = ((k-1)/nb)*nb+1
              do i = kf, 1, -nb
                 ib = min( nb, k-i+1 )
                 call stdlib${ii}$_dlarfb( 'R', 'T', 'F', 'C', m, n-i+1, ib,v( i, i ), ldv, t( 1_${ik}$, i ), &
                           ldt,c( 1_${ik}$, i ), ldc, work, ldwork )
              end do
           end if
           return
     end subroutine stdlib${ii}$_dgemqrt

#:for rk,rt,ri in REAL_KINDS_TYPES
#:if not rk in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ri}$gemqrt( side, trans, m, n, k, nb, v, ldv, t, ldt,c, ldc, work, info )
     !! DGEMQRT: overwrites the general real M-by-N matrix C with
     !! SIDE = 'L'     SIDE = 'R'
     !! TRANS = 'N':      Q C            C Q
     !! TRANS = 'T':   Q**T C            C Q**T
     !! where Q is a real orthogonal matrix defined as the product of K
     !! elementary reflectors:
     !! Q = H(1) H(2) . . . H(K) = I - V T V**T
     !! generated using the compact WY representation as returned by DGEQRT.
     !! Q is of order M if SIDE = 'L' and of order N  if SIDE = 'R'.
               
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: side, trans
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: k, ldv, ldc, m, n, nb, ldt
           ! Array Arguments 
           real(${rk}$), intent(in) :: v(ldv,*), t(ldt,*)
           real(${rk}$), intent(inout) :: c(ldc,*)
           real(${rk}$), intent(out) :: work(*)
        ! =====================================================================
           ! Local Scalars 
           logical(lk) :: left, right, tran, notran
           integer(${ik}$) :: i, ib, ldwork, kf, q
           ! Intrinsic Functions 
           ! Executable Statements 
           ! Test The Input Arguments 
           info   = 0_${ik}$
           left   = stdlib_lsame( side,  'L' )
           right  = stdlib_lsame( side,  'R' )
           tran   = stdlib_lsame( trans, 'T' )
           notran = stdlib_lsame( trans, 'N' )
           if( left ) then
              ldwork = max( 1_${ik}$, n )
              q = m
           else if ( right ) then
              ldwork = max( 1_${ik}$, m )
              q = n
           end if
           if( .not.left .and. .not.right ) then
              info = -1_${ik}$
           else if( .not.tran .and. .not.notran ) then
              info = -2_${ik}$
           else if( m<0_${ik}$ ) then
              info = -3_${ik}$
           else if( n<0_${ik}$ ) then
              info = -4_${ik}$
           else if( k<0_${ik}$ .or. k>q ) then
              info = -5_${ik}$
           else if( nb<1_${ik}$ .or. (nb>k .and. k>0_${ik}$)) then
              info = -6_${ik}$
           else if( ldv<max( 1_${ik}$, q ) ) then
              info = -8_${ik}$
           else if( ldt<nb ) then
              info = -10_${ik}$
           else if( ldc<max( 1_${ik}$, m ) ) then
              info = -12_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DGEMQRT', -info )
              return
           end if
           ! Quick Return If Possible 
           if( m==0 .or. n==0 .or. k==0 ) return
           if( left .and. tran ) then
              do i = 1, k, nb
                 ib = min( nb, k-i+1 )
                 call stdlib${ii}$_${ri}$larfb( 'L', 'T', 'F', 'C', m-i+1, n, ib,v( i, i ), ldv, t( 1_${ik}$, i ), &
                           ldt,c( i, 1_${ik}$ ), ldc, work, ldwork )
              end do
           else if( right .and. notran ) then
              do i = 1, k, nb
                 ib = min( nb, k-i+1 )
                 call stdlib${ii}$_${ri}$larfb( 'R', 'N', 'F', 'C', m, n-i+1, ib,v( i, i ), ldv, t( 1_${ik}$, i ), &
                           ldt,c( 1_${ik}$, i ), ldc, work, ldwork )
              end do
           else if( left .and. notran ) then
              kf = ((k-1)/nb)*nb+1
              do i = kf, 1, -nb
                 ib = min( nb, k-i+1 )
                 call stdlib${ii}$_${ri}$larfb( 'L', 'N', 'F', 'C', m-i+1, n, ib,v( i, i ), ldv, t( 1_${ik}$, i ), &
                           ldt,c( i, 1_${ik}$ ), ldc, work, ldwork )
              end do
           else if( right .and. tran ) then
              kf = ((k-1)/nb)*nb+1
              do i = kf, 1, -nb
                 ib = min( nb, k-i+1 )
                 call stdlib${ii}$_${ri}$larfb( 'R', 'T', 'F', 'C', m, n-i+1, ib,v( i, i ), ldv, t( 1_${ik}$, i ), &
                           ldt,c( 1_${ik}$, i ), ldc, work, ldwork )
              end do
           end if
           return
     end subroutine stdlib${ii}$_${ri}$gemqrt

#:endif
#:endfor

     pure module subroutine stdlib${ii}$_cgemqrt( side, trans, m, n, k, nb, v, ldv, t, ldt,c, ldc, work, info )
     !! CGEMQRT overwrites the general complex M-by-N matrix C with
     !! SIDE = 'L'     SIDE = 'R'
     !! TRANS = 'N':      Q C            C Q
     !! TRANS = 'C':    Q**H C            C Q**H
     !! where Q is a complex orthogonal matrix defined as the product of K
     !! elementary reflectors:
     !! Q = H(1) H(2) . . . H(K) = I - V T V**H
     !! generated using the compact WY representation as returned by CGEQRT.
     !! Q is of order M if SIDE = 'L' and of order N  if SIDE = 'R'.
               
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: side, trans
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: k, ldv, ldc, m, n, nb, ldt
           ! Array Arguments 
           complex(sp), intent(in) :: v(ldv,*), t(ldt,*)
           complex(sp), intent(inout) :: c(ldc,*)
           complex(sp), intent(out) :: work(*)
        ! =====================================================================
           ! Local Scalars 
           logical(lk) :: left, right, tran, notran
           integer(${ik}$) :: i, ib, ldwork, kf, q
           ! Intrinsic Functions 
           ! Executable Statements 
           ! Test The Input Arguments 
           info   = 0_${ik}$
           left   = stdlib_lsame( side,  'L' )
           right  = stdlib_lsame( side,  'R' )
           tran   = stdlib_lsame( trans, 'C' )
           notran = stdlib_lsame( trans, 'N' )
           if( left ) then
              ldwork = max( 1_${ik}$, n )
              q = m
           else if ( right ) then
              ldwork = max( 1_${ik}$, m )
              q = n
           end if
           if( .not.left .and. .not.right ) then
              info = -1_${ik}$
           else if( .not.tran .and. .not.notran ) then
              info = -2_${ik}$
           else if( m<0_${ik}$ ) then
              info = -3_${ik}$
           else if( n<0_${ik}$ ) then
              info = -4_${ik}$
           else if( k<0_${ik}$ .or. k>q ) then
              info = -5_${ik}$
           else if( nb<1_${ik}$ .or. (nb>k .and. k>0_${ik}$)) then
              info = -6_${ik}$
           else if( ldv<max( 1_${ik}$, q ) ) then
              info = -8_${ik}$
           else if( ldt<nb ) then
              info = -10_${ik}$
           else if( ldc<max( 1_${ik}$, m ) ) then
              info = -12_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'CGEMQRT', -info )
              return
           end if
           ! Quick Return If Possible 
           if( m==0 .or. n==0 .or. k==0 ) return
           if( left .and. tran ) then
              do i = 1, k, nb
                 ib = min( nb, k-i+1 )
                 call stdlib${ii}$_clarfb( 'L', 'C', 'F', 'C', m-i+1, n, ib,v( i, i ), ldv, t( 1_${ik}$, i ), &
                           ldt,c( i, 1_${ik}$ ), ldc, work, ldwork )
              end do
           else if( right .and. notran ) then
              do i = 1, k, nb
                 ib = min( nb, k-i+1 )
                 call stdlib${ii}$_clarfb( 'R', 'N', 'F', 'C', m, n-i+1, ib,v( i, i ), ldv, t( 1_${ik}$, i ), &
                           ldt,c( 1_${ik}$, i ), ldc, work, ldwork )
              end do
           else if( left .and. notran ) then
              kf = ((k-1)/nb)*nb+1
              do i = kf, 1, -nb
                 ib = min( nb, k-i+1 )
                 call stdlib${ii}$_clarfb( 'L', 'N', 'F', 'C', m-i+1, n, ib,v( i, i ), ldv, t( 1_${ik}$, i ), &
                           ldt,c( i, 1_${ik}$ ), ldc, work, ldwork )
              end do
           else if( right .and. tran ) then
              kf = ((k-1)/nb)*nb+1
              do i = kf, 1, -nb
                 ib = min( nb, k-i+1 )
                 call stdlib${ii}$_clarfb( 'R', 'C', 'F', 'C', m, n-i+1, ib,v( i, i ), ldv, t( 1_${ik}$, i ), &
                           ldt,c( 1_${ik}$, i ), ldc, work, ldwork )
              end do
           end if
           return
     end subroutine stdlib${ii}$_cgemqrt

     pure module subroutine stdlib${ii}$_zgemqrt( side, trans, m, n, k, nb, v, ldv, t, ldt,c, ldc, work, info )
     !! ZGEMQRT overwrites the general complex M-by-N matrix C with
     !! SIDE = 'L'     SIDE = 'R'
     !! TRANS = 'N':      Q C            C Q
     !! TRANS = 'C':    Q**H C            C Q**H
     !! where Q is a complex orthogonal matrix defined as the product of K
     !! elementary reflectors:
     !! Q = H(1) H(2) . . . H(K) = I - V T V**H
     !! generated using the compact WY representation as returned by ZGEQRT.
     !! Q is of order M if SIDE = 'L' and of order N  if SIDE = 'R'.
               
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: side, trans
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: k, ldv, ldc, m, n, nb, ldt
           ! Array Arguments 
           complex(dp), intent(in) :: v(ldv,*), t(ldt,*)
           complex(dp), intent(inout) :: c(ldc,*)
           complex(dp), intent(out) :: work(*)
        ! =====================================================================
           ! Local Scalars 
           logical(lk) :: left, right, tran, notran
           integer(${ik}$) :: i, ib, ldwork, kf, q
           ! Intrinsic Functions 
           ! Executable Statements 
           ! Test The Input Arguments 
           info   = 0_${ik}$
           left   = stdlib_lsame( side,  'L' )
           right  = stdlib_lsame( side,  'R' )
           tran   = stdlib_lsame( trans, 'C' )
           notran = stdlib_lsame( trans, 'N' )
           if( left ) then
              ldwork = max( 1_${ik}$, n )
              q = m
           else if ( right ) then
              ldwork = max( 1_${ik}$, m )
              q = n
           end if
           if( .not.left .and. .not.right ) then
              info = -1_${ik}$
           else if( .not.tran .and. .not.notran ) then
              info = -2_${ik}$
           else if( m<0_${ik}$ ) then
              info = -3_${ik}$
           else if( n<0_${ik}$ ) then
              info = -4_${ik}$
           else if( k<0_${ik}$ .or. k>q ) then
              info = -5_${ik}$
           else if( nb<1_${ik}$ .or. (nb>k .and. k>0_${ik}$)) then
              info = -6_${ik}$
           else if( ldv<max( 1_${ik}$, q ) ) then
              info = -8_${ik}$
           else if( ldt<nb ) then
              info = -10_${ik}$
           else if( ldc<max( 1_${ik}$, m ) ) then
              info = -12_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'ZGEMQRT', -info )
              return
           end if
           ! Quick Return If Possible 
           if( m==0 .or. n==0 .or. k==0 ) return
           if( left .and. tran ) then
              do i = 1, k, nb
                 ib = min( nb, k-i+1 )
                 call stdlib${ii}$_zlarfb( 'L', 'C', 'F', 'C', m-i+1, n, ib,v( i, i ), ldv, t( 1_${ik}$, i ), &
                           ldt,c( i, 1_${ik}$ ), ldc, work, ldwork )
              end do
           else if( right .and. notran ) then
              do i = 1, k, nb
                 ib = min( nb, k-i+1 )
                 call stdlib${ii}$_zlarfb( 'R', 'N', 'F', 'C', m, n-i+1, ib,v( i, i ), ldv, t( 1_${ik}$, i ), &
                           ldt,c( 1_${ik}$, i ), ldc, work, ldwork )
              end do
           else if( left .and. notran ) then
              kf = ((k-1)/nb)*nb+1
              do i = kf, 1, -nb
                 ib = min( nb, k-i+1 )
                 call stdlib${ii}$_zlarfb( 'L', 'N', 'F', 'C', m-i+1, n, ib,v( i, i ), ldv, t( 1_${ik}$, i ), &
                           ldt,c( i, 1_${ik}$ ), ldc, work, ldwork )
              end do
           else if( right .and. tran ) then
              kf = ((k-1)/nb)*nb+1
              do i = kf, 1, -nb
                 ib = min( nb, k-i+1 )
                 call stdlib${ii}$_zlarfb( 'R', 'C', 'F', 'C', m, n-i+1, ib,v( i, i ), ldv, t( 1_${ik}$, i ), &
                           ldt,c( 1_${ik}$, i ), ldc, work, ldwork )
              end do
           end if
           return
     end subroutine stdlib${ii}$_zgemqrt

#:for ck,ct,ci in CMPLX_KINDS_TYPES
#:if not ck in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ci}$gemqrt( side, trans, m, n, k, nb, v, ldv, t, ldt,c, ldc, work, info )
     !! ZGEMQRT: overwrites the general complex M-by-N matrix C with
     !! SIDE = 'L'     SIDE = 'R'
     !! TRANS = 'N':      Q C            C Q
     !! TRANS = 'C':    Q**H C            C Q**H
     !! where Q is a complex orthogonal matrix defined as the product of K
     !! elementary reflectors:
     !! Q = H(1) H(2) . . . H(K) = I - V T V**H
     !! generated using the compact WY representation as returned by ZGEQRT.
     !! Q is of order M if SIDE = 'L' and of order N  if SIDE = 'R'.
               
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${ck}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: side, trans
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: k, ldv, ldc, m, n, nb, ldt
           ! Array Arguments 
           complex(${ck}$), intent(in) :: v(ldv,*), t(ldt,*)
           complex(${ck}$), intent(inout) :: c(ldc,*)
           complex(${ck}$), intent(out) :: work(*)
        ! =====================================================================
           ! Local Scalars 
           logical(lk) :: left, right, tran, notran
           integer(${ik}$) :: i, ib, ldwork, kf, q
           ! Intrinsic Functions 
           ! Executable Statements 
           ! Test The Input Arguments 
           info   = 0_${ik}$
           left   = stdlib_lsame( side,  'L' )
           right  = stdlib_lsame( side,  'R' )
           tran   = stdlib_lsame( trans, 'C' )
           notran = stdlib_lsame( trans, 'N' )
           if( left ) then
              ldwork = max( 1_${ik}$, n )
              q = m
           else if ( right ) then
              ldwork = max( 1_${ik}$, m )
              q = n
           end if
           if( .not.left .and. .not.right ) then
              info = -1_${ik}$
           else if( .not.tran .and. .not.notran ) then
              info = -2_${ik}$
           else if( m<0_${ik}$ ) then
              info = -3_${ik}$
           else if( n<0_${ik}$ ) then
              info = -4_${ik}$
           else if( k<0_${ik}$ .or. k>q ) then
              info = -5_${ik}$
           else if( nb<1_${ik}$ .or. (nb>k .and. k>0_${ik}$)) then
              info = -6_${ik}$
           else if( ldv<max( 1_${ik}$, q ) ) then
              info = -8_${ik}$
           else if( ldt<nb ) then
              info = -10_${ik}$
           else if( ldc<max( 1_${ik}$, m ) ) then
              info = -12_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'ZGEMQRT', -info )
              return
           end if
           ! Quick Return If Possible 
           if( m==0 .or. n==0 .or. k==0 ) return
           if( left .and. tran ) then
              do i = 1, k, nb
                 ib = min( nb, k-i+1 )
                 call stdlib${ii}$_${ci}$larfb( 'L', 'C', 'F', 'C', m-i+1, n, ib,v( i, i ), ldv, t( 1_${ik}$, i ), &
                           ldt,c( i, 1_${ik}$ ), ldc, work, ldwork )
              end do
           else if( right .and. notran ) then
              do i = 1, k, nb
                 ib = min( nb, k-i+1 )
                 call stdlib${ii}$_${ci}$larfb( 'R', 'N', 'F', 'C', m, n-i+1, ib,v( i, i ), ldv, t( 1_${ik}$, i ), &
                           ldt,c( 1_${ik}$, i ), ldc, work, ldwork )
              end do
           else if( left .and. notran ) then
              kf = ((k-1)/nb)*nb+1
              do i = kf, 1, -nb
                 ib = min( nb, k-i+1 )
                 call stdlib${ii}$_${ci}$larfb( 'L', 'N', 'F', 'C', m-i+1, n, ib,v( i, i ), ldv, t( 1_${ik}$, i ), &
                           ldt,c( i, 1_${ik}$ ), ldc, work, ldwork )
              end do
           else if( right .and. tran ) then
              kf = ((k-1)/nb)*nb+1
              do i = kf, 1, -nb
                 ib = min( nb, k-i+1 )
                 call stdlib${ii}$_${ci}$larfb( 'R', 'C', 'F', 'C', m, n-i+1, ib,v( i, i ), ldv, t( 1_${ik}$, i ), &
                           ldt,c( 1_${ik}$, i ), ldc, work, ldwork )
              end do
           end if
           return
     end subroutine stdlib${ii}$_${ci}$gemqrt

#:endif
#:endfor



     module subroutine stdlib${ii}$_sgeqrfp( m, n, a, lda, tau, work, lwork, info )
     !! SGEQR2P computes a QR factorization of a real M-by-N matrix A:
     !! A = Q * ( R ),
     !! ( 0 )
     !! where:
     !! Q is a M-by-M orthogonal matrix;
     !! R is an upper-triangular N-by-N matrix with nonnegative diagonal
     !! entries;
     !! 0 is a (M-N)-by-N zero matrix, if M > N.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, lwork, m, n
           ! Array Arguments 
           real(sp), intent(inout) :: a(lda,*)
           real(sp), intent(out) :: tau(*), work(*)
        ! =====================================================================
           ! Local Scalars 
           logical(lk) :: lquery
           integer(${ik}$) :: i, ib, iinfo, iws, k, ldwork, lwkopt, nb, nbmin, nx
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           nb = stdlib${ii}$_ilaenv( 1_${ik}$, 'SGEQRF', ' ', m, n, -1_${ik}$, -1_${ik}$ )
           lwkopt = n*nb
           work( 1_${ik}$ ) = lwkopt
           lquery = ( lwork==-1_${ik}$ )
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -4_${ik}$
           else if( lwork<max( 1_${ik}$, n ) .and. .not.lquery ) then
              info = -7_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'SGEQRFP', -info )
              return
           else if( lquery ) then
              return
           end if
           ! quick return if possible
           k = min( m, n )
           if( k==0_${ik}$ ) then
              work( 1_${ik}$ ) = 1_${ik}$
              return
           end if
           nbmin = 2_${ik}$
           nx = 0_${ik}$
           iws = n
           if( nb>1_${ik}$ .and. nb<k ) then
              ! determine when to cross over from blocked to unblocked code.
              nx = max( 0_${ik}$, stdlib${ii}$_ilaenv( 3_${ik}$, 'SGEQRF', ' ', m, n, -1_${ik}$, -1_${ik}$ ) )
              if( nx<k ) then
                 ! determine if workspace is large enough for blocked code.
                 ldwork = n
                 iws = ldwork*nb
                 if( lwork<iws ) then
                    ! not enough workspace to use optimal nb:  reduce nb and
                    ! determine the minimum value of nb.
                    nb = lwork / ldwork
                    nbmin = max( 2_${ik}$, stdlib${ii}$_ilaenv( 2_${ik}$, 'SGEQRF', ' ', m, n, -1_${ik}$,-1_${ik}$ ) )
                 end if
              end if
           end if
           if( nb>=nbmin .and. nb<k .and. nx<k ) then
              ! use blocked code initially
              do i = 1, k - nx, nb
                 ib = min( k-i+1, nb )
                 ! compute the qr factorization of the current block
                 ! a(i:m,i:i+ib-1)
                 call stdlib${ii}$_sgeqr2p( m-i+1, ib, a( i, i ), lda, tau( i ), work,iinfo )
                 if( i+ib<=n ) then
                    ! form the triangular factor of the block reflector
                    ! h = h(i) h(i+1) . . . h(i+ib-1)
                    call stdlib${ii}$_slarft( 'FORWARD', 'COLUMNWISE', m-i+1, ib,a( i, i ), lda, tau( i &
                              ), work, ldwork )
                    ! apply h**t to a(i:m,i+ib:n) from the left
                    call stdlib${ii}$_slarfb( 'LEFT', 'TRANSPOSE', 'FORWARD','COLUMNWISE', m-i+1, n-i-&
                    ib+1, ib,a( i, i ), lda, work, ldwork, a( i, i+ib ),lda, work( ib+1 ), ldwork &
                              )
                 end if
              end do
           else
              i = 1_${ik}$
           end if
           ! use unblocked code to factor the last or only block.
           if( i<=k )call stdlib${ii}$_sgeqr2p( m-i+1, n-i+1, a( i, i ), lda, tau( i ), work,iinfo )
                     
           work( 1_${ik}$ ) = iws
           return
     end subroutine stdlib${ii}$_sgeqrfp

     module subroutine stdlib${ii}$_dgeqrfp( m, n, a, lda, tau, work, lwork, info )
     !! DGEQR2P computes a QR factorization of a real M-by-N matrix A:
     !! A = Q * ( R ),
     !! ( 0 )
     !! where:
     !! Q is a M-by-M orthogonal matrix;
     !! R is an upper-triangular N-by-N matrix with nonnegative diagonal
     !! entries;
     !! 0 is a (M-N)-by-N zero matrix, if M > N.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, lwork, m, n
           ! Array Arguments 
           real(dp), intent(inout) :: a(lda,*)
           real(dp), intent(out) :: tau(*), work(*)
        ! =====================================================================
           ! Local Scalars 
           logical(lk) :: lquery
           integer(${ik}$) :: i, ib, iinfo, iws, k, ldwork, lwkopt, nb, nbmin, nx
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           nb = stdlib${ii}$_ilaenv( 1_${ik}$, 'DGEQRF', ' ', m, n, -1_${ik}$, -1_${ik}$ )
           lwkopt = n*nb
           work( 1_${ik}$ ) = lwkopt
           lquery = ( lwork==-1_${ik}$ )
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -4_${ik}$
           else if( lwork<max( 1_${ik}$, n ) .and. .not.lquery ) then
              info = -7_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DGEQRFP', -info )
              return
           else if( lquery ) then
              return
           end if
           ! quick return if possible
           k = min( m, n )
           if( k==0_${ik}$ ) then
              work( 1_${ik}$ ) = 1_${ik}$
              return
           end if
           nbmin = 2_${ik}$
           nx = 0_${ik}$
           iws = n
           if( nb>1_${ik}$ .and. nb<k ) then
              ! determine when to cross over from blocked to unblocked code.
              nx = max( 0_${ik}$, stdlib${ii}$_ilaenv( 3_${ik}$, 'DGEQRF', ' ', m, n, -1_${ik}$, -1_${ik}$ ) )
              if( nx<k ) then
                 ! determine if workspace is large enough for blocked code.
                 ldwork = n
                 iws = ldwork*nb
                 if( lwork<iws ) then
                    ! not enough workspace to use optimal nb:  reduce nb and
                    ! determine the minimum value of nb.
                    nb = lwork / ldwork
                    nbmin = max( 2_${ik}$, stdlib${ii}$_ilaenv( 2_${ik}$, 'DGEQRF', ' ', m, n, -1_${ik}$,-1_${ik}$ ) )
                 end if
              end if
           end if
           if( nb>=nbmin .and. nb<k .and. nx<k ) then
              ! use blocked code initially
              do i = 1, k - nx, nb
                 ib = min( k-i+1, nb )
                 ! compute the qr factorization of the current block
                 ! a(i:m,i:i+ib-1)
                 call stdlib${ii}$_dgeqr2p( m-i+1, ib, a( i, i ), lda, tau( i ), work,iinfo )
                 if( i+ib<=n ) then
                    ! form the triangular factor of the block reflector
                    ! h = h(i) h(i+1) . . . h(i+ib-1)
                    call stdlib${ii}$_dlarft( 'FORWARD', 'COLUMNWISE', m-i+1, ib,a( i, i ), lda, tau( i &
                              ), work, ldwork )
                    ! apply h**t to a(i:m,i+ib:n) from the left
                    call stdlib${ii}$_dlarfb( 'LEFT', 'TRANSPOSE', 'FORWARD','COLUMNWISE', m-i+1, n-i-&
                    ib+1, ib,a( i, i ), lda, work, ldwork, a( i, i+ib ),lda, work( ib+1 ), ldwork &
                              )
                 end if
              end do
           else
              i = 1_${ik}$
           end if
           ! use unblocked code to factor the last or only block.
           if( i<=k )call stdlib${ii}$_dgeqr2p( m-i+1, n-i+1, a( i, i ), lda, tau( i ), work,iinfo )
                     
           work( 1_${ik}$ ) = iws
           return
     end subroutine stdlib${ii}$_dgeqrfp

#:for rk,rt,ri in REAL_KINDS_TYPES
#:if not rk in ["sp","dp"]
     module subroutine stdlib${ii}$_${ri}$geqrfp( m, n, a, lda, tau, work, lwork, info )
     !! DGEQR2P computes a QR factorization of a real M-by-N matrix A:
     !! A = Q * ( R ),
     !! ( 0 )
     !! where:
     !! Q is a M-by-M orthogonal matrix;
     !! R is an upper-triangular N-by-N matrix with nonnegative diagonal
     !! entries;
     !! 0 is a (M-N)-by-N zero matrix, if M > N.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, lwork, m, n
           ! Array Arguments 
           real(${rk}$), intent(inout) :: a(lda,*)
           real(${rk}$), intent(out) :: tau(*), work(*)
        ! =====================================================================
           ! Local Scalars 
           logical(lk) :: lquery
           integer(${ik}$) :: i, ib, iinfo, iws, k, ldwork, lwkopt, nb, nbmin, nx
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           nb = stdlib${ii}$_ilaenv( 1_${ik}$, 'DGEQRF', ' ', m, n, -1_${ik}$, -1_${ik}$ )
           lwkopt = n*nb
           work( 1_${ik}$ ) = lwkopt
           lquery = ( lwork==-1_${ik}$ )
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -4_${ik}$
           else if( lwork<max( 1_${ik}$, n ) .and. .not.lquery ) then
              info = -7_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DGEQRFP', -info )
              return
           else if( lquery ) then
              return
           end if
           ! quick return if possible
           k = min( m, n )
           if( k==0_${ik}$ ) then
              work( 1_${ik}$ ) = 1_${ik}$
              return
           end if
           nbmin = 2_${ik}$
           nx = 0_${ik}$
           iws = n
           if( nb>1_${ik}$ .and. nb<k ) then
              ! determine when to cross over from blocked to unblocked code.
              nx = max( 0_${ik}$, stdlib${ii}$_ilaenv( 3_${ik}$, 'DGEQRF', ' ', m, n, -1_${ik}$, -1_${ik}$ ) )
              if( nx<k ) then
                 ! determine if workspace is large enough for blocked code.
                 ldwork = n
                 iws = ldwork*nb
                 if( lwork<iws ) then
                    ! not enough workspace to use optimal nb:  reduce nb and
                    ! determine the minimum value of nb.
                    nb = lwork / ldwork
                    nbmin = max( 2_${ik}$, stdlib${ii}$_ilaenv( 2_${ik}$, 'DGEQRF', ' ', m, n, -1_${ik}$,-1_${ik}$ ) )
                 end if
              end if
           end if
           if( nb>=nbmin .and. nb<k .and. nx<k ) then
              ! use blocked code initially
              do i = 1, k - nx, nb
                 ib = min( k-i+1, nb )
                 ! compute the qr factorization of the current block
                 ! a(i:m,i:i+ib-1)
                 call stdlib${ii}$_${ri}$geqr2p( m-i+1, ib, a( i, i ), lda, tau( i ), work,iinfo )
                 if( i+ib<=n ) then
                    ! form the triangular factor of the block reflector
                    ! h = h(i) h(i+1) . . . h(i+ib-1)
                    call stdlib${ii}$_${ri}$larft( 'FORWARD', 'COLUMNWISE', m-i+1, ib,a( i, i ), lda, tau( i &
                              ), work, ldwork )
                    ! apply h**t to a(i:m,i+ib:n) from the left
                    call stdlib${ii}$_${ri}$larfb( 'LEFT', 'TRANSPOSE', 'FORWARD','COLUMNWISE', m-i+1, n-i-&
                    ib+1, ib,a( i, i ), lda, work, ldwork, a( i, i+ib ),lda, work( ib+1 ), ldwork &
                              )
                 end if
              end do
           else
              i = 1_${ik}$
           end if
           ! use unblocked code to factor the last or only block.
           if( i<=k )call stdlib${ii}$_${ri}$geqr2p( m-i+1, n-i+1, a( i, i ), lda, tau( i ), work,iinfo )
                     
           work( 1_${ik}$ ) = iws
           return
     end subroutine stdlib${ii}$_${ri}$geqrfp

#:endif
#:endfor

     module subroutine stdlib${ii}$_cgeqrfp( m, n, a, lda, tau, work, lwork, info )
     !! CGEQR2P computes a QR factorization of a complex M-by-N matrix A:
     !! A = Q * ( R ),
     !! ( 0 )
     !! where:
     !! Q is a M-by-M orthogonal matrix;
     !! R is an upper-triangular N-by-N matrix with nonnegative diagonal
     !! entries;
     !! 0 is a (M-N)-by-N zero matrix, if M > N.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, lwork, m, n
           ! Array Arguments 
           complex(sp), intent(inout) :: a(lda,*)
           complex(sp), intent(out) :: tau(*), work(*)
        ! =====================================================================
           ! Local Scalars 
           logical(lk) :: lquery
           integer(${ik}$) :: i, ib, iinfo, iws, k, ldwork, lwkopt, nb, nbmin, nx
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           nb = stdlib${ii}$_ilaenv( 1_${ik}$, 'CGEQRF', ' ', m, n, -1_${ik}$, -1_${ik}$ )
           lwkopt = n*nb
           work( 1_${ik}$ ) = lwkopt
           lquery = ( lwork==-1_${ik}$ )
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -4_${ik}$
           else if( lwork<max( 1_${ik}$, n ) .and. .not.lquery ) then
              info = -7_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'CGEQRFP', -info )
              return
           else if( lquery ) then
              return
           end if
           ! quick return if possible
           k = min( m, n )
           if( k==0_${ik}$ ) then
              work( 1_${ik}$ ) = 1_${ik}$
              return
           end if
           nbmin = 2_${ik}$
           nx = 0_${ik}$
           iws = n
           if( nb>1_${ik}$ .and. nb<k ) then
              ! determine when to cross over from blocked to unblocked code.
              nx = max( 0_${ik}$, stdlib${ii}$_ilaenv( 3_${ik}$, 'CGEQRF', ' ', m, n, -1_${ik}$, -1_${ik}$ ) )
              if( nx<k ) then
                 ! determine if workspace is large enough for blocked code.
                 ldwork = n
                 iws = ldwork*nb
                 if( lwork<iws ) then
                    ! not enough workspace to use optimal nb:  reduce nb and
                    ! determine the minimum value of nb.
                    nb = lwork / ldwork
                    nbmin = max( 2_${ik}$, stdlib${ii}$_ilaenv( 2_${ik}$, 'CGEQRF', ' ', m, n, -1_${ik}$,-1_${ik}$ ) )
                 end if
              end if
           end if
           if( nb>=nbmin .and. nb<k .and. nx<k ) then
              ! use blocked code initially
              do i = 1, k - nx, nb
                 ib = min( k-i+1, nb )
                 ! compute the qr factorization of the current block
                 ! a(i:m,i:i+ib-1)
                 call stdlib${ii}$_cgeqr2p( m-i+1, ib, a( i, i ), lda, tau( i ), work,iinfo )
                 if( i+ib<=n ) then
                    ! form the triangular factor of the block reflector
                    ! h = h(i) h(i+1) . . . h(i+ib-1)
                    call stdlib${ii}$_clarft( 'FORWARD', 'COLUMNWISE', m-i+1, ib,a( i, i ), lda, tau( i &
                              ), work, ldwork )
                    ! apply h**h to a(i:m,i+ib:n) from the left
                    call stdlib${ii}$_clarfb( 'LEFT', 'CONJUGATE TRANSPOSE', 'FORWARD','COLUMNWISE', m-&
                    i+1, n-i-ib+1, ib,a( i, i ), lda, work, ldwork, a( i, i+ib ),lda, work( ib+1 )&
                              , ldwork )
                 end if
              end do
           else
              i = 1_${ik}$
           end if
           ! use unblocked code to factor the last or only block.
           if( i<=k )call stdlib${ii}$_cgeqr2p( m-i+1, n-i+1, a( i, i ), lda, tau( i ), work,iinfo )
                     
           work( 1_${ik}$ ) = iws
           return
     end subroutine stdlib${ii}$_cgeqrfp

     module subroutine stdlib${ii}$_zgeqrfp( m, n, a, lda, tau, work, lwork, info )
     !! ZGEQR2P computes a QR factorization of a complex M-by-N matrix A:
     !! A = Q * ( R ),
     !! ( 0 )
     !! where:
     !! Q is a M-by-M orthogonal matrix;
     !! R is an upper-triangular N-by-N matrix with nonnegative diagonal
     !! entries;
     !! 0 is a (M-N)-by-N zero matrix, if M > N.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, lwork, m, n
           ! Array Arguments 
           complex(dp), intent(inout) :: a(lda,*)
           complex(dp), intent(out) :: tau(*), work(*)
        ! =====================================================================
           ! Local Scalars 
           logical(lk) :: lquery
           integer(${ik}$) :: i, ib, iinfo, iws, k, ldwork, lwkopt, nb, nbmin, nx
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           nb = stdlib${ii}$_ilaenv( 1_${ik}$, 'ZGEQRF', ' ', m, n, -1_${ik}$, -1_${ik}$ )
           lwkopt = n*nb
           work( 1_${ik}$ ) = lwkopt
           lquery = ( lwork==-1_${ik}$ )
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -4_${ik}$
           else if( lwork<max( 1_${ik}$, n ) .and. .not.lquery ) then
              info = -7_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'ZGEQRFP', -info )
              return
           else if( lquery ) then
              return
           end if
           ! quick return if possible
           k = min( m, n )
           if( k==0_${ik}$ ) then
              work( 1_${ik}$ ) = 1_${ik}$
              return
           end if
           nbmin = 2_${ik}$
           nx = 0_${ik}$
           iws = n
           if( nb>1_${ik}$ .and. nb<k ) then
              ! determine when to cross over from blocked to unblocked code.
              nx = max( 0_${ik}$, stdlib${ii}$_ilaenv( 3_${ik}$, 'ZGEQRF', ' ', m, n, -1_${ik}$, -1_${ik}$ ) )
              if( nx<k ) then
                 ! determine if workspace is large enough for blocked code.
                 ldwork = n
                 iws = ldwork*nb
                 if( lwork<iws ) then
                    ! not enough workspace to use optimal nb:  reduce nb and
                    ! determine the minimum value of nb.
                    nb = lwork / ldwork
                    nbmin = max( 2_${ik}$, stdlib${ii}$_ilaenv( 2_${ik}$, 'ZGEQRF', ' ', m, n, -1_${ik}$,-1_${ik}$ ) )
                 end if
              end if
           end if
           if( nb>=nbmin .and. nb<k .and. nx<k ) then
              ! use blocked code initially
              do i = 1, k - nx, nb
                 ib = min( k-i+1, nb )
                 ! compute the qr factorization of the current block
                 ! a(i:m,i:i+ib-1)
                 call stdlib${ii}$_zgeqr2p( m-i+1, ib, a( i, i ), lda, tau( i ), work,iinfo )
                 if( i+ib<=n ) then
                    ! form the triangular factor of the block reflector
                    ! h = h(i) h(i+1) . . . h(i+ib-1)
                    call stdlib${ii}$_zlarft( 'FORWARD', 'COLUMNWISE', m-i+1, ib,a( i, i ), lda, tau( i &
                              ), work, ldwork )
                    ! apply h**h to a(i:m,i+ib:n) from the left
                    call stdlib${ii}$_zlarfb( 'LEFT', 'CONJUGATE TRANSPOSE', 'FORWARD','COLUMNWISE', m-&
                    i+1, n-i-ib+1, ib,a( i, i ), lda, work, ldwork, a( i, i+ib ),lda, work( ib+1 )&
                              , ldwork )
                 end if
              end do
           else
              i = 1_${ik}$
           end if
           ! use unblocked code to factor the last or only block.
           if( i<=k )call stdlib${ii}$_zgeqr2p( m-i+1, n-i+1, a( i, i ), lda, tau( i ), work,iinfo )
                     
           work( 1_${ik}$ ) = iws
           return
     end subroutine stdlib${ii}$_zgeqrfp

#:for ck,ct,ci in CMPLX_KINDS_TYPES
#:if not ck in ["sp","dp"]
     module subroutine stdlib${ii}$_${ci}$geqrfp( m, n, a, lda, tau, work, lwork, info )
     !! ZGEQR2P computes a QR factorization of a complex M-by-N matrix A:
     !! A = Q * ( R ),
     !! ( 0 )
     !! where:
     !! Q is a M-by-M orthogonal matrix;
     !! R is an upper-triangular N-by-N matrix with nonnegative diagonal
     !! entries;
     !! 0 is a (M-N)-by-N zero matrix, if M > N.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${ck}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, lwork, m, n
           ! Array Arguments 
           complex(${ck}$), intent(inout) :: a(lda,*)
           complex(${ck}$), intent(out) :: tau(*), work(*)
        ! =====================================================================
           ! Local Scalars 
           logical(lk) :: lquery
           integer(${ik}$) :: i, ib, iinfo, iws, k, ldwork, lwkopt, nb, nbmin, nx
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           nb = stdlib${ii}$_ilaenv( 1_${ik}$, 'ZGEQRF', ' ', m, n, -1_${ik}$, -1_${ik}$ )
           lwkopt = n*nb
           work( 1_${ik}$ ) = lwkopt
           lquery = ( lwork==-1_${ik}$ )
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -4_${ik}$
           else if( lwork<max( 1_${ik}$, n ) .and. .not.lquery ) then
              info = -7_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'ZGEQRFP', -info )
              return
           else if( lquery ) then
              return
           end if
           ! quick return if possible
           k = min( m, n )
           if( k==0_${ik}$ ) then
              work( 1_${ik}$ ) = 1_${ik}$
              return
           end if
           nbmin = 2_${ik}$
           nx = 0_${ik}$
           iws = n
           if( nb>1_${ik}$ .and. nb<k ) then
              ! determine when to cross over from blocked to unblocked code.
              nx = max( 0_${ik}$, stdlib${ii}$_ilaenv( 3_${ik}$, 'ZGEQRF', ' ', m, n, -1_${ik}$, -1_${ik}$ ) )
              if( nx<k ) then
                 ! determine if workspace is large enough for blocked code.
                 ldwork = n
                 iws = ldwork*nb
                 if( lwork<iws ) then
                    ! not enough workspace to use optimal nb:  reduce nb and
                    ! determine the minimum value of nb.
                    nb = lwork / ldwork
                    nbmin = max( 2_${ik}$, stdlib${ii}$_ilaenv( 2_${ik}$, 'ZGEQRF', ' ', m, n, -1_${ik}$,-1_${ik}$ ) )
                 end if
              end if
           end if
           if( nb>=nbmin .and. nb<k .and. nx<k ) then
              ! use blocked code initially
              do i = 1, k - nx, nb
                 ib = min( k-i+1, nb )
                 ! compute the qr factorization of the current block
                 ! a(i:m,i:i+ib-1)
                 call stdlib${ii}$_${ci}$geqr2p( m-i+1, ib, a( i, i ), lda, tau( i ), work,iinfo )
                 if( i+ib<=n ) then
                    ! form the triangular factor of the block reflector
                    ! h = h(i) h(i+1) . . . h(i+ib-1)
                    call stdlib${ii}$_${ci}$larft( 'FORWARD', 'COLUMNWISE', m-i+1, ib,a( i, i ), lda, tau( i &
                              ), work, ldwork )
                    ! apply h**h to a(i:m,i+ib:n) from the left
                    call stdlib${ii}$_${ci}$larfb( 'LEFT', 'CONJUGATE TRANSPOSE', 'FORWARD','COLUMNWISE', m-&
                    i+1, n-i-ib+1, ib,a( i, i ), lda, work, ldwork, a( i, i+ib ),lda, work( ib+1 )&
                              , ldwork )
                 end if
              end do
           else
              i = 1_${ik}$
           end if
           ! use unblocked code to factor the last or only block.
           if( i<=k )call stdlib${ii}$_${ci}$geqr2p( m-i+1, n-i+1, a( i, i ), lda, tau( i ), work,iinfo )
                     
           work( 1_${ik}$ ) = iws
           return
     end subroutine stdlib${ii}$_${ci}$geqrfp

#:endif
#:endfor



     module subroutine stdlib${ii}$_sgeqr2p( m, n, a, lda, tau, work, info )
     !! SGEQR2P computes a QR factorization of a real m-by-n matrix A:
     !! A = Q * ( R ),
     !! ( 0 )
     !! where:
     !! Q is a m-by-m orthogonal matrix;
     !! R is an upper-triangular n-by-n matrix with nonnegative diagonal
     !! entries;
     !! 0 is a (m-n)-by-n zero matrix, if m > n.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, m, n
           ! Array Arguments 
           real(sp), intent(inout) :: a(lda,*)
           real(sp), intent(out) :: tau(*), work(*)
        ! =====================================================================
           
           ! Local Scalars 
           integer(${ik}$) :: i, k
           real(sp) :: aii
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -4_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'SGEQR2P', -info )
              return
           end if
           k = min( m, n )
           do i = 1, k
              ! generate elementary reflector h(i) to annihilate a(i+1:m,i)
              call stdlib${ii}$_slarfgp( m-i+1, a( i, i ), a( min( i+1, m ), i ), 1_${ik}$,tau( i ) )
              if( i<n ) then
                 ! apply h(i) to a(i:m,i+1:n) from the left
                 aii = a( i, i )
                 a( i, i ) = one
                 call stdlib${ii}$_slarf( 'LEFT', m-i+1, n-i, a( i, i ), 1_${ik}$, tau( i ),a( i, i+1 ), lda, &
                           work )
                 a( i, i ) = aii
              end if
           end do
           return
     end subroutine stdlib${ii}$_sgeqr2p

     module subroutine stdlib${ii}$_dgeqr2p( m, n, a, lda, tau, work, info )
     !! DGEQR2P computes a QR factorization of a real m-by-n matrix A:
     !! A = Q * ( R ),
     !! ( 0 )
     !! where:
     !! Q is a m-by-m orthogonal matrix;
     !! R is an upper-triangular n-by-n matrix with nonnegative diagonal
     !! entries;
     !! 0 is a (m-n)-by-n zero matrix, if m > n.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, m, n
           ! Array Arguments 
           real(dp), intent(inout) :: a(lda,*)
           real(dp), intent(out) :: tau(*), work(*)
        ! =====================================================================
           
           ! Local Scalars 
           integer(${ik}$) :: i, k
           real(dp) :: aii
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -4_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DGEQR2P', -info )
              return
           end if
           k = min( m, n )
           do i = 1, k
              ! generate elementary reflector h(i) to annihilate a(i+1:m,i)
              call stdlib${ii}$_dlarfgp( m-i+1, a( i, i ), a( min( i+1, m ), i ), 1_${ik}$,tau( i ) )
              if( i<n ) then
                 ! apply h(i) to a(i:m,i+1:n) from the left
                 aii = a( i, i )
                 a( i, i ) = one
                 call stdlib${ii}$_dlarf( 'LEFT', m-i+1, n-i, a( i, i ), 1_${ik}$, tau( i ),a( i, i+1 ), lda, &
                           work )
                 a( i, i ) = aii
              end if
           end do
           return
     end subroutine stdlib${ii}$_dgeqr2p

#:for rk,rt,ri in REAL_KINDS_TYPES
#:if not rk in ["sp","dp"]
     module subroutine stdlib${ii}$_${ri}$geqr2p( m, n, a, lda, tau, work, info )
     !! DGEQR2P: computes a QR factorization of a real m-by-n matrix A:
     !! A = Q * ( R ),
     !! ( 0 )
     !! where:
     !! Q is a m-by-m orthogonal matrix;
     !! R is an upper-triangular n-by-n matrix with nonnegative diagonal
     !! entries;
     !! 0 is a (m-n)-by-n zero matrix, if m > n.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, m, n
           ! Array Arguments 
           real(${rk}$), intent(inout) :: a(lda,*)
           real(${rk}$), intent(out) :: tau(*), work(*)
        ! =====================================================================
           
           ! Local Scalars 
           integer(${ik}$) :: i, k
           real(${rk}$) :: aii
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -4_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DGEQR2P', -info )
              return
           end if
           k = min( m, n )
           do i = 1, k
              ! generate elementary reflector h(i) to annihilate a(i+1:m,i)
              call stdlib${ii}$_${ri}$larfgp( m-i+1, a( i, i ), a( min( i+1, m ), i ), 1_${ik}$,tau( i ) )
              if( i<n ) then
                 ! apply h(i) to a(i:m,i+1:n) from the left
                 aii = a( i, i )
                 a( i, i ) = one
                 call stdlib${ii}$_${ri}$larf( 'LEFT', m-i+1, n-i, a( i, i ), 1_${ik}$, tau( i ),a( i, i+1 ), lda, &
                           work )
                 a( i, i ) = aii
              end if
           end do
           return
     end subroutine stdlib${ii}$_${ri}$geqr2p

#:endif
#:endfor

     module subroutine stdlib${ii}$_cgeqr2p( m, n, a, lda, tau, work, info )
     !! CGEQR2P computes a QR factorization of a complex m-by-n matrix A:
     !! A = Q * ( R ),
     !! ( 0 )
     !! where:
     !! Q is a m-by-m orthogonal matrix;
     !! R is an upper-triangular n-by-n matrix with nonnegative diagonal
     !! entries;
     !! 0 is a (m-n)-by-n zero matrix, if m > n.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, m, n
           ! Array Arguments 
           complex(sp), intent(inout) :: a(lda,*)
           complex(sp), intent(out) :: tau(*), work(*)
        ! =====================================================================
           
           ! Local Scalars 
           integer(${ik}$) :: i, k
           complex(sp) :: alpha
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -4_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'CGEQR2P', -info )
              return
           end if
           k = min( m, n )
           do i = 1, k
              ! generate elementary reflector h(i) to annihilate a(i+1:m,i)
              call stdlib${ii}$_clarfgp( m-i+1, a( i, i ), a( min( i+1, m ), i ), 1_${ik}$,tau( i ) )
              if( i<n ) then
                 ! apply h(i)**h to a(i:m,i+1:n) from the left
                 alpha = a( i, i )
                 a( i, i ) = cone
                 call stdlib${ii}$_clarf( 'LEFT', m-i+1, n-i, a( i, i ), 1_${ik}$,conjg( tau( i ) ), a( i, i+1 &
                           ), lda, work )
                 a( i, i ) = alpha
              end if
           end do
           return
     end subroutine stdlib${ii}$_cgeqr2p

     module subroutine stdlib${ii}$_zgeqr2p( m, n, a, lda, tau, work, info )
     !! ZGEQR2P computes a QR factorization of a complex m-by-n matrix A:
     !! A = Q * ( R ),
     !! ( 0 )
     !! where:
     !! Q is a m-by-m orthogonal matrix;
     !! R is an upper-triangular n-by-n matrix with nonnegative diagonal
     !! entries;
     !! 0 is a (m-n)-by-n zero matrix, if m > n.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, m, n
           ! Array Arguments 
           complex(dp), intent(inout) :: a(lda,*)
           complex(dp), intent(out) :: tau(*), work(*)
        ! =====================================================================
           
           ! Local Scalars 
           integer(${ik}$) :: i, k
           complex(dp) :: alpha
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -4_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'ZGEQR2P', -info )
              return
           end if
           k = min( m, n )
           do i = 1, k
              ! generate elementary reflector h(i) to annihilate a(i+1:m,i)
              call stdlib${ii}$_zlarfgp( m-i+1, a( i, i ), a( min( i+1, m ), i ), 1_${ik}$,tau( i ) )
              if( i<n ) then
                 ! apply h(i)**h to a(i:m,i+1:n) from the left
                 alpha = a( i, i )
                 a( i, i ) = cone
                 call stdlib${ii}$_zlarf( 'LEFT', m-i+1, n-i, a( i, i ), 1_${ik}$,conjg( tau( i ) ), a( i, i+1 &
                           ), lda, work )
                 a( i, i ) = alpha
              end if
           end do
           return
     end subroutine stdlib${ii}$_zgeqr2p

#:for ck,ct,ci in CMPLX_KINDS_TYPES
#:if not ck in ["sp","dp"]
     module subroutine stdlib${ii}$_${ci}$geqr2p( m, n, a, lda, tau, work, info )
     !! ZGEQR2P: computes a QR factorization of a complex m-by-n matrix A:
     !! A = Q * ( R ),
     !! ( 0 )
     !! where:
     !! Q is a m-by-m orthogonal matrix;
     !! R is an upper-triangular n-by-n matrix with nonnegative diagonal
     !! entries;
     !! 0 is a (m-n)-by-n zero matrix, if m > n.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${ck}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, m, n
           ! Array Arguments 
           complex(${ck}$), intent(inout) :: a(lda,*)
           complex(${ck}$), intent(out) :: tau(*), work(*)
        ! =====================================================================
           
           ! Local Scalars 
           integer(${ik}$) :: i, k
           complex(${ck}$) :: alpha
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -4_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'ZGEQR2P', -info )
              return
           end if
           k = min( m, n )
           do i = 1, k
              ! generate elementary reflector h(i) to annihilate a(i+1:m,i)
              call stdlib${ii}$_${ci}$larfgp( m-i+1, a( i, i ), a( min( i+1, m ), i ), 1_${ik}$,tau( i ) )
              if( i<n ) then
                 ! apply h(i)**h to a(i:m,i+1:n) from the left
                 alpha = a( i, i )
                 a( i, i ) = cone
                 call stdlib${ii}$_${ci}$larf( 'LEFT', m-i+1, n-i, a( i, i ), 1_${ik}$,conjg( tau( i ) ), a( i, i+1 &
                           ), lda, work )
                 a( i, i ) = alpha
              end if
           end do
           return
     end subroutine stdlib${ii}$_${ci}$geqr2p

#:endif
#:endfor



     pure module subroutine stdlib${ii}$_sgeqp3( m, n, a, lda, jpvt, tau, work, lwork, info )
     !! SGEQP3 computes a QR factorization with column pivoting of a
     !! matrix A:  A*P = Q*R  using Level 3 BLAS.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, lwork, m, n
           ! Array Arguments 
           integer(${ik}$), intent(inout) :: jpvt(*)
           real(sp), intent(inout) :: a(lda,*)
           real(sp), intent(out) :: tau(*), work(*)
        ! =====================================================================
           ! Parameters 
           integer(${ik}$), parameter :: inb = 1_${ik}$
           integer(${ik}$), parameter :: inbmin = 2_${ik}$
           integer(${ik}$), parameter :: ixover = 3_${ik}$
           
           ! Local Scalars 
           logical(lk) :: lquery
           integer(${ik}$) :: fjb, iws, j, jb, lwkopt, minmn, minws, na, nb, nbmin, nfxd, nx, sm, &
                     sminmn, sn, topbmn
           ! Intrinsic Functions 
           ! test input arguments
        ! ====================
           info = 0_${ik}$
           lquery = ( lwork==-1_${ik}$ )
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -4_${ik}$
           end if
           if( info==0_${ik}$ ) then
              minmn = min( m, n )
              if( minmn==0_${ik}$ ) then
                 iws = 1_${ik}$
                 lwkopt = 1_${ik}$
              else
                 iws = 3_${ik}$*n + 1_${ik}$
                 nb = stdlib${ii}$_ilaenv( inb, 'SGEQRF', ' ', m, n, -1_${ik}$, -1_${ik}$ )
                 lwkopt = 2_${ik}$*n + ( n + 1_${ik}$ )*nb
              end if
              work( 1_${ik}$ ) = lwkopt
              if( ( lwork<iws ) .and. .not.lquery ) then
                 info = -8_${ik}$
              end if
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'SGEQP3', -info )
              return
           else if( lquery ) then
              return
           end if
           ! move initial columns up front.
           nfxd = 1_${ik}$
           do j = 1, n
              if( jpvt( j )/=0_${ik}$ ) then
                 if( j/=nfxd ) then
                    call stdlib${ii}$_sswap( m, a( 1_${ik}$, j ), 1_${ik}$, a( 1_${ik}$, nfxd ), 1_${ik}$ )
                    jpvt( j ) = jpvt( nfxd )
                    jpvt( nfxd ) = j
                 else
                    jpvt( j ) = j
                 end if
                 nfxd = nfxd + 1_${ik}$
              else
                 jpvt( j ) = j
              end if
           end do
           nfxd = nfxd - 1_${ik}$
           ! factorize fixed columns
        ! =======================
           ! compute the qr factorization of fixed columns and update
           ! remaining columns.
           if( nfxd>0_${ik}$ ) then
              na = min( m, nfxd )
      ! cc      call stdlib${ii}$_sgeqr2( m, na, a, lda, tau, work, info )
              call stdlib${ii}$_sgeqrf( m, na, a, lda, tau, work, lwork, info )
              iws = max( iws, int( work( 1_${ik}$ ),KIND=${ik}$) )
              if( na<n ) then
      ! cc         call stdlib${ii}$_sorm2r( 'left', 'transpose', m, n-na, na, a, lda,
      ! cc  $                   tau, a( 1, na+1 ), lda, work, info )
                 call stdlib${ii}$_sormqr( 'LEFT', 'TRANSPOSE', m, n-na, na, a, lda, tau,a( 1_${ik}$, na+1 ), &
                           lda, work, lwork, info )
                 iws = max( iws, int( work( 1_${ik}$ ),KIND=${ik}$) )
              end if
           end if
           ! factorize free columns
        ! ======================
           if( nfxd<minmn ) then
              sm = m - nfxd
              sn = n - nfxd
              sminmn = minmn - nfxd
              ! determine the block size.
              nb = stdlib${ii}$_ilaenv( inb, 'SGEQRF', ' ', sm, sn, -1_${ik}$, -1_${ik}$ )
              nbmin = 2_${ik}$
              nx = 0_${ik}$
              if( ( nb>1_${ik}$ ) .and. ( nb<sminmn ) ) then
                 ! determine when to cross over from blocked to unblocked code.
                 nx = max( 0_${ik}$, stdlib${ii}$_ilaenv( ixover, 'SGEQRF', ' ', sm, sn, -1_${ik}$,-1_${ik}$ ) )
                 if( nx<sminmn ) then
                    ! determine if workspace is large enough for blocked code.
                    minws = 2_${ik}$*sn + ( sn+1 )*nb
                    iws = max( iws, minws )
                    if( lwork<minws ) then
                       ! not enough workspace to use optimal nb: reduce nb and
                       ! determine the minimum value of nb.
                       nb = ( lwork-2*sn ) / ( sn+1 )
                       nbmin = max( 2_${ik}$, stdlib${ii}$_ilaenv( inbmin, 'SGEQRF', ' ', sm, sn,-1_${ik}$, -1_${ik}$ ) )
                                 
                    end if
                 end if
              end if
              ! initialize partial column norms. the first n elements of work
              ! store the exact column norms.
              do j = nfxd + 1, n
                 work( j ) = stdlib${ii}$_snrm2( sm, a( nfxd+1, j ), 1_${ik}$ )
                 work( n+j ) = work( j )
              end do
              if( ( nb>=nbmin ) .and. ( nb<sminmn ) .and.( nx<sminmn ) ) then
                 ! use blocked code initially.
                 j = nfxd + 1_${ik}$
                 ! compute factorization: while loop.
                 topbmn = minmn - nx
                 30 continue
                 if( j<=topbmn ) then
                    jb = min( nb, topbmn-j+1 )
                    ! factorize jb columns among columns j:n.
                    call stdlib${ii}$_slaqps( m, n-j+1, j-1, jb, fjb, a( 1_${ik}$, j ), lda,jpvt( j ), tau( j )&
                              , work( j ), work( n+j ),work( 2_${ik}$*n+1 ), work( 2_${ik}$*n+jb+1 ), n-j+1 )
                    j = j + fjb
                    go to 30
                 end if
              else
                 j = nfxd + 1_${ik}$
              end if
              ! use unblocked code to factor the last or only block.
              if( j<=minmn )call stdlib${ii}$_slaqp2( m, n-j+1, j-1, a( 1_${ik}$, j ), lda, jpvt( j ),tau( j ),&
                         work( j ), work( n+j ),work( 2_${ik}$*n+1 ) )
           end if
           work( 1_${ik}$ ) = iws
           return
     end subroutine stdlib${ii}$_sgeqp3

     pure module subroutine stdlib${ii}$_dgeqp3( m, n, a, lda, jpvt, tau, work, lwork, info )
     !! DGEQP3 computes a QR factorization with column pivoting of a
     !! matrix A:  A*P = Q*R  using Level 3 BLAS.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, lwork, m, n
           ! Array Arguments 
           integer(${ik}$), intent(inout) :: jpvt(*)
           real(dp), intent(inout) :: a(lda,*)
           real(dp), intent(out) :: tau(*), work(*)
        ! =====================================================================
           ! Parameters 
           integer(${ik}$), parameter :: inb = 1_${ik}$
           integer(${ik}$), parameter :: inbmin = 2_${ik}$
           integer(${ik}$), parameter :: ixover = 3_${ik}$
           
           ! Local Scalars 
           logical(lk) :: lquery
           integer(${ik}$) :: fjb, iws, j, jb, lwkopt, minmn, minws, na, nb, nbmin, nfxd, nx, sm, &
                     sminmn, sn, topbmn
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test input arguments
        ! ====================
           info = 0_${ik}$
           lquery = ( lwork==-1_${ik}$ )
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -4_${ik}$
           end if
           if( info==0_${ik}$ ) then
              minmn = min( m, n )
              if( minmn==0_${ik}$ ) then
                 iws = 1_${ik}$
                 lwkopt = 1_${ik}$
              else
                 iws = 3_${ik}$*n + 1_${ik}$
                 nb = stdlib${ii}$_ilaenv( inb, 'DGEQRF', ' ', m, n, -1_${ik}$, -1_${ik}$ )
                 lwkopt = 2_${ik}$*n + ( n + 1_${ik}$ )*nb
              end if
              work( 1_${ik}$ ) = lwkopt
              if( ( lwork<iws ) .and. .not.lquery ) then
                 info = -8_${ik}$
              end if
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DGEQP3', -info )
              return
           else if( lquery ) then
              return
           end if
           ! move initial columns up front.
           nfxd = 1_${ik}$
           do j = 1, n
              if( jpvt( j )/=0_${ik}$ ) then
                 if( j/=nfxd ) then
                    call stdlib${ii}$_dswap( m, a( 1_${ik}$, j ), 1_${ik}$, a( 1_${ik}$, nfxd ), 1_${ik}$ )
                    jpvt( j ) = jpvt( nfxd )
                    jpvt( nfxd ) = j
                 else
                    jpvt( j ) = j
                 end if
                 nfxd = nfxd + 1_${ik}$
              else
                 jpvt( j ) = j
              end if
           end do
           nfxd = nfxd - 1_${ik}$
           ! factorize fixed columns
        ! =======================
           ! compute the qr factorization of fixed columns and update
           ! remaining columns.
           if( nfxd>0_${ik}$ ) then
              na = min( m, nfxd )
      ! cc      call stdlib${ii}$_dgeqr2( m, na, a, lda, tau, work, info )
              call stdlib${ii}$_dgeqrf( m, na, a, lda, tau, work, lwork, info )
              iws = max( iws, int( work( 1_${ik}$ ),KIND=${ik}$) )
              if( na<n ) then
      ! cc         call stdlib${ii}$_dorm2r( 'left', 'transpose', m, n-na, na, a, lda,
      ! cc  $                   tau, a( 1, na+1 ), lda, work, info )
                 call stdlib${ii}$_dormqr( 'LEFT', 'TRANSPOSE', m, n-na, na, a, lda, tau,a( 1_${ik}$, na+1 ), &
                           lda, work, lwork, info )
                 iws = max( iws, int( work( 1_${ik}$ ),KIND=${ik}$) )
              end if
           end if
           ! factorize free columns
        ! ======================
           if( nfxd<minmn ) then
              sm = m - nfxd
              sn = n - nfxd
              sminmn = minmn - nfxd
              ! determine the block size.
              nb = stdlib${ii}$_ilaenv( inb, 'DGEQRF', ' ', sm, sn, -1_${ik}$, -1_${ik}$ )
              nbmin = 2_${ik}$
              nx = 0_${ik}$
              if( ( nb>1_${ik}$ ) .and. ( nb<sminmn ) ) then
                 ! determine when to cross over from blocked to unblocked code.
                 nx = max( 0_${ik}$, stdlib${ii}$_ilaenv( ixover, 'DGEQRF', ' ', sm, sn, -1_${ik}$,-1_${ik}$ ) )
                 if( nx<sminmn ) then
                    ! determine if workspace is large enough for blocked code.
                    minws = 2_${ik}$*sn + ( sn+1 )*nb
                    iws = max( iws, minws )
                    if( lwork<minws ) then
                       ! not enough workspace to use optimal nb: reduce nb and
                       ! determine the minimum value of nb.
                       nb = ( lwork-2*sn ) / ( sn+1 )
                       nbmin = max( 2_${ik}$, stdlib${ii}$_ilaenv( inbmin, 'DGEQRF', ' ', sm, sn,-1_${ik}$, -1_${ik}$ ) )
                                 
                    end if
                 end if
              end if
              ! initialize partial column norms. the first n elements of work
              ! store the exact column norms.
              do j = nfxd + 1, n
                 work( j ) = stdlib${ii}$_dnrm2( sm, a( nfxd+1, j ), 1_${ik}$ )
                 work( n+j ) = work( j )
              end do
              if( ( nb>=nbmin ) .and. ( nb<sminmn ) .and.( nx<sminmn ) ) then
                 ! use blocked code initially.
                 j = nfxd + 1_${ik}$
                 ! compute factorization: while loop.
                 topbmn = minmn - nx
                 30 continue
                 if( j<=topbmn ) then
                    jb = min( nb, topbmn-j+1 )
                    ! factorize jb columns among columns j:n.
                    call stdlib${ii}$_dlaqps( m, n-j+1, j-1, jb, fjb, a( 1_${ik}$, j ), lda,jpvt( j ), tau( j )&
                              , work( j ), work( n+j ),work( 2_${ik}$*n+1 ), work( 2_${ik}$*n+jb+1 ), n-j+1 )
                    j = j + fjb
                    go to 30
                 end if
              else
                 j = nfxd + 1_${ik}$
              end if
              ! use unblocked code to factor the last or only block.
              if( j<=minmn )call stdlib${ii}$_dlaqp2( m, n-j+1, j-1, a( 1_${ik}$, j ), lda, jpvt( j ),tau( j ),&
                         work( j ), work( n+j ),work( 2_${ik}$*n+1 ) )
           end if
           work( 1_${ik}$ ) = iws
           return
     end subroutine stdlib${ii}$_dgeqp3

#:for rk,rt,ri in REAL_KINDS_TYPES
#:if not rk in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ri}$geqp3( m, n, a, lda, jpvt, tau, work, lwork, info )
     !! DGEQP3: computes a QR factorization with column pivoting of a
     !! matrix A:  A*P = Q*R  using Level 3 BLAS.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, lwork, m, n
           ! Array Arguments 
           integer(${ik}$), intent(inout) :: jpvt(*)
           real(${rk}$), intent(inout) :: a(lda,*)
           real(${rk}$), intent(out) :: tau(*), work(*)
        ! =====================================================================
           ! Parameters 
           integer(${ik}$), parameter :: inb = 1_${ik}$
           integer(${ik}$), parameter :: inbmin = 2_${ik}$
           integer(${ik}$), parameter :: ixover = 3_${ik}$
           
           ! Local Scalars 
           logical(lk) :: lquery
           integer(${ik}$) :: fjb, iws, j, jb, lwkopt, minmn, minws, na, nb, nbmin, nfxd, nx, sm, &
                     sminmn, sn, topbmn
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test input arguments
        ! ====================
           info = 0_${ik}$
           lquery = ( lwork==-1_${ik}$ )
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -4_${ik}$
           end if
           if( info==0_${ik}$ ) then
              minmn = min( m, n )
              if( minmn==0_${ik}$ ) then
                 iws = 1_${ik}$
                 lwkopt = 1_${ik}$
              else
                 iws = 3_${ik}$*n + 1_${ik}$
                 nb = stdlib${ii}$_ilaenv( inb, 'DGEQRF', ' ', m, n, -1_${ik}$, -1_${ik}$ )
                 lwkopt = 2_${ik}$*n + ( n + 1_${ik}$ )*nb
              end if
              work( 1_${ik}$ ) = lwkopt
              if( ( lwork<iws ) .and. .not.lquery ) then
                 info = -8_${ik}$
              end if
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DGEQP3', -info )
              return
           else if( lquery ) then
              return
           end if
           ! move initial columns up front.
           nfxd = 1_${ik}$
           do j = 1, n
              if( jpvt( j )/=0_${ik}$ ) then
                 if( j/=nfxd ) then
                    call stdlib${ii}$_${ri}$swap( m, a( 1_${ik}$, j ), 1_${ik}$, a( 1_${ik}$, nfxd ), 1_${ik}$ )
                    jpvt( j ) = jpvt( nfxd )
                    jpvt( nfxd ) = j
                 else
                    jpvt( j ) = j
                 end if
                 nfxd = nfxd + 1_${ik}$
              else
                 jpvt( j ) = j
              end if
           end do
           nfxd = nfxd - 1_${ik}$
           ! factorize fixed columns
        ! =======================
           ! compute the qr factorization of fixed columns and update
           ! remaining columns.
           if( nfxd>0_${ik}$ ) then
              na = min( m, nfxd )
      ! cc      call stdlib${ii}$_${ri}$geqr2( m, na, a, lda, tau, work, info )
              call stdlib${ii}$_${ri}$geqrf( m, na, a, lda, tau, work, lwork, info )
              iws = max( iws, int( work( 1_${ik}$ ),KIND=${ik}$) )
              if( na<n ) then
      ! cc         call stdlib${ii}$_${ri}$orm2r( 'left', 'transpose', m, n-na, na, a, lda,
      ! cc  $                   tau, a( 1, na+1 ), lda, work, info )
                 call stdlib${ii}$_${ri}$ormqr( 'LEFT', 'TRANSPOSE', m, n-na, na, a, lda, tau,a( 1_${ik}$, na+1 ), &
                           lda, work, lwork, info )
                 iws = max( iws, int( work( 1_${ik}$ ),KIND=${ik}$) )
              end if
           end if
           ! factorize free columns
        ! ======================
           if( nfxd<minmn ) then
              sm = m - nfxd
              sn = n - nfxd
              sminmn = minmn - nfxd
              ! determine the block size.
              nb = stdlib${ii}$_ilaenv( inb, 'DGEQRF', ' ', sm, sn, -1_${ik}$, -1_${ik}$ )
              nbmin = 2_${ik}$
              nx = 0_${ik}$
              if( ( nb>1_${ik}$ ) .and. ( nb<sminmn ) ) then
                 ! determine when to cross over from blocked to unblocked code.
                 nx = max( 0_${ik}$, stdlib${ii}$_ilaenv( ixover, 'DGEQRF', ' ', sm, sn, -1_${ik}$,-1_${ik}$ ) )
                 if( nx<sminmn ) then
                    ! determine if workspace is large enough for blocked code.
                    minws = 2_${ik}$*sn + ( sn+1 )*nb
                    iws = max( iws, minws )
                    if( lwork<minws ) then
                       ! not enough workspace to use optimal nb: reduce nb and
                       ! determine the minimum value of nb.
                       nb = ( lwork-2*sn ) / ( sn+1 )
                       nbmin = max( 2_${ik}$, stdlib${ii}$_ilaenv( inbmin, 'DGEQRF', ' ', sm, sn,-1_${ik}$, -1_${ik}$ ) )
                                 
                    end if
                 end if
              end if
              ! initialize partial column norms. the first n elements of work
              ! store the exact column norms.
              do j = nfxd + 1, n
                 work( j ) = stdlib${ii}$_${ri}$nrm2( sm, a( nfxd+1, j ), 1_${ik}$ )
                 work( n+j ) = work( j )
              end do
              if( ( nb>=nbmin ) .and. ( nb<sminmn ) .and.( nx<sminmn ) ) then
                 ! use blocked code initially.
                 j = nfxd + 1_${ik}$
                 ! compute factorization: while loop.
                 topbmn = minmn - nx
                 30 continue
                 if( j<=topbmn ) then
                    jb = min( nb, topbmn-j+1 )
                    ! factorize jb columns among columns j:n.
                    call stdlib${ii}$_${ri}$laqps( m, n-j+1, j-1, jb, fjb, a( 1_${ik}$, j ), lda,jpvt( j ), tau( j )&
                              , work( j ), work( n+j ),work( 2_${ik}$*n+1 ), work( 2_${ik}$*n+jb+1 ), n-j+1 )
                    j = j + fjb
                    go to 30
                 end if
              else
                 j = nfxd + 1_${ik}$
              end if
              ! use unblocked code to factor the last or only block.
              if( j<=minmn )call stdlib${ii}$_${ri}$laqp2( m, n-j+1, j-1, a( 1_${ik}$, j ), lda, jpvt( j ),tau( j ),&
                         work( j ), work( n+j ),work( 2_${ik}$*n+1 ) )
           end if
           work( 1_${ik}$ ) = iws
           return
     end subroutine stdlib${ii}$_${ri}$geqp3

#:endif
#:endfor

     pure module subroutine stdlib${ii}$_cgeqp3( m, n, a, lda, jpvt, tau, work, lwork, rwork,info )
     !! CGEQP3 computes a QR factorization with column pivoting of a
     !! matrix A:  A*P = Q*R  using Level 3 BLAS.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, lwork, m, n
           ! Array Arguments 
           integer(${ik}$), intent(inout) :: jpvt(*)
           real(sp), intent(out) :: rwork(*)
           complex(sp), intent(inout) :: a(lda,*)
           complex(sp), intent(out) :: tau(*), work(*)
        ! =====================================================================
           ! Parameters 
           integer(${ik}$), parameter :: inb = 1_${ik}$
           integer(${ik}$), parameter :: inbmin = 2_${ik}$
           integer(${ik}$), parameter :: ixover = 3_${ik}$
           
           ! Local Scalars 
           logical(lk) :: lquery
           integer(${ik}$) :: fjb, iws, j, jb, lwkopt, minmn, minws, na, nb, nbmin, nfxd, nx, sm, &
                     sminmn, sn, topbmn
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test input arguments
        ! ====================
           info = 0_${ik}$
           lquery = ( lwork==-1_${ik}$ )
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -4_${ik}$
           end if
           if( info==0_${ik}$ ) then
              minmn = min( m, n )
              if( minmn==0_${ik}$ ) then
                 iws = 1_${ik}$
                 lwkopt = 1_${ik}$
              else
                 iws = n + 1_${ik}$
                 nb = stdlib${ii}$_ilaenv( inb, 'CGEQRF', ' ', m, n, -1_${ik}$, -1_${ik}$ )
                 lwkopt = ( n + 1_${ik}$ )*nb
              end if
              work( 1_${ik}$ ) = cmplx( lwkopt,KIND=sp)
              if( ( lwork<iws ) .and. .not.lquery ) then
                 info = -8_${ik}$
              end if
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'CGEQP3', -info )
              return
           else if( lquery ) then
              return
           end if
           ! move initial columns up front.
           nfxd = 1_${ik}$
           do j = 1, n
              if( jpvt( j )/=0_${ik}$ ) then
                 if( j/=nfxd ) then
                    call stdlib${ii}$_cswap( m, a( 1_${ik}$, j ), 1_${ik}$, a( 1_${ik}$, nfxd ), 1_${ik}$ )
                    jpvt( j ) = jpvt( nfxd )
                    jpvt( nfxd ) = j
                 else
                    jpvt( j ) = j
                 end if
                 nfxd = nfxd + 1_${ik}$
              else
                 jpvt( j ) = j
              end if
           end do
           nfxd = nfxd - 1_${ik}$
           ! factorize fixed columns
        ! =======================
           ! compute the qr factorization of fixed columns and update
           ! remaining columns.
           if( nfxd>0_${ik}$ ) then
              na = min( m, nfxd )
      ! cc      call stdlib${ii}$_cgeqr2( m, na, a, lda, tau, work, info )
              call stdlib${ii}$_cgeqrf( m, na, a, lda, tau, work, lwork, info )
              iws = max( iws, int( work( 1_${ik}$ ),KIND=${ik}$) )
              if( na<n ) then
      ! cc         call stdlib${ii}$_cunm2r( 'left', 'conjugate transpose', m, n-na,
      ! cc  $                   na, a, lda, tau, a( 1, na+1 ), lda, work,
      ! cc  $                   info )
                 call stdlib${ii}$_cunmqr( 'LEFT', 'CONJUGATE TRANSPOSE', m, n-na, na, a,lda, tau, a( 1_${ik}$,&
                            na+1 ), lda, work, lwork,info )
                 iws = max( iws, int( work( 1_${ik}$ ),KIND=${ik}$) )
              end if
           end if
           ! factorize free columns
        ! ======================
           if( nfxd<minmn ) then
              sm = m - nfxd
              sn = n - nfxd
              sminmn = minmn - nfxd
              ! determine the block size.
              nb = stdlib${ii}$_ilaenv( inb, 'CGEQRF', ' ', sm, sn, -1_${ik}$, -1_${ik}$ )
              nbmin = 2_${ik}$
              nx = 0_${ik}$
              if( ( nb>1_${ik}$ ) .and. ( nb<sminmn ) ) then
                 ! determine when to cross over from blocked to unblocked code.
                 nx = max( 0_${ik}$, stdlib${ii}$_ilaenv( ixover, 'CGEQRF', ' ', sm, sn, -1_${ik}$,-1_${ik}$ ) )
                 if( nx<sminmn ) then
                    ! determine if workspace is large enough for blocked code.
                    minws = ( sn+1 )*nb
                    iws = max( iws, minws )
                    if( lwork<minws ) then
                       ! not enough workspace to use optimal nb: reduce nb and
                       ! determine the minimum value of nb.
                       nb = lwork / ( sn+1 )
                       nbmin = max( 2_${ik}$, stdlib${ii}$_ilaenv( inbmin, 'CGEQRF', ' ', sm, sn,-1_${ik}$, -1_${ik}$ ) )
                                 
                    end if
                 end if
              end if
              ! initialize partial column norms. the first n elements of work
              ! store the exact column norms.
              do j = nfxd + 1, n
                 rwork( j ) = stdlib${ii}$_scnrm2( sm, a( nfxd+1, j ), 1_${ik}$ )
                 rwork( n+j ) = rwork( j )
              end do
              if( ( nb>=nbmin ) .and. ( nb<sminmn ) .and.( nx<sminmn ) ) then
                 ! use blocked code initially.
                 j = nfxd + 1_${ik}$
                 ! compute factorization: while loop.
                 topbmn = minmn - nx
                 30 continue
                 if( j<=topbmn ) then
                    jb = min( nb, topbmn-j+1 )
                    ! factorize jb columns among columns j:n.
                    call stdlib${ii}$_claqps( m, n-j+1, j-1, jb, fjb, a( 1_${ik}$, j ), lda,jpvt( j ), tau( j )&
                              , rwork( j ),rwork( n+j ), work( 1_${ik}$ ), work( jb+1 ),n-j+1 )
                    j = j + fjb
                    go to 30
                 end if
              else
                 j = nfxd + 1_${ik}$
              end if
              ! use unblocked code to factor the last or only block.
              if( j<=minmn )call stdlib${ii}$_claqp2( m, n-j+1, j-1, a( 1_${ik}$, j ), lda, jpvt( j ),tau( j ),&
                         rwork( j ), rwork( n+j ), work( 1_${ik}$ ) )
           end if
           work( 1_${ik}$ ) = cmplx( lwkopt,KIND=sp)
           return
     end subroutine stdlib${ii}$_cgeqp3

     pure module subroutine stdlib${ii}$_zgeqp3( m, n, a, lda, jpvt, tau, work, lwork, rwork,info )
     !! ZGEQP3 computes a QR factorization with column pivoting of a
     !! matrix A:  A*P = Q*R  using Level 3 BLAS.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, lwork, m, n
           ! Array Arguments 
           integer(${ik}$), intent(inout) :: jpvt(*)
           real(dp), intent(out) :: rwork(*)
           complex(dp), intent(inout) :: a(lda,*)
           complex(dp), intent(out) :: tau(*), work(*)
        ! =====================================================================
           ! Parameters 
           integer(${ik}$), parameter :: inb = 1_${ik}$
           integer(${ik}$), parameter :: inbmin = 2_${ik}$
           integer(${ik}$), parameter :: ixover = 3_${ik}$
           
           ! Local Scalars 
           logical(lk) :: lquery
           integer(${ik}$) :: fjb, iws, j, jb, lwkopt, minmn, minws, na, nb, nbmin, nfxd, nx, sm, &
                     sminmn, sn, topbmn
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test input arguments
        ! ====================
           info = 0_${ik}$
           lquery = ( lwork==-1_${ik}$ )
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -4_${ik}$
           end if
           if( info==0_${ik}$ ) then
              minmn = min( m, n )
              if( minmn==0_${ik}$ ) then
                 iws = 1_${ik}$
                 lwkopt = 1_${ik}$
              else
                 iws = n + 1_${ik}$
                 nb = stdlib${ii}$_ilaenv( inb, 'ZGEQRF', ' ', m, n, -1_${ik}$, -1_${ik}$ )
                 lwkopt = ( n + 1_${ik}$ )*nb
              end if
              work( 1_${ik}$ ) = cmplx( lwkopt,KIND=dp)
              if( ( lwork<iws ) .and. .not.lquery ) then
                 info = -8_${ik}$
              end if
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'ZGEQP3', -info )
              return
           else if( lquery ) then
              return
           end if
           ! move initial columns up front.
           nfxd = 1_${ik}$
           do j = 1, n
              if( jpvt( j )/=0_${ik}$ ) then
                 if( j/=nfxd ) then
                    call stdlib${ii}$_zswap( m, a( 1_${ik}$, j ), 1_${ik}$, a( 1_${ik}$, nfxd ), 1_${ik}$ )
                    jpvt( j ) = jpvt( nfxd )
                    jpvt( nfxd ) = j
                 else
                    jpvt( j ) = j
                 end if
                 nfxd = nfxd + 1_${ik}$
              else
                 jpvt( j ) = j
              end if
           end do
           nfxd = nfxd - 1_${ik}$
           ! factorize fixed columns
        ! =======================
           ! compute the qr factorization of fixed columns and update
           ! remaining columns.
           if( nfxd>0_${ik}$ ) then
              na = min( m, nfxd )
      ! cc      call stdlib${ii}$_zgeqr2( m, na, a, lda, tau, work, info )
              call stdlib${ii}$_zgeqrf( m, na, a, lda, tau, work, lwork, info )
              iws = max( iws, int( work( 1_${ik}$ ),KIND=${ik}$) )
              if( na<n ) then
      ! cc         call stdlib${ii}$_zunm2r( 'left', 'conjugate transpose', m, n-na,
      ! cc  $                   na, a, lda, tau, a( 1, na+1 ), lda, work,
      ! cc  $                   info )
                 call stdlib${ii}$_zunmqr( 'LEFT', 'CONJUGATE TRANSPOSE', m, n-na, na, a,lda, tau, a( 1_${ik}$,&
                            na+1 ), lda, work, lwork,info )
                 iws = max( iws, int( work( 1_${ik}$ ),KIND=${ik}$) )
              end if
           end if
           ! factorize free columns
        ! ======================
           if( nfxd<minmn ) then
              sm = m - nfxd
              sn = n - nfxd
              sminmn = minmn - nfxd
              ! determine the block size.
              nb = stdlib${ii}$_ilaenv( inb, 'ZGEQRF', ' ', sm, sn, -1_${ik}$, -1_${ik}$ )
              nbmin = 2_${ik}$
              nx = 0_${ik}$
              if( ( nb>1_${ik}$ ) .and. ( nb<sminmn ) ) then
                 ! determine when to cross over from blocked to unblocked code.
                 nx = max( 0_${ik}$, stdlib${ii}$_ilaenv( ixover, 'ZGEQRF', ' ', sm, sn, -1_${ik}$,-1_${ik}$ ) )
                 if( nx<sminmn ) then
                    ! determine if workspace is large enough for blocked code.
                    minws = ( sn+1 )*nb
                    iws = max( iws, minws )
                    if( lwork<minws ) then
                       ! not enough workspace to use optimal nb: reduce nb and
                       ! determine the minimum value of nb.
                       nb = lwork / ( sn+1 )
                       nbmin = max( 2_${ik}$, stdlib${ii}$_ilaenv( inbmin, 'ZGEQRF', ' ', sm, sn,-1_${ik}$, -1_${ik}$ ) )
                                 
                    end if
                 end if
              end if
              ! initialize partial column norms. the first n elements of work
              ! store the exact column norms.
              do j = nfxd + 1, n
                 rwork( j ) = stdlib${ii}$_dznrm2( sm, a( nfxd+1, j ), 1_${ik}$ )
                 rwork( n+j ) = rwork( j )
              end do
              if( ( nb>=nbmin ) .and. ( nb<sminmn ) .and.( nx<sminmn ) ) then
                 ! use blocked code initially.
                 j = nfxd + 1_${ik}$
                 ! compute factorization: while loop.
                 topbmn = minmn - nx
                 30 continue
                 if( j<=topbmn ) then
                    jb = min( nb, topbmn-j+1 )
                    ! factorize jb columns among columns j:n.
                    call stdlib${ii}$_zlaqps( m, n-j+1, j-1, jb, fjb, a( 1_${ik}$, j ), lda,jpvt( j ), tau( j )&
                              , rwork( j ),rwork( n+j ), work( 1_${ik}$ ), work( jb+1 ),n-j+1 )
                    j = j + fjb
                    go to 30
                 end if
              else
                 j = nfxd + 1_${ik}$
              end if
              ! use unblocked code to factor the last or only block.
              if( j<=minmn )call stdlib${ii}$_zlaqp2( m, n-j+1, j-1, a( 1_${ik}$, j ), lda, jpvt( j ),tau( j ),&
                         rwork( j ), rwork( n+j ), work( 1_${ik}$ ) )
           end if
           work( 1_${ik}$ ) = cmplx( lwkopt,KIND=dp)
           return
     end subroutine stdlib${ii}$_zgeqp3

#:for ck,ct,ci in CMPLX_KINDS_TYPES
#:if not ck in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ci}$geqp3( m, n, a, lda, jpvt, tau, work, lwork, rwork,info )
     !! ZGEQP3: computes a QR factorization with column pivoting of a
     !! matrix A:  A*P = Q*R  using Level 3 BLAS.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${ck}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, lwork, m, n
           ! Array Arguments 
           integer(${ik}$), intent(inout) :: jpvt(*)
           real(${ck}$), intent(out) :: rwork(*)
           complex(${ck}$), intent(inout) :: a(lda,*)
           complex(${ck}$), intent(out) :: tau(*), work(*)
        ! =====================================================================
           ! Parameters 
           integer(${ik}$), parameter :: inb = 1_${ik}$
           integer(${ik}$), parameter :: inbmin = 2_${ik}$
           integer(${ik}$), parameter :: ixover = 3_${ik}$
           
           ! Local Scalars 
           logical(lk) :: lquery
           integer(${ik}$) :: fjb, iws, j, jb, lwkopt, minmn, minws, na, nb, nbmin, nfxd, nx, sm, &
                     sminmn, sn, topbmn
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test input arguments
        ! ====================
           info = 0_${ik}$
           lquery = ( lwork==-1_${ik}$ )
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -4_${ik}$
           end if
           if( info==0_${ik}$ ) then
              minmn = min( m, n )
              if( minmn==0_${ik}$ ) then
                 iws = 1_${ik}$
                 lwkopt = 1_${ik}$
              else
                 iws = n + 1_${ik}$
                 nb = stdlib${ii}$_ilaenv( inb, 'ZGEQRF', ' ', m, n, -1_${ik}$, -1_${ik}$ )
                 lwkopt = ( n + 1_${ik}$ )*nb
              end if
              work( 1_${ik}$ ) = cmplx( lwkopt,KIND=${ck}$)
              if( ( lwork<iws ) .and. .not.lquery ) then
                 info = -8_${ik}$
              end if
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'ZGEQP3', -info )
              return
           else if( lquery ) then
              return
           end if
           ! move initial columns up front.
           nfxd = 1_${ik}$
           do j = 1, n
              if( jpvt( j )/=0_${ik}$ ) then
                 if( j/=nfxd ) then
                    call stdlib${ii}$_${ci}$swap( m, a( 1_${ik}$, j ), 1_${ik}$, a( 1_${ik}$, nfxd ), 1_${ik}$ )
                    jpvt( j ) = jpvt( nfxd )
                    jpvt( nfxd ) = j
                 else
                    jpvt( j ) = j
                 end if
                 nfxd = nfxd + 1_${ik}$
              else
                 jpvt( j ) = j
              end if
           end do
           nfxd = nfxd - 1_${ik}$
           ! factorize fixed columns
        ! =======================
           ! compute the qr factorization of fixed columns and update
           ! remaining columns.
           if( nfxd>0_${ik}$ ) then
              na = min( m, nfxd )
      ! cc      call stdlib${ii}$_${ci}$geqr2( m, na, a, lda, tau, work, info )
              call stdlib${ii}$_${ci}$geqrf( m, na, a, lda, tau, work, lwork, info )
              iws = max( iws, int( work( 1_${ik}$ ),KIND=${ik}$) )
              if( na<n ) then
      ! cc         call stdlib${ii}$_${ci}$unm2r( 'left', 'conjugate transpose', m, n-na,
      ! cc  $                   na, a, lda, tau, a( 1, na+1 ), lda, work,
      ! cc  $                   info )
                 call stdlib${ii}$_${ci}$unmqr( 'LEFT', 'CONJUGATE TRANSPOSE', m, n-na, na, a,lda, tau, a( 1_${ik}$,&
                            na+1 ), lda, work, lwork,info )
                 iws = max( iws, int( work( 1_${ik}$ ),KIND=${ik}$) )
              end if
           end if
           ! factorize free columns
        ! ======================
           if( nfxd<minmn ) then
              sm = m - nfxd
              sn = n - nfxd
              sminmn = minmn - nfxd
              ! determine the block size.
              nb = stdlib${ii}$_ilaenv( inb, 'ZGEQRF', ' ', sm, sn, -1_${ik}$, -1_${ik}$ )
              nbmin = 2_${ik}$
              nx = 0_${ik}$
              if( ( nb>1_${ik}$ ) .and. ( nb<sminmn ) ) then
                 ! determine when to cross over from blocked to unblocked code.
                 nx = max( 0_${ik}$, stdlib${ii}$_ilaenv( ixover, 'ZGEQRF', ' ', sm, sn, -1_${ik}$,-1_${ik}$ ) )
                 if( nx<sminmn ) then
                    ! determine if workspace is large enough for blocked code.
                    minws = ( sn+1 )*nb
                    iws = max( iws, minws )
                    if( lwork<minws ) then
                       ! not enough workspace to use optimal nb: reduce nb and
                       ! determine the minimum value of nb.
                       nb = lwork / ( sn+1 )
                       nbmin = max( 2_${ik}$, stdlib${ii}$_ilaenv( inbmin, 'ZGEQRF', ' ', sm, sn,-1_${ik}$, -1_${ik}$ ) )
                                 
                    end if
                 end if
              end if
              ! initialize partial column norms. the first n elements of work
              ! store the exact column norms.
              do j = nfxd + 1, n
                 rwork( j ) = stdlib${ii}$_${c2ri(ci)}$znrm2( sm, a( nfxd+1, j ), 1_${ik}$ )
                 rwork( n+j ) = rwork( j )
              end do
              if( ( nb>=nbmin ) .and. ( nb<sminmn ) .and.( nx<sminmn ) ) then
                 ! use blocked code initially.
                 j = nfxd + 1_${ik}$
                 ! compute factorization: while loop.
                 topbmn = minmn - nx
                 30 continue
                 if( j<=topbmn ) then
                    jb = min( nb, topbmn-j+1 )
                    ! factorize jb columns among columns j:n.
                    call stdlib${ii}$_${ci}$laqps( m, n-j+1, j-1, jb, fjb, a( 1_${ik}$, j ), lda,jpvt( j ), tau( j )&
                              , rwork( j ),rwork( n+j ), work( 1_${ik}$ ), work( jb+1 ),n-j+1 )
                    j = j + fjb
                    go to 30
                 end if
              else
                 j = nfxd + 1_${ik}$
              end if
              ! use unblocked code to factor the last or only block.
              if( j<=minmn )call stdlib${ii}$_${ci}$laqp2( m, n-j+1, j-1, a( 1_${ik}$, j ), lda, jpvt( j ),tau( j ),&
                         rwork( j ), rwork( n+j ), work( 1_${ik}$ ) )
           end if
           work( 1_${ik}$ ) = cmplx( lwkopt,KIND=${ck}$)
           return
     end subroutine stdlib${ii}$_${ci}$geqp3

#:endif
#:endfor



     pure module subroutine stdlib${ii}$_slaqp2( m, n, offset, a, lda, jpvt, tau, vn1, vn2,work )
     !! SLAQP2 computes a QR factorization with column pivoting of
     !! the block A(OFFSET+1:M,1:N).
     !! The block A(1:OFFSET,1:N) is accordingly pivoted, but not factorized.
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(in) :: lda, m, n, offset
           ! Array Arguments 
           integer(${ik}$), intent(inout) :: jpvt(*)
           real(sp), intent(inout) :: a(lda,*), vn1(*), vn2(*)
           real(sp), intent(out) :: tau(*), work(*)
        ! =====================================================================
           
           ! Local Scalars 
           integer(${ik}$) :: i, itemp, j, mn, offpi, pvt
           real(sp) :: aii, temp, temp2, tol3z
           ! Intrinsic Functions 
           ! Executable Statements 
           mn = min( m-offset, n )
           tol3z = sqrt(stdlib${ii}$_slamch('EPSILON'))
           ! compute factorization.
           loop_20: do i = 1, mn
              offpi = offset + i
              ! determine ith pivot column and swap if necessary.
              pvt = ( i-1 ) + stdlib${ii}$_isamax( n-i+1, vn1( i ), 1_${ik}$ )
              if( pvt/=i ) then
                 call stdlib${ii}$_sswap( m, a( 1_${ik}$, pvt ), 1_${ik}$, a( 1_${ik}$, i ), 1_${ik}$ )
                 itemp = jpvt( pvt )
                 jpvt( pvt ) = jpvt( i )
                 jpvt( i ) = itemp
                 vn1( pvt ) = vn1( i )
                 vn2( pvt ) = vn2( i )
              end if
              ! generate elementary reflector h(i).
              if( offpi<m ) then
                 call stdlib${ii}$_slarfg( m-offpi+1, a( offpi, i ), a( offpi+1, i ), 1_${ik}$,tau( i ) )
                           
              else
                 call stdlib${ii}$_slarfg( 1_${ik}$, a( m, i ), a( m, i ), 1_${ik}$, tau( i ) )
              end if
              if( i<n ) then
                 ! apply h(i)**t to a(offset+i:m,i+1:n) from the left.
                 aii = a( offpi, i )
                 a( offpi, i ) = one
                 call stdlib${ii}$_slarf( 'LEFT', m-offpi+1, n-i, a( offpi, i ), 1_${ik}$,tau( i ), a( offpi, &
                           i+1 ), lda, work( 1_${ik}$ ) )
                 a( offpi, i ) = aii
              end if
              ! update partial column norms.
              do j = i + 1, n
                 if( vn1( j )/=zero ) then
                    ! note: the following 4 lines follow from the analysis in
                    ! lapack working note 176.
                    temp = one - ( abs( a( offpi, j ) ) / vn1( j ) )**2_${ik}$
                    temp = max( temp, zero )
                    temp2 = temp*( vn1( j ) / vn2( j ) )**2_${ik}$
                    if( temp2 <= tol3z ) then
                       if( offpi<m ) then
                          vn1( j ) = stdlib${ii}$_snrm2( m-offpi, a( offpi+1, j ), 1_${ik}$ )
                          vn2( j ) = vn1( j )
                       else
                          vn1( j ) = zero
                          vn2( j ) = zero
                       end if
                    else
                       vn1( j ) = vn1( j )*sqrt( temp )
                    end if
                 end if
              end do
           end do loop_20
           return
     end subroutine stdlib${ii}$_slaqp2

     pure module subroutine stdlib${ii}$_dlaqp2( m, n, offset, a, lda, jpvt, tau, vn1, vn2,work )
     !! DLAQP2 computes a QR factorization with column pivoting of
     !! the block A(OFFSET+1:M,1:N).
     !! The block A(1:OFFSET,1:N) is accordingly pivoted, but not factorized.
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(in) :: lda, m, n, offset
           ! Array Arguments 
           integer(${ik}$), intent(inout) :: jpvt(*)
           real(dp), intent(inout) :: a(lda,*), vn1(*), vn2(*)
           real(dp), intent(out) :: tau(*), work(*)
        ! =====================================================================
           
           ! Local Scalars 
           integer(${ik}$) :: i, itemp, j, mn, offpi, pvt
           real(dp) :: aii, temp, temp2, tol3z
           ! Intrinsic Functions 
           ! Executable Statements 
           mn = min( m-offset, n )
           tol3z = sqrt(stdlib${ii}$_dlamch('EPSILON'))
           ! compute factorization.
           loop_20: do i = 1, mn
              offpi = offset + i
              ! determine ith pivot column and swap if necessary.
              pvt = ( i-1 ) + stdlib${ii}$_idamax( n-i+1, vn1( i ), 1_${ik}$ )
              if( pvt/=i ) then
                 call stdlib${ii}$_dswap( m, a( 1_${ik}$, pvt ), 1_${ik}$, a( 1_${ik}$, i ), 1_${ik}$ )
                 itemp = jpvt( pvt )
                 jpvt( pvt ) = jpvt( i )
                 jpvt( i ) = itemp
                 vn1( pvt ) = vn1( i )
                 vn2( pvt ) = vn2( i )
              end if
              ! generate elementary reflector h(i).
              if( offpi<m ) then
                 call stdlib${ii}$_dlarfg( m-offpi+1, a( offpi, i ), a( offpi+1, i ), 1_${ik}$,tau( i ) )
                           
              else
                 call stdlib${ii}$_dlarfg( 1_${ik}$, a( m, i ), a( m, i ), 1_${ik}$, tau( i ) )
              end if
              if( i<n ) then
                 ! apply h(i)**t to a(offset+i:m,i+1:n) from the left.
                 aii = a( offpi, i )
                 a( offpi, i ) = one
                 call stdlib${ii}$_dlarf( 'LEFT', m-offpi+1, n-i, a( offpi, i ), 1_${ik}$,tau( i ), a( offpi, &
                           i+1 ), lda, work( 1_${ik}$ ) )
                 a( offpi, i ) = aii
              end if
              ! update partial column norms.
              do j = i + 1, n
                 if( vn1( j )/=zero ) then
                    ! note: the following 4 lines follow from the analysis in
                    ! lapack working note 176.
                    temp = one - ( abs( a( offpi, j ) ) / vn1( j ) )**2_${ik}$
                    temp = max( temp, zero )
                    temp2 = temp*( vn1( j ) / vn2( j ) )**2_${ik}$
                    if( temp2 <= tol3z ) then
                       if( offpi<m ) then
                          vn1( j ) = stdlib${ii}$_dnrm2( m-offpi, a( offpi+1, j ), 1_${ik}$ )
                          vn2( j ) = vn1( j )
                       else
                          vn1( j ) = zero
                          vn2( j ) = zero
                       end if
                    else
                       vn1( j ) = vn1( j )*sqrt( temp )
                    end if
                 end if
              end do
           end do loop_20
           return
     end subroutine stdlib${ii}$_dlaqp2

#:for rk,rt,ri in REAL_KINDS_TYPES
#:if not rk in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ri}$laqp2( m, n, offset, a, lda, jpvt, tau, vn1, vn2,work )
     !! DLAQP2: computes a QR factorization with column pivoting of
     !! the block A(OFFSET+1:M,1:N).
     !! The block A(1:OFFSET,1:N) is accordingly pivoted, but not factorized.
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(in) :: lda, m, n, offset
           ! Array Arguments 
           integer(${ik}$), intent(inout) :: jpvt(*)
           real(${rk}$), intent(inout) :: a(lda,*), vn1(*), vn2(*)
           real(${rk}$), intent(out) :: tau(*), work(*)
        ! =====================================================================
           
           ! Local Scalars 
           integer(${ik}$) :: i, itemp, j, mn, offpi, pvt
           real(${rk}$) :: aii, temp, temp2, tol3z
           ! Intrinsic Functions 
           ! Executable Statements 
           mn = min( m-offset, n )
           tol3z = sqrt(stdlib${ii}$_${ri}$lamch('EPSILON'))
           ! compute factorization.
           loop_20: do i = 1, mn
              offpi = offset + i
              ! determine ith pivot column and swap if necessary.
              pvt = ( i-1 ) + stdlib${ii}$_i${ri}$amax( n-i+1, vn1( i ), 1_${ik}$ )
              if( pvt/=i ) then
                 call stdlib${ii}$_${ri}$swap( m, a( 1_${ik}$, pvt ), 1_${ik}$, a( 1_${ik}$, i ), 1_${ik}$ )
                 itemp = jpvt( pvt )
                 jpvt( pvt ) = jpvt( i )
                 jpvt( i ) = itemp
                 vn1( pvt ) = vn1( i )
                 vn2( pvt ) = vn2( i )
              end if
              ! generate elementary reflector h(i).
              if( offpi<m ) then
                 call stdlib${ii}$_${ri}$larfg( m-offpi+1, a( offpi, i ), a( offpi+1, i ), 1_${ik}$,tau( i ) )
                           
              else
                 call stdlib${ii}$_${ri}$larfg( 1_${ik}$, a( m, i ), a( m, i ), 1_${ik}$, tau( i ) )
              end if
              if( i<n ) then
                 ! apply h(i)**t to a(offset+i:m,i+1:n) from the left.
                 aii = a( offpi, i )
                 a( offpi, i ) = one
                 call stdlib${ii}$_${ri}$larf( 'LEFT', m-offpi+1, n-i, a( offpi, i ), 1_${ik}$,tau( i ), a( offpi, &
                           i+1 ), lda, work( 1_${ik}$ ) )
                 a( offpi, i ) = aii
              end if
              ! update partial column norms.
              do j = i + 1, n
                 if( vn1( j )/=zero ) then
                    ! note: the following 4 lines follow from the analysis in
                    ! lapack working note 176.
                    temp = one - ( abs( a( offpi, j ) ) / vn1( j ) )**2_${ik}$
                    temp = max( temp, zero )
                    temp2 = temp*( vn1( j ) / vn2( j ) )**2_${ik}$
                    if( temp2 <= tol3z ) then
                       if( offpi<m ) then
                          vn1( j ) = stdlib${ii}$_${ri}$nrm2( m-offpi, a( offpi+1, j ), 1_${ik}$ )
                          vn2( j ) = vn1( j )
                       else
                          vn1( j ) = zero
                          vn2( j ) = zero
                       end if
                    else
                       vn1( j ) = vn1( j )*sqrt( temp )
                    end if
                 end if
              end do
           end do loop_20
           return
     end subroutine stdlib${ii}$_${ri}$laqp2

#:endif
#:endfor

     pure module subroutine stdlib${ii}$_claqp2( m, n, offset, a, lda, jpvt, tau, vn1, vn2,work )
     !! CLAQP2 computes a QR factorization with column pivoting of
     !! the block A(OFFSET+1:M,1:N).
     !! The block A(1:OFFSET,1:N) is accordingly pivoted, but not factorized.
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(in) :: lda, m, n, offset
           ! Array Arguments 
           integer(${ik}$), intent(inout) :: jpvt(*)
           real(sp), intent(inout) :: vn1(*), vn2(*)
           complex(sp), intent(inout) :: a(lda,*)
           complex(sp), intent(out) :: tau(*), work(*)
        ! =====================================================================
           
           
           ! Local Scalars 
           integer(${ik}$) :: i, itemp, j, mn, offpi, pvt
           real(sp) :: temp, temp2, tol3z
           complex(sp) :: aii
           ! Intrinsic Functions 
           ! Executable Statements 
           mn = min( m-offset, n )
           tol3z = sqrt(stdlib${ii}$_slamch('EPSILON'))
           ! compute factorization.
           loop_20: do i = 1, mn
              offpi = offset + i
              ! determine ith pivot column and swap if necessary.
              pvt = ( i-1 ) + stdlib${ii}$_isamax( n-i+1, vn1( i ), 1_${ik}$ )
              if( pvt/=i ) then
                 call stdlib${ii}$_cswap( m, a( 1_${ik}$, pvt ), 1_${ik}$, a( 1_${ik}$, i ), 1_${ik}$ )
                 itemp = jpvt( pvt )
                 jpvt( pvt ) = jpvt( i )
                 jpvt( i ) = itemp
                 vn1( pvt ) = vn1( i )
                 vn2( pvt ) = vn2( i )
              end if
              ! generate elementary reflector h(i).
              if( offpi<m ) then
                 call stdlib${ii}$_clarfg( m-offpi+1, a( offpi, i ), a( offpi+1, i ), 1_${ik}$,tau( i ) )
                           
              else
                 call stdlib${ii}$_clarfg( 1_${ik}$, a( m, i ), a( m, i ), 1_${ik}$, tau( i ) )
              end if
              if( i<n ) then
                 ! apply h(i)**h to a(offset+i:m,i+1:n) from the left.
                 aii = a( offpi, i )
                 a( offpi, i ) = cone
                 call stdlib${ii}$_clarf( 'LEFT', m-offpi+1, n-i, a( offpi, i ), 1_${ik}$,conjg( tau( i ) ), a(&
                            offpi, i+1 ), lda,work( 1_${ik}$ ) )
                 a( offpi, i ) = aii
              end if
              ! update partial column norms.
              do j = i + 1, n
                 if( vn1( j )/=zero ) then
                    ! note: the following 4 lines follow from the analysis in
                    ! lapack working note 176.
                    temp = one - ( abs( a( offpi, j ) ) / vn1( j ) )**2_${ik}$
                    temp = max( temp, zero )
                    temp2 = temp*( vn1( j ) / vn2( j ) )**2_${ik}$
                    if( temp2 <= tol3z ) then
                       if( offpi<m ) then
                          vn1( j ) = stdlib${ii}$_scnrm2( m-offpi, a( offpi+1, j ), 1_${ik}$ )
                          vn2( j ) = vn1( j )
                       else
                          vn1( j ) = zero
                          vn2( j ) = zero
                       end if
                    else
                       vn1( j ) = vn1( j )*sqrt( temp )
                    end if
                 end if
              end do
           end do loop_20
           return
     end subroutine stdlib${ii}$_claqp2

     pure module subroutine stdlib${ii}$_zlaqp2( m, n, offset, a, lda, jpvt, tau, vn1, vn2,work )
     !! ZLAQP2 computes a QR factorization with column pivoting of
     !! the block A(OFFSET+1:M,1:N).
     !! The block A(1:OFFSET,1:N) is accordingly pivoted, but not factorized.
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(in) :: lda, m, n, offset
           ! Array Arguments 
           integer(${ik}$), intent(inout) :: jpvt(*)
           real(dp), intent(inout) :: vn1(*), vn2(*)
           complex(dp), intent(inout) :: a(lda,*)
           complex(dp), intent(out) :: tau(*), work(*)
        ! =====================================================================
           
           
           ! Local Scalars 
           integer(${ik}$) :: i, itemp, j, mn, offpi, pvt
           real(dp) :: temp, temp2, tol3z
           complex(dp) :: aii
           ! Intrinsic Functions 
           ! Executable Statements 
           mn = min( m-offset, n )
           tol3z = sqrt(stdlib${ii}$_dlamch('EPSILON'))
           ! compute factorization.
           loop_20: do i = 1, mn
              offpi = offset + i
              ! determine ith pivot column and swap if necessary.
              pvt = ( i-1 ) + stdlib${ii}$_idamax( n-i+1, vn1( i ), 1_${ik}$ )
              if( pvt/=i ) then
                 call stdlib${ii}$_zswap( m, a( 1_${ik}$, pvt ), 1_${ik}$, a( 1_${ik}$, i ), 1_${ik}$ )
                 itemp = jpvt( pvt )
                 jpvt( pvt ) = jpvt( i )
                 jpvt( i ) = itemp
                 vn1( pvt ) = vn1( i )
                 vn2( pvt ) = vn2( i )
              end if
              ! generate elementary reflector h(i).
              if( offpi<m ) then
                 call stdlib${ii}$_zlarfg( m-offpi+1, a( offpi, i ), a( offpi+1, i ), 1_${ik}$,tau( i ) )
                           
              else
                 call stdlib${ii}$_zlarfg( 1_${ik}$, a( m, i ), a( m, i ), 1_${ik}$, tau( i ) )
              end if
              if( i<n ) then
                 ! apply h(i)**h to a(offset+i:m,i+1:n) from the left.
                 aii = a( offpi, i )
                 a( offpi, i ) = cone
                 call stdlib${ii}$_zlarf( 'LEFT', m-offpi+1, n-i, a( offpi, i ), 1_${ik}$,conjg( tau( i ) ), a(&
                            offpi, i+1 ), lda,work( 1_${ik}$ ) )
                 a( offpi, i ) = aii
              end if
              ! update partial column norms.
              do j = i + 1, n
                 if( vn1( j )/=zero ) then
                    ! note: the following 4 lines follow from the analysis in
                    ! lapack working note 176.
                    temp = one - ( abs( a( offpi, j ) ) / vn1( j ) )**2_${ik}$
                    temp = max( temp, zero )
                    temp2 = temp*( vn1( j ) / vn2( j ) )**2_${ik}$
                    if( temp2 <= tol3z ) then
                       if( offpi<m ) then
                          vn1( j ) = stdlib${ii}$_dznrm2( m-offpi, a( offpi+1, j ), 1_${ik}$ )
                          vn2( j ) = vn1( j )
                       else
                          vn1( j ) = zero
                          vn2( j ) = zero
                       end if
                    else
                       vn1( j ) = vn1( j )*sqrt( temp )
                    end if
                 end if
              end do
           end do loop_20
           return
     end subroutine stdlib${ii}$_zlaqp2

#:for ck,ct,ci in CMPLX_KINDS_TYPES
#:if not ck in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ci}$laqp2( m, n, offset, a, lda, jpvt, tau, vn1, vn2,work )
     !! ZLAQP2: computes a QR factorization with column pivoting of
     !! the block A(OFFSET+1:M,1:N).
     !! The block A(1:OFFSET,1:N) is accordingly pivoted, but not factorized.
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${ck}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(in) :: lda, m, n, offset
           ! Array Arguments 
           integer(${ik}$), intent(inout) :: jpvt(*)
           real(${ck}$), intent(inout) :: vn1(*), vn2(*)
           complex(${ck}$), intent(inout) :: a(lda,*)
           complex(${ck}$), intent(out) :: tau(*), work(*)
        ! =====================================================================
           
           
           ! Local Scalars 
           integer(${ik}$) :: i, itemp, j, mn, offpi, pvt
           real(${ck}$) :: temp, temp2, tol3z
           complex(${ck}$) :: aii
           ! Intrinsic Functions 
           ! Executable Statements 
           mn = min( m-offset, n )
           tol3z = sqrt(stdlib${ii}$_${c2ri(ci)}$lamch('EPSILON'))
           ! compute factorization.
           loop_20: do i = 1, mn
              offpi = offset + i
              ! determine ith pivot column and swap if necessary.
              pvt = ( i-1 ) + stdlib${ii}$_i${c2ri(ci)}$amax( n-i+1, vn1( i ), 1_${ik}$ )
              if( pvt/=i ) then
                 call stdlib${ii}$_${ci}$swap( m, a( 1_${ik}$, pvt ), 1_${ik}$, a( 1_${ik}$, i ), 1_${ik}$ )
                 itemp = jpvt( pvt )
                 jpvt( pvt ) = jpvt( i )
                 jpvt( i ) = itemp
                 vn1( pvt ) = vn1( i )
                 vn2( pvt ) = vn2( i )
              end if
              ! generate elementary reflector h(i).
              if( offpi<m ) then
                 call stdlib${ii}$_${ci}$larfg( m-offpi+1, a( offpi, i ), a( offpi+1, i ), 1_${ik}$,tau( i ) )
                           
              else
                 call stdlib${ii}$_${ci}$larfg( 1_${ik}$, a( m, i ), a( m, i ), 1_${ik}$, tau( i ) )
              end if
              if( i<n ) then
                 ! apply h(i)**h to a(offset+i:m,i+1:n) from the left.
                 aii = a( offpi, i )
                 a( offpi, i ) = cone
                 call stdlib${ii}$_${ci}$larf( 'LEFT', m-offpi+1, n-i, a( offpi, i ), 1_${ik}$,conjg( tau( i ) ), a(&
                            offpi, i+1 ), lda,work( 1_${ik}$ ) )
                 a( offpi, i ) = aii
              end if
              ! update partial column norms.
              do j = i + 1, n
                 if( vn1( j )/=zero ) then
                    ! note: the following 4 lines follow from the analysis in
                    ! lapack working note 176.
                    temp = one - ( abs( a( offpi, j ) ) / vn1( j ) )**2_${ik}$
                    temp = max( temp, zero )
                    temp2 = temp*( vn1( j ) / vn2( j ) )**2_${ik}$
                    if( temp2 <= tol3z ) then
                       if( offpi<m ) then
                          vn1( j ) = stdlib${ii}$_${c2ri(ci)}$znrm2( m-offpi, a( offpi+1, j ), 1_${ik}$ )
                          vn2( j ) = vn1( j )
                       else
                          vn1( j ) = zero
                          vn2( j ) = zero
                       end if
                    else
                       vn1( j ) = vn1( j )*sqrt( temp )
                    end if
                 end if
              end do
           end do loop_20
           return
     end subroutine stdlib${ii}$_${ci}$laqp2

#:endif
#:endfor



     pure module subroutine stdlib${ii}$_slaqps( m, n, offset, nb, kb, a, lda, jpvt, tau, vn1,vn2, auxv, f, &
     !! SLAQPS computes a step of QR factorization with column pivoting
     !! of a real M-by-N matrix A by using Blas-3.  It tries to factorize
     !! NB columns from A starting from the row OFFSET+1, and updates all
     !! of the matrix with Blas-3 xGEMM.
     !! In some cases, due to catastrophic cancellations, it cannot
     !! factorize NB columns.  Hence, the actual number of factorized
     !! columns is returned in KB.
     !! Block A(1:OFFSET,1:N) is accordingly pivoted, but not factorized.
               ldf )
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: kb
           integer(${ik}$), intent(in) :: lda, ldf, m, n, nb, offset
           ! Array Arguments 
           integer(${ik}$), intent(inout) :: jpvt(*)
           real(sp), intent(inout) :: a(lda,*), auxv(*), f(ldf,*), vn1(*), vn2(*)
           real(sp), intent(out) :: tau(*)
        ! =====================================================================
           
           ! Local Scalars 
           integer(${ik}$) :: itemp, j, k, lastrk, lsticc, pvt, rk
           real(sp) :: akk, temp, temp2, tol3z
           ! Intrinsic Functions 
           ! Executable Statements 
           lastrk = min( m, n+offset )
           lsticc = 0_${ik}$
           k = 0_${ik}$
           tol3z = sqrt(stdlib${ii}$_slamch('EPSILON'))
           ! beginning of while loop.
           10 continue
           if( ( k<nb ) .and. ( lsticc==0_${ik}$ ) ) then
              k = k + 1_${ik}$
              rk = offset + k
              ! determine ith pivot column and swap if necessary
              pvt = ( k-1 ) + stdlib${ii}$_isamax( n-k+1, vn1( k ), 1_${ik}$ )
              if( pvt/=k ) then
                 call stdlib${ii}$_sswap( m, a( 1_${ik}$, pvt ), 1_${ik}$, a( 1_${ik}$, k ), 1_${ik}$ )
                 call stdlib${ii}$_sswap( k-1, f( pvt, 1_${ik}$ ), ldf, f( k, 1_${ik}$ ), ldf )
                 itemp = jpvt( pvt )
                 jpvt( pvt ) = jpvt( k )
                 jpvt( k ) = itemp
                 vn1( pvt ) = vn1( k )
                 vn2( pvt ) = vn2( k )
              end if
              ! apply previous householder reflectors to column k:
              ! a(rk:m,k) := a(rk:m,k) - a(rk:m,1:k-1)*f(k,1:k-1)**t.
              if( k>1_${ik}$ ) then
                 call stdlib${ii}$_sgemv( 'NO TRANSPOSE', m-rk+1, k-1, -one, a( rk, 1_${ik}$ ),lda, f( k, 1_${ik}$ ), &
                           ldf, one, a( rk, k ), 1_${ik}$ )
              end if
              ! generate elementary reflector h(k).
              if( rk<m ) then
                 call stdlib${ii}$_slarfg( m-rk+1, a( rk, k ), a( rk+1, k ), 1_${ik}$, tau( k ) )
              else
                 call stdlib${ii}$_slarfg( 1_${ik}$, a( rk, k ), a( rk, k ), 1_${ik}$, tau( k ) )
              end if
              akk = a( rk, k )
              a( rk, k ) = one
              ! compute kth column of f:
              ! compute  f(k+1:n,k) := tau(k)*a(rk:m,k+1:n)**t*a(rk:m,k).
              if( k<n ) then
                 call stdlib${ii}$_sgemv( 'TRANSPOSE', m-rk+1, n-k, tau( k ),a( rk, k+1 ), lda, a( rk, &
                           k ), 1_${ik}$, zero,f( k+1, k ), 1_${ik}$ )
              end if
              ! padding f(1:k,k) with zeros.
              do j = 1, k
                 f( j, k ) = zero
              end do
              ! incremental updating of f:
              ! f(1:n,k) := f(1:n,k) - tau(k)*f(1:n,1:k-1)*a(rk:m,1:k-1)**t
                          ! *a(rk:m,k).
              if( k>1_${ik}$ ) then
                 call stdlib${ii}$_sgemv( 'TRANSPOSE', m-rk+1, k-1, -tau( k ), a( rk, 1_${ik}$ ),lda, a( rk, k &
                           ), 1_${ik}$, zero, auxv( 1_${ik}$ ), 1_${ik}$ )
                 call stdlib${ii}$_sgemv( 'NO TRANSPOSE', n, k-1, one, f( 1_${ik}$, 1_${ik}$ ), ldf,auxv( 1_${ik}$ ), 1_${ik}$, one,&
                            f( 1_${ik}$, k ), 1_${ik}$ )
              end if
              ! update the current row of a:
              ! a(rk,k+1:n) := a(rk,k+1:n) - a(rk,1:k)*f(k+1:n,1:k)**t.
              if( k<n ) then
                 call stdlib${ii}$_sgemv( 'NO TRANSPOSE', n-k, k, -one, f( k+1, 1_${ik}$ ), ldf,a( rk, 1_${ik}$ ), &
                           lda, one, a( rk, k+1 ), lda )
              end if
              ! update partial column norms.
              if( rk<lastrk ) then
                 do j = k + 1, n
                    if( vn1( j )/=zero ) then
                       ! note: the following 4 lines follow from the analysis in
                       ! lapack working note 176.
                       temp = abs( a( rk, j ) ) / vn1( j )
                       temp = max( zero, ( one+temp )*( one-temp ) )
                       temp2 = temp*( vn1( j ) / vn2( j ) )**2_${ik}$
                       if( temp2 <= tol3z ) then
                          vn2( j ) = real( lsticc,KIND=sp)
                          lsticc = j
                       else
                          vn1( j ) = vn1( j )*sqrt( temp )
                       end if
                    end if
                 end do
              end if
              a( rk, k ) = akk
              ! end of while loop.
              go to 10
           end if
           kb = k
           rk = offset + kb
           ! apply the block reflector to the rest of the matrix:
           ! a(offset+kb+1:m,kb+1:n) := a(offset+kb+1:m,kb+1:n) -
                               ! a(offset+kb+1:m,1:kb)*f(kb+1:n,1:kb)**t.
           if( kb<min( n, m-offset ) ) then
              call stdlib${ii}$_sgemm( 'NO TRANSPOSE', 'TRANSPOSE', m-rk, n-kb, kb, -one,a( rk+1, 1_${ik}$ ), &
                        lda, f( kb+1, 1_${ik}$ ), ldf, one,a( rk+1, kb+1 ), lda )
           end if
           ! recomputation of difficult columns.
           40 continue
           if( lsticc>0_${ik}$ ) then
              itemp = nint( vn2( lsticc ),KIND=${ik}$)
              vn1( lsticc ) = stdlib${ii}$_snrm2( m-rk, a( rk+1, lsticc ), 1_${ik}$ )
              ! note: the computation of vn1( lsticc ) relies on the fact that
              ! stdlib${ii}$_snrm2 does not fail on vectors with norm below the value of
              ! sqrt(stdlib${ii}$_dlamch('s'))
              vn2( lsticc ) = vn1( lsticc )
              lsticc = itemp
              go to 40
           end if
           return
     end subroutine stdlib${ii}$_slaqps

     pure module subroutine stdlib${ii}$_dlaqps( m, n, offset, nb, kb, a, lda, jpvt, tau, vn1,vn2, auxv, f, &
     !! DLAQPS computes a step of QR factorization with column pivoting
     !! of a real M-by-N matrix A by using Blas-3.  It tries to factorize
     !! NB columns from A starting from the row OFFSET+1, and updates all
     !! of the matrix with Blas-3 xGEMM.
     !! In some cases, due to catastrophic cancellations, it cannot
     !! factorize NB columns.  Hence, the actual number of factorized
     !! columns is returned in KB.
     !! Block A(1:OFFSET,1:N) is accordingly pivoted, but not factorized.
               ldf )
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: kb
           integer(${ik}$), intent(in) :: lda, ldf, m, n, nb, offset
           ! Array Arguments 
           integer(${ik}$), intent(inout) :: jpvt(*)
           real(dp), intent(inout) :: a(lda,*), auxv(*), f(ldf,*), vn1(*), vn2(*)
           real(dp), intent(out) :: tau(*)
        ! =====================================================================
           
           ! Local Scalars 
           integer(${ik}$) :: itemp, j, k, lastrk, lsticc, pvt, rk
           real(dp) :: akk, temp, temp2, tol3z
           ! Intrinsic Functions 
           ! Executable Statements 
           lastrk = min( m, n+offset )
           lsticc = 0_${ik}$
           k = 0_${ik}$
           tol3z = sqrt(stdlib${ii}$_dlamch('EPSILON'))
           ! beginning of while loop.
           10 continue
           if( ( k<nb ) .and. ( lsticc==0_${ik}$ ) ) then
              k = k + 1_${ik}$
              rk = offset + k
              ! determine ith pivot column and swap if necessary
              pvt = ( k-1 ) + stdlib${ii}$_idamax( n-k+1, vn1( k ), 1_${ik}$ )
              if( pvt/=k ) then
                 call stdlib${ii}$_dswap( m, a( 1_${ik}$, pvt ), 1_${ik}$, a( 1_${ik}$, k ), 1_${ik}$ )
                 call stdlib${ii}$_dswap( k-1, f( pvt, 1_${ik}$ ), ldf, f( k, 1_${ik}$ ), ldf )
                 itemp = jpvt( pvt )
                 jpvt( pvt ) = jpvt( k )
                 jpvt( k ) = itemp
                 vn1( pvt ) = vn1( k )
                 vn2( pvt ) = vn2( k )
              end if
              ! apply previous householder reflectors to column k:
              ! a(rk:m,k) := a(rk:m,k) - a(rk:m,1:k-1)*f(k,1:k-1)**t.
              if( k>1_${ik}$ ) then
                 call stdlib${ii}$_dgemv( 'NO TRANSPOSE', m-rk+1, k-1, -one, a( rk, 1_${ik}$ ),lda, f( k, 1_${ik}$ ), &
                           ldf, one, a( rk, k ), 1_${ik}$ )
              end if
              ! generate elementary reflector h(k).
              if( rk<m ) then
                 call stdlib${ii}$_dlarfg( m-rk+1, a( rk, k ), a( rk+1, k ), 1_${ik}$, tau( k ) )
              else
                 call stdlib${ii}$_dlarfg( 1_${ik}$, a( rk, k ), a( rk, k ), 1_${ik}$, tau( k ) )
              end if
              akk = a( rk, k )
              a( rk, k ) = one
              ! compute kth column of f:
              ! compute  f(k+1:n,k) := tau(k)*a(rk:m,k+1:n)**t*a(rk:m,k).
              if( k<n ) then
                 call stdlib${ii}$_dgemv( 'TRANSPOSE', m-rk+1, n-k, tau( k ),a( rk, k+1 ), lda, a( rk, &
                           k ), 1_${ik}$, zero,f( k+1, k ), 1_${ik}$ )
              end if
              ! padding f(1:k,k) with zeros.
              do j = 1, k
                 f( j, k ) = zero
              end do
              ! incremental updating of f:
              ! f(1:n,k) := f(1:n,k) - tau(k)*f(1:n,1:k-1)*a(rk:m,1:k-1)**t
                          ! *a(rk:m,k).
              if( k>1_${ik}$ ) then
                 call stdlib${ii}$_dgemv( 'TRANSPOSE', m-rk+1, k-1, -tau( k ), a( rk, 1_${ik}$ ),lda, a( rk, k &
                           ), 1_${ik}$, zero, auxv( 1_${ik}$ ), 1_${ik}$ )
                 call stdlib${ii}$_dgemv( 'NO TRANSPOSE', n, k-1, one, f( 1_${ik}$, 1_${ik}$ ), ldf,auxv( 1_${ik}$ ), 1_${ik}$, one,&
                            f( 1_${ik}$, k ), 1_${ik}$ )
              end if
              ! update the current row of a:
              ! a(rk,k+1:n) := a(rk,k+1:n) - a(rk,1:k)*f(k+1:n,1:k)**t.
              if( k<n ) then
                 call stdlib${ii}$_dgemv( 'NO TRANSPOSE', n-k, k, -one, f( k+1, 1_${ik}$ ), ldf,a( rk, 1_${ik}$ ), &
                           lda, one, a( rk, k+1 ), lda )
              end if
              ! update partial column norms.
              if( rk<lastrk ) then
                 do j = k + 1, n
                    if( vn1( j )/=zero ) then
                       ! note: the following 4 lines follow from the analysis in
                       ! lapack working note 176.
                       temp = abs( a( rk, j ) ) / vn1( j )
                       temp = max( zero, ( one+temp )*( one-temp ) )
                       temp2 = temp*( vn1( j ) / vn2( j ) )**2_${ik}$
                       if( temp2 <= tol3z ) then
                          vn2( j ) = real( lsticc,KIND=dp)
                          lsticc = j
                       else
                          vn1( j ) = vn1( j )*sqrt( temp )
                       end if
                    end if
                 end do
              end if
              a( rk, k ) = akk
              ! end of while loop.
              go to 10
           end if
           kb = k
           rk = offset + kb
           ! apply the block reflector to the rest of the matrix:
           ! a(offset+kb+1:m,kb+1:n) := a(offset+kb+1:m,kb+1:n) -
                               ! a(offset+kb+1:m,1:kb)*f(kb+1:n,1:kb)**t.
           if( kb<min( n, m-offset ) ) then
              call stdlib${ii}$_dgemm( 'NO TRANSPOSE', 'TRANSPOSE', m-rk, n-kb, kb, -one,a( rk+1, 1_${ik}$ ), &
                        lda, f( kb+1, 1_${ik}$ ), ldf, one,a( rk+1, kb+1 ), lda )
           end if
           ! recomputation of difficult columns.
           40 continue
           if( lsticc>0_${ik}$ ) then
              itemp = nint( vn2( lsticc ),KIND=${ik}$)
              vn1( lsticc ) = stdlib${ii}$_dnrm2( m-rk, a( rk+1, lsticc ), 1_${ik}$ )
              ! note: the computation of vn1( lsticc ) relies on the fact that
              ! stdlib${ii}$_snrm2 does not fail on vectors with norm below the value of
              ! sqrt(stdlib${ii}$_dlamch('s'))
              vn2( lsticc ) = vn1( lsticc )
              lsticc = itemp
              go to 40
           end if
           return
     end subroutine stdlib${ii}$_dlaqps

#:for rk,rt,ri in REAL_KINDS_TYPES
#:if not rk in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ri}$laqps( m, n, offset, nb, kb, a, lda, jpvt, tau, vn1,vn2, auxv, f, &
     !! DLAQPS: computes a step of QR factorization with column pivoting
     !! of a real M-by-N matrix A by using Blas-3.  It tries to factorize
     !! NB columns from A starting from the row OFFSET+1, and updates all
     !! of the matrix with Blas-3 xGEMM.
     !! In some cases, due to catastrophic cancellations, it cannot
     !! factorize NB columns.  Hence, the actual number of factorized
     !! columns is returned in KB.
     !! Block A(1:OFFSET,1:N) is accordingly pivoted, but not factorized.
               ldf )
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: kb
           integer(${ik}$), intent(in) :: lda, ldf, m, n, nb, offset
           ! Array Arguments 
           integer(${ik}$), intent(inout) :: jpvt(*)
           real(${rk}$), intent(inout) :: a(lda,*), auxv(*), f(ldf,*), vn1(*), vn2(*)
           real(${rk}$), intent(out) :: tau(*)
        ! =====================================================================
           
           ! Local Scalars 
           integer(${ik}$) :: itemp, j, k, lastrk, lsticc, pvt, rk
           real(${rk}$) :: akk, temp, temp2, tol3z
           ! Intrinsic Functions 
           ! Executable Statements 
           lastrk = min( m, n+offset )
           lsticc = 0_${ik}$
           k = 0_${ik}$
           tol3z = sqrt(stdlib${ii}$_${ri}$lamch('EPSILON'))
           ! beginning of while loop.
           10 continue
           if( ( k<nb ) .and. ( lsticc==0_${ik}$ ) ) then
              k = k + 1_${ik}$
              rk = offset + k
              ! determine ith pivot column and swap if necessary
              pvt = ( k-1 ) + stdlib${ii}$_i${ri}$amax( n-k+1, vn1( k ), 1_${ik}$ )
              if( pvt/=k ) then
                 call stdlib${ii}$_${ri}$swap( m, a( 1_${ik}$, pvt ), 1_${ik}$, a( 1_${ik}$, k ), 1_${ik}$ )
                 call stdlib${ii}$_${ri}$swap( k-1, f( pvt, 1_${ik}$ ), ldf, f( k, 1_${ik}$ ), ldf )
                 itemp = jpvt( pvt )
                 jpvt( pvt ) = jpvt( k )
                 jpvt( k ) = itemp
                 vn1( pvt ) = vn1( k )
                 vn2( pvt ) = vn2( k )
              end if
              ! apply previous householder reflectors to column k:
              ! a(rk:m,k) := a(rk:m,k) - a(rk:m,1:k-1)*f(k,1:k-1)**t.
              if( k>1_${ik}$ ) then
                 call stdlib${ii}$_${ri}$gemv( 'NO TRANSPOSE', m-rk+1, k-1, -one, a( rk, 1_${ik}$ ),lda, f( k, 1_${ik}$ ), &
                           ldf, one, a( rk, k ), 1_${ik}$ )
              end if
              ! generate elementary reflector h(k).
              if( rk<m ) then
                 call stdlib${ii}$_${ri}$larfg( m-rk+1, a( rk, k ), a( rk+1, k ), 1_${ik}$, tau( k ) )
              else
                 call stdlib${ii}$_${ri}$larfg( 1_${ik}$, a( rk, k ), a( rk, k ), 1_${ik}$, tau( k ) )
              end if
              akk = a( rk, k )
              a( rk, k ) = one
              ! compute kth column of f:
              ! compute  f(k+1:n,k) := tau(k)*a(rk:m,k+1:n)**t*a(rk:m,k).
              if( k<n ) then
                 call stdlib${ii}$_${ri}$gemv( 'TRANSPOSE', m-rk+1, n-k, tau( k ),a( rk, k+1 ), lda, a( rk, &
                           k ), 1_${ik}$, zero,f( k+1, k ), 1_${ik}$ )
              end if
              ! padding f(1:k,k) with zeros.
              do j = 1, k
                 f( j, k ) = zero
              end do
              ! incremental updating of f:
              ! f(1:n,k) := f(1:n,k) - tau(k)*f(1:n,1:k-1)*a(rk:m,1:k-1)**t
                          ! *a(rk:m,k).
              if( k>1_${ik}$ ) then
                 call stdlib${ii}$_${ri}$gemv( 'TRANSPOSE', m-rk+1, k-1, -tau( k ), a( rk, 1_${ik}$ ),lda, a( rk, k &
                           ), 1_${ik}$, zero, auxv( 1_${ik}$ ), 1_${ik}$ )
                 call stdlib${ii}$_${ri}$gemv( 'NO TRANSPOSE', n, k-1, one, f( 1_${ik}$, 1_${ik}$ ), ldf,auxv( 1_${ik}$ ), 1_${ik}$, one,&
                            f( 1_${ik}$, k ), 1_${ik}$ )
              end if
              ! update the current row of a:
              ! a(rk,k+1:n) := a(rk,k+1:n) - a(rk,1:k)*f(k+1:n,1:k)**t.
              if( k<n ) then
                 call stdlib${ii}$_${ri}$gemv( 'NO TRANSPOSE', n-k, k, -one, f( k+1, 1_${ik}$ ), ldf,a( rk, 1_${ik}$ ), &
                           lda, one, a( rk, k+1 ), lda )
              end if
              ! update partial column norms.
              if( rk<lastrk ) then
                 do j = k + 1, n
                    if( vn1( j )/=zero ) then
                       ! note: the following 4 lines follow from the analysis in
                       ! lapack working note 176.
                       temp = abs( a( rk, j ) ) / vn1( j )
                       temp = max( zero, ( one+temp )*( one-temp ) )
                       temp2 = temp*( vn1( j ) / vn2( j ) )**2_${ik}$
                       if( temp2 <= tol3z ) then
                          vn2( j ) = real( lsticc,KIND=${rk}$)
                          lsticc = j
                       else
                          vn1( j ) = vn1( j )*sqrt( temp )
                       end if
                    end if
                 end do
              end if
              a( rk, k ) = akk
              ! end of while loop.
              go to 10
           end if
           kb = k
           rk = offset + kb
           ! apply the block reflector to the rest of the matrix:
           ! a(offset+kb+1:m,kb+1:n) := a(offset+kb+1:m,kb+1:n) -
                               ! a(offset+kb+1:m,1:kb)*f(kb+1:n,1:kb)**t.
           if( kb<min( n, m-offset ) ) then
              call stdlib${ii}$_${ri}$gemm( 'NO TRANSPOSE', 'TRANSPOSE', m-rk, n-kb, kb, -one,a( rk+1, 1_${ik}$ ), &
                        lda, f( kb+1, 1_${ik}$ ), ldf, one,a( rk+1, kb+1 ), lda )
           end if
           ! recomputation of difficult columns.
           40 continue
           if( lsticc>0_${ik}$ ) then
              itemp = nint( vn2( lsticc ),KIND=${ik}$)
              vn1( lsticc ) = stdlib${ii}$_${ri}$nrm2( m-rk, a( rk+1, lsticc ), 1_${ik}$ )
              ! note: the computation of vn1( lsticc ) relies on the fact that
              ! stdlib${ii}$_dnrm2 does not fail on vectors with norm below the value of
              ! sqrt(stdlib${ii}$_${ri}$lamch('s'))
              vn2( lsticc ) = vn1( lsticc )
              lsticc = itemp
              go to 40
           end if
           return
     end subroutine stdlib${ii}$_${ri}$laqps

#:endif
#:endfor

     pure module subroutine stdlib${ii}$_claqps( m, n, offset, nb, kb, a, lda, jpvt, tau, vn1,vn2, auxv, f, &
     !! CLAQPS computes a step of QR factorization with column pivoting
     !! of a complex M-by-N matrix A by using Blas-3.  It tries to factorize
     !! NB columns from A starting from the row OFFSET+1, and updates all
     !! of the matrix with Blas-3 xGEMM.
     !! In some cases, due to catastrophic cancellations, it cannot
     !! factorize NB columns.  Hence, the actual number of factorized
     !! columns is returned in KB.
     !! Block A(1:OFFSET,1:N) is accordingly pivoted, but not factorized.
               ldf )
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: kb
           integer(${ik}$), intent(in) :: lda, ldf, m, n, nb, offset
           ! Array Arguments 
           integer(${ik}$), intent(inout) :: jpvt(*)
           real(sp), intent(inout) :: vn1(*), vn2(*)
           complex(sp), intent(inout) :: a(lda,*), auxv(*), f(ldf,*)
           complex(sp), intent(out) :: tau(*)
        ! =====================================================================
           
           
           ! Local Scalars 
           integer(${ik}$) :: itemp, j, k, lastrk, lsticc, pvt, rk
           real(sp) :: temp, temp2, tol3z
           complex(sp) :: akk
           ! Intrinsic Functions 
           ! Executable Statements 
           lastrk = min( m, n+offset )
           lsticc = 0_${ik}$
           k = 0_${ik}$
           tol3z = sqrt(stdlib${ii}$_slamch('EPSILON'))
           ! beginning of while loop.
           10 continue
           if( ( k<nb ) .and. ( lsticc==0_${ik}$ ) ) then
              k = k + 1_${ik}$
              rk = offset + k
              ! determine ith pivot column and swap if necessary
              pvt = ( k-1 ) + stdlib${ii}$_isamax( n-k+1, vn1( k ), 1_${ik}$ )
              if( pvt/=k ) then
                 call stdlib${ii}$_cswap( m, a( 1_${ik}$, pvt ), 1_${ik}$, a( 1_${ik}$, k ), 1_${ik}$ )
                 call stdlib${ii}$_cswap( k-1, f( pvt, 1_${ik}$ ), ldf, f( k, 1_${ik}$ ), ldf )
                 itemp = jpvt( pvt )
                 jpvt( pvt ) = jpvt( k )
                 jpvt( k ) = itemp
                 vn1( pvt ) = vn1( k )
                 vn2( pvt ) = vn2( k )
              end if
              ! apply previous householder reflectors to column k:
              ! a(rk:m,k) := a(rk:m,k) - a(rk:m,1:k-1)*f(k,1:k-1)**h.
              if( k>1_${ik}$ ) then
                 do j = 1, k - 1
                    f( k, j ) = conjg( f( k, j ) )
                 end do
                 call stdlib${ii}$_cgemv( 'NO TRANSPOSE', m-rk+1, k-1, -cone, a( rk, 1_${ik}$ ),lda, f( k, 1_${ik}$ ),&
                            ldf, cone, a( rk, k ), 1_${ik}$ )
                 do j = 1, k - 1
                    f( k, j ) = conjg( f( k, j ) )
                 end do
              end if
              ! generate elementary reflector h(k).
              if( rk<m ) then
                 call stdlib${ii}$_clarfg( m-rk+1, a( rk, k ), a( rk+1, k ), 1_${ik}$, tau( k ) )
              else
                 call stdlib${ii}$_clarfg( 1_${ik}$, a( rk, k ), a( rk, k ), 1_${ik}$, tau( k ) )
              end if
              akk = a( rk, k )
              a( rk, k ) = cone
              ! compute kth column of f:
              ! compute  f(k+1:n,k) := tau(k)*a(rk:m,k+1:n)**h*a(rk:m,k).
              if( k<n ) then
                 call stdlib${ii}$_cgemv( 'CONJUGATE TRANSPOSE', m-rk+1, n-k, tau( k ),a( rk, k+1 ), &
                           lda, a( rk, k ), 1_${ik}$, czero,f( k+1, k ), 1_${ik}$ )
              end if
              ! padding f(1:k,k) with zeros.
              do j = 1, k
                 f( j, k ) = czero
              end do
              ! incremental updating of f:
              ! f(1:n,k) := f(1:n,k) - tau(k)*f(1:n,1:k-1)*a(rk:m,1:k-1)**h
                          ! *a(rk:m,k).
              if( k>1_${ik}$ ) then
                 call stdlib${ii}$_cgemv( 'CONJUGATE TRANSPOSE', m-rk+1, k-1, -tau( k ),a( rk, 1_${ik}$ ), lda,&
                            a( rk, k ), 1_${ik}$, czero,auxv( 1_${ik}$ ), 1_${ik}$ )
                 call stdlib${ii}$_cgemv( 'NO TRANSPOSE', n, k-1, cone, f( 1_${ik}$, 1_${ik}$ ), ldf,auxv( 1_${ik}$ ), 1_${ik}$, &
                           cone, f( 1_${ik}$, k ), 1_${ik}$ )
              end if
              ! update the current row of a:
              ! a(rk,k+1:n) := a(rk,k+1:n) - a(rk,1:k)*f(k+1:n,1:k)**h.
              if( k<n ) then
                 call stdlib${ii}$_cgemm( 'NO TRANSPOSE', 'CONJUGATE TRANSPOSE', 1_${ik}$, n-k,k, -cone, a( rk,&
                            1_${ik}$ ), lda, f( k+1, 1_${ik}$ ), ldf,cone, a( rk, k+1 ), lda )
              end if
              ! update partial column norms.
              if( rk<lastrk ) then
                 do j = k + 1, n
                    if( vn1( j )/=zero ) then
                       ! note: the following 4 lines follow from the analysis in
                       ! lapack working note 176.
                       temp = abs( a( rk, j ) ) / vn1( j )
                       temp = max( zero, ( one+temp )*( one-temp ) )
                       temp2 = temp*( vn1( j ) / vn2( j ) )**2_${ik}$
                       if( temp2 <= tol3z ) then
                          vn2( j ) = real( lsticc,KIND=sp)
                          lsticc = j
                       else
                          vn1( j ) = vn1( j )*sqrt( temp )
                       end if
                    end if
                 end do
              end if
              a( rk, k ) = akk
              ! end of while loop.
              go to 10
           end if
           kb = k
           rk = offset + kb
           ! apply the block reflector to the rest of the matrix:
           ! a(offset+kb+1:m,kb+1:n) := a(offset+kb+1:m,kb+1:n) -
                               ! a(offset+kb+1:m,1:kb)*f(kb+1:n,1:kb)**h.
           if( kb<min( n, m-offset ) ) then
              call stdlib${ii}$_cgemm( 'NO TRANSPOSE', 'CONJUGATE TRANSPOSE', m-rk, n-kb,kb, -cone, a( &
                        rk+1, 1_${ik}$ ), lda, f( kb+1, 1_${ik}$ ), ldf,cone, a( rk+1, kb+1 ), lda )
           end if
           ! recomputation of difficult columns.
           60 continue
           if( lsticc>0_${ik}$ ) then
              itemp = nint( vn2( lsticc ),KIND=${ik}$)
              vn1( lsticc ) = stdlib${ii}$_scnrm2( m-rk, a( rk+1, lsticc ), 1_${ik}$ )
              ! note: the computation of vn1( lsticc ) relies on the fact that
              ! stdlib${ii}$_snrm2 does not fail on vectors with norm below the value of
              ! sqrt(stdlib${ii}$_dlamch('s'))
              vn2( lsticc ) = vn1( lsticc )
              lsticc = itemp
              go to 60
           end if
           return
     end subroutine stdlib${ii}$_claqps

     pure module subroutine stdlib${ii}$_zlaqps( m, n, offset, nb, kb, a, lda, jpvt, tau, vn1,vn2, auxv, f, &
     !! ZLAQPS computes a step of QR factorization with column pivoting
     !! of a complex M-by-N matrix A by using Blas-3.  It tries to factorize
     !! NB columns from A starting from the row OFFSET+1, and updates all
     !! of the matrix with Blas-3 xGEMM.
     !! In some cases, due to catastrophic cancellations, it cannot
     !! factorize NB columns.  Hence, the actual number of factorized
     !! columns is returned in KB.
     !! Block A(1:OFFSET,1:N) is accordingly pivoted, but not factorized.
               ldf )
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: kb
           integer(${ik}$), intent(in) :: lda, ldf, m, n, nb, offset
           ! Array Arguments 
           integer(${ik}$), intent(inout) :: jpvt(*)
           real(dp), intent(inout) :: vn1(*), vn2(*)
           complex(dp), intent(inout) :: a(lda,*), auxv(*), f(ldf,*)
           complex(dp), intent(out) :: tau(*)
        ! =====================================================================
           
           
           ! Local Scalars 
           integer(${ik}$) :: itemp, j, k, lastrk, lsticc, pvt, rk
           real(dp) :: temp, temp2, tol3z
           complex(dp) :: akk
           ! Intrinsic Functions 
           ! Executable Statements 
           lastrk = min( m, n+offset )
           lsticc = 0_${ik}$
           k = 0_${ik}$
           tol3z = sqrt(stdlib${ii}$_dlamch('EPSILON'))
           ! beginning of while loop.
           10 continue
           if( ( k<nb ) .and. ( lsticc==0_${ik}$ ) ) then
              k = k + 1_${ik}$
              rk = offset + k
              ! determine ith pivot column and swap if necessary
              pvt = ( k-1 ) + stdlib${ii}$_idamax( n-k+1, vn1( k ), 1_${ik}$ )
              if( pvt/=k ) then
                 call stdlib${ii}$_zswap( m, a( 1_${ik}$, pvt ), 1_${ik}$, a( 1_${ik}$, k ), 1_${ik}$ )
                 call stdlib${ii}$_zswap( k-1, f( pvt, 1_${ik}$ ), ldf, f( k, 1_${ik}$ ), ldf )
                 itemp = jpvt( pvt )
                 jpvt( pvt ) = jpvt( k )
                 jpvt( k ) = itemp
                 vn1( pvt ) = vn1( k )
                 vn2( pvt ) = vn2( k )
              end if
              ! apply previous householder reflectors to column k:
              ! a(rk:m,k) := a(rk:m,k) - a(rk:m,1:k-1)*f(k,1:k-1)**h.
              if( k>1_${ik}$ ) then
                 do j = 1, k - 1
                    f( k, j ) = conjg( f( k, j ) )
                 end do
                 call stdlib${ii}$_zgemv( 'NO TRANSPOSE', m-rk+1, k-1, -cone, a( rk, 1_${ik}$ ),lda, f( k, 1_${ik}$ ),&
                            ldf, cone, a( rk, k ), 1_${ik}$ )
                 do j = 1, k - 1
                    f( k, j ) = conjg( f( k, j ) )
                 end do
              end if
              ! generate elementary reflector h(k).
              if( rk<m ) then
                 call stdlib${ii}$_zlarfg( m-rk+1, a( rk, k ), a( rk+1, k ), 1_${ik}$, tau( k ) )
              else
                 call stdlib${ii}$_zlarfg( 1_${ik}$, a( rk, k ), a( rk, k ), 1_${ik}$, tau( k ) )
              end if
              akk = a( rk, k )
              a( rk, k ) = cone
              ! compute kth column of f:
              ! compute  f(k+1:n,k) := tau(k)*a(rk:m,k+1:n)**h*a(rk:m,k).
              if( k<n ) then
                 call stdlib${ii}$_zgemv( 'CONJUGATE TRANSPOSE', m-rk+1, n-k, tau( k ),a( rk, k+1 ), &
                           lda, a( rk, k ), 1_${ik}$, czero,f( k+1, k ), 1_${ik}$ )
              end if
              ! padding f(1:k,k) with zeros.
              do j = 1, k
                 f( j, k ) = czero
              end do
              ! incremental updating of f:
              ! f(1:n,k) := f(1:n,k) - tau(k)*f(1:n,1:k-1)*a(rk:m,1:k-1)**h
                          ! *a(rk:m,k).
              if( k>1_${ik}$ ) then
                 call stdlib${ii}$_zgemv( 'CONJUGATE TRANSPOSE', m-rk+1, k-1, -tau( k ),a( rk, 1_${ik}$ ), lda,&
                            a( rk, k ), 1_${ik}$, czero,auxv( 1_${ik}$ ), 1_${ik}$ )
                 call stdlib${ii}$_zgemv( 'NO TRANSPOSE', n, k-1, cone, f( 1_${ik}$, 1_${ik}$ ), ldf,auxv( 1_${ik}$ ), 1_${ik}$, &
                           cone, f( 1_${ik}$, k ), 1_${ik}$ )
              end if
              ! update the current row of a:
              ! a(rk,k+1:n) := a(rk,k+1:n) - a(rk,1:k)*f(k+1:n,1:k)**h.
              if( k<n ) then
                 call stdlib${ii}$_zgemm( 'NO TRANSPOSE', 'CONJUGATE TRANSPOSE', 1_${ik}$, n-k,k, -cone, a( rk,&
                            1_${ik}$ ), lda, f( k+1, 1_${ik}$ ), ldf,cone, a( rk, k+1 ), lda )
              end if
              ! update partial column norms.
              if( rk<lastrk ) then
                 do j = k + 1, n
                    if( vn1( j )/=zero ) then
                       ! note: the following 4 lines follow from the analysis in
                       ! lapack working note 176.
                       temp = abs( a( rk, j ) ) / vn1( j )
                       temp = max( zero, ( one+temp )*( one-temp ) )
                       temp2 = temp*( vn1( j ) / vn2( j ) )**2_${ik}$
                       if( temp2 <= tol3z ) then
                          vn2( j ) = real( lsticc,KIND=dp)
                          lsticc = j
                       else
                          vn1( j ) = vn1( j )*sqrt( temp )
                       end if
                    end if
                 end do
              end if
              a( rk, k ) = akk
              ! end of while loop.
              go to 10
           end if
           kb = k
           rk = offset + kb
           ! apply the block reflector to the rest of the matrix:
           ! a(offset+kb+1:m,kb+1:n) := a(offset+kb+1:m,kb+1:n) -
                               ! a(offset+kb+1:m,1:kb)*f(kb+1:n,1:kb)**h.
           if( kb<min( n, m-offset ) ) then
              call stdlib${ii}$_zgemm( 'NO TRANSPOSE', 'CONJUGATE TRANSPOSE', m-rk, n-kb,kb, -cone, a( &
                        rk+1, 1_${ik}$ ), lda, f( kb+1, 1_${ik}$ ), ldf,cone, a( rk+1, kb+1 ), lda )
           end if
           ! recomputation of difficult columns.
           60 continue
           if( lsticc>0_${ik}$ ) then
              itemp = nint( vn2( lsticc ),KIND=${ik}$)
              vn1( lsticc ) = stdlib${ii}$_dznrm2( m-rk, a( rk+1, lsticc ), 1_${ik}$ )
              ! note: the computation of vn1( lsticc ) relies on the fact that
              ! stdlib${ii}$_snrm2 does not fail on vectors with norm below the value of
              ! sqrt(stdlib${ii}$_dlamch('s'))
              vn2( lsticc ) = vn1( lsticc )
              lsticc = itemp
              go to 60
           end if
           return
     end subroutine stdlib${ii}$_zlaqps

#:for ck,ct,ci in CMPLX_KINDS_TYPES
#:if not ck in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ci}$laqps( m, n, offset, nb, kb, a, lda, jpvt, tau, vn1,vn2, auxv, f, &
     !! ZLAQPS: computes a step of QR factorization with column pivoting
     !! of a complex M-by-N matrix A by using Blas-3.  It tries to factorize
     !! NB columns from A starting from the row OFFSET+1, and updates all
     !! of the matrix with Blas-3 xGEMM.
     !! In some cases, due to catastrophic cancellations, it cannot
     !! factorize NB columns.  Hence, the actual number of factorized
     !! columns is returned in KB.
     !! Block A(1:OFFSET,1:N) is accordingly pivoted, but not factorized.
               ldf )
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${ck}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: kb
           integer(${ik}$), intent(in) :: lda, ldf, m, n, nb, offset
           ! Array Arguments 
           integer(${ik}$), intent(inout) :: jpvt(*)
           real(${ck}$), intent(inout) :: vn1(*), vn2(*)
           complex(${ck}$), intent(inout) :: a(lda,*), auxv(*), f(ldf,*)
           complex(${ck}$), intent(out) :: tau(*)
        ! =====================================================================
           
           
           ! Local Scalars 
           integer(${ik}$) :: itemp, j, k, lastrk, lsticc, pvt, rk
           real(${ck}$) :: temp, temp2, tol3z
           complex(${ck}$) :: akk
           ! Intrinsic Functions 
           ! Executable Statements 
           lastrk = min( m, n+offset )
           lsticc = 0_${ik}$
           k = 0_${ik}$
           tol3z = sqrt(stdlib${ii}$_${c2ri(ci)}$lamch('EPSILON'))
           ! beginning of while loop.
           10 continue
           if( ( k<nb ) .and. ( lsticc==0_${ik}$ ) ) then
              k = k + 1_${ik}$
              rk = offset + k
              ! determine ith pivot column and swap if necessary
              pvt = ( k-1 ) + stdlib${ii}$_i${c2ri(ci)}$amax( n-k+1, vn1( k ), 1_${ik}$ )
              if( pvt/=k ) then
                 call stdlib${ii}$_${ci}$swap( m, a( 1_${ik}$, pvt ), 1_${ik}$, a( 1_${ik}$, k ), 1_${ik}$ )
                 call stdlib${ii}$_${ci}$swap( k-1, f( pvt, 1_${ik}$ ), ldf, f( k, 1_${ik}$ ), ldf )
                 itemp = jpvt( pvt )
                 jpvt( pvt ) = jpvt( k )
                 jpvt( k ) = itemp
                 vn1( pvt ) = vn1( k )
                 vn2( pvt ) = vn2( k )
              end if
              ! apply previous householder reflectors to column k:
              ! a(rk:m,k) := a(rk:m,k) - a(rk:m,1:k-1)*f(k,1:k-1)**h.
              if( k>1_${ik}$ ) then
                 do j = 1, k - 1
                    f( k, j ) = conjg( f( k, j ) )
                 end do
                 call stdlib${ii}$_${ci}$gemv( 'NO TRANSPOSE', m-rk+1, k-1, -cone, a( rk, 1_${ik}$ ),lda, f( k, 1_${ik}$ ),&
                            ldf, cone, a( rk, k ), 1_${ik}$ )
                 do j = 1, k - 1
                    f( k, j ) = conjg( f( k, j ) )
                 end do
              end if
              ! generate elementary reflector h(k).
              if( rk<m ) then
                 call stdlib${ii}$_${ci}$larfg( m-rk+1, a( rk, k ), a( rk+1, k ), 1_${ik}$, tau( k ) )
              else
                 call stdlib${ii}$_${ci}$larfg( 1_${ik}$, a( rk, k ), a( rk, k ), 1_${ik}$, tau( k ) )
              end if
              akk = a( rk, k )
              a( rk, k ) = cone
              ! compute kth column of f:
              ! compute  f(k+1:n,k) := tau(k)*a(rk:m,k+1:n)**h*a(rk:m,k).
              if( k<n ) then
                 call stdlib${ii}$_${ci}$gemv( 'CONJUGATE TRANSPOSE', m-rk+1, n-k, tau( k ),a( rk, k+1 ), &
                           lda, a( rk, k ), 1_${ik}$, czero,f( k+1, k ), 1_${ik}$ )
              end if
              ! padding f(1:k,k) with zeros.
              do j = 1, k
                 f( j, k ) = czero
              end do
              ! incremental updating of f:
              ! f(1:n,k) := f(1:n,k) - tau(k)*f(1:n,1:k-1)*a(rk:m,1:k-1)**h
                          ! *a(rk:m,k).
              if( k>1_${ik}$ ) then
                 call stdlib${ii}$_${ci}$gemv( 'CONJUGATE TRANSPOSE', m-rk+1, k-1, -tau( k ),a( rk, 1_${ik}$ ), lda,&
                            a( rk, k ), 1_${ik}$, czero,auxv( 1_${ik}$ ), 1_${ik}$ )
                 call stdlib${ii}$_${ci}$gemv( 'NO TRANSPOSE', n, k-1, cone, f( 1_${ik}$, 1_${ik}$ ), ldf,auxv( 1_${ik}$ ), 1_${ik}$, &
                           cone, f( 1_${ik}$, k ), 1_${ik}$ )
              end if
              ! update the current row of a:
              ! a(rk,k+1:n) := a(rk,k+1:n) - a(rk,1:k)*f(k+1:n,1:k)**h.
              if( k<n ) then
                 call stdlib${ii}$_${ci}$gemm( 'NO TRANSPOSE', 'CONJUGATE TRANSPOSE', 1_${ik}$, n-k,k, -cone, a( rk,&
                            1_${ik}$ ), lda, f( k+1, 1_${ik}$ ), ldf,cone, a( rk, k+1 ), lda )
              end if
              ! update partial column norms.
              if( rk<lastrk ) then
                 do j = k + 1, n
                    if( vn1( j )/=zero ) then
                       ! note: the following 4 lines follow from the analysis in
                       ! lapack working note 176.
                       temp = abs( a( rk, j ) ) / vn1( j )
                       temp = max( zero, ( one+temp )*( one-temp ) )
                       temp2 = temp*( vn1( j ) / vn2( j ) )**2_${ik}$
                       if( temp2 <= tol3z ) then
                          vn2( j ) = real( lsticc,KIND=${ck}$)
                          lsticc = j
                       else
                          vn1( j ) = vn1( j )*sqrt( temp )
                       end if
                    end if
                 end do
              end if
              a( rk, k ) = akk
              ! end of while loop.
              go to 10
           end if
           kb = k
           rk = offset + kb
           ! apply the block reflector to the rest of the matrix:
           ! a(offset+kb+1:m,kb+1:n) := a(offset+kb+1:m,kb+1:n) -
                               ! a(offset+kb+1:m,1:kb)*f(kb+1:n,1:kb)**h.
           if( kb<min( n, m-offset ) ) then
              call stdlib${ii}$_${ci}$gemm( 'NO TRANSPOSE', 'CONJUGATE TRANSPOSE', m-rk, n-kb,kb, -cone, a( &
                        rk+1, 1_${ik}$ ), lda, f( kb+1, 1_${ik}$ ), ldf,cone, a( rk+1, kb+1 ), lda )
           end if
           ! recomputation of difficult columns.
           60 continue
           if( lsticc>0_${ik}$ ) then
              itemp = nint( vn2( lsticc ),KIND=${ik}$)
              vn1( lsticc ) = stdlib${ii}$_${c2ri(ci)}$znrm2( m-rk, a( rk+1, lsticc ), 1_${ik}$ )
              ! note: the computation of vn1( lsticc ) relies on the fact that
              ! stdlib${ii}$_dnrm2 does not fail on vectors with norm below the value of
              ! sqrt(stdlib${ii}$_${c2ri(ci)}$lamch('s'))
              vn2( lsticc ) = vn1( lsticc )
              lsticc = itemp
              go to 60
           end if
           return
     end subroutine stdlib${ii}$_${ci}$laqps

#:endif
#:endfor



     pure module subroutine stdlib${ii}$_slatsqr( m, n, mb, nb, a, lda, t, ldt, work,lwork, info)
     !! SLATSQR computes a blocked Tall-Skinny QR factorization of
     !! a real M-by-N matrix A for M >= N:
     !! A = Q * ( R ),
     !! ( 0 )
     !! where:
     !! Q is a M-by-M orthogonal matrix, stored on exit in an implicit
     !! form in the elements below the diagonal of the array A and in
     !! the elements of the array T;
     !! R is an upper-triangular N-by-N matrix, stored on exit in
     !! the elements on and above the diagonal of the array A.
     !! 0 is a (M-N)-by-N zero matrix, and is not stored.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd. --
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, m, n, mb, nb, ldt, lwork
           ! Array Arguments 
           real(sp), intent(inout) :: a(lda,*)
           real(sp), intent(out) :: work(*), t(ldt,*)
        ! =====================================================================
           ! Local Scalars 
           logical(lk) :: lquery
           integer(${ik}$) :: i, ii, kk, ctr
           ! External Subroutines 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           lquery = ( lwork==-1_${ik}$ )
           if( m<0_${ik}$ ) then
             info = -1_${ik}$
           else if( n<0_${ik}$ .or. m<n ) then
             info = -2_${ik}$
           else if( mb<1_${ik}$ ) then
             info = -3_${ik}$
           else if( nb<1_${ik}$ .or. ( nb>n .and. n>0_${ik}$ )) then
             info = -4_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
             info = -6_${ik}$
           else if( ldt<nb ) then
             info = -8_${ik}$
           else if( lwork<(n*nb) .and. (.not.lquery) ) then
             info = -10_${ik}$
           end if
           if( info==0_${ik}$)  then
             work(1_${ik}$) = nb*n
           end if
           if( info/=0_${ik}$ ) then
             call stdlib${ii}$_xerbla( 'SLATSQR', -info )
             return
           else if (lquery) then
            return
           end if
           ! quick return if possible
           if( min(m,n)==0_${ik}$ ) then
               return
           end if
           ! the qr decomposition
            if ((mb<=n).or.(mb>=m)) then
              call stdlib${ii}$_sgeqrt( m, n, nb, a, lda, t, ldt, work, info)
              return
            end if
            kk = mod((m-n),(mb-n))
            ii=m-kk+1
            ! compute the qr factorization of the first block a(1:mb,1:n)
            call stdlib${ii}$_sgeqrt( mb, n, nb, a(1_${ik}$,1_${ik}$), lda, t, ldt, work, info )
            ctr = 1_${ik}$
            do i = mb+1, ii-mb+n ,  (mb-n)
            ! compute the qr factorization of the current block a(i:i+mb-n,1:n)
              call stdlib${ii}$_stpqrt( mb-n, n, 0_${ik}$, nb, a(1_${ik}$,1_${ik}$), lda, a( i, 1_${ik}$ ), lda,t(1_${ik}$, ctr * n + 1_${ik}$),&
                        ldt, work, info )
              ctr = ctr + 1_${ik}$
            end do
            ! compute the qr factorization of the last block a(ii:m,1:n)
            if (ii<=m) then
              call stdlib${ii}$_stpqrt( kk, n, 0_${ik}$, nb, a(1_${ik}$,1_${ik}$), lda, a( ii, 1_${ik}$ ), lda,t(1_${ik}$, ctr * n + 1_${ik}$), &
                        ldt,work, info )
            end if
           work( 1_${ik}$ ) = n*nb
           return
     end subroutine stdlib${ii}$_slatsqr

     pure module subroutine stdlib${ii}$_dlatsqr( m, n, mb, nb, a, lda, t, ldt, work,lwork, info)
     !! DLATSQR computes a blocked Tall-Skinny QR factorization of
     !! a real M-by-N matrix A for M >= N:
     !! A = Q * ( R ),
     !! ( 0 )
     !! where:
     !! Q is a M-by-M orthogonal matrix, stored on exit in an implicit
     !! form in the elements below the diagonal of the array A and in
     !! the elements of the array T;
     !! R is an upper-triangular N-by-N matrix, stored on exit in
     !! the elements on and above the diagonal of the array A.
     !! 0 is a (M-N)-by-N zero matrix, and is not stored.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd. --
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, m, n, mb, nb, ldt, lwork
           ! Array Arguments 
           real(dp), intent(inout) :: a(lda,*)
           real(dp), intent(out) :: work(*), t(ldt,*)
        ! =====================================================================
           ! Local Scalars 
           logical(lk) :: lquery
           integer(${ik}$) :: i, ii, kk, ctr
           ! External Subroutines 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           lquery = ( lwork==-1_${ik}$ )
           if( m<0_${ik}$ ) then
             info = -1_${ik}$
           else if( n<0_${ik}$ .or. m<n ) then
             info = -2_${ik}$
           else if( mb<1_${ik}$ ) then
             info = -3_${ik}$
           else if( nb<1_${ik}$ .or. ( nb>n .and. n>0_${ik}$ )) then
             info = -4_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
             info = -6_${ik}$
           else if( ldt<nb ) then
             info = -8_${ik}$
           else if( lwork<(n*nb) .and. (.not.lquery) ) then
             info = -10_${ik}$
           end if
           if( info==0_${ik}$)  then
             work(1_${ik}$) = nb*n
           end if
           if( info/=0_${ik}$ ) then
             call stdlib${ii}$_xerbla( 'DLATSQR', -info )
             return
           else if (lquery) then
            return
           end if
           ! quick return if possible
           if( min(m,n)==0_${ik}$ ) then
               return
           end if
           ! the qr decomposition
            if ((mb<=n).or.(mb>=m)) then
              call stdlib${ii}$_dgeqrt( m, n, nb, a, lda, t, ldt, work, info)
              return
            end if
            kk = mod((m-n),(mb-n))
            ii=m-kk+1
            ! compute the qr factorization of the first block a(1:mb,1:n)
            call stdlib${ii}$_dgeqrt( mb, n, nb, a(1_${ik}$,1_${ik}$), lda, t, ldt, work, info )
            ctr = 1_${ik}$
            do i = mb+1, ii-mb+n ,  (mb-n)
            ! compute the qr factorization of the current block a(i:i+mb-n,1:n)
              call stdlib${ii}$_dtpqrt( mb-n, n, 0_${ik}$, nb, a(1_${ik}$,1_${ik}$), lda, a( i, 1_${ik}$ ), lda,t(1_${ik}$, ctr * n + 1_${ik}$),&
                        ldt, work, info )
              ctr = ctr + 1_${ik}$
            end do
            ! compute the qr factorization of the last block a(ii:m,1:n)
            if (ii<=m) then
              call stdlib${ii}$_dtpqrt( kk, n, 0_${ik}$, nb, a(1_${ik}$,1_${ik}$), lda, a( ii, 1_${ik}$ ), lda,t(1_${ik}$, ctr * n + 1_${ik}$), &
                        ldt,work, info )
            end if
           work( 1_${ik}$ ) = n*nb
           return
     end subroutine stdlib${ii}$_dlatsqr

#:for rk,rt,ri in REAL_KINDS_TYPES
#:if not rk in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ri}$latsqr( m, n, mb, nb, a, lda, t, ldt, work,lwork, info)
     !! DLATSQR: computes a blocked Tall-Skinny QR factorization of
     !! a real M-by-N matrix A for M >= N:
     !! A = Q * ( R ),
     !! ( 0 )
     !! where:
     !! Q is a M-by-M orthogonal matrix, stored on exit in an implicit
     !! form in the elements below the diagonal of the array A and in
     !! the elements of the array T;
     !! R is an upper-triangular N-by-N matrix, stored on exit in
     !! the elements on and above the diagonal of the array A.
     !! 0 is a (M-N)-by-N zero matrix, and is not stored.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd. --
           use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, m, n, mb, nb, ldt, lwork
           ! Array Arguments 
           real(${rk}$), intent(inout) :: a(lda,*)
           real(${rk}$), intent(out) :: work(*), t(ldt,*)
        ! =====================================================================
           ! Local Scalars 
           logical(lk) :: lquery
           integer(${ik}$) :: i, ii, kk, ctr
           ! External Subroutines 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           lquery = ( lwork==-1_${ik}$ )
           if( m<0_${ik}$ ) then
             info = -1_${ik}$
           else if( n<0_${ik}$ .or. m<n ) then
             info = -2_${ik}$
           else if( mb<1_${ik}$ ) then
             info = -3_${ik}$
           else if( nb<1_${ik}$ .or. ( nb>n .and. n>0_${ik}$ )) then
             info = -4_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
             info = -6_${ik}$
           else if( ldt<nb ) then
             info = -8_${ik}$
           else if( lwork<(n*nb) .and. (.not.lquery) ) then
             info = -10_${ik}$
           end if
           if( info==0_${ik}$)  then
             work(1_${ik}$) = nb*n
           end if
           if( info/=0_${ik}$ ) then
             call stdlib${ii}$_xerbla( 'DLATSQR', -info )
             return
           else if (lquery) then
            return
           end if
           ! quick return if possible
           if( min(m,n)==0_${ik}$ ) then
               return
           end if
           ! the qr decomposition
            if ((mb<=n).or.(mb>=m)) then
              call stdlib${ii}$_${ri}$geqrt( m, n, nb, a, lda, t, ldt, work, info)
              return
            end if
            kk = mod((m-n),(mb-n))
            ii=m-kk+1
            ! compute the qr factorization of the first block a(1:mb,1:n)
            call stdlib${ii}$_${ri}$geqrt( mb, n, nb, a(1_${ik}$,1_${ik}$), lda, t, ldt, work, info )
            ctr = 1_${ik}$
            do i = mb+1, ii-mb+n ,  (mb-n)
            ! compute the qr factorization of the current block a(i:i+mb-n,1:n)
              call stdlib${ii}$_${ri}$tpqrt( mb-n, n, 0_${ik}$, nb, a(1_${ik}$,1_${ik}$), lda, a( i, 1_${ik}$ ), lda,t(1_${ik}$, ctr * n + 1_${ik}$),&
                        ldt, work, info )
              ctr = ctr + 1_${ik}$
            end do
            ! compute the qr factorization of the last block a(ii:m,1:n)
            if (ii<=m) then
              call stdlib${ii}$_${ri}$tpqrt( kk, n, 0_${ik}$, nb, a(1_${ik}$,1_${ik}$), lda, a( ii, 1_${ik}$ ), lda,t(1_${ik}$, ctr * n + 1_${ik}$), &
                        ldt,work, info )
            end if
           work( 1_${ik}$ ) = n*nb
           return
     end subroutine stdlib${ii}$_${ri}$latsqr

#:endif
#:endfor

     pure module subroutine stdlib${ii}$_clatsqr( m, n, mb, nb, a, lda, t, ldt, work,lwork, info)
     !! CLATSQR computes a blocked Tall-Skinny QR factorization of
     !! a complex M-by-N matrix A for M >= N:
     !! A = Q * ( R ),
     !! ( 0 )
     !! where:
     !! Q is a M-by-M orthogonal matrix, stored on exit in an implicit
     !! form in the elements below the diagonal of the array A and in
     !! the elements of the array T;
     !! R is an upper-triangular N-by-N matrix, stored on exit in
     !! the elements on and above the diagonal of the array A.
     !! 0 is a (M-N)-by-N zero matrix, and is not stored.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd. --
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, m, n, mb, nb, ldt, lwork
           ! Array Arguments 
           complex(sp), intent(inout) :: a(lda,*)
           complex(sp), intent(out) :: work(*), t(ldt,*)
        ! =====================================================================
           ! Local Scalars 
           logical(lk) :: lquery
           integer(${ik}$) :: i, ii, kk, ctr
           ! External Subroutines 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           lquery = ( lwork==-1_${ik}$ )
           if( m<0_${ik}$ ) then
             info = -1_${ik}$
           else if( n<0_${ik}$ .or. m<n ) then
             info = -2_${ik}$
           else if( mb<1_${ik}$ ) then
             info = -3_${ik}$
           else if( nb<1_${ik}$ .or. ( nb>n .and. n>0_${ik}$ )) then
             info = -4_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
             info = -6_${ik}$
           else if( ldt<nb ) then
             info = -8_${ik}$
           else if( lwork<(n*nb) .and. (.not.lquery) ) then
             info = -10_${ik}$
           end if
           if( info==0_${ik}$)  then
             work(1_${ik}$) = nb*n
           end if
           if( info/=0_${ik}$ ) then
             call stdlib${ii}$_xerbla( 'CLATSQR', -info )
             return
           else if (lquery) then
            return
           end if
           ! quick return if possible
           if( min(m,n)==0_${ik}$ ) then
               return
           end if
           ! the qr decomposition
            if ((mb<=n).or.(mb>=m)) then
              call stdlib${ii}$_cgeqrt( m, n, nb, a, lda, t, ldt, work, info)
              return
            end if
            kk = mod((m-n),(mb-n))
            ii=m-kk+1
            ! compute the qr factorization of the first block a(1:mb,1:n)
            call stdlib${ii}$_cgeqrt( mb, n, nb, a(1_${ik}$,1_${ik}$), lda, t, ldt, work, info )
            ctr = 1_${ik}$
            do i = mb+1, ii-mb+n ,  (mb-n)
            ! compute the qr factorization of the current block a(i:i+mb-n,1:n)
              call stdlib${ii}$_ctpqrt( mb-n, n, 0_${ik}$, nb, a(1_${ik}$,1_${ik}$), lda, a( i, 1_${ik}$ ), lda,t(1_${ik}$,ctr * n + 1_${ik}$),&
                        ldt, work, info )
              ctr = ctr + 1_${ik}$
            end do
            ! compute the qr factorization of the last block a(ii:m,1:n)
            if (ii<=m) then
              call stdlib${ii}$_ctpqrt( kk, n, 0_${ik}$, nb, a(1_${ik}$,1_${ik}$), lda, a( ii, 1_${ik}$ ), lda,t(1_${ik}$, ctr * n + 1_${ik}$), &
                        ldt,work, info )
            end if
           work( 1_${ik}$ ) = n*nb
           return
     end subroutine stdlib${ii}$_clatsqr

     pure module subroutine stdlib${ii}$_zlatsqr( m, n, mb, nb, a, lda, t, ldt, work,lwork, info)
     !! ZLATSQR computes a blocked Tall-Skinny QR factorization of
     !! a complex M-by-N matrix A for M >= N:
     !! A = Q * ( R ),
     !! ( 0 )
     !! where:
     !! Q is a M-by-M orthogonal matrix, stored on exit in an implicit
     !! form in the elements below the diagonal of the array A and in
     !! the elements of the array T;
     !! R is an upper-triangular N-by-N matrix, stored on exit in
     !! the elements on and above the diagonal of the array A.
     !! 0 is a (M-N)-by-N zero matrix, and is not stored.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd. --
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, m, n, mb, nb, ldt, lwork
           ! Array Arguments 
           complex(dp), intent(inout) :: a(lda,*)
           complex(dp), intent(out) :: work(*), t(ldt,*)
        ! =====================================================================
           ! Local Scalars 
           logical(lk) :: lquery
           integer(${ik}$) :: i, ii, kk, ctr
           ! External Subroutines 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           lquery = ( lwork==-1_${ik}$ )
           if( m<0_${ik}$ ) then
             info = -1_${ik}$
           else if( n<0_${ik}$ .or. m<n ) then
             info = -2_${ik}$
           else if( mb<1_${ik}$ ) then
             info = -3_${ik}$
           else if( nb<1_${ik}$ .or. ( nb>n .and. n>0_${ik}$ )) then
             info = -4_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
             info = -6_${ik}$
           else if( ldt<nb ) then
             info = -8_${ik}$
           else if( lwork<(n*nb) .and. (.not.lquery) ) then
             info = -10_${ik}$
           end if
           if( info==0_${ik}$)  then
             work(1_${ik}$) = nb*n
           end if
           if( info/=0_${ik}$ ) then
             call stdlib${ii}$_xerbla( 'ZLATSQR', -info )
             return
           else if (lquery) then
            return
           end if
           ! quick return if possible
           if( min(m,n)==0_${ik}$ ) then
               return
           end if
           ! the qr decomposition
            if ((mb<=n).or.(mb>=m)) then
              call stdlib${ii}$_zgeqrt( m, n, nb, a, lda, t, ldt, work, info)
              return
            end if
            kk = mod((m-n),(mb-n))
            ii=m-kk+1
            ! compute the qr factorization of the first block a(1:mb,1:n)
            call stdlib${ii}$_zgeqrt( mb, n, nb, a(1_${ik}$,1_${ik}$), lda, t, ldt, work, info )
            ctr = 1_${ik}$
            do i = mb+1, ii-mb+n ,  (mb-n)
            ! compute the qr factorization of the current block a(i:i+mb-n,1:n)
              call stdlib${ii}$_ztpqrt( mb-n, n, 0_${ik}$, nb, a(1_${ik}$,1_${ik}$), lda, a( i, 1_${ik}$ ), lda,t(1_${ik}$, ctr * n + 1_${ik}$),&
                        ldt, work, info )
              ctr = ctr + 1_${ik}$
            end do
            ! compute the qr factorization of the last block a(ii:m,1:n)
            if (ii<=m) then
              call stdlib${ii}$_ztpqrt( kk, n, 0_${ik}$, nb, a(1_${ik}$,1_${ik}$), lda, a( ii, 1_${ik}$ ), lda,t(1_${ik}$,ctr * n + 1_${ik}$), &
                        ldt,work, info )
            end if
           work( 1_${ik}$ ) = n*nb
           return
     end subroutine stdlib${ii}$_zlatsqr

#:for ck,ct,ci in CMPLX_KINDS_TYPES
#:if not ck in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ci}$latsqr( m, n, mb, nb, a, lda, t, ldt, work,lwork, info)
     !! ZLATSQR: computes a blocked Tall-Skinny QR factorization of
     !! a complex M-by-N matrix A for M >= N:
     !! A = Q * ( R ),
     !! ( 0 )
     !! where:
     !! Q is a M-by-M orthogonal matrix, stored on exit in an implicit
     !! form in the elements below the diagonal of the array A and in
     !! the elements of the array T;
     !! R is an upper-triangular N-by-N matrix, stored on exit in
     !! the elements on and above the diagonal of the array A.
     !! 0 is a (M-N)-by-N zero matrix, and is not stored.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd. --
           use stdlib_blas_constants_${ck}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, m, n, mb, nb, ldt, lwork
           ! Array Arguments 
           complex(${ck}$), intent(inout) :: a(lda,*)
           complex(${ck}$), intent(out) :: work(*), t(ldt,*)
        ! =====================================================================
           ! Local Scalars 
           logical(lk) :: lquery
           integer(${ik}$) :: i, ii, kk, ctr
           ! External Subroutines 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           lquery = ( lwork==-1_${ik}$ )
           if( m<0_${ik}$ ) then
             info = -1_${ik}$
           else if( n<0_${ik}$ .or. m<n ) then
             info = -2_${ik}$
           else if( mb<1_${ik}$ ) then
             info = -3_${ik}$
           else if( nb<1_${ik}$ .or. ( nb>n .and. n>0_${ik}$ )) then
             info = -4_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
             info = -6_${ik}$
           else if( ldt<nb ) then
             info = -8_${ik}$
           else if( lwork<(n*nb) .and. (.not.lquery) ) then
             info = -10_${ik}$
           end if
           if( info==0_${ik}$)  then
             work(1_${ik}$) = nb*n
           end if
           if( info/=0_${ik}$ ) then
             call stdlib${ii}$_xerbla( 'ZLATSQR', -info )
             return
           else if (lquery) then
            return
           end if
           ! quick return if possible
           if( min(m,n)==0_${ik}$ ) then
               return
           end if
           ! the qr decomposition
            if ((mb<=n).or.(mb>=m)) then
              call stdlib${ii}$_${ci}$geqrt( m, n, nb, a, lda, t, ldt, work, info)
              return
            end if
            kk = mod((m-n),(mb-n))
            ii=m-kk+1
            ! compute the qr factorization of the first block a(1:mb,1:n)
            call stdlib${ii}$_${ci}$geqrt( mb, n, nb, a(1_${ik}$,1_${ik}$), lda, t, ldt, work, info )
            ctr = 1_${ik}$
            do i = mb+1, ii-mb+n ,  (mb-n)
            ! compute the qr factorization of the current block a(i:i+mb-n,1:n)
              call stdlib${ii}$_${ci}$tpqrt( mb-n, n, 0_${ik}$, nb, a(1_${ik}$,1_${ik}$), lda, a( i, 1_${ik}$ ), lda,t(1_${ik}$, ctr * n + 1_${ik}$),&
                        ldt, work, info )
              ctr = ctr + 1_${ik}$
            end do
            ! compute the qr factorization of the last block a(ii:m,1:n)
            if (ii<=m) then
              call stdlib${ii}$_${ci}$tpqrt( kk, n, 0_${ik}$, nb, a(1_${ik}$,1_${ik}$), lda, a( ii, 1_${ik}$ ), lda,t(1_${ik}$,ctr * n + 1_${ik}$), &
                        ldt,work, info )
            end if
           work( 1_${ik}$ ) = n*nb
           return
     end subroutine stdlib${ii}$_${ci}$latsqr

#:endif
#:endfor



     pure module subroutine stdlib${ii}$_cungtsqr( m, n, mb, nb, a, lda, t, ldt, work, lwork,info )
     !! CUNGTSQR generates an M-by-N complex matrix Q_out with orthonormal
     !! columns, which are the first N columns of a product of comlpex unitary
     !! matrices of order M which are returned by CLATSQR
     !! Q_out = first_N_columns_of( Q(1)_in * Q(2)_in * ... * Q(k)_in ).
     !! See the documentation for CLATSQR.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, ldt, lwork, m, n, mb, nb
           ! Array Arguments 
           complex(sp), intent(inout) :: a(lda,*)
           complex(sp), intent(in) :: t(ldt,*)
           complex(sp), intent(out) :: work(*)
        ! =====================================================================
           
           ! Local Scalars 
           logical(lk) :: lquery
           integer(${ik}$) :: iinfo, ldc, lworkopt, lc, lw, nblocal, j
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input parameters
           lquery  = lwork==-1_${ik}$
           info = 0_${ik}$
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ .or. m<n ) then
              info = -2_${ik}$
           else if( mb<=n ) then
              info = -3_${ik}$
           else if( nb<1_${ik}$ ) then
              info = -4_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -6_${ik}$
           else if( ldt<max( 1_${ik}$, min( nb, n ) ) ) then
              info = -8_${ik}$
           else
              ! test the input lwork for the dimension of the array work.
              ! this workspace is used to store array c(ldc, n) and work(lwork)
              ! in the call to stdlib${ii}$_clamtsqr. see the documentation for stdlib${ii}$_clamtsqr.
              if( lwork<2_${ik}$ .and. (.not.lquery) ) then
                 info = -10_${ik}$
              else
                 ! set block size for column blocks
                 nblocal = min( nb, n )
                 ! lwork = -1, then set the size for the array c(ldc,n)
                 ! in stdlib${ii}$_clamtsqr call and set the optimal size of the work array
                 ! work(lwork) in stdlib${ii}$_clamtsqr call.
                 ldc = m
                 lc = ldc*n
                 lw = n * nblocal
                 lworkopt = lc+lw
                 if( ( lwork<max( 1_${ik}$, lworkopt ) ).and.(.not.lquery) ) then
                    info = -10_${ik}$
                 end if
              end if
           end if
           ! handle error in the input parameters and return workspace query.
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'CUNGTSQR', -info )
              return
           else if ( lquery ) then
              work( 1_${ik}$ ) = cmplx( lworkopt,KIND=sp)
              return
           end if
           ! quick return if possible
           if( min( m, n )==0_${ik}$ ) then
              work( 1_${ik}$ ) = cmplx( lworkopt,KIND=sp)
              return
           end if
           ! (1) form explicitly the tall-skinny m-by-n left submatrix q1_in
           ! of m-by-m orthogonal matrix q_in, which is implicitly stored in
           ! the subdiagonal part of input array a and in the input array t.
           ! perform by the following operation using the routine stdlib${ii}$_clamtsqr.
               ! q1_in = q_in * ( i ), where i is a n-by-n identity matrix,
                              ! ( 0 )        0 is a (m-n)-by-n zero matrix.
           ! (1a) form m-by-n matrix in the array work(1:ldc*n) with ones
           ! on the diagonal and zeros elsewhere.
           call stdlib${ii}$_claset( 'F', m, n, czero, cone, work, ldc )
           ! (1b)  on input, work(1:ldc*n) stores ( i );
                                                ! ( 0 )
                 ! on output, work(1:ldc*n) stores q1_in.
           call stdlib${ii}$_clamtsqr( 'L', 'N', m, n, n, mb, nblocal, a, lda, t, ldt,work, ldc, work( &
                     lc+1 ), lw, iinfo )
           ! (2) copy the result from the part of the work array (1:m,1:n)
           ! with the leading dimension ldc that starts at work(1) into
           ! the output array a(1:m,1:n) column-by-column.
           do j = 1, n
              call stdlib${ii}$_ccopy( m, work( (j-1)*ldc + 1_${ik}$ ), 1_${ik}$, a( 1_${ik}$, j ), 1_${ik}$ )
           end do
           work( 1_${ik}$ ) = cmplx( lworkopt,KIND=sp)
           return
     end subroutine stdlib${ii}$_cungtsqr

     pure module subroutine stdlib${ii}$_zungtsqr( m, n, mb, nb, a, lda, t, ldt, work, lwork,info )
     !! ZUNGTSQR generates an M-by-N complex matrix Q_out with orthonormal
     !! columns, which are the first N columns of a product of comlpex unitary
     !! matrices of order M which are returned by ZLATSQR
     !! Q_out = first_N_columns_of( Q(1)_in * Q(2)_in * ... * Q(k)_in ).
     !! See the documentation for ZLATSQR.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, ldt, lwork, m, n, mb, nb
           ! Array Arguments 
           complex(dp), intent(inout) :: a(lda,*)
           complex(dp), intent(in) :: t(ldt,*)
           complex(dp), intent(out) :: work(*)
        ! =====================================================================
           
           ! Local Scalars 
           logical(lk) :: lquery
           integer(${ik}$) :: iinfo, ldc, lworkopt, lc, lw, nblocal, j
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input parameters
           lquery  = lwork==-1_${ik}$
           info = 0_${ik}$
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ .or. m<n ) then
              info = -2_${ik}$
           else if( mb<=n ) then
              info = -3_${ik}$
           else if( nb<1_${ik}$ ) then
              info = -4_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -6_${ik}$
           else if( ldt<max( 1_${ik}$, min( nb, n ) ) ) then
              info = -8_${ik}$
           else
              ! test the input lwork for the dimension of the array work.
              ! this workspace is used to store array c(ldc, n) and work(lwork)
              ! in the call to stdlib${ii}$_zlamtsqr. see the documentation for stdlib${ii}$_zlamtsqr.
              if( lwork<2_${ik}$ .and. (.not.lquery) ) then
                 info = -10_${ik}$
              else
                 ! set block size for column blocks
                 nblocal = min( nb, n )
                 ! lwork = -1, then set the size for the array c(ldc,n)
                 ! in stdlib${ii}$_zlamtsqr call and set the optimal size of the work array
                 ! work(lwork) in stdlib${ii}$_zlamtsqr call.
                 ldc = m
                 lc = ldc*n
                 lw = n * nblocal
                 lworkopt = lc+lw
                 if( ( lwork<max( 1_${ik}$, lworkopt ) ).and.(.not.lquery) ) then
                    info = -10_${ik}$
                 end if
              end if
           end if
           ! handle error in the input parameters and return workspace query.
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'ZUNGTSQR', -info )
              return
           else if ( lquery ) then
              work( 1_${ik}$ ) = cmplx( lworkopt,KIND=dp)
              return
           end if
           ! quick return if possible
           if( min( m, n )==0_${ik}$ ) then
              work( 1_${ik}$ ) = cmplx( lworkopt,KIND=dp)
              return
           end if
           ! (1) form explicitly the tall-skinny m-by-n left submatrix q1_in
           ! of m-by-m orthogonal matrix q_in, which is implicitly stored in
           ! the subdiagonal part of input array a and in the input array t.
           ! perform by the following operation using the routine stdlib${ii}$_zlamtsqr.
               ! q1_in = q_in * ( i ), where i is a n-by-n identity matrix,
                              ! ( 0 )        0 is a (m-n)-by-n zero matrix.
           ! (1a) form m-by-n matrix in the array work(1:ldc*n) with ones
           ! on the diagonal and zeros elsewhere.
           call stdlib${ii}$_zlaset( 'F', m, n, czero, cone, work, ldc )
           ! (1b)  on input, work(1:ldc*n) stores ( i );
                                                ! ( 0 )
                 ! on output, work(1:ldc*n) stores q1_in.
           call stdlib${ii}$_zlamtsqr( 'L', 'N', m, n, n, mb, nblocal, a, lda, t, ldt,work, ldc, work( &
                     lc+1 ), lw, iinfo )
           ! (2) copy the result from the part of the work array (1:m,1:n)
           ! with the leading dimension ldc that starts at work(1) into
           ! the output array a(1:m,1:n) column-by-column.
           do j = 1, n
              call stdlib${ii}$_zcopy( m, work( (j-1)*ldc + 1_${ik}$ ), 1_${ik}$, a( 1_${ik}$, j ), 1_${ik}$ )
           end do
           work( 1_${ik}$ ) = cmplx( lworkopt,KIND=dp)
           return
     end subroutine stdlib${ii}$_zungtsqr

#:for ck,ct,ci in CMPLX_KINDS_TYPES
#:if not ck in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ci}$ungtsqr( m, n, mb, nb, a, lda, t, ldt, work, lwork,info )
     !! ZUNGTSQR: generates an M-by-N complex matrix Q_out with orthonormal
     !! columns, which are the first N columns of a product of comlpex unitary
     !! matrices of order M which are returned by ZLATSQR
     !! Q_out = first_N_columns_of( Q(1)_in * Q(2)_in * ... * Q(k)_in ).
     !! See the documentation for ZLATSQR.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${ck}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, ldt, lwork, m, n, mb, nb
           ! Array Arguments 
           complex(${ck}$), intent(inout) :: a(lda,*)
           complex(${ck}$), intent(in) :: t(ldt,*)
           complex(${ck}$), intent(out) :: work(*)
        ! =====================================================================
           
           ! Local Scalars 
           logical(lk) :: lquery
           integer(${ik}$) :: iinfo, ldc, lworkopt, lc, lw, nblocal, j
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input parameters
           lquery  = lwork==-1_${ik}$
           info = 0_${ik}$
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ .or. m<n ) then
              info = -2_${ik}$
           else if( mb<=n ) then
              info = -3_${ik}$
           else if( nb<1_${ik}$ ) then
              info = -4_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -6_${ik}$
           else if( ldt<max( 1_${ik}$, min( nb, n ) ) ) then
              info = -8_${ik}$
           else
              ! test the input lwork for the dimension of the array work.
              ! this workspace is used to store array c(ldc, n) and work(lwork)
              ! in the call to stdlib${ii}$_${ci}$lamtsqr. see the documentation for stdlib${ii}$_${ci}$lamtsqr.
              if( lwork<2_${ik}$ .and. (.not.lquery) ) then
                 info = -10_${ik}$
              else
                 ! set block size for column blocks
                 nblocal = min( nb, n )
                 ! lwork = -1, then set the size for the array c(ldc,n)
                 ! in stdlib${ii}$_${ci}$lamtsqr call and set the optimal size of the work array
                 ! work(lwork) in stdlib${ii}$_${ci}$lamtsqr call.
                 ldc = m
                 lc = ldc*n
                 lw = n * nblocal
                 lworkopt = lc+lw
                 if( ( lwork<max( 1_${ik}$, lworkopt ) ).and.(.not.lquery) ) then
                    info = -10_${ik}$
                 end if
              end if
           end if
           ! handle error in the input parameters and return workspace query.
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'ZUNGTSQR', -info )
              return
           else if ( lquery ) then
              work( 1_${ik}$ ) = cmplx( lworkopt,KIND=${ck}$)
              return
           end if
           ! quick return if possible
           if( min( m, n )==0_${ik}$ ) then
              work( 1_${ik}$ ) = cmplx( lworkopt,KIND=${ck}$)
              return
           end if
           ! (1) form explicitly the tall-skinny m-by-n left submatrix q1_in
           ! of m-by-m orthogonal matrix q_in, which is implicitly stored in
           ! the subdiagonal part of input array a and in the input array t.
           ! perform by the following operation using the routine stdlib${ii}$_${ci}$lamtsqr.
               ! q1_in = q_in * ( i ), where i is a n-by-n identity matrix,
                              ! ( 0 )        0 is a (m-n)-by-n zero matrix.
           ! (1a) form m-by-n matrix in the array work(1:ldc*n) with ones
           ! on the diagonal and zeros elsewhere.
           call stdlib${ii}$_${ci}$laset( 'F', m, n, czero, cone, work, ldc )
           ! (1b)  on input, work(1:ldc*n) stores ( i );
                                                ! ( 0 )
                 ! on output, work(1:ldc*n) stores q1_in.
           call stdlib${ii}$_${ci}$lamtsqr( 'L', 'N', m, n, n, mb, nblocal, a, lda, t, ldt,work, ldc, work( &
                     lc+1 ), lw, iinfo )
           ! (2) copy the result from the part of the work array (1:m,1:n)
           ! with the leading dimension ldc that starts at work(1) into
           ! the output array a(1:m,1:n) column-by-column.
           do j = 1, n
              call stdlib${ii}$_${ci}$copy( m, work( (j-1)*ldc + 1_${ik}$ ), 1_${ik}$, a( 1_${ik}$, j ), 1_${ik}$ )
           end do
           work( 1_${ik}$ ) = cmplx( lworkopt,KIND=${ck}$)
           return
     end subroutine stdlib${ii}$_${ci}$ungtsqr

#:endif
#:endfor



     pure module subroutine stdlib${ii}$_cungtsqr_row( m, n, mb, nb, a, lda, t, ldt, work,lwork, info )
     !! CUNGTSQR_ROW generates an M-by-N complex matrix Q_out with
     !! orthonormal columns from the output of CLATSQR. These N orthonormal
     !! columns are the first N columns of a product of complex unitary
     !! matrices Q(k)_in of order M, which are returned by CLATSQR in
     !! a special format.
     !! Q_out = first_N_columns_of( Q(1)_in * Q(2)_in * ... * Q(k)_in ).
     !! The input matrices Q(k)_in are stored in row and column blocks in A.
     !! See the documentation of CLATSQR for more details on the format of
     !! Q(k)_in, where each Q(k)_in is represented by block Householder
     !! transformations. This routine calls an auxiliary routine CLARFB_GETT,
     !! where the computation is performed on each individual block. The
     !! algorithm first sweeps NB-sized column blocks from the right to left
     !! starting in the bottom row block and continues to the top row block
     !! (hence _ROW in the routine name). This sweep is in reverse order of
     !! the order in which CLATSQR generates the output blocks.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, ldt, lwork, m, n, mb, nb
           ! Array Arguments 
           complex(sp), intent(inout) :: a(lda,*)
           complex(sp), intent(in) :: t(ldt,*)
           complex(sp), intent(out) :: work(*)
        ! =====================================================================
           
           ! Local Scalars 
           logical(lk) :: lquery
           integer(${ik}$) :: nblocal, mb2, m_plus_one, itmp, ib_bottom, lworkopt, &
                     num_all_row_blocks, jb_t, ib, imb, kb, kb_last, knb, mb1
           ! Local Arrays 
           complex(sp) :: dummy(1_${ik}$,1_${ik}$)
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input parameters
           info = 0_${ik}$
           lquery  = lwork==-1_${ik}$
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ .or. m<n ) then
              info = -2_${ik}$
           else if( mb<=n ) then
              info = -3_${ik}$
           else if( nb<1_${ik}$ ) then
              info = -4_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -6_${ik}$
           else if( ldt<max( 1_${ik}$, min( nb, n ) ) ) then
              info = -8_${ik}$
           else if( lwork<1_${ik}$ .and. .not.lquery ) then
              info = -10_${ik}$
           end if
           nblocal = min( nb, n )
           ! determine the workspace size.
           if( info==0_${ik}$ ) then
              lworkopt = nblocal * max( nblocal, ( n - nblocal ) )
           end if
           ! handle error in the input parameters and handle the workspace query.
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'CUNGTSQR_ROW', -info )
              return
           else if ( lquery ) then
              work( 1_${ik}$ ) = cmplx( lworkopt,KIND=sp)
              return
           end if
           ! quick return if possible
           if( min( m, n )==0_${ik}$ ) then
              work( 1_${ik}$ ) = cmplx( lworkopt,KIND=sp)
              return
           end if
           ! (0) set the upper-triangular part of the matrix a to zero and
           ! its diagonal elements to one.
           call stdlib${ii}$_claset('U', m, n, czero, cone, a, lda )
           ! kb_last is the column index of the last column block reflector
           ! in the matrices t and v.
           kb_last = ( ( n-1 ) / nblocal ) * nblocal + 1_${ik}$
           ! (1) bottom-up loop over row blocks of a, except the top row block.
           ! note: if mb>=m, then the loop is never executed.
           if ( mb<m ) then
              ! mb2 is the row blocking size for the row blocks before the
              ! first top row block in the matrix a. ib is the row index for
              ! the row blocks in the matrix a before the first top row block.
              ! ib_bottom is the row index for the last bottom row block
              ! in the matrix a. jb_t is the column index of the corresponding
              ! column block in the matrix t.
              ! initialize variables.
              ! num_all_row_blocks is the number of row blocks in the matrix a
              ! including the first row block.
              mb2 = mb - n
              m_plus_one = m + 1_${ik}$
              itmp = ( m - mb - 1_${ik}$ ) / mb2
              ib_bottom = itmp * mb2 + mb + 1_${ik}$
              num_all_row_blocks = itmp + 2_${ik}$
              jb_t = num_all_row_blocks * n + 1_${ik}$
              do ib = ib_bottom, mb+1, -mb2
                 ! determine the block size imb for the current row block
                 ! in the matrix a.
                 imb = min( m_plus_one - ib, mb2 )
                 ! determine the column index jb_t for the current column block
                 ! in the matrix t.
                 jb_t = jb_t - n
                 ! apply column blocks of h in the row block from right to left.
                 ! kb is the column index of the current column block reflector
                 ! in the matrices t and v.
                 do kb = kb_last, 1, -nblocal
                    ! determine the size of the current column block knb in
                    ! the matrices t and v.
                    knb = min( nblocal, n - kb + 1_${ik}$ )
                    call stdlib${ii}$_clarfb_gett( 'I', imb, n-kb+1, knb,t( 1_${ik}$, jb_t+kb-1 ), ldt, a( kb, &
                              kb ), lda,a( ib, kb ), lda, work, knb )
                 end do
              end do
           end if
           ! (2) top row block of a.
           ! note: if mb>=m, then we have only one row block of a of size m
           ! and we work on the entire matrix a.
           mb1 = min( mb, m )
           ! apply column blocks of h in the top row block from right to left.
           ! kb is the column index of the current block reflector in
           ! the matrices t and v.
           do kb = kb_last, 1, -nblocal
              ! determine the size of the current column block knb in
              ! the matrices t and v.
              knb = min( nblocal, n - kb + 1_${ik}$ )
              if( mb1-kb-knb+1==0_${ik}$ ) then
                 ! in stdlib${ii}$_slarfb_gett parameters, when m=0, then the matrix b
                 ! does not exist, hence we need to pass a dummy array
                 ! reference dummy(1,1) to b with lddummy=1.
                 call stdlib${ii}$_clarfb_gett( 'N', 0_${ik}$, n-kb+1, knb,t( 1_${ik}$, kb ), ldt, a( kb, kb ), lda,&
                           dummy( 1_${ik}$, 1_${ik}$ ), 1_${ik}$, work, knb )
              else
                 call stdlib${ii}$_clarfb_gett( 'N', mb1-kb-knb+1, n-kb+1, knb,t( 1_${ik}$, kb ), ldt, a( kb, &
                           kb ), lda,a( kb+knb, kb), lda, work, knb )
              end if
           end do
           work( 1_${ik}$ ) = cmplx( lworkopt,KIND=sp)
           return
     end subroutine stdlib${ii}$_cungtsqr_row

     pure module subroutine stdlib${ii}$_zungtsqr_row( m, n, mb, nb, a, lda, t, ldt, work,lwork, info )
     !! ZUNGTSQR_ROW generates an M-by-N complex matrix Q_out with
     !! orthonormal columns from the output of ZLATSQR. These N orthonormal
     !! columns are the first N columns of a product of complex unitary
     !! matrices Q(k)_in of order M, which are returned by ZLATSQR in
     !! a special format.
     !! Q_out = first_N_columns_of( Q(1)_in * Q(2)_in * ... * Q(k)_in ).
     !! The input matrices Q(k)_in are stored in row and column blocks in A.
     !! See the documentation of ZLATSQR for more details on the format of
     !! Q(k)_in, where each Q(k)_in is represented by block Householder
     !! transformations. This routine calls an auxiliary routine ZLARFB_GETT,
     !! where the computation is performed on each individual block. The
     !! algorithm first sweeps NB-sized column blocks from the right to left
     !! starting in the bottom row block and continues to the top row block
     !! (hence _ROW in the routine name). This sweep is in reverse order of
     !! the order in which ZLATSQR generates the output blocks.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, ldt, lwork, m, n, mb, nb
           ! Array Arguments 
           complex(dp), intent(inout) :: a(lda,*)
           complex(dp), intent(in) :: t(ldt,*)
           complex(dp), intent(out) :: work(*)
        ! =====================================================================
           
           ! Local Scalars 
           logical(lk) :: lquery
           integer(${ik}$) :: nblocal, mb2, m_plus_one, itmp, ib_bottom, lworkopt, &
                     num_all_row_blocks, jb_t, ib, imb, kb, kb_last, knb, mb1
           ! Local Arrays 
           complex(dp) :: dummy(1_${ik}$,1_${ik}$)
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input parameters
           info = 0_${ik}$
           lquery  = lwork==-1_${ik}$
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ .or. m<n ) then
              info = -2_${ik}$
           else if( mb<=n ) then
              info = -3_${ik}$
           else if( nb<1_${ik}$ ) then
              info = -4_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -6_${ik}$
           else if( ldt<max( 1_${ik}$, min( nb, n ) ) ) then
              info = -8_${ik}$
           else if( lwork<1_${ik}$ .and. .not.lquery ) then
              info = -10_${ik}$
           end if
           nblocal = min( nb, n )
           ! determine the workspace size.
           if( info==0_${ik}$ ) then
              lworkopt = nblocal * max( nblocal, ( n - nblocal ) )
           end if
           ! handle error in the input parameters and handle the workspace query.
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'ZUNGTSQR_ROW', -info )
              return
           else if ( lquery ) then
              work( 1_${ik}$ ) = cmplx( lworkopt,KIND=dp)
              return
           end if
           ! quick return if possible
           if( min( m, n )==0_${ik}$ ) then
              work( 1_${ik}$ ) = cmplx( lworkopt,KIND=dp)
              return
           end if
           ! (0) set the upper-triangular part of the matrix a to zero and
           ! its diagonal elements to one.
           call stdlib${ii}$_zlaset('U', m, n, czero, cone, a, lda )
           ! kb_last is the column index of the last column block reflector
           ! in the matrices t and v.
           kb_last = ( ( n-1 ) / nblocal ) * nblocal + 1_${ik}$
           ! (1) bottom-up loop over row blocks of a, except the top row block.
           ! note: if mb>=m, then the loop is never executed.
           if ( mb<m ) then
              ! mb2 is the row blocking size for the row blocks before the
              ! first top row block in the matrix a. ib is the row index for
              ! the row blocks in the matrix a before the first top row block.
              ! ib_bottom is the row index for the last bottom row block
              ! in the matrix a. jb_t is the column index of the corresponding
              ! column block in the matrix t.
              ! initialize variables.
              ! num_all_row_blocks is the number of row blocks in the matrix a
              ! including the first row block.
              mb2 = mb - n
              m_plus_one = m + 1_${ik}$
              itmp = ( m - mb - 1_${ik}$ ) / mb2
              ib_bottom = itmp * mb2 + mb + 1_${ik}$
              num_all_row_blocks = itmp + 2_${ik}$
              jb_t = num_all_row_blocks * n + 1_${ik}$
              do ib = ib_bottom, mb+1, -mb2
                 ! determine the block size imb for the current row block
                 ! in the matrix a.
                 imb = min( m_plus_one - ib, mb2 )
                 ! determine the column index jb_t for the current column block
                 ! in the matrix t.
                 jb_t = jb_t - n
                 ! apply column blocks of h in the row block from right to left.
                 ! kb is the column index of the current column block reflector
                 ! in the matrices t and v.
                 do kb = kb_last, 1, -nblocal
                    ! determine the size of the current column block knb in
                    ! the matrices t and v.
                    knb = min( nblocal, n - kb + 1_${ik}$ )
                    call stdlib${ii}$_zlarfb_gett( 'I', imb, n-kb+1, knb,t( 1_${ik}$, jb_t+kb-1 ), ldt, a( kb, &
                              kb ), lda,a( ib, kb ), lda, work, knb )
                 end do
              end do
           end if
           ! (2) top row block of a.
           ! note: if mb>=m, then we have only one row block of a of size m
           ! and we work on the entire matrix a.
           mb1 = min( mb, m )
           ! apply column blocks of h in the top row block from right to left.
           ! kb is the column index of the current block reflector in
           ! the matrices t and v.
           do kb = kb_last, 1, -nblocal
              ! determine the size of the current column block knb in
              ! the matrices t and v.
              knb = min( nblocal, n - kb + 1_${ik}$ )
              if( mb1-kb-knb+1==0_${ik}$ ) then
                 ! in stdlib${ii}$_slarfb_gett parameters, when m=0, then the matrix b
                 ! does not exist, hence we need to pass a dummy array
                 ! reference dummy(1,1) to b with lddummy=1.
                 call stdlib${ii}$_zlarfb_gett( 'N', 0_${ik}$, n-kb+1, knb,t( 1_${ik}$, kb ), ldt, a( kb, kb ), lda,&
                           dummy( 1_${ik}$, 1_${ik}$ ), 1_${ik}$, work, knb )
              else
                 call stdlib${ii}$_zlarfb_gett( 'N', mb1-kb-knb+1, n-kb+1, knb,t( 1_${ik}$, kb ), ldt, a( kb, &
                           kb ), lda,a( kb+knb, kb), lda, work, knb )
              end if
           end do
           work( 1_${ik}$ ) = cmplx( lworkopt,KIND=dp)
           return
     end subroutine stdlib${ii}$_zungtsqr_row

#:for ck,ct,ci in CMPLX_KINDS_TYPES
#:if not ck in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ci}$ungtsqr_row( m, n, mb, nb, a, lda, t, ldt, work,lwork, info )
     !! ZUNGTSQR_ROW: generates an M-by-N complex matrix Q_out with
     !! orthonormal columns from the output of ZLATSQR. These N orthonormal
     !! columns are the first N columns of a product of complex unitary
     !! matrices Q(k)_in of order M, which are returned by ZLATSQR in
     !! a special format.
     !! Q_out = first_N_columns_of( Q(1)_in * Q(2)_in * ... * Q(k)_in ).
     !! The input matrices Q(k)_in are stored in row and column blocks in A.
     !! See the documentation of ZLATSQR for more details on the format of
     !! Q(k)_in, where each Q(k)_in is represented by block Householder
     !! transformations. This routine calls an auxiliary routine ZLARFB_GETT,
     !! where the computation is performed on each individual block. The
     !! algorithm first sweeps NB-sized column blocks from the right to left
     !! starting in the bottom row block and continues to the top row block
     !! (hence _ROW in the routine name). This sweep is in reverse order of
     !! the order in which ZLATSQR generates the output blocks.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${ck}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, ldt, lwork, m, n, mb, nb
           ! Array Arguments 
           complex(${ck}$), intent(inout) :: a(lda,*)
           complex(${ck}$), intent(in) :: t(ldt,*)
           complex(${ck}$), intent(out) :: work(*)
        ! =====================================================================
           
           ! Local Scalars 
           logical(lk) :: lquery
           integer(${ik}$) :: nblocal, mb2, m_plus_one, itmp, ib_bottom, lworkopt, &
                     num_all_row_blocks, jb_t, ib, imb, kb, kb_last, knb, mb1
           ! Local Arrays 
           complex(${ck}$) :: dummy(1_${ik}$,1_${ik}$)
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input parameters
           info = 0_${ik}$
           lquery  = lwork==-1_${ik}$
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ .or. m<n ) then
              info = -2_${ik}$
           else if( mb<=n ) then
              info = -3_${ik}$
           else if( nb<1_${ik}$ ) then
              info = -4_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -6_${ik}$
           else if( ldt<max( 1_${ik}$, min( nb, n ) ) ) then
              info = -8_${ik}$
           else if( lwork<1_${ik}$ .and. .not.lquery ) then
              info = -10_${ik}$
           end if
           nblocal = min( nb, n )
           ! determine the workspace size.
           if( info==0_${ik}$ ) then
              lworkopt = nblocal * max( nblocal, ( n - nblocal ) )
           end if
           ! handle error in the input parameters and handle the workspace query.
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'ZUNGTSQR_ROW', -info )
              return
           else if ( lquery ) then
              work( 1_${ik}$ ) = cmplx( lworkopt,KIND=${ck}$)
              return
           end if
           ! quick return if possible
           if( min( m, n )==0_${ik}$ ) then
              work( 1_${ik}$ ) = cmplx( lworkopt,KIND=${ck}$)
              return
           end if
           ! (0) set the upper-triangular part of the matrix a to zero and
           ! its diagonal elements to one.
           call stdlib${ii}$_${ci}$laset('U', m, n, czero, cone, a, lda )
           ! kb_last is the column index of the last column block reflector
           ! in the matrices t and v.
           kb_last = ( ( n-1 ) / nblocal ) * nblocal + 1_${ik}$
           ! (1) bottom-up loop over row blocks of a, except the top row block.
           ! note: if mb>=m, then the loop is never executed.
           if ( mb<m ) then
              ! mb2 is the row blocking size for the row blocks before the
              ! first top row block in the matrix a. ib is the row index for
              ! the row blocks in the matrix a before the first top row block.
              ! ib_bottom is the row index for the last bottom row block
              ! in the matrix a. jb_t is the column index of the corresponding
              ! column block in the matrix t.
              ! initialize variables.
              ! num_all_row_blocks is the number of row blocks in the matrix a
              ! including the first row block.
              mb2 = mb - n
              m_plus_one = m + 1_${ik}$
              itmp = ( m - mb - 1_${ik}$ ) / mb2
              ib_bottom = itmp * mb2 + mb + 1_${ik}$
              num_all_row_blocks = itmp + 2_${ik}$
              jb_t = num_all_row_blocks * n + 1_${ik}$
              do ib = ib_bottom, mb+1, -mb2
                 ! determine the block size imb for the current row block
                 ! in the matrix a.
                 imb = min( m_plus_one - ib, mb2 )
                 ! determine the column index jb_t for the current column block
                 ! in the matrix t.
                 jb_t = jb_t - n
                 ! apply column blocks of h in the row block from right to left.
                 ! kb is the column index of the current column block reflector
                 ! in the matrices t and v.
                 do kb = kb_last, 1, -nblocal
                    ! determine the size of the current column block knb in
                    ! the matrices t and v.
                    knb = min( nblocal, n - kb + 1_${ik}$ )
                    call stdlib${ii}$_${ci}$larfb_gett( 'I', imb, n-kb+1, knb,t( 1_${ik}$, jb_t+kb-1 ), ldt, a( kb, &
                              kb ), lda,a( ib, kb ), lda, work, knb )
                 end do
              end do
           end if
           ! (2) top row block of a.
           ! note: if mb>=m, then we have only one row block of a of size m
           ! and we work on the entire matrix a.
           mb1 = min( mb, m )
           ! apply column blocks of h in the top row block from right to left.
           ! kb is the column index of the current block reflector in
           ! the matrices t and v.
           do kb = kb_last, 1, -nblocal
              ! determine the size of the current column block knb in
              ! the matrices t and v.
              knb = min( nblocal, n - kb + 1_${ik}$ )
              if( mb1-kb-knb+1==0_${ik}$ ) then
                 ! in stdlib${ii}$_dlarfb_gett parameters, when m=0, then the matrix b
                 ! does not exist, hence we need to pass a dummy array
                 ! reference dummy(1,1) to b with lddummy=1.
                 call stdlib${ii}$_${ci}$larfb_gett( 'N', 0_${ik}$, n-kb+1, knb,t( 1_${ik}$, kb ), ldt, a( kb, kb ), lda,&
                           dummy( 1_${ik}$, 1_${ik}$ ), 1_${ik}$, work, knb )
              else
                 call stdlib${ii}$_${ci}$larfb_gett( 'N', mb1-kb-knb+1, n-kb+1, knb,t( 1_${ik}$, kb ), ldt, a( kb, &
                           kb ), lda,a( kb+knb, kb), lda, work, knb )
              end if
           end do
           work( 1_${ik}$ ) = cmplx( lworkopt,KIND=${ck}$)
           return
     end subroutine stdlib${ii}$_${ci}$ungtsqr_row

#:endif
#:endfor



     pure module subroutine stdlib${ii}$_sorgtsqr( m, n, mb, nb, a, lda, t, ldt, work, lwork,info )
     !! SORGTSQR generates an M-by-N real matrix Q_out with orthonormal columns,
     !! which are the first N columns of a product of real orthogonal
     !! matrices of order M which are returned by SLATSQR
     !! Q_out = first_N_columns_of( Q(1)_in * Q(2)_in * ... * Q(k)_in ).
     !! See the documentation for SLATSQR.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, ldt, lwork, m, n, mb, nb
           ! Array Arguments 
           real(sp), intent(inout) :: a(lda,*)
           real(sp), intent(in) :: t(ldt,*)
           real(sp), intent(out) :: work(*)
        ! =====================================================================
           
           ! Local Scalars 
           logical(lk) :: lquery
           integer(${ik}$) :: iinfo, ldc, lworkopt, lc, lw, nblocal, j
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input parameters
           lquery  = lwork==-1_${ik}$
           info = 0_${ik}$
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ .or. m<n ) then
              info = -2_${ik}$
           else if( mb<=n ) then
              info = -3_${ik}$
           else if( nb<1_${ik}$ ) then
              info = -4_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -6_${ik}$
           else if( ldt<max( 1_${ik}$, min( nb, n ) ) ) then
              info = -8_${ik}$
           else
              ! test the input lwork for the dimension of the array work.
              ! this workspace is used to store array c(ldc, n) and work(lwork)
              ! in the call to stdlib${ii}$_slamtsqr. see the documentation for stdlib${ii}$_slamtsqr.
              if( lwork<2_${ik}$ .and. (.not.lquery) ) then
                 info = -10_${ik}$
              else
                 ! set block size for column blocks
                 nblocal = min( nb, n )
                 ! lwork = -1, then set the size for the array c(ldc,n)
                 ! in stdlib${ii}$_slamtsqr call and set the optimal size of the work array
                 ! work(lwork) in stdlib${ii}$_slamtsqr call.
                 ldc = m
                 lc = ldc*n
                 lw = n * nblocal
                 lworkopt = lc+lw
                 if( ( lwork<max( 1_${ik}$, lworkopt ) ).and.(.not.lquery) ) then
                    info = -10_${ik}$
                 end if
              end if
           end if
           ! handle error in the input parameters and return workspace query.
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'SORGTSQR', -info )
              return
           else if ( lquery ) then
              work( 1_${ik}$ ) = real( lworkopt,KIND=sp)
              return
           end if
           ! quick return if possible
           if( min( m, n )==0_${ik}$ ) then
              work( 1_${ik}$ ) = real( lworkopt,KIND=sp)
              return
           end if
           ! (1) form explicitly the tall-skinny m-by-n left submatrix q1_in
           ! of m-by-m orthogonal matrix q_in, which is implicitly stored in
           ! the subdiagonal part of input array a and in the input array t.
           ! perform by the following operation using the routine stdlib${ii}$_slamtsqr.
               ! q1_in = q_in * ( i ), where i is a n-by-n identity matrix,
                              ! ( 0 )        0 is a (m-n)-by-n zero matrix.
           ! (1a) form m-by-n matrix in the array work(1:ldc*n) with ones
           ! on the diagonal and zeros elsewhere.
           call stdlib${ii}$_slaset( 'F', m, n, zero, one, work, ldc )
           ! (1b)  on input, work(1:ldc*n) stores ( i );
                                                ! ( 0 )
                 ! on output, work(1:ldc*n) stores q1_in.
           call stdlib${ii}$_slamtsqr( 'L', 'N', m, n, n, mb, nblocal, a, lda, t, ldt,work, ldc, work( &
                     lc+1 ), lw, iinfo )
           ! (2) copy the result from the part of the work array (1:m,1:n)
           ! with the leading dimension ldc that starts at work(1) into
           ! the output array a(1:m,1:n) column-by-column.
           do j = 1, n
              call stdlib${ii}$_scopy( m, work( (j-1)*ldc + 1_${ik}$ ), 1_${ik}$, a( 1_${ik}$, j ), 1_${ik}$ )
           end do
           work( 1_${ik}$ ) = real( lworkopt,KIND=sp)
           return
     end subroutine stdlib${ii}$_sorgtsqr

     pure module subroutine stdlib${ii}$_dorgtsqr( m, n, mb, nb, a, lda, t, ldt, work, lwork,info )
     !! DORGTSQR generates an M-by-N real matrix Q_out with orthonormal columns,
     !! which are the first N columns of a product of real orthogonal
     !! matrices of order M which are returned by DLATSQR
     !! Q_out = first_N_columns_of( Q(1)_in * Q(2)_in * ... * Q(k)_in ).
     !! See the documentation for DLATSQR.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, ldt, lwork, m, n, mb, nb
           ! Array Arguments 
           real(dp), intent(inout) :: a(lda,*)
           real(dp), intent(in) :: t(ldt,*)
           real(dp), intent(out) :: work(*)
        ! =====================================================================
           
           ! Local Scalars 
           logical(lk) :: lquery
           integer(${ik}$) :: iinfo, ldc, lworkopt, lc, lw, nblocal, j
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input parameters
           lquery  = lwork==-1_${ik}$
           info = 0_${ik}$
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ .or. m<n ) then
              info = -2_${ik}$
           else if( mb<=n ) then
              info = -3_${ik}$
           else if( nb<1_${ik}$ ) then
              info = -4_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -6_${ik}$
           else if( ldt<max( 1_${ik}$, min( nb, n ) ) ) then
              info = -8_${ik}$
           else
              ! test the input lwork for the dimension of the array work.
              ! this workspace is used to store array c(ldc, n) and work(lwork)
              ! in the call to stdlib${ii}$_dlamtsqr. see the documentation for stdlib${ii}$_dlamtsqr.
              if( lwork<2_${ik}$ .and. (.not.lquery) ) then
                 info = -10_${ik}$
              else
                 ! set block size for column blocks
                 nblocal = min( nb, n )
                 ! lwork = -1, then set the size for the array c(ldc,n)
                 ! in stdlib${ii}$_dlamtsqr call and set the optimal size of the work array
                 ! work(lwork) in stdlib${ii}$_dlamtsqr call.
                 ldc = m
                 lc = ldc*n
                 lw = n * nblocal
                 lworkopt = lc+lw
                 if( ( lwork<max( 1_${ik}$, lworkopt ) ).and.(.not.lquery) ) then
                    info = -10_${ik}$
                 end if
              end if
           end if
           ! handle error in the input parameters and return workspace query.
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DORGTSQR', -info )
              return
           else if ( lquery ) then
              work( 1_${ik}$ ) = real( lworkopt,KIND=dp)
              return
           end if
           ! quick return if possible
           if( min( m, n )==0_${ik}$ ) then
              work( 1_${ik}$ ) = real( lworkopt,KIND=dp)
              return
           end if
           ! (1) form explicitly the tall-skinny m-by-n left submatrix q1_in
           ! of m-by-m orthogonal matrix q_in, which is implicitly stored in
           ! the subdiagonal part of input array a and in the input array t.
           ! perform by the following operation using the routine stdlib${ii}$_dlamtsqr.
               ! q1_in = q_in * ( i ), where i is a n-by-n identity matrix,
                              ! ( 0 )        0 is a (m-n)-by-n zero matrix.
           ! (1a) form m-by-n matrix in the array work(1:ldc*n) with ones
           ! on the diagonal and zeros elsewhere.
           call stdlib${ii}$_dlaset( 'F', m, n, zero, one, work, ldc )
           ! (1b)  on input, work(1:ldc*n) stores ( i );
                                                ! ( 0 )
                 ! on output, work(1:ldc*n) stores q1_in.
           call stdlib${ii}$_dlamtsqr( 'L', 'N', m, n, n, mb, nblocal, a, lda, t, ldt,work, ldc, work( &
                     lc+1 ), lw, iinfo )
           ! (2) copy the result from the part of the work array (1:m,1:n)
           ! with the leading dimension ldc that starts at work(1) into
           ! the output array a(1:m,1:n) column-by-column.
           do j = 1, n
              call stdlib${ii}$_dcopy( m, work( (j-1)*ldc + 1_${ik}$ ), 1_${ik}$, a( 1_${ik}$, j ), 1_${ik}$ )
           end do
           work( 1_${ik}$ ) = real( lworkopt,KIND=dp)
           return
     end subroutine stdlib${ii}$_dorgtsqr

#:for rk,rt,ri in REAL_KINDS_TYPES
#:if not rk in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ri}$orgtsqr( m, n, mb, nb, a, lda, t, ldt, work, lwork,info )
     !! DORGTSQR: generates an M-by-N real matrix Q_out with orthonormal columns,
     !! which are the first N columns of a product of real orthogonal
     !! matrices of order M which are returned by DLATSQR
     !! Q_out = first_N_columns_of( Q(1)_in * Q(2)_in * ... * Q(k)_in ).
     !! See the documentation for DLATSQR.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, ldt, lwork, m, n, mb, nb
           ! Array Arguments 
           real(${rk}$), intent(inout) :: a(lda,*)
           real(${rk}$), intent(in) :: t(ldt,*)
           real(${rk}$), intent(out) :: work(*)
        ! =====================================================================
           
           ! Local Scalars 
           logical(lk) :: lquery
           integer(${ik}$) :: iinfo, ldc, lworkopt, lc, lw, nblocal, j
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input parameters
           lquery  = lwork==-1_${ik}$
           info = 0_${ik}$
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ .or. m<n ) then
              info = -2_${ik}$
           else if( mb<=n ) then
              info = -3_${ik}$
           else if( nb<1_${ik}$ ) then
              info = -4_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -6_${ik}$
           else if( ldt<max( 1_${ik}$, min( nb, n ) ) ) then
              info = -8_${ik}$
           else
              ! test the input lwork for the dimension of the array work.
              ! this workspace is used to store array c(ldc, n) and work(lwork)
              ! in the call to stdlib${ii}$_${ri}$lamtsqr. see the documentation for stdlib${ii}$_${ri}$lamtsqr.
              if( lwork<2_${ik}$ .and. (.not.lquery) ) then
                 info = -10_${ik}$
              else
                 ! set block size for column blocks
                 nblocal = min( nb, n )
                 ! lwork = -1, then set the size for the array c(ldc,n)
                 ! in stdlib${ii}$_${ri}$lamtsqr call and set the optimal size of the work array
                 ! work(lwork) in stdlib${ii}$_${ri}$lamtsqr call.
                 ldc = m
                 lc = ldc*n
                 lw = n * nblocal
                 lworkopt = lc+lw
                 if( ( lwork<max( 1_${ik}$, lworkopt ) ).and.(.not.lquery) ) then
                    info = -10_${ik}$
                 end if
              end if
           end if
           ! handle error in the input parameters and return workspace query.
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DORGTSQR', -info )
              return
           else if ( lquery ) then
              work( 1_${ik}$ ) = real( lworkopt,KIND=${rk}$)
              return
           end if
           ! quick return if possible
           if( min( m, n )==0_${ik}$ ) then
              work( 1_${ik}$ ) = real( lworkopt,KIND=${rk}$)
              return
           end if
           ! (1) form explicitly the tall-skinny m-by-n left submatrix q1_in
           ! of m-by-m orthogonal matrix q_in, which is implicitly stored in
           ! the subdiagonal part of input array a and in the input array t.
           ! perform by the following operation using the routine stdlib${ii}$_${ri}$lamtsqr.
               ! q1_in = q_in * ( i ), where i is a n-by-n identity matrix,
                              ! ( 0 )        0 is a (m-n)-by-n zero matrix.
           ! (1a) form m-by-n matrix in the array work(1:ldc*n) with ones
           ! on the diagonal and zeros elsewhere.
           call stdlib${ii}$_${ri}$laset( 'F', m, n, zero, one, work, ldc )
           ! (1b)  on input, work(1:ldc*n) stores ( i );
                                                ! ( 0 )
                 ! on output, work(1:ldc*n) stores q1_in.
           call stdlib${ii}$_${ri}$lamtsqr( 'L', 'N', m, n, n, mb, nblocal, a, lda, t, ldt,work, ldc, work( &
                     lc+1 ), lw, iinfo )
           ! (2) copy the result from the part of the work array (1:m,1:n)
           ! with the leading dimension ldc that starts at work(1) into
           ! the output array a(1:m,1:n) column-by-column.
           do j = 1, n
              call stdlib${ii}$_${ri}$copy( m, work( (j-1)*ldc + 1_${ik}$ ), 1_${ik}$, a( 1_${ik}$, j ), 1_${ik}$ )
           end do
           work( 1_${ik}$ ) = real( lworkopt,KIND=${rk}$)
           return
     end subroutine stdlib${ii}$_${ri}$orgtsqr

#:endif
#:endfor



     pure module subroutine stdlib${ii}$_sorgtsqr_row( m, n, mb, nb, a, lda, t, ldt, work,lwork, info )
     !! SORGTSQR_ROW generates an M-by-N real matrix Q_out with
     !! orthonormal columns from the output of SLATSQR. These N orthonormal
     !! columns are the first N columns of a product of complex unitary
     !! matrices Q(k)_in of order M, which are returned by SLATSQR in
     !! a special format.
     !! Q_out = first_N_columns_of( Q(1)_in * Q(2)_in * ... * Q(k)_in ).
     !! The input matrices Q(k)_in are stored in row and column blocks in A.
     !! See the documentation of SLATSQR for more details on the format of
     !! Q(k)_in, where each Q(k)_in is represented by block Householder
     !! transformations. This routine calls an auxiliary routine SLARFB_GETT,
     !! where the computation is performed on each individual block. The
     !! algorithm first sweeps NB-sized column blocks from the right to left
     !! starting in the bottom row block and continues to the top row block
     !! (hence _ROW in the routine name). This sweep is in reverse order of
     !! the order in which SLATSQR generates the output blocks.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, ldt, lwork, m, n, mb, nb
           ! Array Arguments 
           real(sp), intent(inout) :: a(lda,*)
           real(sp), intent(in) :: t(ldt,*)
           real(sp), intent(out) :: work(*)
        ! =====================================================================
           
           ! Local Scalars 
           logical(lk) :: lquery
           integer(${ik}$) :: nblocal, mb2, m_plus_one, itmp, ib_bottom, lworkopt, &
                     num_all_row_blocks, jb_t, ib, imb, kb, kb_last, knb, mb1
           ! Local Arrays 
           real(sp) :: dummy(1_${ik}$,1_${ik}$)
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input parameters
           info = 0_${ik}$
           lquery  = lwork==-1_${ik}$
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ .or. m<n ) then
              info = -2_${ik}$
           else if( mb<=n ) then
              info = -3_${ik}$
           else if( nb<1_${ik}$ ) then
              info = -4_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -6_${ik}$
           else if( ldt<max( 1_${ik}$, min( nb, n ) ) ) then
              info = -8_${ik}$
           else if( lwork<1_${ik}$ .and. .not.lquery ) then
              info = -10_${ik}$
           end if
           nblocal = min( nb, n )
           ! determine the workspace size.
           if( info==0_${ik}$ ) then
              lworkopt = nblocal * max( nblocal, ( n - nblocal ) )
           end if
           ! handle error in the input parameters and handle the workspace query.
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'SORGTSQR_ROW', -info )
              return
           else if ( lquery ) then
              work( 1_${ik}$ ) = real( lworkopt,KIND=sp)
              return
           end if
           ! quick return if possible
           if( min( m, n )==0_${ik}$ ) then
              work( 1_${ik}$ ) = real( lworkopt,KIND=sp)
              return
           end if
           ! (0) set the upper-triangular part of the matrix a to zero and
           ! its diagonal elements to one.
           call stdlib${ii}$_slaset('U', m, n, zero, one, a, lda )
           ! kb_last is the column index of the last column block reflector
           ! in the matrices t and v.
           kb_last = ( ( n-1 ) / nblocal ) * nblocal + 1_${ik}$
           ! (1) bottom-up loop over row blocks of a, except the top row block.
           ! note: if mb>=m, then the loop is never executed.
           if ( mb<m ) then
              ! mb2 is the row blocking size for the row blocks before the
              ! first top row block in the matrix a. ib is the row index for
              ! the row blocks in the matrix a before the first top row block.
              ! ib_bottom is the row index for the last bottom row block
              ! in the matrix a. jb_t is the column index of the corresponding
              ! column block in the matrix t.
              ! initialize variables.
              ! num_all_row_blocks is the number of row blocks in the matrix a
              ! including the first row block.
              mb2 = mb - n
              m_plus_one = m + 1_${ik}$
              itmp = ( m - mb - 1_${ik}$ ) / mb2
              ib_bottom = itmp * mb2 + mb + 1_${ik}$
              num_all_row_blocks = itmp + 2_${ik}$
              jb_t = num_all_row_blocks * n + 1_${ik}$
              do ib = ib_bottom, mb+1, -mb2
                 ! determine the block size imb for the current row block
                 ! in the matrix a.
                 imb = min( m_plus_one - ib, mb2 )
                 ! determine the column index jb_t for the current column block
                 ! in the matrix t.
                 jb_t = jb_t - n
                 ! apply column blocks of h in the row block from right to left.
                 ! kb is the column index of the current column block reflector
                 ! in the matrices t and v.
                 do kb = kb_last, 1, -nblocal
                    ! determine the size of the current column block knb in
                    ! the matrices t and v.
                    knb = min( nblocal, n - kb + 1_${ik}$ )
                    call stdlib${ii}$_slarfb_gett( 'I', imb, n-kb+1, knb,t( 1_${ik}$, jb_t+kb-1 ), ldt, a( kb, &
                              kb ), lda,a( ib, kb ), lda, work, knb )
                 end do
              end do
           end if
           ! (2) top row block of a.
           ! note: if mb>=m, then we have only one row block of a of size m
           ! and we work on the entire matrix a.
           mb1 = min( mb, m )
           ! apply column blocks of h in the top row block from right to left.
           ! kb is the column index of the current block reflector in
           ! the matrices t and v.
           do kb = kb_last, 1, -nblocal
              ! determine the size of the current column block knb in
              ! the matrices t and v.
              knb = min( nblocal, n - kb + 1_${ik}$ )
              if( mb1-kb-knb+1==0_${ik}$ ) then
                 ! in stdlib${ii}$_slarfb_gett parameters, when m=0, then the matrix b
                 ! does not exist, hence we need to pass a dummy array
                 ! reference dummy(1,1) to b with lddummy=1.
                 call stdlib${ii}$_slarfb_gett( 'N', 0_${ik}$, n-kb+1, knb,t( 1_${ik}$, kb ), ldt, a( kb, kb ), lda,&
                           dummy( 1_${ik}$, 1_${ik}$ ), 1_${ik}$, work, knb )
              else
                 call stdlib${ii}$_slarfb_gett( 'N', mb1-kb-knb+1, n-kb+1, knb,t( 1_${ik}$, kb ), ldt, a( kb, &
                           kb ), lda,a( kb+knb, kb), lda, work, knb )
              end if
           end do
           work( 1_${ik}$ ) = real( lworkopt,KIND=sp)
           return
     end subroutine stdlib${ii}$_sorgtsqr_row

     pure module subroutine stdlib${ii}$_dorgtsqr_row( m, n, mb, nb, a, lda, t, ldt, work,lwork, info )
     !! DORGTSQR_ROW generates an M-by-N real matrix Q_out with
     !! orthonormal columns from the output of DLATSQR. These N orthonormal
     !! columns are the first N columns of a product of complex unitary
     !! matrices Q(k)_in of order M, which are returned by DLATSQR in
     !! a special format.
     !! Q_out = first_N_columns_of( Q(1)_in * Q(2)_in * ... * Q(k)_in ).
     !! The input matrices Q(k)_in are stored in row and column blocks in A.
     !! See the documentation of DLATSQR for more details on the format of
     !! Q(k)_in, where each Q(k)_in is represented by block Householder
     !! transformations. This routine calls an auxiliary routine DLARFB_GETT,
     !! where the computation is performed on each individual block. The
     !! algorithm first sweeps NB-sized column blocks from the right to left
     !! starting in the bottom row block and continues to the top row block
     !! (hence _ROW in the routine name). This sweep is in reverse order of
     !! the order in which DLATSQR generates the output blocks.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, ldt, lwork, m, n, mb, nb
           ! Array Arguments 
           real(dp), intent(inout) :: a(lda,*)
           real(dp), intent(in) :: t(ldt,*)
           real(dp), intent(out) :: work(*)
        ! =====================================================================
           
           ! Local Scalars 
           logical(lk) :: lquery
           integer(${ik}$) :: nblocal, mb2, m_plus_one, itmp, ib_bottom, lworkopt, &
                     num_all_row_blocks, jb_t, ib, imb, kb, kb_last, knb, mb1
           ! Local Arrays 
           real(dp) :: dummy(1_${ik}$,1_${ik}$)
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input parameters
           info = 0_${ik}$
           lquery  = lwork==-1_${ik}$
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ .or. m<n ) then
              info = -2_${ik}$
           else if( mb<=n ) then
              info = -3_${ik}$
           else if( nb<1_${ik}$ ) then
              info = -4_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -6_${ik}$
           else if( ldt<max( 1_${ik}$, min( nb, n ) ) ) then
              info = -8_${ik}$
           else if( lwork<1_${ik}$ .and. .not.lquery ) then
              info = -10_${ik}$
           end if
           nblocal = min( nb, n )
           ! determine the workspace size.
           if( info==0_${ik}$ ) then
              lworkopt = nblocal * max( nblocal, ( n - nblocal ) )
           end if
           ! handle error in the input parameters and handle the workspace query.
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DORGTSQR_ROW', -info )
              return
           else if ( lquery ) then
              work( 1_${ik}$ ) = real( lworkopt,KIND=dp)
              return
           end if
           ! quick return if possible
           if( min( m, n )==0_${ik}$ ) then
              work( 1_${ik}$ ) = real( lworkopt,KIND=dp)
              return
           end if
           ! (0) set the upper-triangular part of the matrix a to zero and
           ! its diagonal elements to one.
           call stdlib${ii}$_dlaset('U', m, n, zero, one, a, lda )
           ! kb_last is the column index of the last column block reflector
           ! in the matrices t and v.
           kb_last = ( ( n-1 ) / nblocal ) * nblocal + 1_${ik}$
           ! (1) bottom-up loop over row blocks of a, except the top row block.
           ! note: if mb>=m, then the loop is never executed.
           if ( mb<m ) then
              ! mb2 is the row blocking size for the row blocks before the
              ! first top row block in the matrix a. ib is the row index for
              ! the row blocks in the matrix a before the first top row block.
              ! ib_bottom is the row index for the last bottom row block
              ! in the matrix a. jb_t is the column index of the corresponding
              ! column block in the matrix t.
              ! initialize variables.
              ! num_all_row_blocks is the number of row blocks in the matrix a
              ! including the first row block.
              mb2 = mb - n
              m_plus_one = m + 1_${ik}$
              itmp = ( m - mb - 1_${ik}$ ) / mb2
              ib_bottom = itmp * mb2 + mb + 1_${ik}$
              num_all_row_blocks = itmp + 2_${ik}$
              jb_t = num_all_row_blocks * n + 1_${ik}$
              do ib = ib_bottom, mb+1, -mb2
                 ! determine the block size imb for the current row block
                 ! in the matrix a.
                 imb = min( m_plus_one - ib, mb2 )
                 ! determine the column index jb_t for the current column block
                 ! in the matrix t.
                 jb_t = jb_t - n
                 ! apply column blocks of h in the row block from right to left.
                 ! kb is the column index of the current column block reflector
                 ! in the matrices t and v.
                 do kb = kb_last, 1, -nblocal
                    ! determine the size of the current column block knb in
                    ! the matrices t and v.
                    knb = min( nblocal, n - kb + 1_${ik}$ )
                    call stdlib${ii}$_dlarfb_gett( 'I', imb, n-kb+1, knb,t( 1_${ik}$, jb_t+kb-1 ), ldt, a( kb, &
                              kb ), lda,a( ib, kb ), lda, work, knb )
                 end do
              end do
           end if
           ! (2) top row block of a.
           ! note: if mb>=m, then we have only one row block of a of size m
           ! and we work on the entire matrix a.
           mb1 = min( mb, m )
           ! apply column blocks of h in the top row block from right to left.
           ! kb is the column index of the current block reflector in
           ! the matrices t and v.
           do kb = kb_last, 1, -nblocal
              ! determine the size of the current column block knb in
              ! the matrices t and v.
              knb = min( nblocal, n - kb + 1_${ik}$ )
              if( mb1-kb-knb+1==0_${ik}$ ) then
                 ! in stdlib${ii}$_slarfb_gett parameters, when m=0, then the matrix b
                 ! does not exist, hence we need to pass a dummy array
                 ! reference dummy(1,1) to b with lddummy=1.
                 call stdlib${ii}$_dlarfb_gett( 'N', 0_${ik}$, n-kb+1, knb,t( 1_${ik}$, kb ), ldt, a( kb, kb ), lda,&
                           dummy( 1_${ik}$, 1_${ik}$ ), 1_${ik}$, work, knb )
              else
                 call stdlib${ii}$_dlarfb_gett( 'N', mb1-kb-knb+1, n-kb+1, knb,t( 1_${ik}$, kb ), ldt, a( kb, &
                           kb ), lda,a( kb+knb, kb), lda, work, knb )
              end if
           end do
           work( 1_${ik}$ ) = real( lworkopt,KIND=dp)
           return
     end subroutine stdlib${ii}$_dorgtsqr_row

#:for rk,rt,ri in REAL_KINDS_TYPES
#:if not rk in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ri}$orgtsqr_row( m, n, mb, nb, a, lda, t, ldt, work,lwork, info )
     !! DORGTSQR_ROW: generates an M-by-N real matrix Q_out with
     !! orthonormal columns from the output of DLATSQR. These N orthonormal
     !! columns are the first N columns of a product of complex unitary
     !! matrices Q(k)_in of order M, which are returned by DLATSQR in
     !! a special format.
     !! Q_out = first_N_columns_of( Q(1)_in * Q(2)_in * ... * Q(k)_in ).
     !! The input matrices Q(k)_in are stored in row and column blocks in A.
     !! See the documentation of DLATSQR for more details on the format of
     !! Q(k)_in, where each Q(k)_in is represented by block Householder
     !! transformations. This routine calls an auxiliary routine DLARFB_GETT,
     !! where the computation is performed on each individual block. The
     !! algorithm first sweeps NB-sized column blocks from the right to left
     !! starting in the bottom row block and continues to the top row block
     !! (hence _ROW in the routine name). This sweep is in reverse order of
     !! the order in which DLATSQR generates the output blocks.
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, ldt, lwork, m, n, mb, nb
           ! Array Arguments 
           real(${rk}$), intent(inout) :: a(lda,*)
           real(${rk}$), intent(in) :: t(ldt,*)
           real(${rk}$), intent(out) :: work(*)
        ! =====================================================================
           
           ! Local Scalars 
           logical(lk) :: lquery
           integer(${ik}$) :: nblocal, mb2, m_plus_one, itmp, ib_bottom, lworkopt, &
                     num_all_row_blocks, jb_t, ib, imb, kb, kb_last, knb, mb1
           ! Local Arrays 
           real(${rk}$) :: dummy(1_${ik}$,1_${ik}$)
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input parameters
           info = 0_${ik}$
           lquery  = lwork==-1_${ik}$
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ .or. m<n ) then
              info = -2_${ik}$
           else if( mb<=n ) then
              info = -3_${ik}$
           else if( nb<1_${ik}$ ) then
              info = -4_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -6_${ik}$
           else if( ldt<max( 1_${ik}$, min( nb, n ) ) ) then
              info = -8_${ik}$
           else if( lwork<1_${ik}$ .and. .not.lquery ) then
              info = -10_${ik}$
           end if
           nblocal = min( nb, n )
           ! determine the workspace size.
           if( info==0_${ik}$ ) then
              lworkopt = nblocal * max( nblocal, ( n - nblocal ) )
           end if
           ! handle error in the input parameters and handle the workspace query.
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DORGTSQR_ROW', -info )
              return
           else if ( lquery ) then
              work( 1_${ik}$ ) = real( lworkopt,KIND=${rk}$)
              return
           end if
           ! quick return if possible
           if( min( m, n )==0_${ik}$ ) then
              work( 1_${ik}$ ) = real( lworkopt,KIND=${rk}$)
              return
           end if
           ! (0) set the upper-triangular part of the matrix a to zero and
           ! its diagonal elements to one.
           call stdlib${ii}$_${ri}$laset('U', m, n, zero, one, a, lda )
           ! kb_last is the column index of the last column block reflector
           ! in the matrices t and v.
           kb_last = ( ( n-1 ) / nblocal ) * nblocal + 1_${ik}$
           ! (1) bottom-up loop over row blocks of a, except the top row block.
           ! note: if mb>=m, then the loop is never executed.
           if ( mb<m ) then
              ! mb2 is the row blocking size for the row blocks before the
              ! first top row block in the matrix a. ib is the row index for
              ! the row blocks in the matrix a before the first top row block.
              ! ib_bottom is the row index for the last bottom row block
              ! in the matrix a. jb_t is the column index of the corresponding
              ! column block in the matrix t.
              ! initialize variables.
              ! num_all_row_blocks is the number of row blocks in the matrix a
              ! including the first row block.
              mb2 = mb - n
              m_plus_one = m + 1_${ik}$
              itmp = ( m - mb - 1_${ik}$ ) / mb2
              ib_bottom = itmp * mb2 + mb + 1_${ik}$
              num_all_row_blocks = itmp + 2_${ik}$
              jb_t = num_all_row_blocks * n + 1_${ik}$
              do ib = ib_bottom, mb+1, -mb2
                 ! determine the block size imb for the current row block
                 ! in the matrix a.
                 imb = min( m_plus_one - ib, mb2 )
                 ! determine the column index jb_t for the current column block
                 ! in the matrix t.
                 jb_t = jb_t - n
                 ! apply column blocks of h in the row block from right to left.
                 ! kb is the column index of the current column block reflector
                 ! in the matrices t and v.
                 do kb = kb_last, 1, -nblocal
                    ! determine the size of the current column block knb in
                    ! the matrices t and v.
                    knb = min( nblocal, n - kb + 1_${ik}$ )
                    call stdlib${ii}$_${ri}$larfb_gett( 'I', imb, n-kb+1, knb,t( 1_${ik}$, jb_t+kb-1 ), ldt, a( kb, &
                              kb ), lda,a( ib, kb ), lda, work, knb )
                 end do
              end do
           end if
           ! (2) top row block of a.
           ! note: if mb>=m, then we have only one row block of a of size m
           ! and we work on the entire matrix a.
           mb1 = min( mb, m )
           ! apply column blocks of h in the top row block from right to left.
           ! kb is the column index of the current block reflector in
           ! the matrices t and v.
           do kb = kb_last, 1, -nblocal
              ! determine the size of the current column block knb in
              ! the matrices t and v.
              knb = min( nblocal, n - kb + 1_${ik}$ )
              if( mb1-kb-knb+1==0_${ik}$ ) then
                 ! in stdlib${ii}$_dlarfb_gett parameters, when m=0, then the matrix b
                 ! does not exist, hence we need to pass a dummy array
                 ! reference dummy(1,1) to b with lddummy=1.
                 call stdlib${ii}$_${ri}$larfb_gett( 'N', 0_${ik}$, n-kb+1, knb,t( 1_${ik}$, kb ), ldt, a( kb, kb ), lda,&
                           dummy( 1_${ik}$, 1_${ik}$ ), 1_${ik}$, work, knb )
              else
                 call stdlib${ii}$_${ri}$larfb_gett( 'N', mb1-kb-knb+1, n-kb+1, knb,t( 1_${ik}$, kb ), ldt, a( kb, &
                           kb ), lda,a( kb+knb, kb), lda, work, knb )
              end if
           end do
           work( 1_${ik}$ ) = real( lworkopt,KIND=${rk}$)
           return
     end subroutine stdlib${ii}$_${ri}$orgtsqr_row

#:endif
#:endfor



     pure module subroutine stdlib${ii}$_slarfb_gett( ident, m, n, k, t, ldt, a, lda, b, ldb,work, ldwork )
     !! SLARFB_GETT applies a real Householder block reflector H from the
     !! left to a real (K+M)-by-N  "triangular-pentagonal" matrix
     !! composed of two block matrices: an upper trapezoidal K-by-N matrix A
     !! stored in the array A, and a rectangular M-by-(N-K) matrix B, stored
     !! in the array B. The block reflector H is stored in a compact
     !! WY-representation, where the elementary reflectors are in the
     !! arrays A, B and T. See Further Details section.
               
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: ident
           integer(${ik}$), intent(in) :: k, lda, ldb, ldt, ldwork, m, n
           ! Array Arguments 
           real(sp), intent(inout) :: a(lda,*), b(ldb,*)
           real(sp), intent(in) :: t(ldt,*)
           real(sp), intent(out) :: work(ldwork,*)
        ! =====================================================================
           
           ! Local Scalars 
           logical(lk) :: lnotident
           integer(${ik}$) :: i, j
           ! Executable Statements 
           ! quick return if possible
           if( m<0 .or. n<=0 .or. k==0 .or. k>n )return
           lnotident = .not.stdlib_lsame( ident, 'I' )
           ! ------------------------------------------------------------------
           ! first step. computation of the column block 2:
              ! ( a2 ) := h * ( a2 )
              ! ( b2 )        ( b2 )
           ! ------------------------------------------------------------------
           if( n>k ) then
              ! col2_(1) compute w2: = a2. therefore, copy a2 = a(1:k, k+1:n)
              ! into w2=work(1:k, 1:n-k) column-by-column.
              do j = 1, n-k
                 call stdlib${ii}$_scopy( k, a( 1_${ik}$, k+j ), 1_${ik}$, work( 1_${ik}$, j ), 1_${ik}$ )
              end do
              if( lnotident ) then
                 ! col2_(2) compute w2: = (v1**t) * w2 = (a1**t) * w2,
                 ! v1 is not an identy matrix, but unit lower-triangular
                 ! v1 stored in a1 (diagonal ones are not stored).
                 call stdlib${ii}$_strmm( 'L', 'L', 'T', 'U', k, n-k, one, a, lda,work, ldwork )
              end if
              ! col2_(3) compute w2: = w2 + (v2**t) * b2 = w2 + (b1**t) * b2
              ! v2 stored in b1.
              if( m>0_${ik}$ ) then
                 call stdlib${ii}$_sgemm( 'T', 'N', k, n-k, m, one, b, ldb,b( 1_${ik}$, k+1 ), ldb, one, work, &
                           ldwork )
              end if
              ! col2_(4) compute w2: = t * w2,
              ! t is upper-triangular.
              call stdlib${ii}$_strmm( 'L', 'U', 'N', 'N', k, n-k, one, t, ldt,work, ldwork )
              ! col2_(5) compute b2: = b2 - v2 * w2 = b2 - b1 * w2,
              ! v2 stored in b1.
              if( m>0_${ik}$ ) then
                 call stdlib${ii}$_sgemm( 'N', 'N', m, n-k, k, -one, b, ldb,work, ldwork, one, b( 1_${ik}$, k+&
                           1_${ik}$ ), ldb )
              end if
              if( lnotident ) then
                 ! col2_(6) compute w2: = v1 * w2 = a1 * w2,
                 ! v1 is not an identity matrix, but unit lower-triangular,
                 ! v1 stored in a1 (diagonal ones are not stored).
                 call stdlib${ii}$_strmm( 'L', 'L', 'N', 'U', k, n-k, one, a, lda,work, ldwork )
              end if
              ! col2_(7) compute a2: = a2 - w2 =
                                   ! = a(1:k, k+1:n-k) - work(1:k, 1:n-k),
              ! column-by-column.
              do j = 1, n-k
                 do i = 1, k
                    a( i, k+j ) = a( i, k+j ) - work( i, j )
                 end do
              end do
           end if
           ! ------------------------------------------------------------------
           ! second step. computation of the column block 1:
              ! ( a1 ) := h * ( a1 )
              ! ( b1 )        (  0 )
           ! ------------------------------------------------------------------
           ! col1_(1) compute w1: = a1. copy the upper-triangular
           ! a1 = a(1:k, 1:k) into the upper-triangular
           ! w1 = work(1:k, 1:k) column-by-column.
           do j = 1, k
              call stdlib${ii}$_scopy( j, a( 1_${ik}$, j ), 1_${ik}$, work( 1_${ik}$, j ), 1_${ik}$ )
           end do
           ! set the subdiagonal elements of w1 to zero column-by-column.
           do j = 1, k - 1
              do i = j + 1, k
                 work( i, j ) = zero
              end do
           end do
           if( lnotident ) then
              ! col1_(2) compute w1: = (v1**t) * w1 = (a1**t) * w1,
              ! v1 is not an identity matrix, but unit lower-triangular
              ! v1 stored in a1 (diagonal ones are not stored),
              ! w1 is upper-triangular with zeroes below the diagonal.
              call stdlib${ii}$_strmm( 'L', 'L', 'T', 'U', k, k, one, a, lda,work, ldwork )
           end if
           ! col1_(3) compute w1: = t * w1,
           ! t is upper-triangular,
           ! w1 is upper-triangular with zeroes below the diagonal.
           call stdlib${ii}$_strmm( 'L', 'U', 'N', 'N', k, k, one, t, ldt,work, ldwork )
           ! col1_(4) compute b1: = - v2 * w1 = - b1 * w1,
           ! v2 = b1, w1 is upper-triangular with zeroes below the diagonal.
           if( m>0_${ik}$ ) then
              call stdlib${ii}$_strmm( 'R', 'U', 'N', 'N', m, k, -one, work, ldwork,b, ldb )
           end if
           if( lnotident ) then
              ! col1_(5) compute w1: = v1 * w1 = a1 * w1,
              ! v1 is not an identity matrix, but unit lower-triangular
              ! v1 stored in a1 (diagonal ones are not stored),
              ! w1 is upper-triangular on input with zeroes below the diagonal,
              ! and square on output.
              call stdlib${ii}$_strmm( 'L', 'L', 'N', 'U', k, k, one, a, lda,work, ldwork )
              ! col1_(6) compute a1: = a1 - w1 = a(1:k, 1:k) - work(1:k, 1:k)
              ! column-by-column. a1 is upper-triangular on input.
              ! if ident, a1 is square on output, and w1 is square,
              ! if not ident, a1 is upper-triangular on output,
              ! w1 is upper-triangular.
              ! col1_(6)_a compute elements of a1 below the diagonal.
              do j = 1, k - 1
                 do i = j + 1, k
                    a( i, j ) = - work( i, j )
                 end do
              end do
           end if
           ! col1_(6)_b compute elements of a1 on and above the diagonal.
           do j = 1, k
              do i = 1, j
                 a( i, j ) = a( i, j ) - work( i, j )
              end do
           end do
           return
     end subroutine stdlib${ii}$_slarfb_gett

     pure module subroutine stdlib${ii}$_dlarfb_gett( ident, m, n, k, t, ldt, a, lda, b, ldb,work, ldwork )
     !! DLARFB_GETT applies a real Householder block reflector H from the
     !! left to a real (K+M)-by-N  "triangular-pentagonal" matrix
     !! composed of two block matrices: an upper trapezoidal K-by-N matrix A
     !! stored in the array A, and a rectangular M-by-(N-K) matrix B, stored
     !! in the array B. The block reflector H is stored in a compact
     !! WY-representation, where the elementary reflectors are in the
     !! arrays A, B and T. See Further Details section.
               
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: ident
           integer(${ik}$), intent(in) :: k, lda, ldb, ldt, ldwork, m, n
           ! Array Arguments 
           real(dp), intent(inout) :: a(lda,*), b(ldb,*)
           real(dp), intent(in) :: t(ldt,*)
           real(dp), intent(out) :: work(ldwork,*)
        ! =====================================================================
           
           ! Local Scalars 
           logical(lk) :: lnotident
           integer(${ik}$) :: i, j
           ! Executable Statements 
           ! quick return if possible
           if( m<0 .or. n<=0 .or. k==0 .or. k>n )return
           lnotident = .not.stdlib_lsame( ident, 'I' )
           ! ------------------------------------------------------------------
           ! first step. computation of the column block 2:
              ! ( a2 ) := h * ( a2 )
              ! ( b2 )        ( b2 )
           ! ------------------------------------------------------------------
           if( n>k ) then
              ! col2_(1) compute w2: = a2. therefore, copy a2 = a(1:k, k+1:n)
              ! into w2=work(1:k, 1:n-k) column-by-column.
              do j = 1, n-k
                 call stdlib${ii}$_dcopy( k, a( 1_${ik}$, k+j ), 1_${ik}$, work( 1_${ik}$, j ), 1_${ik}$ )
              end do
              if( lnotident ) then
                 ! col2_(2) compute w2: = (v1**t) * w2 = (a1**t) * w2,
                 ! v1 is not an identy matrix, but unit lower-triangular
                 ! v1 stored in a1 (diagonal ones are not stored).
                 call stdlib${ii}$_dtrmm( 'L', 'L', 'T', 'U', k, n-k, one, a, lda,work, ldwork )
              end if
              ! col2_(3) compute w2: = w2 + (v2**t) * b2 = w2 + (b1**t) * b2
              ! v2 stored in b1.
              if( m>0_${ik}$ ) then
                 call stdlib${ii}$_dgemm( 'T', 'N', k, n-k, m, one, b, ldb,b( 1_${ik}$, k+1 ), ldb, one, work, &
                           ldwork )
              end if
              ! col2_(4) compute w2: = t * w2,
              ! t is upper-triangular.
              call stdlib${ii}$_dtrmm( 'L', 'U', 'N', 'N', k, n-k, one, t, ldt,work, ldwork )
              ! col2_(5) compute b2: = b2 - v2 * w2 = b2 - b1 * w2,
              ! v2 stored in b1.
              if( m>0_${ik}$ ) then
                 call stdlib${ii}$_dgemm( 'N', 'N', m, n-k, k, -one, b, ldb,work, ldwork, one, b( 1_${ik}$, k+&
                           1_${ik}$ ), ldb )
              end if
              if( lnotident ) then
                 ! col2_(6) compute w2: = v1 * w2 = a1 * w2,
                 ! v1 is not an identity matrix, but unit lower-triangular,
                 ! v1 stored in a1 (diagonal ones are not stored).
                 call stdlib${ii}$_dtrmm( 'L', 'L', 'N', 'U', k, n-k, one, a, lda,work, ldwork )
              end if
              ! col2_(7) compute a2: = a2 - w2 =
                                   ! = a(1:k, k+1:n-k) - work(1:k, 1:n-k),
              ! column-by-column.
              do j = 1, n-k
                 do i = 1, k
                    a( i, k+j ) = a( i, k+j ) - work( i, j )
                 end do
              end do
           end if
           ! ------------------------------------------------------------------
           ! second step. computation of the column block 1:
              ! ( a1 ) := h * ( a1 )
              ! ( b1 )        (  0 )
           ! ------------------------------------------------------------------
           ! col1_(1) compute w1: = a1. copy the upper-triangular
           ! a1 = a(1:k, 1:k) into the upper-triangular
           ! w1 = work(1:k, 1:k) column-by-column.
           do j = 1, k
              call stdlib${ii}$_dcopy( j, a( 1_${ik}$, j ), 1_${ik}$, work( 1_${ik}$, j ), 1_${ik}$ )
           end do
           ! set the subdiagonal elements of w1 to zero column-by-column.
           do j = 1, k - 1
              do i = j + 1, k
                 work( i, j ) = zero
              end do
           end do
           if( lnotident ) then
              ! col1_(2) compute w1: = (v1**t) * w1 = (a1**t) * w1,
              ! v1 is not an identity matrix, but unit lower-triangular
              ! v1 stored in a1 (diagonal ones are not stored),
              ! w1 is upper-triangular with zeroes below the diagonal.
              call stdlib${ii}$_dtrmm( 'L', 'L', 'T', 'U', k, k, one, a, lda,work, ldwork )
           end if
           ! col1_(3) compute w1: = t * w1,
           ! t is upper-triangular,
           ! w1 is upper-triangular with zeroes below the diagonal.
           call stdlib${ii}$_dtrmm( 'L', 'U', 'N', 'N', k, k, one, t, ldt,work, ldwork )
           ! col1_(4) compute b1: = - v2 * w1 = - b1 * w1,
           ! v2 = b1, w1 is upper-triangular with zeroes below the diagonal.
           if( m>0_${ik}$ ) then
              call stdlib${ii}$_dtrmm( 'R', 'U', 'N', 'N', m, k, -one, work, ldwork,b, ldb )
           end if
           if( lnotident ) then
              ! col1_(5) compute w1: = v1 * w1 = a1 * w1,
              ! v1 is not an identity matrix, but unit lower-triangular
              ! v1 stored in a1 (diagonal ones are not stored),
              ! w1 is upper-triangular on input with zeroes below the diagonal,
              ! and square on output.
              call stdlib${ii}$_dtrmm( 'L', 'L', 'N', 'U', k, k, one, a, lda,work, ldwork )
              ! col1_(6) compute a1: = a1 - w1 = a(1:k, 1:k) - work(1:k, 1:k)
              ! column-by-column. a1 is upper-triangular on input.
              ! if ident, a1 is square on output, and w1 is square,
              ! if not ident, a1 is upper-triangular on output,
              ! w1 is upper-triangular.
              ! col1_(6)_a compute elements of a1 below the diagonal.
              do j = 1, k - 1
                 do i = j + 1, k
                    a( i, j ) = - work( i, j )
                 end do
              end do
           end if
           ! col1_(6)_b compute elements of a1 on and above the diagonal.
           do j = 1, k
              do i = 1, j
                 a( i, j ) = a( i, j ) - work( i, j )
              end do
           end do
           return
     end subroutine stdlib${ii}$_dlarfb_gett

#:for rk,rt,ri in REAL_KINDS_TYPES
#:if not rk in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ri}$larfb_gett( ident, m, n, k, t, ldt, a, lda, b, ldb,work, ldwork )
     !! DLARFB_GETT: applies a real Householder block reflector H from the
     !! left to a real (K+M)-by-N  "triangular-pentagonal" matrix
     !! composed of two block matrices: an upper trapezoidal K-by-N matrix A
     !! stored in the array A, and a rectangular M-by-(N-K) matrix B, stored
     !! in the array B. The block reflector H is stored in a compact
     !! WY-representation, where the elementary reflectors are in the
     !! arrays A, B and T. See Further Details section.
               
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: ident
           integer(${ik}$), intent(in) :: k, lda, ldb, ldt, ldwork, m, n
           ! Array Arguments 
           real(${rk}$), intent(inout) :: a(lda,*), b(ldb,*)
           real(${rk}$), intent(in) :: t(ldt,*)
           real(${rk}$), intent(out) :: work(ldwork,*)
        ! =====================================================================
           
           ! Local Scalars 
           logical(lk) :: lnotident
           integer(${ik}$) :: i, j
           ! Executable Statements 
           ! quick return if possible
           if( m<0 .or. n<=0 .or. k==0 .or. k>n )return
           lnotident = .not.stdlib_lsame( ident, 'I' )
           ! ------------------------------------------------------------------
           ! first step. computation of the column block 2:
              ! ( a2 ) := h * ( a2 )
              ! ( b2 )        ( b2 )
           ! ------------------------------------------------------------------
           if( n>k ) then
              ! col2_(1) compute w2: = a2. therefore, copy a2 = a(1:k, k+1:n)
              ! into w2=work(1:k, 1:n-k) column-by-column.
              do j = 1, n-k
                 call stdlib${ii}$_${ri}$copy( k, a( 1_${ik}$, k+j ), 1_${ik}$, work( 1_${ik}$, j ), 1_${ik}$ )
              end do
              if( lnotident ) then
                 ! col2_(2) compute w2: = (v1**t) * w2 = (a1**t) * w2,
                 ! v1 is not an identy matrix, but unit lower-triangular
                 ! v1 stored in a1 (diagonal ones are not stored).
                 call stdlib${ii}$_${ri}$trmm( 'L', 'L', 'T', 'U', k, n-k, one, a, lda,work, ldwork )
              end if
              ! col2_(3) compute w2: = w2 + (v2**t) * b2 = w2 + (b1**t) * b2
              ! v2 stored in b1.
              if( m>0_${ik}$ ) then
                 call stdlib${ii}$_${ri}$gemm( 'T', 'N', k, n-k, m, one, b, ldb,b( 1_${ik}$, k+1 ), ldb, one, work, &
                           ldwork )
              end if
              ! col2_(4) compute w2: = t * w2,
              ! t is upper-triangular.
              call stdlib${ii}$_${ri}$trmm( 'L', 'U', 'N', 'N', k, n-k, one, t, ldt,work, ldwork )
              ! col2_(5) compute b2: = b2 - v2 * w2 = b2 - b1 * w2,
              ! v2 stored in b1.
              if( m>0_${ik}$ ) then
                 call stdlib${ii}$_${ri}$gemm( 'N', 'N', m, n-k, k, -one, b, ldb,work, ldwork, one, b( 1_${ik}$, k+&
                           1_${ik}$ ), ldb )
              end if
              if( lnotident ) then
                 ! col2_(6) compute w2: = v1 * w2 = a1 * w2,
                 ! v1 is not an identity matrix, but unit lower-triangular,
                 ! v1 stored in a1 (diagonal ones are not stored).
                 call stdlib${ii}$_${ri}$trmm( 'L', 'L', 'N', 'U', k, n-k, one, a, lda,work, ldwork )
              end if
              ! col2_(7) compute a2: = a2 - w2 =
                                   ! = a(1:k, k+1:n-k) - work(1:k, 1:n-k),
              ! column-by-column.
              do j = 1, n-k
                 do i = 1, k
                    a( i, k+j ) = a( i, k+j ) - work( i, j )
                 end do
              end do
           end if
           ! ------------------------------------------------------------------
           ! second step. computation of the column block 1:
              ! ( a1 ) := h * ( a1 )
              ! ( b1 )        (  0 )
           ! ------------------------------------------------------------------
           ! col1_(1) compute w1: = a1. copy the upper-triangular
           ! a1 = a(1:k, 1:k) into the upper-triangular
           ! w1 = work(1:k, 1:k) column-by-column.
           do j = 1, k
              call stdlib${ii}$_${ri}$copy( j, a( 1_${ik}$, j ), 1_${ik}$, work( 1_${ik}$, j ), 1_${ik}$ )
           end do
           ! set the subdiagonal elements of w1 to zero column-by-column.
           do j = 1, k - 1
              do i = j + 1, k
                 work( i, j ) = zero
              end do
           end do
           if( lnotident ) then
              ! col1_(2) compute w1: = (v1**t) * w1 = (a1**t) * w1,
              ! v1 is not an identity matrix, but unit lower-triangular
              ! v1 stored in a1 (diagonal ones are not stored),
              ! w1 is upper-triangular with zeroes below the diagonal.
              call stdlib${ii}$_${ri}$trmm( 'L', 'L', 'T', 'U', k, k, one, a, lda,work, ldwork )
           end if
           ! col1_(3) compute w1: = t * w1,
           ! t is upper-triangular,
           ! w1 is upper-triangular with zeroes below the diagonal.
           call stdlib${ii}$_${ri}$trmm( 'L', 'U', 'N', 'N', k, k, one, t, ldt,work, ldwork )
           ! col1_(4) compute b1: = - v2 * w1 = - b1 * w1,
           ! v2 = b1, w1 is upper-triangular with zeroes below the diagonal.
           if( m>0_${ik}$ ) then
              call stdlib${ii}$_${ri}$trmm( 'R', 'U', 'N', 'N', m, k, -one, work, ldwork,b, ldb )
           end if
           if( lnotident ) then
              ! col1_(5) compute w1: = v1 * w1 = a1 * w1,
              ! v1 is not an identity matrix, but unit lower-triangular
              ! v1 stored in a1 (diagonal ones are not stored),
              ! w1 is upper-triangular on input with zeroes below the diagonal,
              ! and square on output.
              call stdlib${ii}$_${ri}$trmm( 'L', 'L', 'N', 'U', k, k, one, a, lda,work, ldwork )
              ! col1_(6) compute a1: = a1 - w1 = a(1:k, 1:k) - work(1:k, 1:k)
              ! column-by-column. a1 is upper-triangular on input.
              ! if ident, a1 is square on output, and w1 is square,
              ! if not ident, a1 is upper-triangular on output,
              ! w1 is upper-triangular.
              ! col1_(6)_a compute elements of a1 below the diagonal.
              do j = 1, k - 1
                 do i = j + 1, k
                    a( i, j ) = - work( i, j )
                 end do
              end do
           end if
           ! col1_(6)_b compute elements of a1 on and above the diagonal.
           do j = 1, k
              do i = 1, j
                 a( i, j ) = a( i, j ) - work( i, j )
              end do
           end do
           return
     end subroutine stdlib${ii}$_${ri}$larfb_gett

#:endif
#:endfor

     pure module subroutine stdlib${ii}$_clarfb_gett( ident, m, n, k, t, ldt, a, lda, b, ldb,work, ldwork )
     !! CLARFB_GETT applies a complex Householder block reflector H from the
     !! left to a complex (K+M)-by-N  "triangular-pentagonal" matrix
     !! composed of two block matrices: an upper trapezoidal K-by-N matrix A
     !! stored in the array A, and a rectangular M-by-(N-K) matrix B, stored
     !! in the array B. The block reflector H is stored in a compact
     !! WY-representation, where the elementary reflectors are in the
     !! arrays A, B and T. See Further Details section.
               
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: ident
           integer(${ik}$), intent(in) :: k, lda, ldb, ldt, ldwork, m, n
           ! Array Arguments 
           complex(sp), intent(inout) :: a(lda,*), b(ldb,*)
           complex(sp), intent(in) :: t(ldt,*)
           complex(sp), intent(out) :: work(ldwork,*)
        ! =====================================================================
           
           ! Local Scalars 
           logical(lk) :: lnotident
           integer(${ik}$) :: i, j
           ! Executable Statements 
           ! quick return if possible
           if( m<0 .or. n<=0 .or. k==0 .or. k>n )return
           lnotident = .not.stdlib_lsame( ident, 'I' )
           ! ------------------------------------------------------------------
           ! first step. computation of the column block 2:
              ! ( a2 ) := h * ( a2 )
              ! ( b2 )        ( b2 )
           ! ------------------------------------------------------------------
           if( n>k ) then
              ! col2_(1) compute w2: = a2. therefore, copy a2 = a(1:k, k+1:n)
              ! into w2=work(1:k, 1:n-k) column-by-column.
              do j = 1, n-k
                 call stdlib${ii}$_ccopy( k, a( 1_${ik}$, k+j ), 1_${ik}$, work( 1_${ik}$, j ), 1_${ik}$ )
              end do
              if( lnotident ) then
                 ! col2_(2) compute w2: = (v1**h) * w2 = (a1**h) * w2,
                 ! v1 is not an identy matrix, but unit lower-triangular
                 ! v1 stored in a1 (diagonal ones are not stored).
                 call stdlib${ii}$_ctrmm( 'L', 'L', 'C', 'U', k, n-k, cone, a, lda,work, ldwork )
                           
              end if
              ! col2_(3) compute w2: = w2 + (v2**h) * b2 = w2 + (b1**h) * b2
              ! v2 stored in b1.
              if( m>0_${ik}$ ) then
                 call stdlib${ii}$_cgemm( 'C', 'N', k, n-k, m, cone, b, ldb,b( 1_${ik}$, k+1 ), ldb, cone, &
                           work, ldwork )
              end if
              ! col2_(4) compute w2: = t * w2,
              ! t is upper-triangular.
              call stdlib${ii}$_ctrmm( 'L', 'U', 'N', 'N', k, n-k, cone, t, ldt,work, ldwork )
              ! col2_(5) compute b2: = b2 - v2 * w2 = b2 - b1 * w2,
              ! v2 stored in b1.
              if( m>0_${ik}$ ) then
                 call stdlib${ii}$_cgemm( 'N', 'N', m, n-k, k, -cone, b, ldb,work, ldwork, cone, b( 1_${ik}$, &
                           k+1 ), ldb )
              end if
              if( lnotident ) then
                 ! col2_(6) compute w2: = v1 * w2 = a1 * w2,
                 ! v1 is not an identity matrix, but unit lower-triangular,
                 ! v1 stored in a1 (diagonal ones are not stored).
                 call stdlib${ii}$_ctrmm( 'L', 'L', 'N', 'U', k, n-k, cone, a, lda,work, ldwork )
                           
              end if
              ! col2_(7) compute a2: = a2 - w2 =
                                   ! = a(1:k, k+1:n-k) - work(1:k, 1:n-k),
              ! column-by-column.
              do j = 1, n-k
                 do i = 1, k
                    a( i, k+j ) = a( i, k+j ) - work( i, j )
                 end do
              end do
           end if
           ! ------------------------------------------------------------------
           ! second step. computation of the column block 1:
              ! ( a1 ) := h * ( a1 )
              ! ( b1 )        (  0 )
           ! ------------------------------------------------------------------
           ! col1_(1) compute w1: = a1. copy the upper-triangular
           ! a1 = a(1:k, 1:k) into the upper-triangular
           ! w1 = work(1:k, 1:k) column-by-column.
           do j = 1, k
              call stdlib${ii}$_ccopy( j, a( 1_${ik}$, j ), 1_${ik}$, work( 1_${ik}$, j ), 1_${ik}$ )
           end do
           ! set the subdiagonal elements of w1 to zero column-by-column.
           do j = 1, k - 1
              do i = j + 1, k
                 work( i, j ) = czero
              end do
           end do
           if( lnotident ) then
              ! col1_(2) compute w1: = (v1**h) * w1 = (a1**h) * w1,
              ! v1 is not an identity matrix, but unit lower-triangular
              ! v1 stored in a1 (diagonal ones are not stored),
              ! w1 is upper-triangular with zeroes below the diagonal.
              call stdlib${ii}$_ctrmm( 'L', 'L', 'C', 'U', k, k, cone, a, lda,work, ldwork )
           end if
           ! col1_(3) compute w1: = t * w1,
           ! t is upper-triangular,
           ! w1 is upper-triangular with zeroes below the diagonal.
           call stdlib${ii}$_ctrmm( 'L', 'U', 'N', 'N', k, k, cone, t, ldt,work, ldwork )
           ! col1_(4) compute b1: = - v2 * w1 = - b1 * w1,
           ! v2 = b1, w1 is upper-triangular with zeroes below the diagonal.
           if( m>0_${ik}$ ) then
              call stdlib${ii}$_ctrmm( 'R', 'U', 'N', 'N', m, k, -cone, work, ldwork,b, ldb )
           end if
           if( lnotident ) then
              ! col1_(5) compute w1: = v1 * w1 = a1 * w1,
              ! v1 is not an identity matrix, but unit lower-triangular
              ! v1 stored in a1 (diagonal ones are not stored),
              ! w1 is upper-triangular on input with zeroes below the diagonal,
              ! and square on output.
              call stdlib${ii}$_ctrmm( 'L', 'L', 'N', 'U', k, k, cone, a, lda,work, ldwork )
              ! col1_(6) compute a1: = a1 - w1 = a(1:k, 1:k) - work(1:k, 1:k)
              ! column-by-column. a1 is upper-triangular on input.
              ! if ident, a1 is square on output, and w1 is square,
              ! if not ident, a1 is upper-triangular on output,
              ! w1 is upper-triangular.
              ! col1_(6)_a compute elements of a1 below the diagonal.
              do j = 1, k - 1
                 do i = j + 1, k
                    a( i, j ) = - work( i, j )
                 end do
              end do
           end if
           ! col1_(6)_b compute elements of a1 on and above the diagonal.
           do j = 1, k
              do i = 1, j
                 a( i, j ) = a( i, j ) - work( i, j )
              end do
           end do
           return
     end subroutine stdlib${ii}$_clarfb_gett

     pure module subroutine stdlib${ii}$_zlarfb_gett( ident, m, n, k, t, ldt, a, lda, b, ldb,work, ldwork )
     !! ZLARFB_GETT applies a complex Householder block reflector H from the
     !! left to a complex (K+M)-by-N  "triangular-pentagonal" matrix
     !! composed of two block matrices: an upper trapezoidal K-by-N matrix A
     !! stored in the array A, and a rectangular M-by-(N-K) matrix B, stored
     !! in the array B. The block reflector H is stored in a compact
     !! WY-representation, where the elementary reflectors are in the
     !! arrays A, B and T. See Further Details section.
               
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: ident
           integer(${ik}$), intent(in) :: k, lda, ldb, ldt, ldwork, m, n
           ! Array Arguments 
           complex(dp), intent(inout) :: a(lda,*), b(ldb,*)
           complex(dp), intent(in) :: t(ldt,*)
           complex(dp), intent(out) :: work(ldwork,*)
        ! =====================================================================
           
           ! Local Scalars 
           logical(lk) :: lnotident
           integer(${ik}$) :: i, j
           ! Executable Statements 
           ! quick return if possible
           if( m<0 .or. n<=0 .or. k==0 .or. k>n )return
           lnotident = .not.stdlib_lsame( ident, 'I' )
           ! ------------------------------------------------------------------
           ! first step. computation of the column block 2:
              ! ( a2 ) := h * ( a2 )
              ! ( b2 )        ( b2 )
           ! ------------------------------------------------------------------
           if( n>k ) then
              ! col2_(1) compute w2: = a2. therefore, copy a2 = a(1:k, k+1:n)
              ! into w2=work(1:k, 1:n-k) column-by-column.
              do j = 1, n-k
                 call stdlib${ii}$_zcopy( k, a( 1_${ik}$, k+j ), 1_${ik}$, work( 1_${ik}$, j ), 1_${ik}$ )
              end do
              if( lnotident ) then
                 ! col2_(2) compute w2: = (v1**h) * w2 = (a1**h) * w2,
                 ! v1 is not an identy matrix, but unit lower-triangular
                 ! v1 stored in a1 (diagonal ones are not stored).
                 call stdlib${ii}$_ztrmm( 'L', 'L', 'C', 'U', k, n-k, cone, a, lda,work, ldwork )
                           
              end if
              ! col2_(3) compute w2: = w2 + (v2**h) * b2 = w2 + (b1**h) * b2
              ! v2 stored in b1.
              if( m>0_${ik}$ ) then
                 call stdlib${ii}$_zgemm( 'C', 'N', k, n-k, m, cone, b, ldb,b( 1_${ik}$, k+1 ), ldb, cone, &
                           work, ldwork )
              end if
              ! col2_(4) compute w2: = t * w2,
              ! t is upper-triangular.
              call stdlib${ii}$_ztrmm( 'L', 'U', 'N', 'N', k, n-k, cone, t, ldt,work, ldwork )
              ! col2_(5) compute b2: = b2 - v2 * w2 = b2 - b1 * w2,
              ! v2 stored in b1.
              if( m>0_${ik}$ ) then
                 call stdlib${ii}$_zgemm( 'N', 'N', m, n-k, k, -cone, b, ldb,work, ldwork, cone, b( 1_${ik}$, &
                           k+1 ), ldb )
              end if
              if( lnotident ) then
                 ! col2_(6) compute w2: = v1 * w2 = a1 * w2,
                 ! v1 is not an identity matrix, but unit lower-triangular,
                 ! v1 stored in a1 (diagonal ones are not stored).
                 call stdlib${ii}$_ztrmm( 'L', 'L', 'N', 'U', k, n-k, cone, a, lda,work, ldwork )
                           
              end if
              ! col2_(7) compute a2: = a2 - w2 =
                                   ! = a(1:k, k+1:n-k) - work(1:k, 1:n-k),
              ! column-by-column.
              do j = 1, n-k
                 do i = 1, k
                    a( i, k+j ) = a( i, k+j ) - work( i, j )
                 end do
              end do
           end if
           ! ------------------------------------------------------------------
           ! second step. computation of the column block 1:
              ! ( a1 ) := h * ( a1 )
              ! ( b1 )        (  0 )
           ! ------------------------------------------------------------------
           ! col1_(1) compute w1: = a1. copy the upper-triangular
           ! a1 = a(1:k, 1:k) into the upper-triangular
           ! w1 = work(1:k, 1:k) column-by-column.
           do j = 1, k
              call stdlib${ii}$_zcopy( j, a( 1_${ik}$, j ), 1_${ik}$, work( 1_${ik}$, j ), 1_${ik}$ )
           end do
           ! set the subdiagonal elements of w1 to zero column-by-column.
           do j = 1, k - 1
              do i = j + 1, k
                 work( i, j ) = czero
              end do
           end do
           if( lnotident ) then
              ! col1_(2) compute w1: = (v1**h) * w1 = (a1**h) * w1,
              ! v1 is not an identity matrix, but unit lower-triangular
              ! v1 stored in a1 (diagonal ones are not stored),
              ! w1 is upper-triangular with zeroes below the diagonal.
              call stdlib${ii}$_ztrmm( 'L', 'L', 'C', 'U', k, k, cone, a, lda,work, ldwork )
           end if
           ! col1_(3) compute w1: = t * w1,
           ! t is upper-triangular,
           ! w1 is upper-triangular with zeroes below the diagonal.
           call stdlib${ii}$_ztrmm( 'L', 'U', 'N', 'N', k, k, cone, t, ldt,work, ldwork )
           ! col1_(4) compute b1: = - v2 * w1 = - b1 * w1,
           ! v2 = b1, w1 is upper-triangular with zeroes below the diagonal.
           if( m>0_${ik}$ ) then
              call stdlib${ii}$_ztrmm( 'R', 'U', 'N', 'N', m, k, -cone, work, ldwork,b, ldb )
           end if
           if( lnotident ) then
              ! col1_(5) compute w1: = v1 * w1 = a1 * w1,
              ! v1 is not an identity matrix, but unit lower-triangular
              ! v1 stored in a1 (diagonal ones are not stored),
              ! w1 is upper-triangular on input with zeroes below the diagonal,
              ! and square on output.
              call stdlib${ii}$_ztrmm( 'L', 'L', 'N', 'U', k, k, cone, a, lda,work, ldwork )
              ! col1_(6) compute a1: = a1 - w1 = a(1:k, 1:k) - work(1:k, 1:k)
              ! column-by-column. a1 is upper-triangular on input.
              ! if ident, a1 is square on output, and w1 is square,
              ! if not ident, a1 is upper-triangular on output,
              ! w1 is upper-triangular.
              ! col1_(6)_a compute elements of a1 below the diagonal.
              do j = 1, k - 1
                 do i = j + 1, k
                    a( i, j ) = - work( i, j )
                 end do
              end do
           end if
           ! col1_(6)_b compute elements of a1 on and above the diagonal.
           do j = 1, k
              do i = 1, j
                 a( i, j ) = a( i, j ) - work( i, j )
              end do
           end do
           return
     end subroutine stdlib${ii}$_zlarfb_gett

#:for ck,ct,ci in CMPLX_KINDS_TYPES
#:if not ck in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ci}$larfb_gett( ident, m, n, k, t, ldt, a, lda, b, ldb,work, ldwork )
     !! ZLARFB_GETT: applies a complex Householder block reflector H from the
     !! left to a complex (K+M)-by-N  "triangular-pentagonal" matrix
     !! composed of two block matrices: an upper trapezoidal K-by-N matrix A
     !! stored in the array A, and a rectangular M-by-(N-K) matrix B, stored
     !! in the array B. The block reflector H is stored in a compact
     !! WY-representation, where the elementary reflectors are in the
     !! arrays A, B and T. See Further Details section.
               
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${ck}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: ident
           integer(${ik}$), intent(in) :: k, lda, ldb, ldt, ldwork, m, n
           ! Array Arguments 
           complex(${ck}$), intent(inout) :: a(lda,*), b(ldb,*)
           complex(${ck}$), intent(in) :: t(ldt,*)
           complex(${ck}$), intent(out) :: work(ldwork,*)
        ! =====================================================================
           
           ! Local Scalars 
           logical(lk) :: lnotident
           integer(${ik}$) :: i, j
           ! Executable Statements 
           ! quick return if possible
           if( m<0 .or. n<=0 .or. k==0 .or. k>n )return
           lnotident = .not.stdlib_lsame( ident, 'I' )
           ! ------------------------------------------------------------------
           ! first step. computation of the column block 2:
              ! ( a2 ) := h * ( a2 )
              ! ( b2 )        ( b2 )
           ! ------------------------------------------------------------------
           if( n>k ) then
              ! col2_(1) compute w2: = a2. therefore, copy a2 = a(1:k, k+1:n)
              ! into w2=work(1:k, 1:n-k) column-by-column.
              do j = 1, n-k
                 call stdlib${ii}$_${ci}$copy( k, a( 1_${ik}$, k+j ), 1_${ik}$, work( 1_${ik}$, j ), 1_${ik}$ )
              end do
              if( lnotident ) then
                 ! col2_(2) compute w2: = (v1**h) * w2 = (a1**h) * w2,
                 ! v1 is not an identy matrix, but unit lower-triangular
                 ! v1 stored in a1 (diagonal ones are not stored).
                 call stdlib${ii}$_${ci}$trmm( 'L', 'L', 'C', 'U', k, n-k, cone, a, lda,work, ldwork )
                           
              end if
              ! col2_(3) compute w2: = w2 + (v2**h) * b2 = w2 + (b1**h) * b2
              ! v2 stored in b1.
              if( m>0_${ik}$ ) then
                 call stdlib${ii}$_${ci}$gemm( 'C', 'N', k, n-k, m, cone, b, ldb,b( 1_${ik}$, k+1 ), ldb, cone, &
                           work, ldwork )
              end if
              ! col2_(4) compute w2: = t * w2,
              ! t is upper-triangular.
              call stdlib${ii}$_${ci}$trmm( 'L', 'U', 'N', 'N', k, n-k, cone, t, ldt,work, ldwork )
              ! col2_(5) compute b2: = b2 - v2 * w2 = b2 - b1 * w2,
              ! v2 stored in b1.
              if( m>0_${ik}$ ) then
                 call stdlib${ii}$_${ci}$gemm( 'N', 'N', m, n-k, k, -cone, b, ldb,work, ldwork, cone, b( 1_${ik}$, &
                           k+1 ), ldb )
              end if
              if( lnotident ) then
                 ! col2_(6) compute w2: = v1 * w2 = a1 * w2,
                 ! v1 is not an identity matrix, but unit lower-triangular,
                 ! v1 stored in a1 (diagonal ones are not stored).
                 call stdlib${ii}$_${ci}$trmm( 'L', 'L', 'N', 'U', k, n-k, cone, a, lda,work, ldwork )
                           
              end if
              ! col2_(7) compute a2: = a2 - w2 =
                                   ! = a(1:k, k+1:n-k) - work(1:k, 1:n-k),
              ! column-by-column.
              do j = 1, n-k
                 do i = 1, k
                    a( i, k+j ) = a( i, k+j ) - work( i, j )
                 end do
              end do
           end if
           ! ------------------------------------------------------------------
           ! second step. computation of the column block 1:
              ! ( a1 ) := h * ( a1 )
              ! ( b1 )        (  0 )
           ! ------------------------------------------------------------------
           ! col1_(1) compute w1: = a1. copy the upper-triangular
           ! a1 = a(1:k, 1:k) into the upper-triangular
           ! w1 = work(1:k, 1:k) column-by-column.
           do j = 1, k
              call stdlib${ii}$_${ci}$copy( j, a( 1_${ik}$, j ), 1_${ik}$, work( 1_${ik}$, j ), 1_${ik}$ )
           end do
           ! set the subdiagonal elements of w1 to zero column-by-column.
           do j = 1, k - 1
              do i = j + 1, k
                 work( i, j ) = czero
              end do
           end do
           if( lnotident ) then
              ! col1_(2) compute w1: = (v1**h) * w1 = (a1**h) * w1,
              ! v1 is not an identity matrix, but unit lower-triangular
              ! v1 stored in a1 (diagonal ones are not stored),
              ! w1 is upper-triangular with zeroes below the diagonal.
              call stdlib${ii}$_${ci}$trmm( 'L', 'L', 'C', 'U', k, k, cone, a, lda,work, ldwork )
           end if
           ! col1_(3) compute w1: = t * w1,
           ! t is upper-triangular,
           ! w1 is upper-triangular with zeroes below the diagonal.
           call stdlib${ii}$_${ci}$trmm( 'L', 'U', 'N', 'N', k, k, cone, t, ldt,work, ldwork )
           ! col1_(4) compute b1: = - v2 * w1 = - b1 * w1,
           ! v2 = b1, w1 is upper-triangular with zeroes below the diagonal.
           if( m>0_${ik}$ ) then
              call stdlib${ii}$_${ci}$trmm( 'R', 'U', 'N', 'N', m, k, -cone, work, ldwork,b, ldb )
           end if
           if( lnotident ) then
              ! col1_(5) compute w1: = v1 * w1 = a1 * w1,
              ! v1 is not an identity matrix, but unit lower-triangular
              ! v1 stored in a1 (diagonal ones are not stored),
              ! w1 is upper-triangular on input with zeroes below the diagonal,
              ! and square on output.
              call stdlib${ii}$_${ci}$trmm( 'L', 'L', 'N', 'U', k, k, cone, a, lda,work, ldwork )
              ! col1_(6) compute a1: = a1 - w1 = a(1:k, 1:k) - work(1:k, 1:k)
              ! column-by-column. a1 is upper-triangular on input.
              ! if ident, a1 is square on output, and w1 is square,
              ! if not ident, a1 is upper-triangular on output,
              ! w1 is upper-triangular.
              ! col1_(6)_a compute elements of a1 below the diagonal.
              do j = 1, k - 1
                 do i = j + 1, k
                    a( i, j ) = - work( i, j )
                 end do
              end do
           end if
           ! col1_(6)_b compute elements of a1 on and above the diagonal.
           do j = 1, k
              do i = 1, j
                 a( i, j ) = a( i, j ) - work( i, j )
              end do
           end do
           return
     end subroutine stdlib${ii}$_${ci}$larfb_gett

#:endif
#:endfor



     pure module subroutine stdlib${ii}$_slamtsqr( side, trans, m, n, k, mb, nb, a, lda, t,ldt, c, ldc, work, &
     !! SLAMTSQR overwrites the general real M-by-N matrix C with
     !! SIDE = 'L'     SIDE = 'R'
     !! TRANS = 'N':      Q * C          C * Q
     !! TRANS = 'T':      Q**T * C       C * Q**T
     !! where Q is a real orthogonal matrix defined as the product
     !! of blocked elementary reflectors computed by tall skinny
     !! QR factorization (SLATSQR)
               lwork, info )
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: side, trans
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, m, n, k, mb, nb, ldt, lwork, ldc
           ! Array Arguments 
           real(sp), intent(in) :: a(lda,*), t(ldt,*)
           real(sp), intent(out) :: work(*)
           real(sp), intent(inout) :: c(ldc,*)
       ! =====================================================================
           ! Local Scalars 
           logical(lk) :: left, right, tran, notran, lquery
           integer(${ik}$) :: i, ii, kk, lw, ctr, q
           ! External Subroutines 
           ! Executable Statements 
           ! test the input arguments
           lquery  = lwork<0_${ik}$
           notran  = stdlib_lsame( trans, 'N' )
           tran    = stdlib_lsame( trans, 'T' )
           left    = stdlib_lsame( side, 'L' )
           right   = stdlib_lsame( side, 'R' )
           if (left) then
             lw = n * nb
             q = m
           else
             lw = mb * nb
             q = n
           end if
           info = 0_${ik}$
           if( .not.left .and. .not.right ) then
              info = -1_${ik}$
           else if( .not.tran .and. .not.notran ) then
              info = -2_${ik}$
           else if( m<k ) then
             info = -3_${ik}$
           else if( n<0_${ik}$ ) then
             info = -4_${ik}$
           else if( k<0_${ik}$ ) then
             info = -5_${ik}$
           else if( k<nb .or. nb<1_${ik}$ ) then
             info = -7_${ik}$
           else if( lda<max( 1_${ik}$, q ) ) then
             info = -9_${ik}$
           else if( ldt<max( 1_${ik}$, nb) ) then
             info = -11_${ik}$
           else if( ldc<max( 1_${ik}$, m ) ) then
              info = -13_${ik}$
           else if(( lwork<max(1_${ik}$,lw)).and.(.not.lquery)) then
             info = -15_${ik}$
           end if
           ! determine the block size if it is tall skinny or short and wide
           if( info==0_${ik}$)  then
               work(1_${ik}$) = lw
           end if
           if( info/=0_${ik}$ ) then
             call stdlib${ii}$_xerbla( 'SLAMTSQR', -info )
             return
           else if (lquery) then
            return
           end if
           ! quick return if possible
           if( min(m,n,k)==0_${ik}$ ) then
             return
           end if
           if((mb<=k).or.(mb>=max(m,n,k))) then
             call stdlib${ii}$_sgemqrt( side, trans, m, n, k, nb, a, lda,t, ldt, c, ldc, work, info)
                       
             return
            end if
           if(left.and.notran) then
               ! multiply q to the last block of c
              kk = mod((m-k),(mb-k))
              ctr = (m-k)/(mb-k)
              if (kk>0_${ik}$) then
                ii=m-kk+1
                call stdlib${ii}$_stpmqrt('L','N',kk , n, k, 0_${ik}$, nb, a(ii,1_${ik}$), lda,t(1_${ik}$,ctr*k+1),ldt , c(1_${ik}$,&
                          1_${ik}$), ldc,c(ii,1_${ik}$), ldc, work, info )
              else
                ii=m+1
              end if
              do i=ii-(mb-k),mb+1,-(mb-k)
               ! multiply q to the current block of c (i:i+mb,1:n)
                ctr = ctr - 1_${ik}$
                call stdlib${ii}$_stpmqrt('L','N',mb-k , n, k, 0_${ik}$,nb, a(i,1_${ik}$), lda,t(1_${ik}$, ctr * k + 1_${ik}$), ldt,&
                           c(1_${ik}$,1_${ik}$), ldc,c(i,1_${ik}$), ldc, work, info )
              end do
               ! multiply q to the first block of c (1:mb,1:n)
              call stdlib${ii}$_sgemqrt('L','N',mb , n, k, nb, a(1_${ik}$,1_${ik}$), lda, t,ldt ,c(1_${ik}$,1_${ik}$), ldc, work, &
                        info )
           else if (left.and.tran) then
               ! multiply q to the first block of c
              kk = mod((m-k),(mb-k))
              ii=m-kk+1
              ctr = 1_${ik}$
              call stdlib${ii}$_sgemqrt('L','T',mb , n, k, nb, a(1_${ik}$,1_${ik}$), lda, t,ldt ,c(1_${ik}$,1_${ik}$), ldc, work, &
                        info )
              do i=mb+1,ii-mb+k,(mb-k)
               ! multiply q to the current block of c (i:i+mb,1:n)
               call stdlib${ii}$_stpmqrt('L','T',mb-k , n, k, 0_${ik}$,nb, a(i,1_${ik}$), lda,t(1_${ik}$,ctr * k + 1_${ik}$),ldt, c(&
                         1_${ik}$,1_${ik}$), ldc,c(i,1_${ik}$), ldc, work, info )
               ctr = ctr + 1_${ik}$
              end do
              if(ii<=m) then
               ! multiply q to the last block of c
               call stdlib${ii}$_stpmqrt('L','T',kk , n, k, 0_${ik}$,nb, a(ii,1_${ik}$), lda,t(1_${ik}$, ctr * k + 1_${ik}$), ldt, &
                         c(1_${ik}$,1_${ik}$), ldc,c(ii,1_${ik}$), ldc, work, info )
              end if
           else if(right.and.tran) then
               ! multiply q to the last block of c
               kk = mod((n-k),(mb-k))
               ctr = (n-k)/(mb-k)
               if (kk>0_${ik}$) then
                 ii=n-kk+1
                 call stdlib${ii}$_stpmqrt('R','T',m , kk, k, 0_${ik}$, nb, a(ii,1_${ik}$), lda,t(1_${ik}$, ctr * k + 1_${ik}$), &
                           ldt, c(1_${ik}$,1_${ik}$), ldc,c(1_${ik}$,ii), ldc, work, info )
               else
                 ii=n+1
               end if
               do i=ii-(mb-k),mb+1,-(mb-k)
               ! multiply q to the current block of c (1:m,i:i+mb)
                 ctr = ctr - 1_${ik}$
                 call stdlib${ii}$_stpmqrt('R','T',m , mb-k, k, 0_${ik}$,nb, a(i,1_${ik}$), lda,t(1_${ik}$, ctr * k + 1_${ik}$), &
                           ldt, c(1_${ik}$,1_${ik}$), ldc,c(1_${ik}$,i), ldc, work, info )
               end do
               ! multiply q to the first block of c (1:m,1:mb)
               call stdlib${ii}$_sgemqrt('R','T',m , mb, k, nb, a(1_${ik}$,1_${ik}$), lda, t,ldt ,c(1_${ik}$,1_${ik}$), ldc, work, &
                         info )
           else if (right.and.notran) then
               ! multiply q to the first block of c
              kk = mod((n-k),(mb-k))
              ii=n-kk+1
              ctr = 1_${ik}$
              call stdlib${ii}$_sgemqrt('R','N', m, mb , k, nb, a(1_${ik}$,1_${ik}$), lda, t,ldt ,c(1_${ik}$,1_${ik}$), ldc, work, &
                        info )
              do i=mb+1,ii-mb+k,(mb-k)
               ! multiply q to the current block of c (1:m,i:i+mb)
               call stdlib${ii}$_stpmqrt('R','N', m, mb-k, k, 0_${ik}$,nb, a(i,1_${ik}$), lda,t(1_${ik}$, ctr * k + 1_${ik}$),ldt, &
                         c(1_${ik}$,1_${ik}$), ldc,c(1_${ik}$,i), ldc, work, info )
               ctr = ctr + 1_${ik}$
              end do
              if(ii<=n) then
               ! multiply q to the last block of c
               call stdlib${ii}$_stpmqrt('R','N', m, kk , k, 0_${ik}$,nb, a(ii,1_${ik}$), lda,t(1_${ik}$, ctr * k + 1_${ik}$),ldt, &
                         c(1_${ik}$,1_${ik}$), ldc,c(1_${ik}$,ii), ldc, work, info )
              end if
           end if
           work(1_${ik}$) = lw
           return
     end subroutine stdlib${ii}$_slamtsqr

     pure module subroutine stdlib${ii}$_dlamtsqr( side, trans, m, n, k, mb, nb, a, lda, t,ldt, c, ldc, work, &
     !! DLAMTSQR overwrites the general real M-by-N matrix C with
     !! SIDE = 'L'     SIDE = 'R'
     !! TRANS = 'N':      Q * C          C * Q
     !! TRANS = 'T':      Q**T * C       C * Q**T
     !! where Q is a real orthogonal matrix defined as the product
     !! of blocked elementary reflectors computed by tall skinny
     !! QR factorization (DLATSQR)
               lwork, info )
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: side, trans
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, m, n, k, mb, nb, ldt, lwork, ldc
           ! Array Arguments 
           real(dp), intent(in) :: a(lda,*), t(ldt,*)
           real(dp), intent(out) :: work(*)
           real(dp), intent(inout) :: c(ldc,*)
       ! =====================================================================
           ! Local Scalars 
           logical(lk) :: left, right, tran, notran, lquery
           integer(${ik}$) :: i, ii, kk, lw, ctr, q
           ! External Subroutines 
           ! Executable Statements 
           ! test the input arguments
           lquery  = lwork<0_${ik}$
           notran  = stdlib_lsame( trans, 'N' )
           tran    = stdlib_lsame( trans, 'T' )
           left    = stdlib_lsame( side, 'L' )
           right   = stdlib_lsame( side, 'R' )
           if (left) then
             lw = n * nb
             q = m
           else
             lw = mb * nb
             q = n
           end if
           info = 0_${ik}$
           if( .not.left .and. .not.right ) then
              info = -1_${ik}$
           else if( .not.tran .and. .not.notran ) then
              info = -2_${ik}$
           else if( m<k ) then
             info = -3_${ik}$
           else if( n<0_${ik}$ ) then
             info = -4_${ik}$
           else if( k<0_${ik}$ ) then
             info = -5_${ik}$
           else if( k<nb .or. nb<1_${ik}$ ) then
             info = -7_${ik}$
           else if( lda<max( 1_${ik}$, q ) ) then
             info = -9_${ik}$
           else if( ldt<max( 1_${ik}$, nb) ) then
             info = -11_${ik}$
           else if( ldc<max( 1_${ik}$, m ) ) then
              info = -13_${ik}$
           else if(( lwork<max(1_${ik}$,lw)).and.(.not.lquery)) then
             info = -15_${ik}$
           end if
           ! determine the block size if it is tall skinny or short and wide
           if( info==0_${ik}$)  then
               work(1_${ik}$) = lw
           end if
           if( info/=0_${ik}$ ) then
             call stdlib${ii}$_xerbla( 'DLAMTSQR', -info )
             return
           else if (lquery) then
            return
           end if
           ! quick return if possible
           if( min(m,n,k)==0_${ik}$ ) then
             return
           end if
           if((mb<=k).or.(mb>=max(m,n,k))) then
             call stdlib${ii}$_dgemqrt( side, trans, m, n, k, nb, a, lda,t, ldt, c, ldc, work, info)
                       
             return
            end if
           if(left.and.notran) then
               ! multiply q to the last block of c
              kk = mod((m-k),(mb-k))
              ctr = (m-k)/(mb-k)
              if (kk>0_${ik}$) then
                ii=m-kk+1
                call stdlib${ii}$_dtpmqrt('L','N',kk , n, k, 0_${ik}$, nb, a(ii,1_${ik}$), lda,t(1_${ik}$,ctr*k+1),ldt , c(1_${ik}$,&
                          1_${ik}$), ldc,c(ii,1_${ik}$), ldc, work, info )
              else
                ii=m+1
              end if
              do i=ii-(mb-k),mb+1,-(mb-k)
               ! multiply q to the current block of c (i:i+mb,1:n)
                ctr = ctr - 1_${ik}$
                call stdlib${ii}$_dtpmqrt('L','N',mb-k , n, k, 0_${ik}$,nb, a(i,1_${ik}$), lda,t(1_${ik}$,ctr*k+1),ldt, c(1_${ik}$,&
                          1_${ik}$), ldc,c(i,1_${ik}$), ldc, work, info )
              end do
               ! multiply q to the first block of c (1:mb,1:n)
              call stdlib${ii}$_dgemqrt('L','N',mb , n, k, nb, a(1_${ik}$,1_${ik}$), lda, t,ldt ,c(1_${ik}$,1_${ik}$), ldc, work, &
                        info )
           else if (left.and.tran) then
               ! multiply q to the first block of c
              kk = mod((m-k),(mb-k))
              ii=m-kk+1
              ctr = 1_${ik}$
              call stdlib${ii}$_dgemqrt('L','T',mb , n, k, nb, a(1_${ik}$,1_${ik}$), lda, t,ldt ,c(1_${ik}$,1_${ik}$), ldc, work, &
                        info )
              do i=mb+1,ii-mb+k,(mb-k)
               ! multiply q to the current block of c (i:i+mb,1:n)
               call stdlib${ii}$_dtpmqrt('L','T',mb-k , n, k, 0_${ik}$,nb, a(i,1_${ik}$), lda,t(1_${ik}$,ctr * k + 1_${ik}$),ldt, c(&
                         1_${ik}$,1_${ik}$), ldc,c(i,1_${ik}$), ldc, work, info )
               ctr = ctr + 1_${ik}$
              end do
              if(ii<=m) then
               ! multiply q to the last block of c
               call stdlib${ii}$_dtpmqrt('L','T',kk , n, k, 0_${ik}$,nb, a(ii,1_${ik}$), lda,t(1_${ik}$,ctr * k + 1_${ik}$), ldt, c(&
                         1_${ik}$,1_${ik}$), ldc,c(ii,1_${ik}$), ldc, work, info )
              end if
           else if(right.and.tran) then
               ! multiply q to the last block of c
               kk = mod((n-k),(mb-k))
               ctr = (n-k)/(mb-k)
               if (kk>0_${ik}$) then
                 ii=n-kk+1
                 call stdlib${ii}$_dtpmqrt('R','T',m , kk, k, 0_${ik}$, nb, a(ii,1_${ik}$), lda,t(1_${ik}$,ctr*k+1), ldt, c(&
                           1_${ik}$,1_${ik}$), ldc,c(1_${ik}$,ii), ldc, work, info )
               else
                 ii=n+1
               end if
               do i=ii-(mb-k),mb+1,-(mb-k)
               ! multiply q to the current block of c (1:m,i:i+mb)
                 ctr = ctr - 1_${ik}$
                 call stdlib${ii}$_dtpmqrt('R','T',m , mb-k, k, 0_${ik}$,nb, a(i,1_${ik}$), lda,t(1_${ik}$,ctr*k+1), ldt, c(&
                           1_${ik}$,1_${ik}$), ldc,c(1_${ik}$,i), ldc, work, info )
               end do
               ! multiply q to the first block of c (1:m,1:mb)
               call stdlib${ii}$_dgemqrt('R','T',m , mb, k, nb, a(1_${ik}$,1_${ik}$), lda, t,ldt ,c(1_${ik}$,1_${ik}$), ldc, work, &
                         info )
           else if (right.and.notran) then
               ! multiply q to the first block of c
              kk = mod((n-k),(mb-k))
              ii=n-kk+1
              ctr = 1_${ik}$
              call stdlib${ii}$_dgemqrt('R','N', m, mb , k, nb, a(1_${ik}$,1_${ik}$), lda, t,ldt ,c(1_${ik}$,1_${ik}$), ldc, work, &
                        info )
              do i=mb+1,ii-mb+k,(mb-k)
               ! multiply q to the current block of c (1:m,i:i+mb)
               call stdlib${ii}$_dtpmqrt('R','N', m, mb-k, k, 0_${ik}$,nb, a(i,1_${ik}$), lda,t(1_${ik}$, ctr * k + 1_${ik}$),ldt, &
                         c(1_${ik}$,1_${ik}$), ldc,c(1_${ik}$,i), ldc, work, info )
               ctr = ctr + 1_${ik}$
              end do
              if(ii<=n) then
               ! multiply q to the last block of c
               call stdlib${ii}$_dtpmqrt('R','N', m, kk , k, 0_${ik}$,nb, a(ii,1_${ik}$), lda,t(1_${ik}$, ctr * k + 1_${ik}$),ldt, &
                         c(1_${ik}$,1_${ik}$), ldc,c(1_${ik}$,ii), ldc, work, info )
              end if
           end if
           work(1_${ik}$) = lw
           return
     end subroutine stdlib${ii}$_dlamtsqr

#:for rk,rt,ri in REAL_KINDS_TYPES
#:if not rk in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ri}$lamtsqr( side, trans, m, n, k, mb, nb, a, lda, t,ldt, c, ldc, work, &
     !! DLAMTSQR: overwrites the general real M-by-N matrix C with
     !! SIDE = 'L'     SIDE = 'R'
     !! TRANS = 'N':      Q * C          C * Q
     !! TRANS = 'T':      Q**T * C       C * Q**T
     !! where Q is a real orthogonal matrix defined as the product
     !! of blocked elementary reflectors computed by tall skinny
     !! QR factorization (DLATSQR)
               lwork, info )
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: side, trans
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, m, n, k, mb, nb, ldt, lwork, ldc
           ! Array Arguments 
           real(${rk}$), intent(in) :: a(lda,*), t(ldt,*)
           real(${rk}$), intent(out) :: work(*)
           real(${rk}$), intent(inout) :: c(ldc,*)
       ! =====================================================================
           ! Local Scalars 
           logical(lk) :: left, right, tran, notran, lquery
           integer(${ik}$) :: i, ii, kk, lw, ctr, q
           ! External Subroutines 
           ! Executable Statements 
           ! test the input arguments
           lquery  = lwork<0_${ik}$
           notran  = stdlib_lsame( trans, 'N' )
           tran    = stdlib_lsame( trans, 'T' )
           left    = stdlib_lsame( side, 'L' )
           right   = stdlib_lsame( side, 'R' )
           if (left) then
             lw = n * nb
             q = m
           else
             lw = mb * nb
             q = n
           end if
           info = 0_${ik}$
           if( .not.left .and. .not.right ) then
              info = -1_${ik}$
           else if( .not.tran .and. .not.notran ) then
              info = -2_${ik}$
           else if( m<k ) then
             info = -3_${ik}$
           else if( n<0_${ik}$ ) then
             info = -4_${ik}$
           else if( k<0_${ik}$ ) then
             info = -5_${ik}$
           else if( k<nb .or. nb<1_${ik}$ ) then
             info = -7_${ik}$
           else if( lda<max( 1_${ik}$, q ) ) then
             info = -9_${ik}$
           else if( ldt<max( 1_${ik}$, nb) ) then
             info = -11_${ik}$
           else if( ldc<max( 1_${ik}$, m ) ) then
              info = -13_${ik}$
           else if(( lwork<max(1_${ik}$,lw)).and.(.not.lquery)) then
             info = -15_${ik}$
           end if
           ! determine the block size if it is tall skinny or short and wide
           if( info==0_${ik}$)  then
               work(1_${ik}$) = lw
           end if
           if( info/=0_${ik}$ ) then
             call stdlib${ii}$_xerbla( 'DLAMTSQR', -info )
             return
           else if (lquery) then
            return
           end if
           ! quick return if possible
           if( min(m,n,k)==0_${ik}$ ) then
             return
           end if
           if((mb<=k).or.(mb>=max(m,n,k))) then
             call stdlib${ii}$_${ri}$gemqrt( side, trans, m, n, k, nb, a, lda,t, ldt, c, ldc, work, info)
                       
             return
            end if
           if(left.and.notran) then
               ! multiply q to the last block of c
              kk = mod((m-k),(mb-k))
              ctr = (m-k)/(mb-k)
              if (kk>0_${ik}$) then
                ii=m-kk+1
                call stdlib${ii}$_${ri}$tpmqrt('L','N',kk , n, k, 0_${ik}$, nb, a(ii,1_${ik}$), lda,t(1_${ik}$,ctr*k+1),ldt , c(1_${ik}$,&
                          1_${ik}$), ldc,c(ii,1_${ik}$), ldc, work, info )
              else
                ii=m+1
              end if
              do i=ii-(mb-k),mb+1,-(mb-k)
               ! multiply q to the current block of c (i:i+mb,1:n)
                ctr = ctr - 1_${ik}$
                call stdlib${ii}$_${ri}$tpmqrt('L','N',mb-k , n, k, 0_${ik}$,nb, a(i,1_${ik}$), lda,t(1_${ik}$,ctr*k+1),ldt, c(1_${ik}$,&
                          1_${ik}$), ldc,c(i,1_${ik}$), ldc, work, info )
              end do
               ! multiply q to the first block of c (1:mb,1:n)
              call stdlib${ii}$_${ri}$gemqrt('L','N',mb , n, k, nb, a(1_${ik}$,1_${ik}$), lda, t,ldt ,c(1_${ik}$,1_${ik}$), ldc, work, &
                        info )
           else if (left.and.tran) then
               ! multiply q to the first block of c
              kk = mod((m-k),(mb-k))
              ii=m-kk+1
              ctr = 1_${ik}$
              call stdlib${ii}$_${ri}$gemqrt('L','T',mb , n, k, nb, a(1_${ik}$,1_${ik}$), lda, t,ldt ,c(1_${ik}$,1_${ik}$), ldc, work, &
                        info )
              do i=mb+1,ii-mb+k,(mb-k)
               ! multiply q to the current block of c (i:i+mb,1:n)
               call stdlib${ii}$_${ri}$tpmqrt('L','T',mb-k , n, k, 0_${ik}$,nb, a(i,1_${ik}$), lda,t(1_${ik}$,ctr * k + 1_${ik}$),ldt, c(&
                         1_${ik}$,1_${ik}$), ldc,c(i,1_${ik}$), ldc, work, info )
               ctr = ctr + 1_${ik}$
              end do
              if(ii<=m) then
               ! multiply q to the last block of c
               call stdlib${ii}$_${ri}$tpmqrt('L','T',kk , n, k, 0_${ik}$,nb, a(ii,1_${ik}$), lda,t(1_${ik}$,ctr * k + 1_${ik}$), ldt, c(&
                         1_${ik}$,1_${ik}$), ldc,c(ii,1_${ik}$), ldc, work, info )
              end if
           else if(right.and.tran) then
               ! multiply q to the last block of c
               kk = mod((n-k),(mb-k))
               ctr = (n-k)/(mb-k)
               if (kk>0_${ik}$) then
                 ii=n-kk+1
                 call stdlib${ii}$_${ri}$tpmqrt('R','T',m , kk, k, 0_${ik}$, nb, a(ii,1_${ik}$), lda,t(1_${ik}$,ctr*k+1), ldt, c(&
                           1_${ik}$,1_${ik}$), ldc,c(1_${ik}$,ii), ldc, work, info )
               else
                 ii=n+1
               end if
               do i=ii-(mb-k),mb+1,-(mb-k)
               ! multiply q to the current block of c (1:m,i:i+mb)
                 ctr = ctr - 1_${ik}$
                 call stdlib${ii}$_${ri}$tpmqrt('R','T',m , mb-k, k, 0_${ik}$,nb, a(i,1_${ik}$), lda,t(1_${ik}$,ctr*k+1), ldt, c(&
                           1_${ik}$,1_${ik}$), ldc,c(1_${ik}$,i), ldc, work, info )
               end do
               ! multiply q to the first block of c (1:m,1:mb)
               call stdlib${ii}$_${ri}$gemqrt('R','T',m , mb, k, nb, a(1_${ik}$,1_${ik}$), lda, t,ldt ,c(1_${ik}$,1_${ik}$), ldc, work, &
                         info )
           else if (right.and.notran) then
               ! multiply q to the first block of c
              kk = mod((n-k),(mb-k))
              ii=n-kk+1
              ctr = 1_${ik}$
              call stdlib${ii}$_${ri}$gemqrt('R','N', m, mb , k, nb, a(1_${ik}$,1_${ik}$), lda, t,ldt ,c(1_${ik}$,1_${ik}$), ldc, work, &
                        info )
              do i=mb+1,ii-mb+k,(mb-k)
               ! multiply q to the current block of c (1:m,i:i+mb)
               call stdlib${ii}$_${ri}$tpmqrt('R','N', m, mb-k, k, 0_${ik}$,nb, a(i,1_${ik}$), lda,t(1_${ik}$, ctr * k + 1_${ik}$),ldt, &
                         c(1_${ik}$,1_${ik}$), ldc,c(1_${ik}$,i), ldc, work, info )
               ctr = ctr + 1_${ik}$
              end do
              if(ii<=n) then
               ! multiply q to the last block of c
               call stdlib${ii}$_${ri}$tpmqrt('R','N', m, kk , k, 0_${ik}$,nb, a(ii,1_${ik}$), lda,t(1_${ik}$, ctr * k + 1_${ik}$),ldt, &
                         c(1_${ik}$,1_${ik}$), ldc,c(1_${ik}$,ii), ldc, work, info )
              end if
           end if
           work(1_${ik}$) = lw
           return
     end subroutine stdlib${ii}$_${ri}$lamtsqr

#:endif
#:endfor

     pure module subroutine stdlib${ii}$_clamtsqr( side, trans, m, n, k, mb, nb, a, lda, t,ldt, c, ldc, work, &
     !! CLAMTSQR overwrites the general complex M-by-N matrix C with
     !! SIDE = 'L'     SIDE = 'R'
     !! TRANS = 'N':      Q * C          C * Q
     !! TRANS = 'C':      Q**H * C       C * Q**H
     !! where Q is a complex unitary matrix defined as the product
     !! of blocked elementary reflectors computed by tall skinny
     !! QR factorization (CLATSQR)
               lwork, info )
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: side, trans
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, m, n, k, mb, nb, ldt, lwork, ldc
           ! Array Arguments 
           complex(sp), intent(in) :: a(lda,*), t(ldt,*)
           complex(sp), intent(out) :: work(*)
           complex(sp), intent(inout) :: c(ldc,*)
       ! =====================================================================
           ! Local Scalars 
           logical(lk) :: left, right, tran, notran, lquery
           integer(${ik}$) :: i, ii, kk, lw, ctr, q
           ! External Subroutines 
           ! Executable Statements 
           ! test the input arguments
           lquery  = lwork<0_${ik}$
           notran  = stdlib_lsame( trans, 'N' )
           tran    = stdlib_lsame( trans, 'C' )
           left    = stdlib_lsame( side, 'L' )
           right   = stdlib_lsame( side, 'R' )
           if (left) then
             lw = n * nb
             q = m
           else
             lw = m * nb
             q = n
           end if
           info = 0_${ik}$
           if( .not.left .and. .not.right ) then
              info = -1_${ik}$
           else if( .not.tran .and. .not.notran ) then
              info = -2_${ik}$
           else if( m<k ) then
             info = -3_${ik}$
           else if( n<0_${ik}$ ) then
             info = -4_${ik}$
           else if( k<0_${ik}$ ) then
             info = -5_${ik}$
           else if( k<nb .or. nb<1_${ik}$ ) then
             info = -7_${ik}$
           else if( lda<max( 1_${ik}$, q ) ) then
             info = -9_${ik}$
           else if( ldt<max( 1_${ik}$, nb) ) then
             info = -11_${ik}$
           else if( ldc<max( 1_${ik}$, m ) ) then
              info = -13_${ik}$
           else if(( lwork<max(1_${ik}$,lw)).and.(.not.lquery)) then
             info = -15_${ik}$
           end if
           ! determine the block size if it is tall skinny or short and wide
           if( info==0_${ik}$)  then
               work(1_${ik}$) = lw
           end if
           if( info/=0_${ik}$ ) then
             call stdlib${ii}$_xerbla( 'CLAMTSQR', -info )
             return
           else if (lquery) then
            return
           end if
           ! quick return if possible
           if( min(m,n,k)==0_${ik}$ ) then
             return
           end if
           if((mb<=k).or.(mb>=max(m,n,k))) then
             call stdlib${ii}$_cgemqrt( side, trans, m, n, k, nb, a, lda,t, ldt, c, ldc, work, info)
                       
             return
            end if
           if(left.and.notran) then
               ! multiply q to the last block of c
              kk = mod((m-k),(mb-k))
              ctr = (m-k)/(mb-k)
              if (kk>0_${ik}$) then
                ii=m-kk+1
                call stdlib${ii}$_ctpmqrt('L','N',kk , n, k, 0_${ik}$, nb, a(ii,1_${ik}$), lda,t(1_${ik}$, ctr*k+1),ldt , c(&
                          1_${ik}$,1_${ik}$), ldc,c(ii,1_${ik}$), ldc, work, info )
              else
                ii=m+1
              end if
              do i=ii-(mb-k),mb+1,-(mb-k)
               ! multiply q to the current block of c (i:i+mb,1:n)
                ctr = ctr - 1_${ik}$
                call stdlib${ii}$_ctpmqrt('L','N',mb-k , n, k, 0_${ik}$,nb, a(i,1_${ik}$), lda,t(1_${ik}$,ctr*k+1),ldt, c(1_${ik}$,&
                          1_${ik}$), ldc,c(i,1_${ik}$), ldc, work, info )
              end do
               ! multiply q to the first block of c (1:mb,1:n)
              call stdlib${ii}$_cgemqrt('L','N',mb , n, k, nb, a(1_${ik}$,1_${ik}$), lda, t,ldt ,c(1_${ik}$,1_${ik}$), ldc, work, &
                        info )
           else if (left.and.tran) then
               ! multiply q to the first block of c
              kk = mod((m-k),(mb-k))
              ii=m-kk+1
              ctr = 1_${ik}$
              call stdlib${ii}$_cgemqrt('L','C',mb , n, k, nb, a(1_${ik}$,1_${ik}$), lda, t,ldt ,c(1_${ik}$,1_${ik}$), ldc, work, &
                        info )
              do i=mb+1,ii-mb+k,(mb-k)
               ! multiply q to the current block of c (i:i+mb,1:n)
               call stdlib${ii}$_ctpmqrt('L','C',mb-k , n, k, 0_${ik}$,nb, a(i,1_${ik}$), lda,t(1_${ik}$, ctr*k+1),ldt, c(1_${ik}$,&
                         1_${ik}$), ldc,c(i,1_${ik}$), ldc, work, info )
               ctr = ctr + 1_${ik}$
              end do
              if(ii<=m) then
               ! multiply q to the last block of c
               call stdlib${ii}$_ctpmqrt('L','C',kk , n, k, 0_${ik}$,nb, a(ii,1_${ik}$), lda,t(1_${ik}$,ctr*k+1), ldt, c(1_${ik}$,1_${ik}$)&
                         , ldc,c(ii,1_${ik}$), ldc, work, info )
              end if
           else if(right.and.tran) then
               ! multiply q to the last block of c
               kk = mod((n-k),(mb-k))
               ctr = (n-k)/(mb-k)
               if (kk>0_${ik}$) then
                 ii=n-kk+1
                 call stdlib${ii}$_ctpmqrt('R','C',m , kk, k, 0_${ik}$, nb, a(ii,1_${ik}$), lda,t(1_${ik}$, ctr*k+1), ldt, c(&
                           1_${ik}$,1_${ik}$), ldc,c(1_${ik}$,ii), ldc, work, info )
               else
                 ii=n+1
               end if
               do i=ii-(mb-k),mb+1,-(mb-k)
               ! multiply q to the current block of c (1:m,i:i+mb)
                 ctr = ctr - 1_${ik}$
                 call stdlib${ii}$_ctpmqrt('R','C',m , mb-k, k, 0_${ik}$,nb, a(i,1_${ik}$), lda,t(1_${ik}$,ctr*k+1), ldt, c(&
                           1_${ik}$,1_${ik}$), ldc,c(1_${ik}$,i), ldc, work, info )
               end do
               ! multiply q to the first block of c (1:m,1:mb)
               call stdlib${ii}$_cgemqrt('R','C',m , mb, k, nb, a(1_${ik}$,1_${ik}$), lda, t,ldt ,c(1_${ik}$,1_${ik}$), ldc, work, &
                         info )
           else if (right.and.notran) then
               ! multiply q to the first block of c
              kk = mod((n-k),(mb-k))
              ii=n-kk+1
              ctr = 1_${ik}$
              call stdlib${ii}$_cgemqrt('R','N', m, mb , k, nb, a(1_${ik}$,1_${ik}$), lda, t,ldt ,c(1_${ik}$,1_${ik}$), ldc, work, &
                        info )
              do i=mb+1,ii-mb+k,(mb-k)
               ! multiply q to the current block of c (1:m,i:i+mb)
               call stdlib${ii}$_ctpmqrt('R','N', m, mb-k, k, 0_${ik}$,nb, a(i,1_${ik}$), lda,t(1_${ik}$,ctr*k+1),ldt, c(1_${ik}$,1_${ik}$)&
                         , ldc,c(1_${ik}$,i), ldc, work, info )
               ctr = ctr + 1_${ik}$
              end do
              if(ii<=n) then
               ! multiply q to the last block of c
               call stdlib${ii}$_ctpmqrt('R','N', m, kk , k, 0_${ik}$,nb, a(ii,1_${ik}$), lda,t(1_${ik}$,ctr*k+1),ldt, c(1_${ik}$,1_${ik}$)&
                         , ldc,c(1_${ik}$,ii), ldc, work, info )
              end if
           end if
           work(1_${ik}$) = lw
           return
     end subroutine stdlib${ii}$_clamtsqr

     pure module subroutine stdlib${ii}$_zlamtsqr( side, trans, m, n, k, mb, nb, a, lda, t,ldt, c, ldc, work, &
     !! ZLAMTSQR overwrites the general complex M-by-N matrix C with
     !! SIDE = 'L'     SIDE = 'R'
     !! TRANS = 'N':      Q * C          C * Q
     !! TRANS = 'C':      Q**H * C       C * Q**H
     !! where Q is a complex unitary matrix defined as the product
     !! of blocked elementary reflectors computed by tall skinny
     !! QR factorization (ZLATSQR)
               lwork, info )
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: side, trans
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, m, n, k, mb, nb, ldt, lwork, ldc
           ! Array Arguments 
           complex(dp), intent(in) :: a(lda,*), t(ldt,*)
           complex(dp), intent(out) :: work(*)
           complex(dp), intent(inout) :: c(ldc,*)
       ! =====================================================================
           ! Local Scalars 
           logical(lk) :: left, right, tran, notran, lquery
           integer(${ik}$) :: i, ii, kk, lw, ctr, q
           ! External Subroutines 
           ! Executable Statements 
           ! test the input arguments
           lquery  = lwork<0_${ik}$
           notran  = stdlib_lsame( trans, 'N' )
           tran    = stdlib_lsame( trans, 'C' )
           left    = stdlib_lsame( side, 'L' )
           right   = stdlib_lsame( side, 'R' )
           if (left) then
             lw = n * nb
             q = m
           else
             lw = m * nb
             q = n
           end if
           info = 0_${ik}$
           if( .not.left .and. .not.right ) then
              info = -1_${ik}$
           else if( .not.tran .and. .not.notran ) then
              info = -2_${ik}$
           else if( m<k ) then
             info = -3_${ik}$
           else if( n<0_${ik}$ ) then
             info = -4_${ik}$
           else if( k<0_${ik}$ ) then
             info = -5_${ik}$
           else if( k<nb .or. nb<1_${ik}$ ) then
             info = -7_${ik}$
           else if( lda<max( 1_${ik}$, q ) ) then
             info = -9_${ik}$
           else if( ldt<max( 1_${ik}$, nb) ) then
             info = -11_${ik}$
           else if( ldc<max( 1_${ik}$, m ) ) then
              info = -13_${ik}$
           else if(( lwork<max(1_${ik}$,lw)).and.(.not.lquery)) then
             info = -15_${ik}$
           end if
           ! determine the block size if it is tall skinny or short and wide
           if( info==0_${ik}$)  then
               work(1_${ik}$) = lw
           end if
           if( info/=0_${ik}$ ) then
             call stdlib${ii}$_xerbla( 'ZLAMTSQR', -info )
             return
           else if (lquery) then
            return
           end if
           ! quick return if possible
           if( min(m,n,k)==0_${ik}$ ) then
             return
           end if
           if((mb<=k).or.(mb>=max(m,n,k))) then
             call stdlib${ii}$_zgemqrt( side, trans, m, n, k, nb, a, lda,t, ldt, c, ldc, work, info)
                       
             return
            end if
           if(left.and.notran) then
               ! multiply q to the last block of c
              kk = mod((m-k),(mb-k))
              ctr = (m-k)/(mb-k)
              if (kk>0_${ik}$) then
                ii=m-kk+1
                call stdlib${ii}$_ztpmqrt('L','N',kk , n, k, 0_${ik}$, nb, a(ii,1_${ik}$), lda,t(1_${ik}$, ctr * k + 1_${ik}$),ldt ,&
                           c(1_${ik}$,1_${ik}$), ldc,c(ii,1_${ik}$), ldc, work, info )
              else
                ii=m+1
              end if
              do i=ii-(mb-k),mb+1,-(mb-k)
               ! multiply q to the current block of c (i:i+mb,1:n)
                ctr = ctr - 1_${ik}$
                call stdlib${ii}$_ztpmqrt('L','N',mb-k , n, k, 0_${ik}$,nb, a(i,1_${ik}$), lda,t(1_${ik}$,ctr * k + 1_${ik}$),ldt, &
                          c(1_${ik}$,1_${ik}$), ldc,c(i,1_${ik}$), ldc, work, info )
              end do
               ! multiply q to the first block of c (1:mb,1:n)
              call stdlib${ii}$_zgemqrt('L','N',mb , n, k, nb, a(1_${ik}$,1_${ik}$), lda, t,ldt ,c(1_${ik}$,1_${ik}$), ldc, work, &
                        info )
           else if (left.and.tran) then
               ! multiply q to the first block of c
              kk = mod((m-k),(mb-k))
              ii=m-kk+1
              ctr = 1_${ik}$
              call stdlib${ii}$_zgemqrt('L','C',mb , n, k, nb, a(1_${ik}$,1_${ik}$), lda, t,ldt ,c(1_${ik}$,1_${ik}$), ldc, work, &
                        info )
              do i=mb+1,ii-mb+k,(mb-k)
               ! multiply q to the current block of c (i:i+mb,1:n)
               call stdlib${ii}$_ztpmqrt('L','C',mb-k , n, k, 0_${ik}$,nb, a(i,1_${ik}$), lda,t(1_${ik}$,ctr * k + 1_${ik}$),ldt, c(&
                         1_${ik}$,1_${ik}$), ldc,c(i,1_${ik}$), ldc, work, info )
               ctr = ctr + 1_${ik}$
              end do
              if(ii<=m) then
               ! multiply q to the last block of c
               call stdlib${ii}$_ztpmqrt('L','C',kk , n, k, 0_${ik}$,nb, a(ii,1_${ik}$), lda,t(1_${ik}$, ctr * k + 1_${ik}$), ldt, &
                         c(1_${ik}$,1_${ik}$), ldc,c(ii,1_${ik}$), ldc, work, info )
              end if
           else if(right.and.tran) then
               ! multiply q to the last block of c
               kk = mod((n-k),(mb-k))
               ctr = (n-k)/(mb-k)
               if (kk>0_${ik}$) then
                 ii=n-kk+1
                 call stdlib${ii}$_ztpmqrt('R','C',m , kk, k, 0_${ik}$, nb, a(ii,1_${ik}$), lda,t(1_${ik}$,ctr * k + 1_${ik}$), ldt,&
                            c(1_${ik}$,1_${ik}$), ldc,c(1_${ik}$,ii), ldc, work, info )
               else
                 ii=n+1
               end if
               do i=ii-(mb-k),mb+1,-(mb-k)
               ! multiply q to the current block of c (1:m,i:i+mb)
                 ctr = ctr - 1_${ik}$
                 call stdlib${ii}$_ztpmqrt('R','C',m , mb-k, k, 0_${ik}$,nb, a(i,1_${ik}$), lda,t(1_${ik}$, ctr * k + 1_${ik}$), &
                           ldt, c(1_${ik}$,1_${ik}$), ldc,c(1_${ik}$,i), ldc, work, info )
               end do
               ! multiply q to the first block of c (1:m,1:mb)
               call stdlib${ii}$_zgemqrt('R','C',m , mb, k, nb, a(1_${ik}$,1_${ik}$), lda, t,ldt ,c(1_${ik}$,1_${ik}$), ldc, work, &
                         info )
           else if (right.and.notran) then
               ! multiply q to the first block of c
              kk = mod((n-k),(mb-k))
              ii=n-kk+1
              ctr = 1_${ik}$
              call stdlib${ii}$_zgemqrt('R','N', m, mb , k, nb, a(1_${ik}$,1_${ik}$), lda, t,ldt ,c(1_${ik}$,1_${ik}$), ldc, work, &
                        info )
              do i=mb+1,ii-mb+k,(mb-k)
               ! multiply q to the current block of c (1:m,i:i+mb)
               call stdlib${ii}$_ztpmqrt('R','N', m, mb-k, k, 0_${ik}$,nb, a(i,1_${ik}$), lda,t(1_${ik}$, ctr * k + 1_${ik}$),ldt, &
                         c(1_${ik}$,1_${ik}$), ldc,c(1_${ik}$,i), ldc, work, info )
               ctr = ctr + 1_${ik}$
              end do
              if(ii<=n) then
               ! multiply q to the last block of c
               call stdlib${ii}$_ztpmqrt('R','N', m, kk , k, 0_${ik}$,nb, a(ii,1_${ik}$), lda,t(1_${ik}$,ctr * k + 1_${ik}$),ldt, c(&
                         1_${ik}$,1_${ik}$), ldc,c(1_${ik}$,ii), ldc, work, info )
              end if
           end if
           work(1_${ik}$) = lw
           return
     end subroutine stdlib${ii}$_zlamtsqr

#:for ck,ct,ci in CMPLX_KINDS_TYPES
#:if not ck in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ci}$lamtsqr( side, trans, m, n, k, mb, nb, a, lda, t,ldt, c, ldc, work, &
     !! ZLAMTSQR: overwrites the general complex M-by-N matrix C with
     !! SIDE = 'L'     SIDE = 'R'
     !! TRANS = 'N':      Q * C          C * Q
     !! TRANS = 'C':      Q**H * C       C * Q**H
     !! where Q is a complex unitary matrix defined as the product
     !! of blocked elementary reflectors computed by tall skinny
     !! QR factorization (ZLATSQR)
               lwork, info )
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${ck}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: side, trans
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, m, n, k, mb, nb, ldt, lwork, ldc
           ! Array Arguments 
           complex(${ck}$), intent(in) :: a(lda,*), t(ldt,*)
           complex(${ck}$), intent(out) :: work(*)
           complex(${ck}$), intent(inout) :: c(ldc,*)
       ! =====================================================================
           ! Local Scalars 
           logical(lk) :: left, right, tran, notran, lquery
           integer(${ik}$) :: i, ii, kk, lw, ctr, q
           ! External Subroutines 
           ! Executable Statements 
           ! test the input arguments
           lquery  = lwork<0_${ik}$
           notran  = stdlib_lsame( trans, 'N' )
           tran    = stdlib_lsame( trans, 'C' )
           left    = stdlib_lsame( side, 'L' )
           right   = stdlib_lsame( side, 'R' )
           if (left) then
             lw = n * nb
             q = m
           else
             lw = m * nb
             q = n
           end if
           info = 0_${ik}$
           if( .not.left .and. .not.right ) then
              info = -1_${ik}$
           else if( .not.tran .and. .not.notran ) then
              info = -2_${ik}$
           else if( m<k ) then
             info = -3_${ik}$
           else if( n<0_${ik}$ ) then
             info = -4_${ik}$
           else if( k<0_${ik}$ ) then
             info = -5_${ik}$
           else if( k<nb .or. nb<1_${ik}$ ) then
             info = -7_${ik}$
           else if( lda<max( 1_${ik}$, q ) ) then
             info = -9_${ik}$
           else if( ldt<max( 1_${ik}$, nb) ) then
             info = -11_${ik}$
           else if( ldc<max( 1_${ik}$, m ) ) then
              info = -13_${ik}$
           else if(( lwork<max(1_${ik}$,lw)).and.(.not.lquery)) then
             info = -15_${ik}$
           end if
           ! determine the block size if it is tall skinny or short and wide
           if( info==0_${ik}$)  then
               work(1_${ik}$) = lw
           end if
           if( info/=0_${ik}$ ) then
             call stdlib${ii}$_xerbla( 'ZLAMTSQR', -info )
             return
           else if (lquery) then
            return
           end if
           ! quick return if possible
           if( min(m,n,k)==0_${ik}$ ) then
             return
           end if
           if((mb<=k).or.(mb>=max(m,n,k))) then
             call stdlib${ii}$_${ci}$gemqrt( side, trans, m, n, k, nb, a, lda,t, ldt, c, ldc, work, info)
                       
             return
            end if
           if(left.and.notran) then
               ! multiply q to the last block of c
              kk = mod((m-k),(mb-k))
              ctr = (m-k)/(mb-k)
              if (kk>0_${ik}$) then
                ii=m-kk+1
                call stdlib${ii}$_${ci}$tpmqrt('L','N',kk , n, k, 0_${ik}$, nb, a(ii,1_${ik}$), lda,t(1_${ik}$, ctr * k + 1_${ik}$),ldt ,&
                           c(1_${ik}$,1_${ik}$), ldc,c(ii,1_${ik}$), ldc, work, info )
              else
                ii=m+1
              end if
              do i=ii-(mb-k),mb+1,-(mb-k)
               ! multiply q to the current block of c (i:i+mb,1:n)
                ctr = ctr - 1_${ik}$
                call stdlib${ii}$_${ci}$tpmqrt('L','N',mb-k , n, k, 0_${ik}$,nb, a(i,1_${ik}$), lda,t(1_${ik}$,ctr * k + 1_${ik}$),ldt, &
                          c(1_${ik}$,1_${ik}$), ldc,c(i,1_${ik}$), ldc, work, info )
              end do
               ! multiply q to the first block of c (1:mb,1:n)
              call stdlib${ii}$_${ci}$gemqrt('L','N',mb , n, k, nb, a(1_${ik}$,1_${ik}$), lda, t,ldt ,c(1_${ik}$,1_${ik}$), ldc, work, &
                        info )
           else if (left.and.tran) then
               ! multiply q to the first block of c
              kk = mod((m-k),(mb-k))
              ii=m-kk+1
              ctr = 1_${ik}$
              call stdlib${ii}$_${ci}$gemqrt('L','C',mb , n, k, nb, a(1_${ik}$,1_${ik}$), lda, t,ldt ,c(1_${ik}$,1_${ik}$), ldc, work, &
                        info )
              do i=mb+1,ii-mb+k,(mb-k)
               ! multiply q to the current block of c (i:i+mb,1:n)
               call stdlib${ii}$_${ci}$tpmqrt('L','C',mb-k , n, k, 0_${ik}$,nb, a(i,1_${ik}$), lda,t(1_${ik}$,ctr * k + 1_${ik}$),ldt, c(&
                         1_${ik}$,1_${ik}$), ldc,c(i,1_${ik}$), ldc, work, info )
               ctr = ctr + 1_${ik}$
              end do
              if(ii<=m) then
               ! multiply q to the last block of c
               call stdlib${ii}$_${ci}$tpmqrt('L','C',kk , n, k, 0_${ik}$,nb, a(ii,1_${ik}$), lda,t(1_${ik}$, ctr * k + 1_${ik}$), ldt, &
                         c(1_${ik}$,1_${ik}$), ldc,c(ii,1_${ik}$), ldc, work, info )
              end if
           else if(right.and.tran) then
               ! multiply q to the last block of c
               kk = mod((n-k),(mb-k))
               ctr = (n-k)/(mb-k)
               if (kk>0_${ik}$) then
                 ii=n-kk+1
                 call stdlib${ii}$_${ci}$tpmqrt('R','C',m , kk, k, 0_${ik}$, nb, a(ii,1_${ik}$), lda,t(1_${ik}$,ctr * k + 1_${ik}$), ldt,&
                            c(1_${ik}$,1_${ik}$), ldc,c(1_${ik}$,ii), ldc, work, info )
               else
                 ii=n+1
               end if
               do i=ii-(mb-k),mb+1,-(mb-k)
               ! multiply q to the current block of c (1:m,i:i+mb)
                 ctr = ctr - 1_${ik}$
                 call stdlib${ii}$_${ci}$tpmqrt('R','C',m , mb-k, k, 0_${ik}$,nb, a(i,1_${ik}$), lda,t(1_${ik}$, ctr * k + 1_${ik}$), &
                           ldt, c(1_${ik}$,1_${ik}$), ldc,c(1_${ik}$,i), ldc, work, info )
               end do
               ! multiply q to the first block of c (1:m,1:mb)
               call stdlib${ii}$_${ci}$gemqrt('R','C',m , mb, k, nb, a(1_${ik}$,1_${ik}$), lda, t,ldt ,c(1_${ik}$,1_${ik}$), ldc, work, &
                         info )
           else if (right.and.notran) then
               ! multiply q to the first block of c
              kk = mod((n-k),(mb-k))
              ii=n-kk+1
              ctr = 1_${ik}$
              call stdlib${ii}$_${ci}$gemqrt('R','N', m, mb , k, nb, a(1_${ik}$,1_${ik}$), lda, t,ldt ,c(1_${ik}$,1_${ik}$), ldc, work, &
                        info )
              do i=mb+1,ii-mb+k,(mb-k)
               ! multiply q to the current block of c (1:m,i:i+mb)
               call stdlib${ii}$_${ci}$tpmqrt('R','N', m, mb-k, k, 0_${ik}$,nb, a(i,1_${ik}$), lda,t(1_${ik}$, ctr * k + 1_${ik}$),ldt, &
                         c(1_${ik}$,1_${ik}$), ldc,c(1_${ik}$,i), ldc, work, info )
               ctr = ctr + 1_${ik}$
              end do
              if(ii<=n) then
               ! multiply q to the last block of c
               call stdlib${ii}$_${ci}$tpmqrt('R','N', m, kk , k, 0_${ik}$,nb, a(ii,1_${ik}$), lda,t(1_${ik}$,ctr * k + 1_${ik}$),ldt, c(&
                         1_${ik}$,1_${ik}$), ldc,c(1_${ik}$,ii), ldc, work, info )
              end if
           end if
           work(1_${ik}$) = lw
           return
     end subroutine stdlib${ii}$_${ci}$lamtsqr

#:endif
#:endfor



     pure module subroutine stdlib${ii}$_sgetsqrhrt( m, n, mb1, nb1, nb2, a, lda, t, ldt, work,lwork, info )
     !! SGETSQRHRT computes a NB2-sized column blocked QR-factorization
     !! of a complex M-by-N matrix A with M >= N,
     !! A = Q * R.
     !! The routine uses internally a NB1-sized column blocked and MB1-sized
     !! row blocked TSQR-factorization and perfors the reconstruction
     !! of the Householder vectors from the TSQR output. The routine also
     !! converts the R_tsqr factor from the TSQR-factorization output into
     !! the R factor that corresponds to the Householder QR-factorization,
     !! A = Q_tsqr * R_tsqr = Q * R.
     !! The output Q and R factors are stored in the same format as in SGEQRT
     !! (Q is in blocked compact WY-representation). See the documentation
     !! of SGEQRT for more details on the format.
               
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, ldt, lwork, m, n, nb1, nb2, mb1
           ! Array Arguments 
           real(sp), intent(inout) :: a(lda,*)
           real(sp), intent(out) :: t(ldt,*), work(*)
        ! =====================================================================
           
           ! Local Scalars 
           logical(lk) :: lquery
           integer(${ik}$) :: i, iinfo, j, lw1, lw2, lwt, ldwt, lworkopt, nb1local, nb2local, &
                     num_all_row_blocks
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           lquery  = lwork==-1_${ik}$
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ .or. m<n ) then
              info = -2_${ik}$
           else if( mb1<=n ) then
              info = -3_${ik}$
           else if( nb1<1_${ik}$ ) then
              info = -4_${ik}$
           else if( nb2<1_${ik}$ ) then
              info = -5_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -7_${ik}$
           else if( ldt<max( 1_${ik}$,  min( nb2, n ) ) ) then
              info = -9_${ik}$
           else
              ! test the input lwork for the dimension of the array work.
              ! this workspace is used to store array:
              ! a) matrix t and work for stdlib${ii}$_slatsqr;
              ! b) n-by-n upper-triangular factor r_tsqr;
              ! c) matrix t and array work for stdlib${ii}$_sorgtsqr_row;
              ! d) diagonal d for stdlib${ii}$_sorhr_col.
              if( lwork<n*n+1 .and. .not.lquery ) then
                 info = -11_${ik}$
              else
                 ! set block size for column blocks
                 nb1local = min( nb1, n )
                 num_all_row_blocks = max( 1_${ik}$,ceiling( real( m - n,KIND=sp) / real( mb1 - n,&
                           KIND=sp) ) )
                 ! length and leading dimension of work array to place
                 ! t array in tsqr.
                 lwt = num_all_row_blocks * n * nb1local
                 ldwt = nb1local
                 ! length of tsqr work array
                 lw1 = nb1local * n
                 ! length of stdlib${ii}$_sorgtsqr_row work array.
                 lw2 = nb1local * max( nb1local, ( n - nb1local ) )
                 lworkopt = max( lwt + lw1, max( lwt+n*n+lw2, lwt+n*n+n ) )
                 if( ( lwork<max( 1_${ik}$, lworkopt ) ).and.(.not.lquery) ) then
                    info = -11_${ik}$
                 end if
              end if
           end if
           ! handle error in the input parameters and return workspace query.
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'SGETSQRHRT', -info )
              return
           else if ( lquery ) then
              work( 1_${ik}$ ) = real( lworkopt,KIND=sp)
              return
           end if
           ! quick return if possible
           if( min( m, n )==0_${ik}$ ) then
              work( 1_${ik}$ ) = real( lworkopt,KIND=sp)
              return
           end if
           nb2local = min( nb2, n )
           ! (1) perform tsqr-factorization of the m-by-n matrix a.
           call stdlib${ii}$_slatsqr( m, n, mb1, nb1local, a, lda, work, ldwt,work(lwt+1), lw1, iinfo )
                     
           ! (2) copy the factor r_tsqr stored in the upper-triangular part
               ! of a into the square matrix in the work array
               ! work(lwt+1:lwt+n*n) column-by-column.
           do j = 1, n
              call stdlib${ii}$_scopy( j, a( 1_${ik}$, j ), 1_${ik}$, work( lwt + n*(j-1)+1_${ik}$ ), 1_${ik}$ )
           end do
           ! (3) generate a m-by-n matrix q with orthonormal columns from
           ! the result stored below the diagonal in the array a in place.
           call stdlib${ii}$_sorgtsqr_row( m, n, mb1, nb1local, a, lda, work, ldwt,work( lwt+n*n+1 ), &
                     lw2, iinfo )
           ! (4) perform the reconstruction of householder vectors from
           ! the matrix q (stored in a) in place.
           call stdlib${ii}$_sorhr_col( m, n, nb2local, a, lda, t, ldt,work( lwt+n*n+1 ), iinfo )
                     
           ! (5) copy the factor r_tsqr stored in the square matrix in the
           ! work array work(lwt+1:lwt+n*n) into the upper-triangular
           ! part of a.
           ! (6) compute from r_tsqr the factor r_hr corresponding to
           ! the reconstructed householder vectors, i.e. r_hr = s * r_tsqr.
           ! this multiplication by the sign matrix s on the left means
           ! changing the sign of i-th row of the matrix r_tsqr according
           ! to sign of the i-th diagonal element diag(i) of the matrix s.
           ! diag is stored in work( lwt+n*n+1 ) from the stdlib${ii}$_sorhr_col output.
           ! (5) and (6) can be combined in a single loop, so the rows in a
           ! are accessed only once.
           do i = 1, n
              if( work( lwt+n*n+i )==-one ) then
                 do j = i, n
                    a( i, j ) = -one * work( lwt+n*(j-1)+i )
                 end do
              else
                 call stdlib${ii}$_scopy( n-i+1, work(lwt+n*(i-1)+i), n, a( i, i ), lda )
              end if
           end do
           work( 1_${ik}$ ) = real( lworkopt,KIND=sp)
           return
     end subroutine stdlib${ii}$_sgetsqrhrt

     pure module subroutine stdlib${ii}$_dgetsqrhrt( m, n, mb1, nb1, nb2, a, lda, t, ldt, work,lwork, info )
     !! DGETSQRHRT computes a NB2-sized column blocked QR-factorization
     !! of a real M-by-N matrix A with M >= N,
     !! A = Q * R.
     !! The routine uses internally a NB1-sized column blocked and MB1-sized
     !! row blocked TSQR-factorization and perfors the reconstruction
     !! of the Householder vectors from the TSQR output. The routine also
     !! converts the R_tsqr factor from the TSQR-factorization output into
     !! the R factor that corresponds to the Householder QR-factorization,
     !! A = Q_tsqr * R_tsqr = Q * R.
     !! The output Q and R factors are stored in the same format as in DGEQRT
     !! (Q is in blocked compact WY-representation). See the documentation
     !! of DGEQRT for more details on the format.
               
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, ldt, lwork, m, n, nb1, nb2, mb1
           ! Array Arguments 
           real(dp), intent(inout) :: a(lda,*)
           real(dp), intent(out) :: t(ldt,*), work(*)
        ! =====================================================================
           
           ! Local Scalars 
           logical(lk) :: lquery
           integer(${ik}$) :: i, iinfo, j, lw1, lw2, lwt, ldwt, lworkopt, nb1local, nb2local, &
                     num_all_row_blocks
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           lquery  = lwork==-1_${ik}$
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ .or. m<n ) then
              info = -2_${ik}$
           else if( mb1<=n ) then
              info = -3_${ik}$
           else if( nb1<1_${ik}$ ) then
              info = -4_${ik}$
           else if( nb2<1_${ik}$ ) then
              info = -5_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -7_${ik}$
           else if( ldt<max( 1_${ik}$,  min( nb2, n ) ) ) then
              info = -9_${ik}$
           else
              ! test the input lwork for the dimension of the array work.
              ! this workspace is used to store array:
              ! a) matrix t and work for stdlib${ii}$_dlatsqr;
              ! b) n-by-n upper-triangular factor r_tsqr;
              ! c) matrix t and array work for stdlib${ii}$_dorgtsqr_row;
              ! d) diagonal d for stdlib${ii}$_dorhr_col.
              if( lwork<n*n+1 .and. .not.lquery ) then
                 info = -11_${ik}$
              else
                 ! set block size for column blocks
                 nb1local = min( nb1, n )
                 num_all_row_blocks = max( 1_${ik}$,ceiling( real( m - n,KIND=dp) / real( mb1 - n,&
                           KIND=dp) ) )
                 ! length and leading dimension of work array to place
                 ! t array in tsqr.
                 lwt = num_all_row_blocks * n * nb1local
                 ldwt = nb1local
                 ! length of tsqr work array
                 lw1 = nb1local * n
                 ! length of stdlib${ii}$_dorgtsqr_row work array.
                 lw2 = nb1local * max( nb1local, ( n - nb1local ) )
                 lworkopt = max( lwt + lw1, max( lwt+n*n+lw2, lwt+n*n+n ) )
                 if( ( lwork<max( 1_${ik}$, lworkopt ) ).and.(.not.lquery) ) then
                    info = -11_${ik}$
                 end if
              end if
           end if
           ! handle error in the input parameters and return workspace query.
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DGETSQRHRT', -info )
              return
           else if ( lquery ) then
              work( 1_${ik}$ ) = real( lworkopt,KIND=dp)
              return
           end if
           ! quick return if possible
           if( min( m, n )==0_${ik}$ ) then
              work( 1_${ik}$ ) = real( lworkopt,KIND=dp)
              return
           end if
           nb2local = min( nb2, n )
           ! (1) perform tsqr-factorization of the m-by-n matrix a.
           call stdlib${ii}$_dlatsqr( m, n, mb1, nb1local, a, lda, work, ldwt,work(lwt+1), lw1, iinfo )
                     
           ! (2) copy the factor r_tsqr stored in the upper-triangular part
               ! of a into the square matrix in the work array
               ! work(lwt+1:lwt+n*n) column-by-column.
           do j = 1, n
              call stdlib${ii}$_dcopy( j, a( 1_${ik}$, j ), 1_${ik}$, work( lwt + n*(j-1)+1_${ik}$ ), 1_${ik}$ )
           end do
           ! (3) generate a m-by-n matrix q with orthonormal columns from
           ! the result stored below the diagonal in the array a in place.
           call stdlib${ii}$_dorgtsqr_row( m, n, mb1, nb1local, a, lda, work, ldwt,work( lwt+n*n+1 ), &
                     lw2, iinfo )
           ! (4) perform the reconstruction of householder vectors from
           ! the matrix q (stored in a) in place.
           call stdlib${ii}$_dorhr_col( m, n, nb2local, a, lda, t, ldt,work( lwt+n*n+1 ), iinfo )
                     
           ! (5) copy the factor r_tsqr stored in the square matrix in the
           ! work array work(lwt+1:lwt+n*n) into the upper-triangular
           ! part of a.
           ! (6) compute from r_tsqr the factor r_hr corresponding to
           ! the reconstructed householder vectors, i.e. r_hr = s * r_tsqr.
           ! this multiplication by the sign matrix s on the left means
           ! changing the sign of i-th row of the matrix r_tsqr according
           ! to sign of the i-th diagonal element diag(i) of the matrix s.
           ! diag is stored in work( lwt+n*n+1 ) from the stdlib${ii}$_dorhr_col output.
           ! (5) and (6) can be combined in a single loop, so the rows in a
           ! are accessed only once.
           do i = 1, n
              if( work( lwt+n*n+i )==-one ) then
                 do j = i, n
                    a( i, j ) = -one * work( lwt+n*(j-1)+i )
                 end do
              else
                 call stdlib${ii}$_dcopy( n-i+1, work(lwt+n*(i-1)+i), n, a( i, i ), lda )
              end if
           end do
           work( 1_${ik}$ ) = real( lworkopt,KIND=dp)
           return
     end subroutine stdlib${ii}$_dgetsqrhrt

#:for rk,rt,ri in REAL_KINDS_TYPES
#:if not rk in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ri}$getsqrhrt( m, n, mb1, nb1, nb2, a, lda, t, ldt, work,lwork, info )
     !! DGETSQRHRT: computes a NB2-sized column blocked QR-factorization
     !! of a real M-by-N matrix A with M >= N,
     !! A = Q * R.
     !! The routine uses internally a NB1-sized column blocked and MB1-sized
     !! row blocked TSQR-factorization and perfors the reconstruction
     !! of the Householder vectors from the TSQR output. The routine also
     !! converts the R_tsqr factor from the TSQR-factorization output into
     !! the R factor that corresponds to the Householder QR-factorization,
     !! A = Q_tsqr * R_tsqr = Q * R.
     !! The output Q and R factors are stored in the same format as in DGEQRT
     !! (Q is in blocked compact WY-representation). See the documentation
     !! of DGEQRT for more details on the format.
               
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, ldt, lwork, m, n, nb1, nb2, mb1
           ! Array Arguments 
           real(${rk}$), intent(inout) :: a(lda,*)
           real(${rk}$), intent(out) :: t(ldt,*), work(*)
        ! =====================================================================
           
           ! Local Scalars 
           logical(lk) :: lquery
           integer(${ik}$) :: i, iinfo, j, lw1, lw2, lwt, ldwt, lworkopt, nb1local, nb2local, &
                     num_all_row_blocks
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           lquery  = lwork==-1_${ik}$
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ .or. m<n ) then
              info = -2_${ik}$
           else if( mb1<=n ) then
              info = -3_${ik}$
           else if( nb1<1_${ik}$ ) then
              info = -4_${ik}$
           else if( nb2<1_${ik}$ ) then
              info = -5_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -7_${ik}$
           else if( ldt<max( 1_${ik}$,  min( nb2, n ) ) ) then
              info = -9_${ik}$
           else
              ! test the input lwork for the dimension of the array work.
              ! this workspace is used to store array:
              ! a) matrix t and work for stdlib${ii}$_${ri}$latsqr;
              ! b) n-by-n upper-triangular factor r_tsqr;
              ! c) matrix t and array work for stdlib${ii}$_${ri}$orgtsqr_row;
              ! d) diagonal d for stdlib${ii}$_${ri}$orhr_col.
              if( lwork<n*n+1 .and. .not.lquery ) then
                 info = -11_${ik}$
              else
                 ! set block size for column blocks
                 nb1local = min( nb1, n )
                 num_all_row_blocks = max( 1_${ik}$,ceiling( real( m - n,KIND=${rk}$) / real( mb1 - n,&
                           KIND=${rk}$) ) )
                 ! length and leading dimension of work array to place
                 ! t array in tsqr.
                 lwt = num_all_row_blocks * n * nb1local
                 ldwt = nb1local
                 ! length of tsqr work array
                 lw1 = nb1local * n
                 ! length of stdlib${ii}$_${ri}$orgtsqr_row work array.
                 lw2 = nb1local * max( nb1local, ( n - nb1local ) )
                 lworkopt = max( lwt + lw1, max( lwt+n*n+lw2, lwt+n*n+n ) )
                 if( ( lwork<max( 1_${ik}$, lworkopt ) ).and.(.not.lquery) ) then
                    info = -11_${ik}$
                 end if
              end if
           end if
           ! handle error in the input parameters and return workspace query.
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DGETSQRHRT', -info )
              return
           else if ( lquery ) then
              work( 1_${ik}$ ) = real( lworkopt,KIND=${rk}$)
              return
           end if
           ! quick return if possible
           if( min( m, n )==0_${ik}$ ) then
              work( 1_${ik}$ ) = real( lworkopt,KIND=${rk}$)
              return
           end if
           nb2local = min( nb2, n )
           ! (1) perform tsqr-factorization of the m-by-n matrix a.
           call stdlib${ii}$_${ri}$latsqr( m, n, mb1, nb1local, a, lda, work, ldwt,work(lwt+1), lw1, iinfo )
                     
           ! (2) copy the factor r_tsqr stored in the upper-triangular part
               ! of a into the square matrix in the work array
               ! work(lwt+1:lwt+n*n) column-by-column.
           do j = 1, n
              call stdlib${ii}$_${ri}$copy( j, a( 1_${ik}$, j ), 1_${ik}$, work( lwt + n*(j-1)+1_${ik}$ ), 1_${ik}$ )
           end do
           ! (3) generate a m-by-n matrix q with orthonormal columns from
           ! the result stored below the diagonal in the array a in place.
           call stdlib${ii}$_${ri}$orgtsqr_row( m, n, mb1, nb1local, a, lda, work, ldwt,work( lwt+n*n+1 ), &
                     lw2, iinfo )
           ! (4) perform the reconstruction of householder vectors from
           ! the matrix q (stored in a) in place.
           call stdlib${ii}$_${ri}$orhr_col( m, n, nb2local, a, lda, t, ldt,work( lwt+n*n+1 ), iinfo )
                     
           ! (5) copy the factor r_tsqr stored in the square matrix in the
           ! work array work(lwt+1:lwt+n*n) into the upper-triangular
           ! part of a.
           ! (6) compute from r_tsqr the factor r_hr corresponding to
           ! the reconstructed householder vectors, i.e. r_hr = s * r_tsqr.
           ! this multiplication by the sign matrix s on the left means
           ! changing the sign of i-th row of the matrix r_tsqr according
           ! to sign of the i-th diagonal element diag(i) of the matrix s.
           ! diag is stored in work( lwt+n*n+1 ) from the stdlib${ii}$_${ri}$orhr_col output.
           ! (5) and (6) can be combined in a single loop, so the rows in a
           ! are accessed only once.
           do i = 1, n
              if( work( lwt+n*n+i )==-one ) then
                 do j = i, n
                    a( i, j ) = -one * work( lwt+n*(j-1)+i )
                 end do
              else
                 call stdlib${ii}$_${ri}$copy( n-i+1, work(lwt+n*(i-1)+i), n, a( i, i ), lda )
              end if
           end do
           work( 1_${ik}$ ) = real( lworkopt,KIND=${rk}$)
           return
     end subroutine stdlib${ii}$_${ri}$getsqrhrt

#:endif
#:endfor

     pure module subroutine stdlib${ii}$_cgetsqrhrt( m, n, mb1, nb1, nb2, a, lda, t, ldt, work,lwork, info )
     !! CGETSQRHRT computes a NB2-sized column blocked QR-factorization
     !! of a complex M-by-N matrix A with M >= N,
     !! A = Q * R.
     !! The routine uses internally a NB1-sized column blocked and MB1-sized
     !! row blocked TSQR-factorization and perfors the reconstruction
     !! of the Householder vectors from the TSQR output. The routine also
     !! converts the R_tsqr factor from the TSQR-factorization output into
     !! the R factor that corresponds to the Householder QR-factorization,
     !! A = Q_tsqr * R_tsqr = Q * R.
     !! The output Q and R factors are stored in the same format as in CGEQRT
     !! (Q is in blocked compact WY-representation). See the documentation
     !! of CGEQRT for more details on the format.
               
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, ldt, lwork, m, n, nb1, nb2, mb1
           ! Array Arguments 
           complex(sp), intent(inout) :: a(lda,*)
           complex(sp), intent(out) :: t(ldt,*), work(*)
        ! =====================================================================
           
           ! Local Scalars 
           logical(lk) :: lquery
           integer(${ik}$) :: i, iinfo, j, lw1, lw2, lwt, ldwt, lworkopt, nb1local, nb2local, &
                     num_all_row_blocks
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           lquery  = lwork==-1_${ik}$
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ .or. m<n ) then
              info = -2_${ik}$
           else if( mb1<=n ) then
              info = -3_${ik}$
           else if( nb1<1_${ik}$ ) then
              info = -4_${ik}$
           else if( nb2<1_${ik}$ ) then
              info = -5_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -7_${ik}$
           else if( ldt<max( 1_${ik}$,  min( nb2, n ) ) ) then
              info = -9_${ik}$
           else
              ! test the input lwork for the dimension of the array work.
              ! this workspace is used to store array:
              ! a) matrix t and work for stdlib${ii}$_clatsqr;
              ! b) n-by-n upper-triangular factor r_tsqr;
              ! c) matrix t and array work for stdlib${ii}$_cungtsqr_row;
              ! d) diagonal d for stdlib${ii}$_cunhr_col.
              if( lwork<n*n+1 .and. .not.lquery ) then
                 info = -11_${ik}$
              else
                 ! set block size for column blocks
                 nb1local = min( nb1, n )
                 num_all_row_blocks = max( 1_${ik}$,ceiling( real( m - n,KIND=sp) / real( mb1 - n,&
                           KIND=sp) )