stdlib_lapack_lsq.fypp Source File


Source Code

#:include "common.fypp" 
submodule(stdlib_lapack_eig_svd_lsq) stdlib_lapack_lsq
  implicit none


  contains
#:for ik,it,ii in LINALG_INT_KINDS_TYPES

     module subroutine stdlib${ii}$_sgelss( m, n, nrhs, a, lda, b, ldb, s, rcond, rank,work, lwork, info )
     !! SGELSS computes the minimum norm solution to a real linear least
     !! squares problem:
     !! Minimize 2-norm(| b - A*x |).
     !! using the singular value decomposition (SVD) of A. A is an M-by-N
     !! matrix which may be rank-deficient.
     !! Several right hand side vectors b and solution vectors x can be
     !! handled in a single call; they are stored as the columns of the
     !! M-by-NRHS right hand side matrix B and the N-by-NRHS solution matrix
     !! X.
     !! The effective rank of A is determined by treating as zero those
     !! singular values which are less than RCOND times the largest singular
     !! value.
               
        ! -- lapack driver routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info, rank
           integer(${ik}$), intent(in) :: lda, ldb, lwork, m, n, nrhs
           real(sp), intent(in) :: rcond
           ! Array Arguments 
           real(sp), intent(inout) :: a(lda,*), b(ldb,*)
           real(sp), intent(out) :: s(*), work(*)
        ! =====================================================================
           
           ! Local Scalars 
           logical(lk) :: lquery
           integer(${ik}$) :: bdspac, bl, chunk, i, iascl, ibscl, ie, il, itau, itaup, itauq, iwork, &
                     ldwork, maxmn, maxwrk, minmn, minwrk, mm, mnthr
           integer(${ik}$) :: lwork_sgeqrf, lwork_sormqr, lwork_sgebrd, lwork_sormbr, lwork_sorgbr, &
                     lwork_sormlq
           real(sp) :: anrm, bignum, bnrm, eps, sfmin, smlnum, thr
           ! Local Arrays 
           real(sp) :: dum(1_${ik}$)
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           minmn = min( m, n )
           maxmn = max( m, n )
           lquery = ( lwork==-1_${ik}$ )
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( nrhs<0_${ik}$ ) then
              info = -3_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -5_${ik}$
           else if( ldb<max( 1_${ik}$, maxmn ) ) then
              info = -7_${ik}$
           end if
           ! compute workspace
            ! (note: comments in the code beginning "workspace:" describe the
             ! minimal amount of workspace needed at that point in the code,
             ! as well as the preferred amount for good performance.
             ! nb refers to the optimal block size for the immediately
             ! following subroutine, as returned by stdlib${ii}$_ilaenv.)
           if( info==0_${ik}$ ) then
              minwrk = 1_${ik}$
              maxwrk = 1_${ik}$
              if( minmn>0_${ik}$ ) then
                 mm = m
                 mnthr = stdlib${ii}$_ilaenv( 6_${ik}$, 'SGELSS', ' ', m, n, nrhs, -1_${ik}$ )
                 if( m>=n .and. m>=mnthr ) then
                    ! path 1a - overdetermined, with many more rows than
                              ! columns
                    ! compute space needed for stdlib${ii}$_sgeqrf
                    call stdlib${ii}$_sgeqrf( m, n, a, lda, dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, info )
                    lwork_sgeqrf=dum(1_${ik}$)
                    ! compute space needed for stdlib${ii}$_sormqr
                    call stdlib${ii}$_sormqr( 'L', 'T', m, nrhs, n, a, lda, dum(1_${ik}$), b,ldb, dum(1_${ik}$), -1_${ik}$, &
                              info )
                    lwork_sormqr=dum(1_${ik}$)
                    mm = n
                    maxwrk = max( maxwrk, n + lwork_sgeqrf )
                    maxwrk = max( maxwrk, n + lwork_sormqr )
                 end if
                 if( m>=n ) then
                    ! path 1 - overdetermined or exactly determined
                    ! compute workspace needed for stdlib${ii}$_sbdsqr
                    bdspac = max( 1_${ik}$, 5_${ik}$*n )
                    ! compute space needed for stdlib${ii}$_sgebrd
                    call stdlib${ii}$_sgebrd( mm, n, a, lda, s, dum(1_${ik}$), dum(1_${ik}$),dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, info &
                              )
                    lwork_sgebrd=dum(1_${ik}$)
                    ! compute space needed for stdlib${ii}$_sormbr
                    call stdlib${ii}$_sormbr( 'Q', 'L', 'T', mm, nrhs, n, a, lda, dum(1_${ik}$),b, ldb, dum(1_${ik}$),&
                               -1_${ik}$, info )
                    lwork_sormbr=dum(1_${ik}$)
                    ! compute space needed for stdlib${ii}$_sorgbr
                    call stdlib${ii}$_sorgbr( 'P', n, n, n, a, lda, dum(1_${ik}$),dum(1_${ik}$), -1_${ik}$, info )
                    lwork_sorgbr=dum(1_${ik}$)
                    ! compute total workspace needed
                    maxwrk = max( maxwrk, 3_${ik}$*n + lwork_sgebrd )
                    maxwrk = max( maxwrk, 3_${ik}$*n + lwork_sormbr )
                    maxwrk = max( maxwrk, 3_${ik}$*n + lwork_sorgbr )
                    maxwrk = max( maxwrk, bdspac )
                    maxwrk = max( maxwrk, n*nrhs )
                    minwrk = max( 3_${ik}$*n + mm, 3_${ik}$*n + nrhs, bdspac )
                    maxwrk = max( minwrk, maxwrk )
                 end if
                 if( n>m ) then
                    ! compute workspace needed for stdlib${ii}$_sbdsqr
                    bdspac = max( 1_${ik}$, 5_${ik}$*m )
                    minwrk = max( 3_${ik}$*m+nrhs, 3_${ik}$*m+n, bdspac )
                    if( n>=mnthr ) then
                       ! path 2a - underdetermined, with many more columns
                       ! than rows
                       ! compute space needed for stdlib${ii}$_sgebrd
                       call stdlib${ii}$_sgebrd( m, m, a, lda, s, dum(1_${ik}$), dum(1_${ik}$),dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, &
                                 info )
                       lwork_sgebrd=dum(1_${ik}$)
                       ! compute space needed for stdlib${ii}$_sormbr
                       call stdlib${ii}$_sormbr( 'Q', 'L', 'T', m, nrhs, n, a, lda,dum(1_${ik}$), b, ldb, dum(&
                                 1_${ik}$), -1_${ik}$, info )
                       lwork_sormbr=dum(1_${ik}$)
                       ! compute space needed for stdlib${ii}$_sorgbr
                       call stdlib${ii}$_sorgbr( 'P', m, m, m, a, lda, dum(1_${ik}$),dum(1_${ik}$), -1_${ik}$, info )
                       lwork_sorgbr=dum(1_${ik}$)
                       ! compute space needed for stdlib${ii}$_sormlq
                       call stdlib${ii}$_sormlq( 'L', 'T', n, nrhs, m, a, lda, dum(1_${ik}$),b, ldb, dum(1_${ik}$), -&
                                 1_${ik}$, info )
                       lwork_sormlq=dum(1_${ik}$)
                       ! compute total workspace needed
                       maxwrk = m + m*stdlib${ii}$_ilaenv( 1_${ik}$, 'SGELQF', ' ', m, n, -1_${ik}$,-1_${ik}$ )
                       maxwrk = max( maxwrk, m*m + 4_${ik}$*m + lwork_sgebrd )
                       maxwrk = max( maxwrk, m*m + 4_${ik}$*m + lwork_sormbr )
                       maxwrk = max( maxwrk, m*m + 4_${ik}$*m + lwork_sorgbr )
                       maxwrk = max( maxwrk, m*m + m + bdspac )
                       if( nrhs>1_${ik}$ ) then
                          maxwrk = max( maxwrk, m*m + m + m*nrhs )
                       else
                          maxwrk = max( maxwrk, m*m + 2_${ik}$*m )
                       end if
                       maxwrk = max( maxwrk, m + lwork_sormlq )
                    else
                       ! path 2 - underdetermined
                       ! compute space needed for stdlib${ii}$_sgebrd
                       call stdlib${ii}$_sgebrd( m, n, a, lda, s, dum(1_${ik}$), dum(1_${ik}$),dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, &
                                 info )
                       lwork_sgebrd=dum(1_${ik}$)
                       ! compute space needed for stdlib${ii}$_sormbr
                       call stdlib${ii}$_sormbr( 'Q', 'L', 'T', m, nrhs, m, a, lda,dum(1_${ik}$), b, ldb, dum(&
                                 1_${ik}$), -1_${ik}$, info )
                       lwork_sormbr=dum(1_${ik}$)
                       ! compute space needed for stdlib${ii}$_sorgbr
                       call stdlib${ii}$_sorgbr( 'P', m, n, m, a, lda, dum(1_${ik}$),dum(1_${ik}$), -1_${ik}$, info )
                       lwork_sorgbr=dum(1_${ik}$)
                       maxwrk = 3_${ik}$*m + lwork_sgebrd
                       maxwrk = max( maxwrk, 3_${ik}$*m + lwork_sormbr )
                       maxwrk = max( maxwrk, 3_${ik}$*m + lwork_sorgbr )
                       maxwrk = max( maxwrk, bdspac )
                       maxwrk = max( maxwrk, n*nrhs )
                    end if
                 end if
                 maxwrk = max( minwrk, maxwrk )
              end if
              work( 1_${ik}$ ) = maxwrk
              if( lwork<minwrk .and. .not.lquery )info = -12_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'SGELSS', -info )
              return
           else if( lquery ) then
              return
           end if
           ! quick return if possible
           if( m==0_${ik}$ .or. n==0_${ik}$ ) then
              rank = 0_${ik}$
              return
           end if
           ! get machine parameters
           eps = stdlib${ii}$_slamch( 'P' )
           sfmin = stdlib${ii}$_slamch( 'S' )
           smlnum = sfmin / eps
           bignum = one / smlnum
           call stdlib${ii}$_slabad( smlnum, bignum )
           ! scale a if max element outside range [smlnum,bignum]
           anrm = stdlib${ii}$_slange( 'M', m, n, a, lda, work )
           iascl = 0_${ik}$
           if( anrm>zero .and. anrm<smlnum ) then
              ! scale matrix norm up to smlnum
              call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, m, n, a, lda, info )
              iascl = 1_${ik}$
           else if( anrm>bignum ) then
              ! scale matrix norm down to bignum
              call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, m, n, a, lda, info )
              iascl = 2_${ik}$
           else if( anrm==zero ) then
              ! matrix all zero. return zero solution.
              call stdlib${ii}$_slaset( 'F', max( m, n ), nrhs, zero, zero, b, ldb )
              call stdlib${ii}$_slaset( 'F', minmn, 1_${ik}$, zero, zero, s, minmn )
              rank = 0_${ik}$
              go to 70
           end if
           ! scale b if max element outside range [smlnum,bignum]
           bnrm = stdlib${ii}$_slange( 'M', m, nrhs, b, ldb, work )
           ibscl = 0_${ik}$
           if( bnrm>zero .and. bnrm<smlnum ) then
              ! scale matrix norm up to smlnum
              call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, bnrm, smlnum, m, nrhs, b, ldb, info )
              ibscl = 1_${ik}$
           else if( bnrm>bignum ) then
              ! scale matrix norm down to bignum
              call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, bnrm, bignum, m, nrhs, b, ldb, info )
              ibscl = 2_${ik}$
           end if
           ! overdetermined case
           if( m>=n ) then
              ! path 1 - overdetermined or exactly determined
              mm = m
              if( m>=mnthr ) then
                 ! path 1a - overdetermined, with many more rows than columns
                 mm = n
                 itau = 1_${ik}$
                 iwork = itau + n
                 ! compute a=q*r
                 ! (workspace: need 2*n, prefer n+n*nb)
                 call stdlib${ii}$_sgeqrf( m, n, a, lda, work( itau ), work( iwork ),lwork-iwork+1, &
                           info )
                 ! multiply b by transpose(q)
                 ! (workspace: need n+nrhs, prefer n+nrhs*nb)
                 call stdlib${ii}$_sormqr( 'L', 'T', m, nrhs, n, a, lda, work( itau ), b,ldb, work( &
                           iwork ), lwork-iwork+1, info )
                 ! zero out below r
                 if( n>1_${ik}$ )call stdlib${ii}$_slaset( 'L', n-1, n-1, zero, zero, a( 2_${ik}$, 1_${ik}$ ), lda )
              end if
              ie = 1_${ik}$
              itauq = ie + n
              itaup = itauq + n
              iwork = itaup + n
              ! bidiagonalize r in a
              ! (workspace: need 3*n+mm, prefer 3*n+(mm+n)*nb)
              call stdlib${ii}$_sgebrd( mm, n, a, lda, s, work( ie ), work( itauq ),work( itaup ), work(&
                         iwork ), lwork-iwork+1,info )
              ! multiply b by transpose of left bidiagonalizing vectors of r
              ! (workspace: need 3*n+nrhs, prefer 3*n+nrhs*nb)
              call stdlib${ii}$_sormbr( 'Q', 'L', 'T', mm, nrhs, n, a, lda, work( itauq ),b, ldb, work( &
                        iwork ), lwork-iwork+1, info )
              ! generate right bidiagonalizing vectors of r in a
              ! (workspace: need 4*n-1, prefer 3*n+(n-1)*nb)
              call stdlib${ii}$_sorgbr( 'P', n, n, n, a, lda, work( itaup ),work( iwork ), lwork-iwork+&
                        1_${ik}$, info )
              iwork = ie + n
              ! perform bidiagonal qr iteration
                ! multiply b by transpose of left singular vectors
                ! compute right singular vectors in a
              ! (workspace: need bdspac)
              call stdlib${ii}$_sbdsqr( 'U', n, n, 0_${ik}$, nrhs, s, work( ie ), a, lda, dum,1_${ik}$, b, ldb, work( &
                        iwork ), info )
              if( info/=0 )go to 70
              ! multiply b by reciprocals of singular values
              thr = max( rcond*s( 1_${ik}$ ), sfmin )
              if( rcond<zero )thr = max( eps*s( 1_${ik}$ ), sfmin )
              rank = 0_${ik}$
              do i = 1, n
                 if( s( i )>thr ) then
                    call stdlib${ii}$_srscl( nrhs, s( i ), b( i, 1_${ik}$ ), ldb )
                    rank = rank + 1_${ik}$
                 else
                    call stdlib${ii}$_slaset( 'F', 1_${ik}$, nrhs, zero, zero, b( i, 1_${ik}$ ), ldb )
                 end if
              end do
              ! multiply b by right singular vectors
              ! (workspace: need n, prefer n*nrhs)
              if( lwork>=ldb*nrhs .and. nrhs>1_${ik}$ ) then
                 call stdlib${ii}$_sgemm( 'T', 'N', n, nrhs, n, one, a, lda, b, ldb, zero,work, ldb )
                           
                 call stdlib${ii}$_slacpy( 'G', n, nrhs, work, ldb, b, ldb )
              else if( nrhs>1_${ik}$ ) then
                 chunk = lwork / n
                 do i = 1, nrhs, chunk
                    bl = min( nrhs-i+1, chunk )
                    call stdlib${ii}$_sgemm( 'T', 'N', n, bl, n, one, a, lda, b( 1_${ik}$, i ),ldb, zero, work,&
                               n )
                    call stdlib${ii}$_slacpy( 'G', n, bl, work, n, b( 1_${ik}$, i ), ldb )
                 end do
              else
                 call stdlib${ii}$_sgemv( 'T', n, n, one, a, lda, b, 1_${ik}$, zero, work, 1_${ik}$ )
                 call stdlib${ii}$_scopy( n, work, 1_${ik}$, b, 1_${ik}$ )
              end if
           else if( n>=mnthr .and. lwork>=4_${ik}$*m+m*m+max( m, 2_${ik}$*m-4, nrhs, n-3*m ) ) then
              ! path 2a - underdetermined, with many more columns than rows
              ! and sufficient workspace for an efficient algorithm
              ldwork = m
              if( lwork>=max( 4_${ik}$*m+m*lda+max( m, 2_${ik}$*m-4, nrhs, n-3*m ),m*lda+m+m*nrhs ) )ldwork = &
                        lda
              itau = 1_${ik}$
              iwork = m + 1_${ik}$
              ! compute a=l*q
              ! (workspace: need 2*m, prefer m+m*nb)
              call stdlib${ii}$_sgelqf( m, n, a, lda, work( itau ), work( iwork ),lwork-iwork+1, info )
                        
              il = iwork
              ! copy l to work(il), zeroing out above it
              call stdlib${ii}$_slacpy( 'L', m, m, a, lda, work( il ), ldwork )
              call stdlib${ii}$_slaset( 'U', m-1, m-1, zero, zero, work( il+ldwork ),ldwork )
              ie = il + ldwork*m
              itauq = ie + m
              itaup = itauq + m
              iwork = itaup + m
              ! bidiagonalize l in work(il)
              ! (workspace: need m*m+5*m, prefer m*m+4*m+2*m*nb)
              call stdlib${ii}$_sgebrd( m, m, work( il ), ldwork, s, work( ie ),work( itauq ), work( &
                        itaup ), work( iwork ),lwork-iwork+1, info )
              ! multiply b by transpose of left bidiagonalizing vectors of l
              ! (workspace: need m*m+4*m+nrhs, prefer m*m+4*m+nrhs*nb)
              call stdlib${ii}$_sormbr( 'Q', 'L', 'T', m, nrhs, m, work( il ), ldwork,work( itauq ), b, &
                        ldb, work( iwork ),lwork-iwork+1, info )
              ! generate right bidiagonalizing vectors of r in work(il)
              ! (workspace: need m*m+5*m-1, prefer m*m+4*m+(m-1)*nb)
              call stdlib${ii}$_sorgbr( 'P', m, m, m, work( il ), ldwork, work( itaup ),work( iwork ), &
                        lwork-iwork+1, info )
              iwork = ie + m
              ! perform bidiagonal qr iteration,
                 ! computing right singular vectors of l in work(il) and
                 ! multiplying b by transpose of left singular vectors
              ! (workspace: need m*m+m+bdspac)
              call stdlib${ii}$_sbdsqr( 'U', m, m, 0_${ik}$, nrhs, s, work( ie ), work( il ),ldwork, a, lda, b,&
                         ldb, work( iwork ), info )
              if( info/=0 )go to 70
              ! multiply b by reciprocals of singular values
              thr = max( rcond*s( 1_${ik}$ ), sfmin )
              if( rcond<zero )thr = max( eps*s( 1_${ik}$ ), sfmin )
              rank = 0_${ik}$
              do i = 1, m
                 if( s( i )>thr ) then
                    call stdlib${ii}$_srscl( nrhs, s( i ), b( i, 1_${ik}$ ), ldb )
                    rank = rank + 1_${ik}$
                 else
                    call stdlib${ii}$_slaset( 'F', 1_${ik}$, nrhs, zero, zero, b( i, 1_${ik}$ ), ldb )
                 end if
              end do
              iwork = ie
              ! multiply b by right singular vectors of l in work(il)
              ! (workspace: need m*m+2*m, prefer m*m+m+m*nrhs)
              if( lwork>=ldb*nrhs+iwork-1 .and. nrhs>1_${ik}$ ) then
                 call stdlib${ii}$_sgemm( 'T', 'N', m, nrhs, m, one, work( il ), ldwork,b, ldb, zero, &
                           work( iwork ), ldb )
                 call stdlib${ii}$_slacpy( 'G', m, nrhs, work( iwork ), ldb, b, ldb )
              else if( nrhs>1_${ik}$ ) then
                 chunk = ( lwork-iwork+1 ) / m
                 do i = 1, nrhs, chunk
                    bl = min( nrhs-i+1, chunk )
                    call stdlib${ii}$_sgemm( 'T', 'N', m, bl, m, one, work( il ), ldwork,b( 1_${ik}$, i ), ldb,&
                               zero, work( iwork ), m )
                    call stdlib${ii}$_slacpy( 'G', m, bl, work( iwork ), m, b( 1_${ik}$, i ),ldb )
                 end do
              else
                 call stdlib${ii}$_sgemv( 'T', m, m, one, work( il ), ldwork, b( 1_${ik}$, 1_${ik}$ ),1_${ik}$, zero, work( &
                           iwork ), 1_${ik}$ )
                 call stdlib${ii}$_scopy( m, work( iwork ), 1_${ik}$, b( 1_${ik}$, 1_${ik}$ ), 1_${ik}$ )
              end if
              ! zero out below first m rows of b
              call stdlib${ii}$_slaset( 'F', n-m, nrhs, zero, zero, b( m+1, 1_${ik}$ ), ldb )
              iwork = itau + m
              ! multiply transpose(q) by b
              ! (workspace: need m+nrhs, prefer m+nrhs*nb)
              call stdlib${ii}$_sormlq( 'L', 'T', n, nrhs, m, a, lda, work( itau ), b,ldb, work( iwork )&
                        , lwork-iwork+1, info )
           else
              ! path 2 - remaining underdetermined cases
              ie = 1_${ik}$
              itauq = ie + m
              itaup = itauq + m
              iwork = itaup + m
              ! bidiagonalize a
              ! (workspace: need 3*m+n, prefer 3*m+(m+n)*nb)
              call stdlib${ii}$_sgebrd( m, n, a, lda, s, work( ie ), work( itauq ),work( itaup ), work( &
                        iwork ), lwork-iwork+1,info )
              ! multiply b by transpose of left bidiagonalizing vectors
              ! (workspace: need 3*m+nrhs, prefer 3*m+nrhs*nb)
              call stdlib${ii}$_sormbr( 'Q', 'L', 'T', m, nrhs, n, a, lda, work( itauq ),b, ldb, work( &
                        iwork ), lwork-iwork+1, info )
              ! generate right bidiagonalizing vectors in a
              ! (workspace: need 4*m, prefer 3*m+m*nb)
              call stdlib${ii}$_sorgbr( 'P', m, n, m, a, lda, work( itaup ),work( iwork ), lwork-iwork+&
                        1_${ik}$, info )
              iwork = ie + m
              ! perform bidiagonal qr iteration,
                 ! computing right singular vectors of a in a and
                 ! multiplying b by transpose of left singular vectors
              ! (workspace: need bdspac)
              call stdlib${ii}$_sbdsqr( 'L', m, n, 0_${ik}$, nrhs, s, work( ie ), a, lda, dum,1_${ik}$, b, ldb, work( &
                        iwork ), info )
              if( info/=0 )go to 70
              ! multiply b by reciprocals of singular values
              thr = max( rcond*s( 1_${ik}$ ), sfmin )
              if( rcond<zero )thr = max( eps*s( 1_${ik}$ ), sfmin )
              rank = 0_${ik}$
              do i = 1, m
                 if( s( i )>thr ) then
                    call stdlib${ii}$_srscl( nrhs, s( i ), b( i, 1_${ik}$ ), ldb )
                    rank = rank + 1_${ik}$
                 else
                    call stdlib${ii}$_slaset( 'F', 1_${ik}$, nrhs, zero, zero, b( i, 1_${ik}$ ), ldb )
                 end if
              end do
              ! multiply b by right singular vectors of a
              ! (workspace: need n, prefer n*nrhs)
              if( lwork>=ldb*nrhs .and. nrhs>1_${ik}$ ) then
                 call stdlib${ii}$_sgemm( 'T', 'N', n, nrhs, m, one, a, lda, b, ldb, zero,work, ldb )
                           
                 call stdlib${ii}$_slacpy( 'F', n, nrhs, work, ldb, b, ldb )
              else if( nrhs>1_${ik}$ ) then
                 chunk = lwork / n
                 do i = 1, nrhs, chunk
                    bl = min( nrhs-i+1, chunk )
                    call stdlib${ii}$_sgemm( 'T', 'N', n, bl, m, one, a, lda, b( 1_${ik}$, i ),ldb, zero, work,&
                               n )
                    call stdlib${ii}$_slacpy( 'F', n, bl, work, n, b( 1_${ik}$, i ), ldb )
                 end do
              else
                 call stdlib${ii}$_sgemv( 'T', m, n, one, a, lda, b, 1_${ik}$, zero, work, 1_${ik}$ )
                 call stdlib${ii}$_scopy( n, work, 1_${ik}$, b, 1_${ik}$ )
              end if
           end if
           ! undo scaling
           if( iascl==1_${ik}$ ) then
              call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, n, nrhs, b, ldb, info )
              call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, smlnum, anrm, minmn, 1_${ik}$, s, minmn,info )
           else if( iascl==2_${ik}$ ) then
              call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, n, nrhs, b, ldb, info )
              call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, bignum, anrm, minmn, 1_${ik}$, s, minmn,info )
           end if
           if( ibscl==1_${ik}$ ) then
              call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, smlnum, bnrm, n, nrhs, b, ldb, info )
           else if( ibscl==2_${ik}$ ) then
              call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, bignum, bnrm, n, nrhs, b, ldb, info )
           end if
           70 continue
           work( 1_${ik}$ ) = maxwrk
           return
     end subroutine stdlib${ii}$_sgelss

     module subroutine stdlib${ii}$_dgelss( m, n, nrhs, a, lda, b, ldb, s, rcond, rank,work, lwork, info )
     !! DGELSS computes the minimum norm solution to a real linear least
     !! squares problem:
     !! Minimize 2-norm(| b - A*x |).
     !! using the singular value decomposition (SVD) of A. A is an M-by-N
     !! matrix which may be rank-deficient.
     !! Several right hand side vectors b and solution vectors x can be
     !! handled in a single call; they are stored as the columns of the
     !! M-by-NRHS right hand side matrix B and the N-by-NRHS solution matrix
     !! X.
     !! The effective rank of A is determined by treating as zero those
     !! singular values which are less than RCOND times the largest singular
     !! value.
               
        ! -- lapack driver routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info, rank
           integer(${ik}$), intent(in) :: lda, ldb, lwork, m, n, nrhs
           real(dp), intent(in) :: rcond
           ! Array Arguments 
           real(dp), intent(inout) :: a(lda,*), b(ldb,*)
           real(dp), intent(out) :: s(*), work(*)
        ! =====================================================================
           
           ! Local Scalars 
           logical(lk) :: lquery
           integer(${ik}$) :: bdspac, bl, chunk, i, iascl, ibscl, ie, il, itau, itaup, itauq, iwork, &
                     ldwork, maxmn, maxwrk, minmn, minwrk, mm, mnthr
           integer(${ik}$) :: lwork_dgeqrf, lwork_dormqr, lwork_dgebrd, lwork_dormbr, lwork_dorgbr, &
                     lwork_dormlq, lwork_dgelqf
           real(dp) :: anrm, bignum, bnrm, eps, sfmin, smlnum, thr
           ! Local Arrays 
           real(dp) :: dum(1_${ik}$)
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           minmn = min( m, n )
           maxmn = max( m, n )
           lquery = ( lwork==-1_${ik}$ )
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( nrhs<0_${ik}$ ) then
              info = -3_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -5_${ik}$
           else if( ldb<max( 1_${ik}$, maxmn ) ) then
              info = -7_${ik}$
           end if
           ! compute workspace
            ! (note: comments in the code beginning "workspace:" describe the
             ! minimal amount of workspace needed at that point in the code,
             ! as well as the preferred amount for good performance.
             ! nb refers to the optimal block size for the immediately
             ! following subroutine, as returned by stdlib${ii}$_ilaenv.)
           if( info==0_${ik}$ ) then
              minwrk = 1_${ik}$
              maxwrk = 1_${ik}$
              if( minmn>0_${ik}$ ) then
                 mm = m
                 mnthr = stdlib${ii}$_ilaenv( 6_${ik}$, 'DGELSS', ' ', m, n, nrhs, -1_${ik}$ )
                 if( m>=n .and. m>=mnthr ) then
                    ! path 1a - overdetermined, with many more rows than
                              ! columns
                    ! compute space needed for stdlib${ii}$_dgeqrf
                    call stdlib${ii}$_dgeqrf( m, n, a, lda, dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, info )
                    lwork_dgeqrf=dum(1_${ik}$)
                    ! compute space needed for stdlib${ii}$_dormqr
                    call stdlib${ii}$_dormqr( 'L', 'T', m, nrhs, n, a, lda, dum(1_${ik}$), b,ldb, dum(1_${ik}$), -1_${ik}$, &
                              info )
                    lwork_dormqr=dum(1_${ik}$)
                    mm = n
                    maxwrk = max( maxwrk, n + lwork_dgeqrf )
                    maxwrk = max( maxwrk, n + lwork_dormqr )
                 end if
                 if( m>=n ) then
                    ! path 1 - overdetermined or exactly determined
                    ! compute workspace needed for stdlib${ii}$_dbdsqr
                    bdspac = max( 1_${ik}$, 5_${ik}$*n )
                    ! compute space needed for stdlib${ii}$_dgebrd
                    call stdlib${ii}$_dgebrd( mm, n, a, lda, s, dum(1_${ik}$), dum(1_${ik}$),dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, info &
                              )
                    lwork_dgebrd=dum(1_${ik}$)
                    ! compute space needed for stdlib${ii}$_dormbr
                    call stdlib${ii}$_dormbr( 'Q', 'L', 'T', mm, nrhs, n, a, lda, dum(1_${ik}$),b, ldb, dum(1_${ik}$),&
                               -1_${ik}$, info )
                    lwork_dormbr=dum(1_${ik}$)
                    ! compute space needed for stdlib${ii}$_dorgbr
                    call stdlib${ii}$_dorgbr( 'P', n, n, n, a, lda, dum(1_${ik}$),dum(1_${ik}$), -1_${ik}$, info )
                    lwork_dorgbr=dum(1_${ik}$)
                    ! compute total workspace needed
                    maxwrk = max( maxwrk, 3_${ik}$*n + lwork_dgebrd )
                    maxwrk = max( maxwrk, 3_${ik}$*n + lwork_dormbr )
                    maxwrk = max( maxwrk, 3_${ik}$*n + lwork_dorgbr )
                    maxwrk = max( maxwrk, bdspac )
                    maxwrk = max( maxwrk, n*nrhs )
                    minwrk = max( 3_${ik}$*n + mm, 3_${ik}$*n + nrhs, bdspac )
                    maxwrk = max( minwrk, maxwrk )
                 end if
                 if( n>m ) then
                    ! compute workspace needed for stdlib${ii}$_dbdsqr
                    bdspac = max( 1_${ik}$, 5_${ik}$*m )
                    minwrk = max( 3_${ik}$*m+nrhs, 3_${ik}$*m+n, bdspac )
                    if( n>=mnthr ) then
                       ! path 2a - underdetermined, with many more columns
                       ! than rows
                       ! compute space needed for stdlib${ii}$_dgelqf
                       call stdlib${ii}$_dgelqf( m, n, a, lda, dum(1_${ik}$), dum(1_${ik}$),-1_${ik}$, info )
                       lwork_dgelqf=dum(1_${ik}$)
                       ! compute space needed for stdlib${ii}$_dgebrd
                       call stdlib${ii}$_dgebrd( m, m, a, lda, s, dum(1_${ik}$), dum(1_${ik}$),dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, &
                                 info )
                       lwork_dgebrd=dum(1_${ik}$)
                       ! compute space needed for stdlib${ii}$_dormbr
                       call stdlib${ii}$_dormbr( 'Q', 'L', 'T', m, nrhs, n, a, lda,dum(1_${ik}$), b, ldb, dum(&
                                 1_${ik}$), -1_${ik}$, info )
                       lwork_dormbr=dum(1_${ik}$)
                       ! compute space needed for stdlib${ii}$_dorgbr
                       call stdlib${ii}$_dorgbr( 'P', m, m, m, a, lda, dum(1_${ik}$),dum(1_${ik}$), -1_${ik}$, info )
                       lwork_dorgbr=dum(1_${ik}$)
                       ! compute space needed for stdlib${ii}$_dormlq
                       call stdlib${ii}$_dormlq( 'L', 'T', n, nrhs, m, a, lda, dum(1_${ik}$),b, ldb, dum(1_${ik}$), -&
                                 1_${ik}$, info )
                       lwork_dormlq=dum(1_${ik}$)
                       ! compute total workspace needed
                       maxwrk = m + lwork_dgelqf
                       maxwrk = max( maxwrk, m*m + 4_${ik}$*m + lwork_dgebrd )
                       maxwrk = max( maxwrk, m*m + 4_${ik}$*m + lwork_dormbr )
                       maxwrk = max( maxwrk, m*m + 4_${ik}$*m + lwork_dorgbr )
                       maxwrk = max( maxwrk, m*m + m + bdspac )
                       if( nrhs>1_${ik}$ ) then
                          maxwrk = max( maxwrk, m*m + m + m*nrhs )
                       else
                          maxwrk = max( maxwrk, m*m + 2_${ik}$*m )
                       end if
                       maxwrk = max( maxwrk, m + lwork_dormlq )
                    else
                       ! path 2 - underdetermined
                       ! compute space needed for stdlib${ii}$_dgebrd
                       call stdlib${ii}$_dgebrd( m, n, a, lda, s, dum(1_${ik}$), dum(1_${ik}$),dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, &
                                 info )
                       lwork_dgebrd=dum(1_${ik}$)
                       ! compute space needed for stdlib${ii}$_dormbr
                       call stdlib${ii}$_dormbr( 'Q', 'L', 'T', m, nrhs, m, a, lda,dum(1_${ik}$), b, ldb, dum(&
                                 1_${ik}$), -1_${ik}$, info )
                       lwork_dormbr=dum(1_${ik}$)
                       ! compute space needed for stdlib${ii}$_dorgbr
                       call stdlib${ii}$_dorgbr( 'P', m, n, m, a, lda, dum(1_${ik}$),dum(1_${ik}$), -1_${ik}$, info )
                       lwork_dorgbr=dum(1_${ik}$)
                       maxwrk = 3_${ik}$*m + lwork_dgebrd
                       maxwrk = max( maxwrk, 3_${ik}$*m + lwork_dormbr )
                       maxwrk = max( maxwrk, 3_${ik}$*m + lwork_dorgbr )
                       maxwrk = max( maxwrk, bdspac )
                       maxwrk = max( maxwrk, n*nrhs )
                    end if
                 end if
                 maxwrk = max( minwrk, maxwrk )
              end if
              work( 1_${ik}$ ) = maxwrk
              if( lwork<minwrk .and. .not.lquery )info = -12_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DGELSS', -info )
              return
           else if( lquery ) then
              return
           end if
           ! quick return if possible
           if( m==0_${ik}$ .or. n==0_${ik}$ ) then
              rank = 0_${ik}$
              return
           end if
           ! get machine parameters
           eps = stdlib${ii}$_dlamch( 'P' )
           sfmin = stdlib${ii}$_dlamch( 'S' )
           smlnum = sfmin / eps
           bignum = one / smlnum
           call stdlib${ii}$_dlabad( smlnum, bignum )
           ! scale a if max element outside range [smlnum,bignum]
           anrm = stdlib${ii}$_dlange( 'M', m, n, a, lda, work )
           iascl = 0_${ik}$
           if( anrm>zero .and. anrm<smlnum ) then
              ! scale matrix norm up to smlnum
              call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, m, n, a, lda, info )
              iascl = 1_${ik}$
           else if( anrm>bignum ) then
              ! scale matrix norm down to bignum
              call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, m, n, a, lda, info )
              iascl = 2_${ik}$
           else if( anrm==zero ) then
              ! matrix all zero. return zero solution.
              call stdlib${ii}$_dlaset( 'F', max( m, n ), nrhs, zero, zero, b, ldb )
              call stdlib${ii}$_dlaset( 'F', minmn, 1_${ik}$, zero, zero, s, minmn )
              rank = 0_${ik}$
              go to 70
           end if
           ! scale b if max element outside range [smlnum,bignum]
           bnrm = stdlib${ii}$_dlange( 'M', m, nrhs, b, ldb, work )
           ibscl = 0_${ik}$
           if( bnrm>zero .and. bnrm<smlnum ) then
              ! scale matrix norm up to smlnum
              call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, bnrm, smlnum, m, nrhs, b, ldb, info )
              ibscl = 1_${ik}$
           else if( bnrm>bignum ) then
              ! scale matrix norm down to bignum
              call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, bnrm, bignum, m, nrhs, b, ldb, info )
              ibscl = 2_${ik}$
           end if
           ! overdetermined case
           if( m>=n ) then
              ! path 1 - overdetermined or exactly determined
              mm = m
              if( m>=mnthr ) then
                 ! path 1a - overdetermined, with many more rows than columns
                 mm = n
                 itau = 1_${ik}$
                 iwork = itau + n
                 ! compute a=q*r
                 ! (workspace: need 2*n, prefer n+n*nb)
                 call stdlib${ii}$_dgeqrf( m, n, a, lda, work( itau ), work( iwork ),lwork-iwork+1, &
                           info )
                 ! multiply b by transpose(q)
                 ! (workspace: need n+nrhs, prefer n+nrhs*nb)
                 call stdlib${ii}$_dormqr( 'L', 'T', m, nrhs, n, a, lda, work( itau ), b,ldb, work( &
                           iwork ), lwork-iwork+1, info )
                 ! zero out below r
                 if( n>1_${ik}$ )call stdlib${ii}$_dlaset( 'L', n-1, n-1, zero, zero, a( 2_${ik}$, 1_${ik}$ ), lda )
              end if
              ie = 1_${ik}$
              itauq = ie + n
              itaup = itauq + n
              iwork = itaup + n
              ! bidiagonalize r in a
              ! (workspace: need 3*n+mm, prefer 3*n+(mm+n)*nb)
              call stdlib${ii}$_dgebrd( mm, n, a, lda, s, work( ie ), work( itauq ),work( itaup ), work(&
                         iwork ), lwork-iwork+1,info )
              ! multiply b by transpose of left bidiagonalizing vectors of r
              ! (workspace: need 3*n+nrhs, prefer 3*n+nrhs*nb)
              call stdlib${ii}$_dormbr( 'Q', 'L', 'T', mm, nrhs, n, a, lda, work( itauq ),b, ldb, work( &
                        iwork ), lwork-iwork+1, info )
              ! generate right bidiagonalizing vectors of r in a
              ! (workspace: need 4*n-1, prefer 3*n+(n-1)*nb)
              call stdlib${ii}$_dorgbr( 'P', n, n, n, a, lda, work( itaup ),work( iwork ), lwork-iwork+&
                        1_${ik}$, info )
              iwork = ie + n
              ! perform bidiagonal qr iteration
                ! multiply b by transpose of left singular vectors
                ! compute right singular vectors in a
              ! (workspace: need bdspac)
              call stdlib${ii}$_dbdsqr( 'U', n, n, 0_${ik}$, nrhs, s, work( ie ), a, lda, dum,1_${ik}$, b, ldb, work( &
                        iwork ), info )
              if( info/=0 )go to 70
              ! multiply b by reciprocals of singular values
              thr = max( rcond*s( 1_${ik}$ ), sfmin )
              if( rcond<zero )thr = max( eps*s( 1_${ik}$ ), sfmin )
              rank = 0_${ik}$
              do i = 1, n
                 if( s( i )>thr ) then
                    call stdlib${ii}$_drscl( nrhs, s( i ), b( i, 1_${ik}$ ), ldb )
                    rank = rank + 1_${ik}$
                 else
                    call stdlib${ii}$_dlaset( 'F', 1_${ik}$, nrhs, zero, zero, b( i, 1_${ik}$ ), ldb )
                 end if
              end do
              ! multiply b by right singular vectors
              ! (workspace: need n, prefer n*nrhs)
              if( lwork>=ldb*nrhs .and. nrhs>1_${ik}$ ) then
                 call stdlib${ii}$_dgemm( 'T', 'N', n, nrhs, n, one, a, lda, b, ldb, zero,work, ldb )
                           
                 call stdlib${ii}$_dlacpy( 'G', n, nrhs, work, ldb, b, ldb )
              else if( nrhs>1_${ik}$ ) then
                 chunk = lwork / n
                 do i = 1, nrhs, chunk
                    bl = min( nrhs-i+1, chunk )
                    call stdlib${ii}$_dgemm( 'T', 'N', n, bl, n, one, a, lda, b( 1_${ik}$, i ),ldb, zero, work,&
                               n )
                    call stdlib${ii}$_dlacpy( 'G', n, bl, work, n, b( 1_${ik}$, i ), ldb )
                 end do
              else
                 call stdlib${ii}$_dgemv( 'T', n, n, one, a, lda, b, 1_${ik}$, zero, work, 1_${ik}$ )
                 call stdlib${ii}$_dcopy( n, work, 1_${ik}$, b, 1_${ik}$ )
              end if
           else if( n>=mnthr .and. lwork>=4_${ik}$*m+m*m+max( m, 2_${ik}$*m-4, nrhs, n-3*m ) ) then
              ! path 2a - underdetermined, with many more columns than rows
              ! and sufficient workspace for an efficient algorithm
              ldwork = m
              if( lwork>=max( 4_${ik}$*m+m*lda+max( m, 2_${ik}$*m-4, nrhs, n-3*m ),m*lda+m+m*nrhs ) )ldwork = &
                        lda
              itau = 1_${ik}$
              iwork = m + 1_${ik}$
              ! compute a=l*q
              ! (workspace: need 2*m, prefer m+m*nb)
              call stdlib${ii}$_dgelqf( m, n, a, lda, work( itau ), work( iwork ),lwork-iwork+1, info )
                        
              il = iwork
              ! copy l to work(il), zeroing out above it
              call stdlib${ii}$_dlacpy( 'L', m, m, a, lda, work( il ), ldwork )
              call stdlib${ii}$_dlaset( 'U', m-1, m-1, zero, zero, work( il+ldwork ),ldwork )
              ie = il + ldwork*m
              itauq = ie + m
              itaup = itauq + m
              iwork = itaup + m
              ! bidiagonalize l in work(il)
              ! (workspace: need m*m+5*m, prefer m*m+4*m+2*m*nb)
              call stdlib${ii}$_dgebrd( m, m, work( il ), ldwork, s, work( ie ),work( itauq ), work( &
                        itaup ), work( iwork ),lwork-iwork+1, info )
              ! multiply b by transpose of left bidiagonalizing vectors of l
              ! (workspace: need m*m+4*m+nrhs, prefer m*m+4*m+nrhs*nb)
              call stdlib${ii}$_dormbr( 'Q', 'L', 'T', m, nrhs, m, work( il ), ldwork,work( itauq ), b, &
                        ldb, work( iwork ),lwork-iwork+1, info )
              ! generate right bidiagonalizing vectors of r in work(il)
              ! (workspace: need m*m+5*m-1, prefer m*m+4*m+(m-1)*nb)
              call stdlib${ii}$_dorgbr( 'P', m, m, m, work( il ), ldwork, work( itaup ),work( iwork ), &
                        lwork-iwork+1, info )
              iwork = ie + m
              ! perform bidiagonal qr iteration,
                 ! computing right singular vectors of l in work(il) and
                 ! multiplying b by transpose of left singular vectors
              ! (workspace: need m*m+m+bdspac)
              call stdlib${ii}$_dbdsqr( 'U', m, m, 0_${ik}$, nrhs, s, work( ie ), work( il ),ldwork, a, lda, b,&
                         ldb, work( iwork ), info )
              if( info/=0 )go to 70
              ! multiply b by reciprocals of singular values
              thr = max( rcond*s( 1_${ik}$ ), sfmin )
              if( rcond<zero )thr = max( eps*s( 1_${ik}$ ), sfmin )
              rank = 0_${ik}$
              do i = 1, m
                 if( s( i )>thr ) then
                    call stdlib${ii}$_drscl( nrhs, s( i ), b( i, 1_${ik}$ ), ldb )
                    rank = rank + 1_${ik}$
                 else
                    call stdlib${ii}$_dlaset( 'F', 1_${ik}$, nrhs, zero, zero, b( i, 1_${ik}$ ), ldb )
                 end if
              end do
              iwork = ie
              ! multiply b by right singular vectors of l in work(il)
              ! (workspace: need m*m+2*m, prefer m*m+m+m*nrhs)
              if( lwork>=ldb*nrhs+iwork-1 .and. nrhs>1_${ik}$ ) then
                 call stdlib${ii}$_dgemm( 'T', 'N', m, nrhs, m, one, work( il ), ldwork,b, ldb, zero, &
                           work( iwork ), ldb )
                 call stdlib${ii}$_dlacpy( 'G', m, nrhs, work( iwork ), ldb, b, ldb )
              else if( nrhs>1_${ik}$ ) then
                 chunk = ( lwork-iwork+1 ) / m
                 do i = 1, nrhs, chunk
                    bl = min( nrhs-i+1, chunk )
                    call stdlib${ii}$_dgemm( 'T', 'N', m, bl, m, one, work( il ), ldwork,b( 1_${ik}$, i ), ldb,&
                               zero, work( iwork ), m )
                    call stdlib${ii}$_dlacpy( 'G', m, bl, work( iwork ), m, b( 1_${ik}$, i ),ldb )
                 end do
              else
                 call stdlib${ii}$_dgemv( 'T', m, m, one, work( il ), ldwork, b( 1_${ik}$, 1_${ik}$ ),1_${ik}$, zero, work( &
                           iwork ), 1_${ik}$ )
                 call stdlib${ii}$_dcopy( m, work( iwork ), 1_${ik}$, b( 1_${ik}$, 1_${ik}$ ), 1_${ik}$ )
              end if
              ! zero out below first m rows of b
              call stdlib${ii}$_dlaset( 'F', n-m, nrhs, zero, zero, b( m+1, 1_${ik}$ ), ldb )
              iwork = itau + m
              ! multiply transpose(q) by b
              ! (workspace: need m+nrhs, prefer m+nrhs*nb)
              call stdlib${ii}$_dormlq( 'L', 'T', n, nrhs, m, a, lda, work( itau ), b,ldb, work( iwork )&
                        , lwork-iwork+1, info )
           else
              ! path 2 - remaining underdetermined cases
              ie = 1_${ik}$
              itauq = ie + m
              itaup = itauq + m
              iwork = itaup + m
              ! bidiagonalize a
              ! (workspace: need 3*m+n, prefer 3*m+(m+n)*nb)
              call stdlib${ii}$_dgebrd( m, n, a, lda, s, work( ie ), work( itauq ),work( itaup ), work( &
                        iwork ), lwork-iwork+1,info )
              ! multiply b by transpose of left bidiagonalizing vectors
              ! (workspace: need 3*m+nrhs, prefer 3*m+nrhs*nb)
              call stdlib${ii}$_dormbr( 'Q', 'L', 'T', m, nrhs, n, a, lda, work( itauq ),b, ldb, work( &
                        iwork ), lwork-iwork+1, info )
              ! generate right bidiagonalizing vectors in a
              ! (workspace: need 4*m, prefer 3*m+m*nb)
              call stdlib${ii}$_dorgbr( 'P', m, n, m, a, lda, work( itaup ),work( iwork ), lwork-iwork+&
                        1_${ik}$, info )
              iwork = ie + m
              ! perform bidiagonal qr iteration,
                 ! computing right singular vectors of a in a and
                 ! multiplying b by transpose of left singular vectors
              ! (workspace: need bdspac)
              call stdlib${ii}$_dbdsqr( 'L', m, n, 0_${ik}$, nrhs, s, work( ie ), a, lda, dum,1_${ik}$, b, ldb, work( &
                        iwork ), info )
              if( info/=0 )go to 70
              ! multiply b by reciprocals of singular values
              thr = max( rcond*s( 1_${ik}$ ), sfmin )
              if( rcond<zero )thr = max( eps*s( 1_${ik}$ ), sfmin )
              rank = 0_${ik}$
              do i = 1, m
                 if( s( i )>thr ) then
                    call stdlib${ii}$_drscl( nrhs, s( i ), b( i, 1_${ik}$ ), ldb )
                    rank = rank + 1_${ik}$
                 else
                    call stdlib${ii}$_dlaset( 'F', 1_${ik}$, nrhs, zero, zero, b( i, 1_${ik}$ ), ldb )
                 end if
              end do
              ! multiply b by right singular vectors of a
              ! (workspace: need n, prefer n*nrhs)
              if( lwork>=ldb*nrhs .and. nrhs>1_${ik}$ ) then
                 call stdlib${ii}$_dgemm( 'T', 'N', n, nrhs, m, one, a, lda, b, ldb, zero,work, ldb )
                           
                 call stdlib${ii}$_dlacpy( 'F', n, nrhs, work, ldb, b, ldb )
              else if( nrhs>1_${ik}$ ) then
                 chunk = lwork / n
                 do i = 1, nrhs, chunk
                    bl = min( nrhs-i+1, chunk )
                    call stdlib${ii}$_dgemm( 'T', 'N', n, bl, m, one, a, lda, b( 1_${ik}$, i ),ldb, zero, work,&
                               n )
                    call stdlib${ii}$_dlacpy( 'F', n, bl, work, n, b( 1_${ik}$, i ), ldb )
                 end do
              else
                 call stdlib${ii}$_dgemv( 'T', m, n, one, a, lda, b, 1_${ik}$, zero, work, 1_${ik}$ )
                 call stdlib${ii}$_dcopy( n, work, 1_${ik}$, b, 1_${ik}$ )
              end if
           end if
           ! undo scaling
           if( iascl==1_${ik}$ ) then
              call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, n, nrhs, b, ldb, info )
              call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, smlnum, anrm, minmn, 1_${ik}$, s, minmn,info )
           else if( iascl==2_${ik}$ ) then
              call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, n, nrhs, b, ldb, info )
              call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, bignum, anrm, minmn, 1_${ik}$, s, minmn,info )
           end if
           if( ibscl==1_${ik}$ ) then
              call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, smlnum, bnrm, n, nrhs, b, ldb, info )
           else if( ibscl==2_${ik}$ ) then
              call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, bignum, bnrm, n, nrhs, b, ldb, info )
           end if
           70 continue
           work( 1_${ik}$ ) = maxwrk
           return
     end subroutine stdlib${ii}$_dgelss

#:for rk,rt,ri in REAL_KINDS_TYPES
#:if not rk in ["sp","dp"]
     module subroutine stdlib${ii}$_${ri}$gelss( m, n, nrhs, a, lda, b, ldb, s, rcond, rank,work, lwork, info )
     !! DGELSS: computes the minimum norm solution to a real linear least
     !! squares problem:
     !! Minimize 2-norm(| b - A*x |).
     !! using the singular value decomposition (SVD) of A. A is an M-by-N
     !! matrix which may be rank-deficient.
     !! Several right hand side vectors b and solution vectors x can be
     !! handled in a single call; they are stored as the columns of the
     !! M-by-NRHS right hand side matrix B and the N-by-NRHS solution matrix
     !! X.
     !! The effective rank of A is determined by treating as zero those
     !! singular values which are less than RCOND times the largest singular
     !! value.
               
        ! -- lapack driver routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info, rank
           integer(${ik}$), intent(in) :: lda, ldb, lwork, m, n, nrhs
           real(${rk}$), intent(in) :: rcond
           ! Array Arguments 
           real(${rk}$), intent(inout) :: a(lda,*), b(ldb,*)
           real(${rk}$), intent(out) :: s(*), work(*)
        ! =====================================================================
           
           ! Local Scalars 
           logical(lk) :: lquery
           integer(${ik}$) :: bdspac, bl, chunk, i, iascl, ibscl, ie, il, itau, itaup, itauq, iwork, &
                     ldwork, maxmn, maxwrk, minmn, minwrk, mm, mnthr
           integer(${ik}$) :: lwork_qgeqrf, lwork_qormqr, lwork_qgebrd, lwork_qormbr, lwork_qorgbr, &
                     lwork_qormlq, lwork_qgelqf
           real(${rk}$) :: anrm, bignum, bnrm, eps, sfmin, smlnum, thr
           ! Local Arrays 
           real(${rk}$) :: dum(1_${ik}$)
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           minmn = min( m, n )
           maxmn = max( m, n )
           lquery = ( lwork==-1_${ik}$ )
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( nrhs<0_${ik}$ ) then
              info = -3_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -5_${ik}$
           else if( ldb<max( 1_${ik}$, maxmn ) ) then
              info = -7_${ik}$
           end if
           ! compute workspace
            ! (note: comments in the code beginning "workspace:" describe the
             ! minimal amount of workspace needed at that point in the code,
             ! as well as the preferred amount for good performance.
             ! nb refers to the optimal block size for the immediately
             ! following subroutine, as returned by stdlib${ii}$_ilaenv.)
           if( info==0_${ik}$ ) then
              minwrk = 1_${ik}$
              maxwrk = 1_${ik}$
              if( minmn>0_${ik}$ ) then
                 mm = m
                 mnthr = stdlib${ii}$_ilaenv( 6_${ik}$, 'DGELSS', ' ', m, n, nrhs, -1_${ik}$ )
                 if( m>=n .and. m>=mnthr ) then
                    ! path 1a - overdetermined, with many more rows than
                              ! columns
                    ! compute space needed for stdlib${ii}$_${ri}$geqrf
                    call stdlib${ii}$_${ri}$geqrf( m, n, a, lda, dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, info )
                    lwork_qgeqrf=dum(1_${ik}$)
                    ! compute space needed for stdlib${ii}$_${ri}$ormqr
                    call stdlib${ii}$_${ri}$ormqr( 'L', 'T', m, nrhs, n, a, lda, dum(1_${ik}$), b,ldb, dum(1_${ik}$), -1_${ik}$, &
                              info )
                    lwork_qormqr=dum(1_${ik}$)
                    mm = n
                    maxwrk = max( maxwrk, n + lwork_qgeqrf )
                    maxwrk = max( maxwrk, n + lwork_qormqr )
                 end if
                 if( m>=n ) then
                    ! path 1 - overdetermined or exactly determined
                    ! compute workspace needed for stdlib${ii}$_${ri}$bdsqr
                    bdspac = max( 1_${ik}$, 5_${ik}$*n )
                    ! compute space needed for stdlib${ii}$_${ri}$gebrd
                    call stdlib${ii}$_${ri}$gebrd( mm, n, a, lda, s, dum(1_${ik}$), dum(1_${ik}$),dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, info &
                              )
                    lwork_qgebrd=dum(1_${ik}$)
                    ! compute space needed for stdlib${ii}$_${ri}$ormbr
                    call stdlib${ii}$_${ri}$ormbr( 'Q', 'L', 'T', mm, nrhs, n, a, lda, dum(1_${ik}$),b, ldb, dum(1_${ik}$),&
                               -1_${ik}$, info )
                    lwork_qormbr=dum(1_${ik}$)
                    ! compute space needed for stdlib${ii}$_${ri}$orgbr
                    call stdlib${ii}$_${ri}$orgbr( 'P', n, n, n, a, lda, dum(1_${ik}$),dum(1_${ik}$), -1_${ik}$, info )
                    lwork_qorgbr=dum(1_${ik}$)
                    ! compute total workspace needed
                    maxwrk = max( maxwrk, 3_${ik}$*n + lwork_qgebrd )
                    maxwrk = max( maxwrk, 3_${ik}$*n + lwork_qormbr )
                    maxwrk = max( maxwrk, 3_${ik}$*n + lwork_qorgbr )
                    maxwrk = max( maxwrk, bdspac )
                    maxwrk = max( maxwrk, n*nrhs )
                    minwrk = max( 3_${ik}$*n + mm, 3_${ik}$*n + nrhs, bdspac )
                    maxwrk = max( minwrk, maxwrk )
                 end if
                 if( n>m ) then
                    ! compute workspace needed for stdlib${ii}$_${ri}$bdsqr
                    bdspac = max( 1_${ik}$, 5_${ik}$*m )
                    minwrk = max( 3_${ik}$*m+nrhs, 3_${ik}$*m+n, bdspac )
                    if( n>=mnthr ) then
                       ! path 2a - underdetermined, with many more columns
                       ! than rows
                       ! compute space needed for stdlib${ii}$_${ri}$gelqf
                       call stdlib${ii}$_${ri}$gelqf( m, n, a, lda, dum(1_${ik}$), dum(1_${ik}$),-1_${ik}$, info )
                       lwork_qgelqf=dum(1_${ik}$)
                       ! compute space needed for stdlib${ii}$_${ri}$gebrd
                       call stdlib${ii}$_${ri}$gebrd( m, m, a, lda, s, dum(1_${ik}$), dum(1_${ik}$),dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, &
                                 info )
                       lwork_qgebrd=dum(1_${ik}$)
                       ! compute space needed for stdlib${ii}$_${ri}$ormbr
                       call stdlib${ii}$_${ri}$ormbr( 'Q', 'L', 'T', m, nrhs, n, a, lda,dum(1_${ik}$), b, ldb, dum(&
                                 1_${ik}$), -1_${ik}$, info )
                       lwork_qormbr=dum(1_${ik}$)
                       ! compute space needed for stdlib${ii}$_${ri}$orgbr
                       call stdlib${ii}$_${ri}$orgbr( 'P', m, m, m, a, lda, dum(1_${ik}$),dum(1_${ik}$), -1_${ik}$, info )
                       lwork_qorgbr=dum(1_${ik}$)
                       ! compute space needed for stdlib${ii}$_${ri}$ormlq
                       call stdlib${ii}$_${ri}$ormlq( 'L', 'T', n, nrhs, m, a, lda, dum(1_${ik}$),b, ldb, dum(1_${ik}$), -&
                                 1_${ik}$, info )
                       lwork_qormlq=dum(1_${ik}$)
                       ! compute total workspace needed
                       maxwrk = m + lwork_qgelqf
                       maxwrk = max( maxwrk, m*m + 4_${ik}$*m + lwork_qgebrd )
                       maxwrk = max( maxwrk, m*m + 4_${ik}$*m + lwork_qormbr )
                       maxwrk = max( maxwrk, m*m + 4_${ik}$*m + lwork_qorgbr )
                       maxwrk = max( maxwrk, m*m + m + bdspac )
                       if( nrhs>1_${ik}$ ) then
                          maxwrk = max( maxwrk, m*m + m + m*nrhs )
                       else
                          maxwrk = max( maxwrk, m*m + 2_${ik}$*m )
                       end if
                       maxwrk = max( maxwrk, m + lwork_qormlq )
                    else
                       ! path 2 - underdetermined
                       ! compute space needed for stdlib${ii}$_${ri}$gebrd
                       call stdlib${ii}$_${ri}$gebrd( m, n, a, lda, s, dum(1_${ik}$), dum(1_${ik}$),dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, &
                                 info )
                       lwork_qgebrd=dum(1_${ik}$)
                       ! compute space needed for stdlib${ii}$_${ri}$ormbr
                       call stdlib${ii}$_${ri}$ormbr( 'Q', 'L', 'T', m, nrhs, m, a, lda,dum(1_${ik}$), b, ldb, dum(&
                                 1_${ik}$), -1_${ik}$, info )
                       lwork_qormbr=dum(1_${ik}$)
                       ! compute space needed for stdlib${ii}$_${ri}$orgbr
                       call stdlib${ii}$_${ri}$orgbr( 'P', m, n, m, a, lda, dum(1_${ik}$),dum(1_${ik}$), -1_${ik}$, info )
                       lwork_qorgbr=dum(1_${ik}$)
                       maxwrk = 3_${ik}$*m + lwork_qgebrd
                       maxwrk = max( maxwrk, 3_${ik}$*m + lwork_qormbr )
                       maxwrk = max( maxwrk, 3_${ik}$*m + lwork_qorgbr )
                       maxwrk = max( maxwrk, bdspac )
                       maxwrk = max( maxwrk, n*nrhs )
                    end if
                 end if
                 maxwrk = max( minwrk, maxwrk )
              end if
              work( 1_${ik}$ ) = maxwrk
              if( lwork<minwrk .and. .not.lquery )info = -12_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DGELSS', -info )
              return
           else if( lquery ) then
              return
           end if
           ! quick return if possible
           if( m==0_${ik}$ .or. n==0_${ik}$ ) then
              rank = 0_${ik}$
              return
           end if
           ! get machine parameters
           eps = stdlib${ii}$_${ri}$lamch( 'P' )
           sfmin = stdlib${ii}$_${ri}$lamch( 'S' )
           smlnum = sfmin / eps
           bignum = one / smlnum
           call stdlib${ii}$_${ri}$labad( smlnum, bignum )
           ! scale a if max element outside range [smlnum,bignum]
           anrm = stdlib${ii}$_${ri}$lange( 'M', m, n, a, lda, work )
           iascl = 0_${ik}$
           if( anrm>zero .and. anrm<smlnum ) then
              ! scale matrix norm up to smlnum
              call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, m, n, a, lda, info )
              iascl = 1_${ik}$
           else if( anrm>bignum ) then
              ! scale matrix norm down to bignum
              call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, m, n, a, lda, info )
              iascl = 2_${ik}$
           else if( anrm==zero ) then
              ! matrix all zero. return zero solution.
              call stdlib${ii}$_${ri}$laset( 'F', max( m, n ), nrhs, zero, zero, b, ldb )
              call stdlib${ii}$_${ri}$laset( 'F', minmn, 1_${ik}$, zero, zero, s, minmn )
              rank = 0_${ik}$
              go to 70
           end if
           ! scale b if max element outside range [smlnum,bignum]
           bnrm = stdlib${ii}$_${ri}$lange( 'M', m, nrhs, b, ldb, work )
           ibscl = 0_${ik}$
           if( bnrm>zero .and. bnrm<smlnum ) then
              ! scale matrix norm up to smlnum
              call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, bnrm, smlnum, m, nrhs, b, ldb, info )
              ibscl = 1_${ik}$
           else if( bnrm>bignum ) then
              ! scale matrix norm down to bignum
              call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, bnrm, bignum, m, nrhs, b, ldb, info )
              ibscl = 2_${ik}$
           end if
           ! overdetermined case
           if( m>=n ) then
              ! path 1 - overdetermined or exactly determined
              mm = m
              if( m>=mnthr ) then
                 ! path 1a - overdetermined, with many more rows than columns
                 mm = n
                 itau = 1_${ik}$
                 iwork = itau + n
                 ! compute a=q*r
                 ! (workspace: need 2*n, prefer n+n*nb)
                 call stdlib${ii}$_${ri}$geqrf( m, n, a, lda, work( itau ), work( iwork ),lwork-iwork+1, &
                           info )
                 ! multiply b by transpose(q)
                 ! (workspace: need n+nrhs, prefer n+nrhs*nb)
                 call stdlib${ii}$_${ri}$ormqr( 'L', 'T', m, nrhs, n, a, lda, work( itau ), b,ldb, work( &
                           iwork ), lwork-iwork+1, info )
                 ! zero out below r
                 if( n>1_${ik}$ )call stdlib${ii}$_${ri}$laset( 'L', n-1, n-1, zero, zero, a( 2_${ik}$, 1_${ik}$ ), lda )
              end if
              ie = 1_${ik}$
              itauq = ie + n
              itaup = itauq + n
              iwork = itaup + n
              ! bidiagonalize r in a
              ! (workspace: need 3*n+mm, prefer 3*n+(mm+n)*nb)
              call stdlib${ii}$_${ri}$gebrd( mm, n, a, lda, s, work( ie ), work( itauq ),work( itaup ), work(&
                         iwork ), lwork-iwork+1,info )
              ! multiply b by transpose of left bidiagonalizing vectors of r
              ! (workspace: need 3*n+nrhs, prefer 3*n+nrhs*nb)
              call stdlib${ii}$_${ri}$ormbr( 'Q', 'L', 'T', mm, nrhs, n, a, lda, work( itauq ),b, ldb, work( &
                        iwork ), lwork-iwork+1, info )
              ! generate right bidiagonalizing vectors of r in a
              ! (workspace: need 4*n-1, prefer 3*n+(n-1)*nb)
              call stdlib${ii}$_${ri}$orgbr( 'P', n, n, n, a, lda, work( itaup ),work( iwork ), lwork-iwork+&
                        1_${ik}$, info )
              iwork = ie + n
              ! perform bidiagonal qr iteration
                ! multiply b by transpose of left singular vectors
                ! compute right singular vectors in a
              ! (workspace: need bdspac)
              call stdlib${ii}$_${ri}$bdsqr( 'U', n, n, 0_${ik}$, nrhs, s, work( ie ), a, lda, dum,1_${ik}$, b, ldb, work( &
                        iwork ), info )
              if( info/=0 )go to 70
              ! multiply b by reciprocals of singular values
              thr = max( rcond*s( 1_${ik}$ ), sfmin )
              if( rcond<zero )thr = max( eps*s( 1_${ik}$ ), sfmin )
              rank = 0_${ik}$
              do i = 1, n
                 if( s( i )>thr ) then
                    call stdlib${ii}$_${ri}$rscl( nrhs, s( i ), b( i, 1_${ik}$ ), ldb )
                    rank = rank + 1_${ik}$
                 else
                    call stdlib${ii}$_${ri}$laset( 'F', 1_${ik}$, nrhs, zero, zero, b( i, 1_${ik}$ ), ldb )
                 end if
              end do
              ! multiply b by right singular vectors
              ! (workspace: need n, prefer n*nrhs)
              if( lwork>=ldb*nrhs .and. nrhs>1_${ik}$ ) then
                 call stdlib${ii}$_${ri}$gemm( 'T', 'N', n, nrhs, n, one, a, lda, b, ldb, zero,work, ldb )
                           
                 call stdlib${ii}$_${ri}$lacpy( 'G', n, nrhs, work, ldb, b, ldb )
              else if( nrhs>1_${ik}$ ) then
                 chunk = lwork / n
                 do i = 1, nrhs, chunk
                    bl = min( nrhs-i+1, chunk )
                    call stdlib${ii}$_${ri}$gemm( 'T', 'N', n, bl, n, one, a, lda, b( 1_${ik}$, i ),ldb, zero, work,&
                               n )
                    call stdlib${ii}$_${ri}$lacpy( 'G', n, bl, work, n, b( 1_${ik}$, i ), ldb )
                 end do
              else
                 call stdlib${ii}$_${ri}$gemv( 'T', n, n, one, a, lda, b, 1_${ik}$, zero, work, 1_${ik}$ )
                 call stdlib${ii}$_${ri}$copy( n, work, 1_${ik}$, b, 1_${ik}$ )
              end if
           else if( n>=mnthr .and. lwork>=4_${ik}$*m+m*m+max( m, 2_${ik}$*m-4, nrhs, n-3*m ) ) then
              ! path 2a - underdetermined, with many more columns than rows
              ! and sufficient workspace for an efficient algorithm
              ldwork = m
              if( lwork>=max( 4_${ik}$*m+m*lda+max( m, 2_${ik}$*m-4, nrhs, n-3*m ),m*lda+m+m*nrhs ) )ldwork = &
                        lda
              itau = 1_${ik}$
              iwork = m + 1_${ik}$
              ! compute a=l*q
              ! (workspace: need 2*m, prefer m+m*nb)
              call stdlib${ii}$_${ri}$gelqf( m, n, a, lda, work( itau ), work( iwork ),lwork-iwork+1, info )
                        
              il = iwork
              ! copy l to work(il), zeroing out above it
              call stdlib${ii}$_${ri}$lacpy( 'L', m, m, a, lda, work( il ), ldwork )
              call stdlib${ii}$_${ri}$laset( 'U', m-1, m-1, zero, zero, work( il+ldwork ),ldwork )
              ie = il + ldwork*m
              itauq = ie + m
              itaup = itauq + m
              iwork = itaup + m
              ! bidiagonalize l in work(il)
              ! (workspace: need m*m+5*m, prefer m*m+4*m+2*m*nb)
              call stdlib${ii}$_${ri}$gebrd( m, m, work( il ), ldwork, s, work( ie ),work( itauq ), work( &
                        itaup ), work( iwork ),lwork-iwork+1, info )
              ! multiply b by transpose of left bidiagonalizing vectors of l
              ! (workspace: need m*m+4*m+nrhs, prefer m*m+4*m+nrhs*nb)
              call stdlib${ii}$_${ri}$ormbr( 'Q', 'L', 'T', m, nrhs, m, work( il ), ldwork,work( itauq ), b, &
                        ldb, work( iwork ),lwork-iwork+1, info )
              ! generate right bidiagonalizing vectors of r in work(il)
              ! (workspace: need m*m+5*m-1, prefer m*m+4*m+(m-1)*nb)
              call stdlib${ii}$_${ri}$orgbr( 'P', m, m, m, work( il ), ldwork, work( itaup ),work( iwork ), &
                        lwork-iwork+1, info )
              iwork = ie + m
              ! perform bidiagonal qr iteration,
                 ! computing right singular vectors of l in work(il) and
                 ! multiplying b by transpose of left singular vectors
              ! (workspace: need m*m+m+bdspac)
              call stdlib${ii}$_${ri}$bdsqr( 'U', m, m, 0_${ik}$, nrhs, s, work( ie ), work( il ),ldwork, a, lda, b,&
                         ldb, work( iwork ), info )
              if( info/=0 )go to 70
              ! multiply b by reciprocals of singular values
              thr = max( rcond*s( 1_${ik}$ ), sfmin )
              if( rcond<zero )thr = max( eps*s( 1_${ik}$ ), sfmin )
              rank = 0_${ik}$
              do i = 1, m
                 if( s( i )>thr ) then
                    call stdlib${ii}$_${ri}$rscl( nrhs, s( i ), b( i, 1_${ik}$ ), ldb )
                    rank = rank + 1_${ik}$
                 else
                    call stdlib${ii}$_${ri}$laset( 'F', 1_${ik}$, nrhs, zero, zero, b( i, 1_${ik}$ ), ldb )
                 end if
              end do
              iwork = ie
              ! multiply b by right singular vectors of l in work(il)
              ! (workspace: need m*m+2*m, prefer m*m+m+m*nrhs)
              if( lwork>=ldb*nrhs+iwork-1 .and. nrhs>1_${ik}$ ) then
                 call stdlib${ii}$_${ri}$gemm( 'T', 'N', m, nrhs, m, one, work( il ), ldwork,b, ldb, zero, &
                           work( iwork ), ldb )
                 call stdlib${ii}$_${ri}$lacpy( 'G', m, nrhs, work( iwork ), ldb, b, ldb )
              else if( nrhs>1_${ik}$ ) then
                 chunk = ( lwork-iwork+1 ) / m
                 do i = 1, nrhs, chunk
                    bl = min( nrhs-i+1, chunk )
                    call stdlib${ii}$_${ri}$gemm( 'T', 'N', m, bl, m, one, work( il ), ldwork,b( 1_${ik}$, i ), ldb,&
                               zero, work( iwork ), m )
                    call stdlib${ii}$_${ri}$lacpy( 'G', m, bl, work( iwork ), m, b( 1_${ik}$, i ),ldb )
                 end do
              else
                 call stdlib${ii}$_${ri}$gemv( 'T', m, m, one, work( il ), ldwork, b( 1_${ik}$, 1_${ik}$ ),1_${ik}$, zero, work( &
                           iwork ), 1_${ik}$ )
                 call stdlib${ii}$_${ri}$copy( m, work( iwork ), 1_${ik}$, b( 1_${ik}$, 1_${ik}$ ), 1_${ik}$ )
              end if
              ! zero out below first m rows of b
              call stdlib${ii}$_${ri}$laset( 'F', n-m, nrhs, zero, zero, b( m+1, 1_${ik}$ ), ldb )
              iwork = itau + m
              ! multiply transpose(q) by b
              ! (workspace: need m+nrhs, prefer m+nrhs*nb)
              call stdlib${ii}$_${ri}$ormlq( 'L', 'T', n, nrhs, m, a, lda, work( itau ), b,ldb, work( iwork )&
                        , lwork-iwork+1, info )
           else
              ! path 2 - remaining underdetermined cases
              ie = 1_${ik}$
              itauq = ie + m
              itaup = itauq + m
              iwork = itaup + m
              ! bidiagonalize a
              ! (workspace: need 3*m+n, prefer 3*m+(m+n)*nb)
              call stdlib${ii}$_${ri}$gebrd( m, n, a, lda, s, work( ie ), work( itauq ),work( itaup ), work( &
                        iwork ), lwork-iwork+1,info )
              ! multiply b by transpose of left bidiagonalizing vectors
              ! (workspace: need 3*m+nrhs, prefer 3*m+nrhs*nb)
              call stdlib${ii}$_${ri}$ormbr( 'Q', 'L', 'T', m, nrhs, n, a, lda, work( itauq ),b, ldb, work( &
                        iwork ), lwork-iwork+1, info )
              ! generate right bidiagonalizing vectors in a
              ! (workspace: need 4*m, prefer 3*m+m*nb)
              call stdlib${ii}$_${ri}$orgbr( 'P', m, n, m, a, lda, work( itaup ),work( iwork ), lwork-iwork+&
                        1_${ik}$, info )
              iwork = ie + m
              ! perform bidiagonal qr iteration,
                 ! computing right singular vectors of a in a and
                 ! multiplying b by transpose of left singular vectors
              ! (workspace: need bdspac)
              call stdlib${ii}$_${ri}$bdsqr( 'L', m, n, 0_${ik}$, nrhs, s, work( ie ), a, lda, dum,1_${ik}$, b, ldb, work( &
                        iwork ), info )
              if( info/=0 )go to 70
              ! multiply b by reciprocals of singular values
              thr = max( rcond*s( 1_${ik}$ ), sfmin )
              if( rcond<zero )thr = max( eps*s( 1_${ik}$ ), sfmin )
              rank = 0_${ik}$
              do i = 1, m
                 if( s( i )>thr ) then
                    call stdlib${ii}$_${ri}$rscl( nrhs, s( i ), b( i, 1_${ik}$ ), ldb )
                    rank = rank + 1_${ik}$
                 else
                    call stdlib${ii}$_${ri}$laset( 'F', 1_${ik}$, nrhs, zero, zero, b( i, 1_${ik}$ ), ldb )
                 end if
              end do
              ! multiply b by right singular vectors of a
              ! (workspace: need n, prefer n*nrhs)
              if( lwork>=ldb*nrhs .and. nrhs>1_${ik}$ ) then
                 call stdlib${ii}$_${ri}$gemm( 'T', 'N', n, nrhs, m, one, a, lda, b, ldb, zero,work, ldb )
                           
                 call stdlib${ii}$_${ri}$lacpy( 'F', n, nrhs, work, ldb, b, ldb )
              else if( nrhs>1_${ik}$ ) then
                 chunk = lwork / n
                 do i = 1, nrhs, chunk
                    bl = min( nrhs-i+1, chunk )
                    call stdlib${ii}$_${ri}$gemm( 'T', 'N', n, bl, m, one, a, lda, b( 1_${ik}$, i ),ldb, zero, work,&
                               n )
                    call stdlib${ii}$_${ri}$lacpy( 'F', n, bl, work, n, b( 1_${ik}$, i ), ldb )
                 end do
              else
                 call stdlib${ii}$_${ri}$gemv( 'T', m, n, one, a, lda, b, 1_${ik}$, zero, work, 1_${ik}$ )
                 call stdlib${ii}$_${ri}$copy( n, work, 1_${ik}$, b, 1_${ik}$ )
              end if
           end if
           ! undo scaling
           if( iascl==1_${ik}$ ) then
              call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, n, nrhs, b, ldb, info )
              call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, smlnum, anrm, minmn, 1_${ik}$, s, minmn,info )
           else if( iascl==2_${ik}$ ) then
              call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, n, nrhs, b, ldb, info )
              call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, bignum, anrm, minmn, 1_${ik}$, s, minmn,info )
           end if
           if( ibscl==1_${ik}$ ) then
              call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, smlnum, bnrm, n, nrhs, b, ldb, info )
           else if( ibscl==2_${ik}$ ) then
              call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, bignum, bnrm, n, nrhs, b, ldb, info )
           end if
           70 continue
           work( 1_${ik}$ ) = maxwrk
           return
     end subroutine stdlib${ii}$_${ri}$gelss

#:endif
#:endfor

     module subroutine stdlib${ii}$_cgelss( m, n, nrhs, a, lda, b, ldb, s, rcond, rank,work, lwork, rwork, &
     !! CGELSS computes the minimum norm solution to a complex linear
     !! least squares problem:
     !! Minimize 2-norm(| b - A*x |).
     !! using the singular value decomposition (SVD) of A. A is an M-by-N
     !! matrix which may be rank-deficient.
     !! Several right hand side vectors b and solution vectors x can be
     !! handled in a single call; they are stored as the columns of the
     !! M-by-NRHS right hand side matrix B and the N-by-NRHS solution matrix
     !! X.
     !! The effective rank of A is determined by treating as zero those
     !! singular values which are less than RCOND times the largest singular
     !! value.
               info )
        ! -- lapack driver routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info, rank
           integer(${ik}$), intent(in) :: lda, ldb, lwork, m, n, nrhs
           real(sp), intent(in) :: rcond
           ! Array Arguments 
           real(sp), intent(out) :: rwork(*), s(*)
           complex(sp), intent(inout) :: a(lda,*), b(ldb,*)
           complex(sp), intent(out) :: work(*)
        ! =====================================================================
           
           
           ! Local Scalars 
           logical(lk) :: lquery
           integer(${ik}$) :: bl, chunk, i, iascl, ibscl, ie, il, irwork, itau, itaup, itauq, iwork, &
                     ldwork, maxmn, maxwrk, minmn, minwrk, mm, mnthr
           integer(${ik}$) :: lwork_cgeqrf, lwork_cunmqr, lwork_cgebrd, lwork_cunmbr, lwork_cungbr, &
                     lwork_cunmlq, lwork_cgelqf
           real(sp) :: anrm, bignum, bnrm, eps, sfmin, smlnum, thr
           ! Local Arrays 
           complex(sp) :: dum(1_${ik}$)
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           minmn = min( m, n )
           maxmn = max( m, n )
           lquery = ( lwork==-1_${ik}$ )
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( nrhs<0_${ik}$ ) then
              info = -3_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -5_${ik}$
           else if( ldb<max( 1_${ik}$, maxmn ) ) then
              info = -7_${ik}$
           end if
           ! compute workspace
            ! (note: comments in the code beginning "workspace:" describe the
             ! minimal amount of workspace needed at that point in the code,
             ! as well as the preferred amount for good performance.
             ! cworkspace refers to complex workspace, and rworkspace refers
             ! to real workspace. nb refers to the optimal block size for the
             ! immediately following subroutine, as returned by stdlib${ii}$_ilaenv.)
           if( info==0_${ik}$ ) then
              minwrk = 1_${ik}$
              maxwrk = 1_${ik}$
              if( minmn>0_${ik}$ ) then
                 mm = m
                 mnthr = stdlib${ii}$_ilaenv( 6_${ik}$, 'CGELSS', ' ', m, n, nrhs, -1_${ik}$ )
                 if( m>=n .and. m>=mnthr ) then
                    ! path 1a - overdetermined, with many more rows than
                              ! columns
                    ! compute space needed for stdlib${ii}$_cgeqrf
                    call stdlib${ii}$_cgeqrf( m, n, a, lda, dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, info )
                    lwork_cgeqrf = real( dum(1_${ik}$),KIND=sp)
                    ! compute space needed for stdlib${ii}$_cunmqr
                    call stdlib${ii}$_cunmqr( 'L', 'C', m, nrhs, n, a, lda, dum(1_${ik}$), b,ldb, dum(1_${ik}$), -1_${ik}$, &
                              info )
                    lwork_cunmqr = real( dum(1_${ik}$),KIND=sp)
                    mm = n
                    maxwrk = max( maxwrk, n + n*stdlib${ii}$_ilaenv( 1_${ik}$, 'CGEQRF', ' ', m,n, -1_${ik}$, -1_${ik}$ ) )
                              
                    maxwrk = max( maxwrk, n + nrhs*stdlib${ii}$_ilaenv( 1_${ik}$, 'CUNMQR', 'LC',m, nrhs, n, -&
                              1_${ik}$ ) )
                 end if
                 if( m>=n ) then
                    ! path 1 - overdetermined or exactly determined
                    ! compute space needed for stdlib${ii}$_cgebrd
                    call stdlib${ii}$_cgebrd( mm, n, a, lda, s, s, dum(1_${ik}$), dum(1_${ik}$), dum(1_${ik}$),-1_${ik}$, info )
                              
                    lwork_cgebrd = real( dum(1_${ik}$),KIND=sp)
                    ! compute space needed for stdlib${ii}$_cunmbr
                    call stdlib${ii}$_cunmbr( 'Q', 'L', 'C', mm, nrhs, n, a, lda, dum(1_${ik}$),b, ldb, dum(1_${ik}$),&
                               -1_${ik}$, info )
                    lwork_cunmbr = real( dum(1_${ik}$),KIND=sp)
                    ! compute space needed for stdlib${ii}$_cungbr
                    call stdlib${ii}$_cungbr( 'P', n, n, n, a, lda, dum(1_${ik}$),dum(1_${ik}$), -1_${ik}$, info )
                    lwork_cungbr = real( dum(1_${ik}$),KIND=sp)
                    ! compute total workspace needed
                    maxwrk = max( maxwrk, 2_${ik}$*n + lwork_cgebrd )
                    maxwrk = max( maxwrk, 2_${ik}$*n + lwork_cunmbr )
                    maxwrk = max( maxwrk, 2_${ik}$*n + lwork_cungbr )
                    maxwrk = max( maxwrk, n*nrhs )
                    minwrk = 2_${ik}$*n + max( nrhs, m )
                 end if
                 if( n>m ) then
                    minwrk = 2_${ik}$*m + max( nrhs, n )
                    if( n>=mnthr ) then
                       ! path 2a - underdetermined, with many more columns
                       ! than rows
                       ! compute space needed for stdlib${ii}$_cgelqf
                       call stdlib${ii}$_cgelqf( m, n, a, lda, dum(1_${ik}$), dum(1_${ik}$),-1_${ik}$, info )
                       lwork_cgelqf = real( dum(1_${ik}$),KIND=sp)
                       ! compute space needed for stdlib${ii}$_cgebrd
                       call stdlib${ii}$_cgebrd( m, m, a, lda, s, s, dum(1_${ik}$), dum(1_${ik}$),dum(1_${ik}$), -1_${ik}$, info )
                                 
                       lwork_cgebrd = real( dum(1_${ik}$),KIND=sp)
                       ! compute space needed for stdlib${ii}$_cunmbr
                       call stdlib${ii}$_cunmbr( 'Q', 'L', 'C', m, nrhs, n, a, lda,dum(1_${ik}$), b, ldb, dum(&
                                 1_${ik}$), -1_${ik}$, info )
                       lwork_cunmbr = real( dum(1_${ik}$),KIND=sp)
                       ! compute space needed for stdlib${ii}$_cungbr
                       call stdlib${ii}$_cungbr( 'P', m, m, m, a, lda, dum(1_${ik}$),dum(1_${ik}$), -1_${ik}$, info )
                       lwork_cungbr = real( dum(1_${ik}$),KIND=sp)
                       ! compute space needed for stdlib${ii}$_cunmlq
                       call stdlib${ii}$_cunmlq( 'L', 'C', n, nrhs, m, a, lda, dum(1_${ik}$),b, ldb, dum(1_${ik}$), -&
                                 1_${ik}$, info )
                       lwork_cunmlq = real( dum(1_${ik}$),KIND=sp)
                       ! compute total workspace needed
                       maxwrk = m + lwork_cgelqf
                       maxwrk = max( maxwrk, 3_${ik}$*m + m*m + lwork_cgebrd )
                       maxwrk = max( maxwrk, 3_${ik}$*m + m*m + lwork_cunmbr )
                       maxwrk = max( maxwrk, 3_${ik}$*m + m*m + lwork_cungbr )
                       if( nrhs>1_${ik}$ ) then
                          maxwrk = max( maxwrk, m*m + m + m*nrhs )
                       else
                          maxwrk = max( maxwrk, m*m + 2_${ik}$*m )
                       end if
                       maxwrk = max( maxwrk, m + lwork_cunmlq )
                    else
                       ! path 2 - underdetermined
                       ! compute space needed for stdlib${ii}$_cgebrd
                       call stdlib${ii}$_cgebrd( m, n, a, lda, s, s, dum(1_${ik}$), dum(1_${ik}$),dum(1_${ik}$), -1_${ik}$, info )
                                 
                       lwork_cgebrd = real( dum(1_${ik}$),KIND=sp)
                       ! compute space needed for stdlib${ii}$_cunmbr
                       call stdlib${ii}$_cunmbr( 'Q', 'L', 'C', m, nrhs, m, a, lda,dum(1_${ik}$), b, ldb, dum(&
                                 1_${ik}$), -1_${ik}$, info )
                       lwork_cunmbr = real( dum(1_${ik}$),KIND=sp)
                       ! compute space needed for stdlib${ii}$_cungbr
                       call stdlib${ii}$_cungbr( 'P', m, n, m, a, lda, dum(1_${ik}$),dum(1_${ik}$), -1_${ik}$, info )
                       lwork_cungbr = real( dum(1_${ik}$),KIND=sp)
                       maxwrk = 2_${ik}$*m + lwork_cgebrd
                       maxwrk = max( maxwrk, 2_${ik}$*m + lwork_cunmbr )
                       maxwrk = max( maxwrk, 2_${ik}$*m + lwork_cungbr )
                       maxwrk = max( maxwrk, n*nrhs )
                    end if
                 end if
                 maxwrk = max( minwrk, maxwrk )
              end if
              work( 1_${ik}$ ) = maxwrk
              if( lwork<minwrk .and. .not.lquery )info = -12_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'CGELSS', -info )
              return
           else if( lquery ) then
              return
           end if
           ! quick return if possible
           if( m==0_${ik}$ .or. n==0_${ik}$ ) then
              rank = 0_${ik}$
              return
           end if
           ! get machine parameters
           eps = stdlib${ii}$_slamch( 'P' )
           sfmin = stdlib${ii}$_slamch( 'S' )
           smlnum = sfmin / eps
           bignum = one / smlnum
           call stdlib${ii}$_slabad( smlnum, bignum )
           ! scale a if max element outside range [smlnum,bignum]
           anrm = stdlib${ii}$_clange( 'M', m, n, a, lda, rwork )
           iascl = 0_${ik}$
           if( anrm>zero .and. anrm<smlnum ) then
              ! scale matrix norm up to smlnum
              call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, m, n, a, lda, info )
              iascl = 1_${ik}$
           else if( anrm>bignum ) then
              ! scale matrix norm down to bignum
              call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, m, n, a, lda, info )
              iascl = 2_${ik}$
           else if( anrm==zero ) then
              ! matrix all zero. return zero solution.
              call stdlib${ii}$_claset( 'F', max( m, n ), nrhs, czero, czero, b, ldb )
              call stdlib${ii}$_slaset( 'F', minmn, 1_${ik}$, zero, zero, s, minmn )
              rank = 0_${ik}$
              go to 70
           end if
           ! scale b if max element outside range [smlnum,bignum]
           bnrm = stdlib${ii}$_clange( 'M', m, nrhs, b, ldb, rwork )
           ibscl = 0_${ik}$
           if( bnrm>zero .and. bnrm<smlnum ) then
              ! scale matrix norm up to smlnum
              call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, bnrm, smlnum, m, nrhs, b, ldb, info )
              ibscl = 1_${ik}$
           else if( bnrm>bignum ) then
              ! scale matrix norm down to bignum
              call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, bnrm, bignum, m, nrhs, b, ldb, info )
              ibscl = 2_${ik}$
           end if
           ! overdetermined case
           if( m>=n ) then
              ! path 1 - overdetermined or exactly determined
              mm = m
              if( m>=mnthr ) then
                 ! path 1a - overdetermined, with many more rows than columns
                 mm = n
                 itau = 1_${ik}$
                 iwork = itau + n
                 ! compute a=q*r
                 ! (cworkspace: need 2*n, prefer n+n*nb)
                 ! (rworkspace: none)
                 call stdlib${ii}$_cgeqrf( m, n, a, lda, work( itau ), work( iwork ),lwork-iwork+1, &
                           info )
                 ! multiply b by transpose(q)
                 ! (cworkspace: need n+nrhs, prefer n+nrhs*nb)
                 ! (rworkspace: none)
                 call stdlib${ii}$_cunmqr( 'L', 'C', m, nrhs, n, a, lda, work( itau ), b,ldb, work( &
                           iwork ), lwork-iwork+1, info )
                 ! zero out below r
                 if( n>1_${ik}$ )call stdlib${ii}$_claset( 'L', n-1, n-1, czero, czero, a( 2_${ik}$, 1_${ik}$ ),lda )
              end if
              ie = 1_${ik}$
              itauq = 1_${ik}$
              itaup = itauq + n
              iwork = itaup + n
              ! bidiagonalize r in a
              ! (cworkspace: need 2*n+mm, prefer 2*n+(mm+n)*nb)
              ! (rworkspace: need n)
              call stdlib${ii}$_cgebrd( mm, n, a, lda, s, rwork( ie ), work( itauq ),work( itaup ), &
                        work( iwork ), lwork-iwork+1,info )
              ! multiply b by transpose of left bidiagonalizing vectors of r
              ! (cworkspace: need 2*n+nrhs, prefer 2*n+nrhs*nb)
              ! (rworkspace: none)
              call stdlib${ii}$_cunmbr( 'Q', 'L', 'C', mm, nrhs, n, a, lda, work( itauq ),b, ldb, work( &
                        iwork ), lwork-iwork+1, info )
              ! generate right bidiagonalizing vectors of r in a
              ! (cworkspace: need 3*n-1, prefer 2*n+(n-1)*nb)
              ! (rworkspace: none)
              call stdlib${ii}$_cungbr( 'P', n, n, n, a, lda, work( itaup ),work( iwork ), lwork-iwork+&
                        1_${ik}$, info )
              irwork = ie + n
              ! perform bidiagonal qr iteration
                ! multiply b by transpose of left singular vectors
                ! compute right singular vectors in a
              ! (cworkspace: none)
              ! (rworkspace: need bdspac)
              call stdlib${ii}$_cbdsqr( 'U', n, n, 0_${ik}$, nrhs, s, rwork( ie ), a, lda, dum,1_${ik}$, b, ldb, &
                        rwork( irwork ), info )
              if( info/=0 )go to 70
              ! multiply b by reciprocals of singular values
              thr = max( rcond*s( 1_${ik}$ ), sfmin )
              if( rcond<zero )thr = max( eps*s( 1_${ik}$ ), sfmin )
              rank = 0_${ik}$
              do i = 1, n
                 if( s( i )>thr ) then
                    call stdlib${ii}$_csrscl( nrhs, s( i ), b( i, 1_${ik}$ ), ldb )
                    rank = rank + 1_${ik}$
                 else
                    call stdlib${ii}$_claset( 'F', 1_${ik}$, nrhs, czero, czero, b( i, 1_${ik}$ ), ldb )
                 end if
              end do
              ! multiply b by right singular vectors
              ! (cworkspace: need n, prefer n*nrhs)
              ! (rworkspace: none)
              if( lwork>=ldb*nrhs .and. nrhs>1_${ik}$ ) then
                 call stdlib${ii}$_cgemm( 'C', 'N', n, nrhs, n, cone, a, lda, b, ldb,czero, work, ldb )
                           
                 call stdlib${ii}$_clacpy( 'G', n, nrhs, work, ldb, b, ldb )
              else if( nrhs>1_${ik}$ ) then
                 chunk = lwork / n
                 do i = 1, nrhs, chunk
                    bl = min( nrhs-i+1, chunk )
                    call stdlib${ii}$_cgemm( 'C', 'N', n, bl, n, cone, a, lda, b( 1_${ik}$, i ),ldb, czero, &
                              work, n )
                    call stdlib${ii}$_clacpy( 'G', n, bl, work, n, b( 1_${ik}$, i ), ldb )
                 end do
              else
                 call stdlib${ii}$_cgemv( 'C', n, n, cone, a, lda, b, 1_${ik}$, czero, work, 1_${ik}$ )
                 call stdlib${ii}$_ccopy( n, work, 1_${ik}$, b, 1_${ik}$ )
              end if
           else if( n>=mnthr .and. lwork>=3_${ik}$*m+m*m+max( m, nrhs, n-2*m ) )then
              ! underdetermined case, m much less than n
              ! path 2a - underdetermined, with many more columns than rows
              ! and sufficient workspace for an efficient algorithm
              ldwork = m
              if( lwork>=3_${ik}$*m+m*lda+max( m, nrhs, n-2*m ) )ldwork = lda
              itau = 1_${ik}$
              iwork = m + 1_${ik}$
              ! compute a=l*q
              ! (cworkspace: need 2*m, prefer m+m*nb)
              ! (rworkspace: none)
              call stdlib${ii}$_cgelqf( m, n, a, lda, work( itau ), work( iwork ),lwork-iwork+1, info )
                        
              il = iwork
              ! copy l to work(il), zeroing out above it
              call stdlib${ii}$_clacpy( 'L', m, m, a, lda, work( il ), ldwork )
              call stdlib${ii}$_claset( 'U', m-1, m-1, czero, czero, work( il+ldwork ),ldwork )
              ie = 1_${ik}$
              itauq = il + ldwork*m
              itaup = itauq + m
              iwork = itaup + m
              ! bidiagonalize l in work(il)
              ! (cworkspace: need m*m+4*m, prefer m*m+3*m+2*m*nb)
              ! (rworkspace: need m)
              call stdlib${ii}$_cgebrd( m, m, work( il ), ldwork, s, rwork( ie ),work( itauq ), work( &
                        itaup ), work( iwork ),lwork-iwork+1, info )
              ! multiply b by transpose of left bidiagonalizing vectors of l
              ! (cworkspace: need m*m+3*m+nrhs, prefer m*m+3*m+nrhs*nb)
              ! (rworkspace: none)
              call stdlib${ii}$_cunmbr( 'Q', 'L', 'C', m, nrhs, m, work( il ), ldwork,work( itauq ), b, &
                        ldb, work( iwork ),lwork-iwork+1, info )
              ! generate right bidiagonalizing vectors of r in work(il)
              ! (cworkspace: need m*m+4*m-1, prefer m*m+3*m+(m-1)*nb)
              ! (rworkspace: none)
              call stdlib${ii}$_cungbr( 'P', m, m, m, work( il ), ldwork, work( itaup ),work( iwork ), &
                        lwork-iwork+1, info )
              irwork = ie + m
              ! perform bidiagonal qr iteration, computing right singular
              ! vectors of l in work(il) and multiplying b by transpose of
              ! left singular vectors
              ! (cworkspace: need m*m)
              ! (rworkspace: need bdspac)
              call stdlib${ii}$_cbdsqr( 'U', m, m, 0_${ik}$, nrhs, s, rwork( ie ), work( il ),ldwork, a, lda, &
                        b, ldb, rwork( irwork ), info )
              if( info/=0 )go to 70
              ! multiply b by reciprocals of singular values
              thr = max( rcond*s( 1_${ik}$ ), sfmin )
              if( rcond<zero )thr = max( eps*s( 1_${ik}$ ), sfmin )
              rank = 0_${ik}$
              do i = 1, m
                 if( s( i )>thr ) then
                    call stdlib${ii}$_csrscl( nrhs, s( i ), b( i, 1_${ik}$ ), ldb )
                    rank = rank + 1_${ik}$
                 else
                    call stdlib${ii}$_claset( 'F', 1_${ik}$, nrhs, czero, czero, b( i, 1_${ik}$ ), ldb )
                 end if
              end do
              iwork = il + m*ldwork
              ! multiply b by right singular vectors of l in work(il)
              ! (cworkspace: need m*m+2*m, prefer m*m+m+m*nrhs)
              ! (rworkspace: none)
              if( lwork>=ldb*nrhs+iwork-1 .and. nrhs>1_${ik}$ ) then
                 call stdlib${ii}$_cgemm( 'C', 'N', m, nrhs, m, cone, work( il ), ldwork,b, ldb, czero, &
                           work( iwork ), ldb )
                 call stdlib${ii}$_clacpy( 'G', m, nrhs, work( iwork ), ldb, b, ldb )
              else if( nrhs>1_${ik}$ ) then
                 chunk = ( lwork-iwork+1 ) / m
                 do i = 1, nrhs, chunk
                    bl = min( nrhs-i+1, chunk )
                    call stdlib${ii}$_cgemm( 'C', 'N', m, bl, m, cone, work( il ), ldwork,b( 1_${ik}$, i ), &
                              ldb, czero, work( iwork ), m )
                    call stdlib${ii}$_clacpy( 'G', m, bl, work( iwork ), m, b( 1_${ik}$, i ),ldb )
                 end do
              else
                 call stdlib${ii}$_cgemv( 'C', m, m, cone, work( il ), ldwork, b( 1_${ik}$, 1_${ik}$ ),1_${ik}$, czero, work(&
                            iwork ), 1_${ik}$ )
                 call stdlib${ii}$_ccopy( m, work( iwork ), 1_${ik}$, b( 1_${ik}$, 1_${ik}$ ), 1_${ik}$ )
              end if
              ! zero out below first m rows of b
              call stdlib${ii}$_claset( 'F', n-m, nrhs, czero, czero, b( m+1, 1_${ik}$ ), ldb )
              iwork = itau + m
              ! multiply transpose(q) by b
              ! (cworkspace: need m+nrhs, prefer m+nhrs*nb)
              ! (rworkspace: none)
              call stdlib${ii}$_cunmlq( 'L', 'C', n, nrhs, m, a, lda, work( itau ), b,ldb, work( iwork )&
                        , lwork-iwork+1, info )
           else
              ! path 2 - remaining underdetermined cases
              ie = 1_${ik}$
              itauq = 1_${ik}$
              itaup = itauq + m
              iwork = itaup + m
              ! bidiagonalize a
              ! (cworkspace: need 3*m, prefer 2*m+(m+n)*nb)
              ! (rworkspace: need n)
              call stdlib${ii}$_cgebrd( m, n, a, lda, s, rwork( ie ), work( itauq ),work( itaup ), work(&
                         iwork ), lwork-iwork+1,info )
              ! multiply b by transpose of left bidiagonalizing vectors
              ! (cworkspace: need 2*m+nrhs, prefer 2*m+nrhs*nb)
              ! (rworkspace: none)
              call stdlib${ii}$_cunmbr( 'Q', 'L', 'C', m, nrhs, n, a, lda, work( itauq ),b, ldb, work( &
                        iwork ), lwork-iwork+1, info )
              ! generate right bidiagonalizing vectors in a
              ! (cworkspace: need 3*m, prefer 2*m+m*nb)
              ! (rworkspace: none)
              call stdlib${ii}$_cungbr( 'P', m, n, m, a, lda, work( itaup ),work( iwork ), lwork-iwork+&
                        1_${ik}$, info )
              irwork = ie + m
              ! perform bidiagonal qr iteration,
                 ! computing right singular vectors of a in a and
                 ! multiplying b by transpose of left singular vectors
              ! (cworkspace: none)
              ! (rworkspace: need bdspac)
              call stdlib${ii}$_cbdsqr( 'L', m, n, 0_${ik}$, nrhs, s, rwork( ie ), a, lda, dum,1_${ik}$, b, ldb, &
                        rwork( irwork ), info )
              if( info/=0 )go to 70
              ! multiply b by reciprocals of singular values
              thr = max( rcond*s( 1_${ik}$ ), sfmin )
              if( rcond<zero )thr = max( eps*s( 1_${ik}$ ), sfmin )
              rank = 0_${ik}$
              do i = 1, m
                 if( s( i )>thr ) then
                    call stdlib${ii}$_csrscl( nrhs, s( i ), b( i, 1_${ik}$ ), ldb )
                    rank = rank + 1_${ik}$
                 else
                    call stdlib${ii}$_claset( 'F', 1_${ik}$, nrhs, czero, czero, b( i, 1_${ik}$ ), ldb )
                 end if
              end do
              ! multiply b by right singular vectors of a
              ! (cworkspace: need n, prefer n*nrhs)
              ! (rworkspace: none)
              if( lwork>=ldb*nrhs .and. nrhs>1_${ik}$ ) then
                 call stdlib${ii}$_cgemm( 'C', 'N', n, nrhs, m, cone, a, lda, b, ldb,czero, work, ldb )
                           
                 call stdlib${ii}$_clacpy( 'G', n, nrhs, work, ldb, b, ldb )
              else if( nrhs>1_${ik}$ ) then
                 chunk = lwork / n
                 do i = 1, nrhs, chunk
                    bl = min( nrhs-i+1, chunk )
                    call stdlib${ii}$_cgemm( 'C', 'N', n, bl, m, cone, a, lda, b( 1_${ik}$, i ),ldb, czero, &
                              work, n )
                    call stdlib${ii}$_clacpy( 'F', n, bl, work, n, b( 1_${ik}$, i ), ldb )
                 end do
              else
                 call stdlib${ii}$_cgemv( 'C', m, n, cone, a, lda, b, 1_${ik}$, czero, work, 1_${ik}$ )
                 call stdlib${ii}$_ccopy( n, work, 1_${ik}$, b, 1_${ik}$ )
              end if
           end if
           ! undo scaling
           if( iascl==1_${ik}$ ) then
              call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, n, nrhs, b, ldb, info )
              call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, smlnum, anrm, minmn, 1_${ik}$, s, minmn,info )
           else if( iascl==2_${ik}$ ) then
              call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, n, nrhs, b, ldb, info )
              call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, bignum, anrm, minmn, 1_${ik}$, s, minmn,info )
           end if
           if( ibscl==1_${ik}$ ) then
              call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, smlnum, bnrm, n, nrhs, b, ldb, info )
           else if( ibscl==2_${ik}$ ) then
              call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, bignum, bnrm, n, nrhs, b, ldb, info )
           end if
           70 continue
           work( 1_${ik}$ ) = maxwrk
           return
     end subroutine stdlib${ii}$_cgelss

     module subroutine stdlib${ii}$_zgelss( m, n, nrhs, a, lda, b, ldb, s, rcond, rank,work, lwork, rwork, &
     !! ZGELSS computes the minimum norm solution to a complex linear
     !! least squares problem:
     !! Minimize 2-norm(| b - A*x |).
     !! using the singular value decomposition (SVD) of A. A is an M-by-N
     !! matrix which may be rank-deficient.
     !! Several right hand side vectors b and solution vectors x can be
     !! handled in a single call; they are stored as the columns of the
     !! M-by-NRHS right hand side matrix B and the N-by-NRHS solution matrix
     !! X.
     !! The effective rank of A is determined by treating as zero those
     !! singular values which are less than RCOND times the largest singular
     !! value.
               info )
        ! -- lapack driver routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info, rank
           integer(${ik}$), intent(in) :: lda, ldb, lwork, m, n, nrhs
           real(dp), intent(in) :: rcond
           ! Array Arguments 
           real(dp), intent(out) :: rwork(*), s(*)
           complex(dp), intent(inout) :: a(lda,*), b(ldb,*)
           complex(dp), intent(out) :: work(*)
        ! =====================================================================
           
           
           ! Local Scalars 
           logical(lk) :: lquery
           integer(${ik}$) :: bl, chunk, i, iascl, ibscl, ie, il, irwork, itau, itaup, itauq, iwork, &
                     ldwork, maxmn, maxwrk, minmn, minwrk, mm, mnthr
           integer(${ik}$) :: lwork_zgeqrf, lwork_zunmqr, lwork_zgebrd, lwork_zunmbr, lwork_zungbr, &
                     lwork_zunmlq, lwork_zgelqf
           real(dp) :: anrm, bignum, bnrm, eps, sfmin, smlnum, thr
           ! Local Arrays 
           complex(dp) :: dum(1_${ik}$)
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           minmn = min( m, n )
           maxmn = max( m, n )
           lquery = ( lwork==-1_${ik}$ )
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( nrhs<0_${ik}$ ) then
              info = -3_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -5_${ik}$
           else if( ldb<max( 1_${ik}$, maxmn ) ) then
              info = -7_${ik}$
           end if
           ! compute workspace
            ! (note: comments in the code beginning "workspace:" describe the
             ! minimal amount of workspace needed at that point in the code,
             ! as well as the preferred amount for good performance.
             ! cworkspace refers to complex workspace, and rworkspace refers
             ! to real workspace. nb refers to the optimal block size for the
             ! immediately following subroutine, as returned by stdlib${ii}$_ilaenv.)
           if( info==0_${ik}$ ) then
              minwrk = 1_${ik}$
              maxwrk = 1_${ik}$
              if( minmn>0_${ik}$ ) then
                 mm = m
                 mnthr = stdlib${ii}$_ilaenv( 6_${ik}$, 'ZGELSS', ' ', m, n, nrhs, -1_${ik}$ )
                 if( m>=n .and. m>=mnthr ) then
                    ! path 1a - overdetermined, with many more rows than
                              ! columns
                    ! compute space needed for stdlib${ii}$_zgeqrf
                    call stdlib${ii}$_zgeqrf( m, n, a, lda, dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, info )
                    lwork_zgeqrf = real( dum(1_${ik}$),KIND=dp)
                    ! compute space needed for stdlib${ii}$_zunmqr
                    call stdlib${ii}$_zunmqr( 'L', 'C', m, nrhs, n, a, lda, dum(1_${ik}$), b,ldb, dum(1_${ik}$), -1_${ik}$, &
                              info )
                    lwork_zunmqr = real( dum(1_${ik}$),KIND=dp)
                    mm = n
                    maxwrk = max( maxwrk, n + n*stdlib${ii}$_ilaenv( 1_${ik}$, 'ZGEQRF', ' ', m,n, -1_${ik}$, -1_${ik}$ ) )
                              
                    maxwrk = max( maxwrk, n + nrhs*stdlib${ii}$_ilaenv( 1_${ik}$, 'ZUNMQR', 'LC',m, nrhs, n, -&
                              1_${ik}$ ) )
                 end if
                 if( m>=n ) then
                    ! path 1 - overdetermined or exactly determined
                    ! compute space needed for stdlib${ii}$_zgebrd
                    call stdlib${ii}$_zgebrd( mm, n, a, lda, s, s, dum(1_${ik}$), dum(1_${ik}$), dum(1_${ik}$),-1_${ik}$, info )
                              
                    lwork_zgebrd = real( dum(1_${ik}$),KIND=dp)
                    ! compute space needed for stdlib${ii}$_zunmbr
                    call stdlib${ii}$_zunmbr( 'Q', 'L', 'C', mm, nrhs, n, a, lda, dum(1_${ik}$),b, ldb, dum(1_${ik}$),&
                               -1_${ik}$, info )
                    lwork_zunmbr = real( dum(1_${ik}$),KIND=dp)
                    ! compute space needed for stdlib${ii}$_zungbr
                    call stdlib${ii}$_zungbr( 'P', n, n, n, a, lda, dum(1_${ik}$),dum(1_${ik}$), -1_${ik}$, info )
                    lwork_zungbr = real( dum(1_${ik}$),KIND=dp)
                    ! compute total workspace needed
                    maxwrk = max( maxwrk, 2_${ik}$*n + lwork_zgebrd )
                    maxwrk = max( maxwrk, 2_${ik}$*n + lwork_zunmbr )
                    maxwrk = max( maxwrk, 2_${ik}$*n + lwork_zungbr )
                    maxwrk = max( maxwrk, n*nrhs )
                    minwrk = 2_${ik}$*n + max( nrhs, m )
                 end if
                 if( n>m ) then
                    minwrk = 2_${ik}$*m + max( nrhs, n )
                    if( n>=mnthr ) then
                       ! path 2a - underdetermined, with many more columns
                       ! than rows
                       ! compute space needed for stdlib${ii}$_zgelqf
                       call stdlib${ii}$_zgelqf( m, n, a, lda, dum(1_${ik}$), dum(1_${ik}$),-1_${ik}$, info )
                       lwork_zgelqf = real( dum(1_${ik}$),KIND=dp)
                       ! compute space needed for stdlib${ii}$_zgebrd
                       call stdlib${ii}$_zgebrd( m, m, a, lda, s, s, dum(1_${ik}$), dum(1_${ik}$),dum(1_${ik}$), -1_${ik}$, info )
                                 
                       lwork_zgebrd = real( dum(1_${ik}$),KIND=dp)
                       ! compute space needed for stdlib${ii}$_zunmbr
                       call stdlib${ii}$_zunmbr( 'Q', 'L', 'C', m, nrhs, n, a, lda,dum(1_${ik}$), b, ldb, dum(&
                                 1_${ik}$), -1_${ik}$, info )
                       lwork_zunmbr = real( dum(1_${ik}$),KIND=dp)
                       ! compute space needed for stdlib${ii}$_zungbr
                       call stdlib${ii}$_zungbr( 'P', m, m, m, a, lda, dum(1_${ik}$),dum(1_${ik}$), -1_${ik}$, info )
                       lwork_zungbr = real( dum(1_${ik}$),KIND=dp)
                       ! compute space needed for stdlib${ii}$_zunmlq
                       call stdlib${ii}$_zunmlq( 'L', 'C', n, nrhs, m, a, lda, dum(1_${ik}$),b, ldb, dum(1_${ik}$), -&
                                 1_${ik}$, info )
                       lwork_zunmlq = real( dum(1_${ik}$),KIND=dp)
                       ! compute total workspace needed
                       maxwrk = m + lwork_zgelqf
                       maxwrk = max( maxwrk, 3_${ik}$*m + m*m + lwork_zgebrd )
                       maxwrk = max( maxwrk, 3_${ik}$*m + m*m + lwork_zunmbr )
                       maxwrk = max( maxwrk, 3_${ik}$*m + m*m + lwork_zungbr )
                       if( nrhs>1_${ik}$ ) then
                          maxwrk = max( maxwrk, m*m + m + m*nrhs )
                       else
                          maxwrk = max( maxwrk, m*m + 2_${ik}$*m )
                       end if
                       maxwrk = max( maxwrk, m + lwork_zunmlq )
                    else
                       ! path 2 - underdetermined
                       ! compute space needed for stdlib${ii}$_zgebrd
                       call stdlib${ii}$_zgebrd( m, n, a, lda, s, s, dum(1_${ik}$), dum(1_${ik}$),dum(1_${ik}$), -1_${ik}$, info )
                                 
                       lwork_zgebrd = real( dum(1_${ik}$),KIND=dp)
                       ! compute space needed for stdlib${ii}$_zunmbr
                       call stdlib${ii}$_zunmbr( 'Q', 'L', 'C', m, nrhs, m, a, lda,dum(1_${ik}$), b, ldb, dum(&
                                 1_${ik}$), -1_${ik}$, info )
                       lwork_zunmbr = real( dum(1_${ik}$),KIND=dp)
                       ! compute space needed for stdlib${ii}$_zungbr
                       call stdlib${ii}$_zungbr( 'P', m, n, m, a, lda, dum(1_${ik}$),dum(1_${ik}$), -1_${ik}$, info )
                       lwork_zungbr = real( dum(1_${ik}$),KIND=dp)
                       maxwrk = 2_${ik}$*m + lwork_zgebrd
                       maxwrk = max( maxwrk, 2_${ik}$*m + lwork_zunmbr )
                       maxwrk = max( maxwrk, 2_${ik}$*m + lwork_zungbr )
                       maxwrk = max( maxwrk, n*nrhs )
                    end if
                 end if
                 maxwrk = max( minwrk, maxwrk )
              end if
              work( 1_${ik}$ ) = maxwrk
              if( lwork<minwrk .and. .not.lquery )info = -12_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'ZGELSS', -info )
              return
           else if( lquery ) then
              return
           end if
           ! quick return if possible
           if( m==0_${ik}$ .or. n==0_${ik}$ ) then
              rank = 0_${ik}$
              return
           end if
           ! get machine parameters
           eps = stdlib${ii}$_dlamch( 'P' )
           sfmin = stdlib${ii}$_dlamch( 'S' )
           smlnum = sfmin / eps
           bignum = one / smlnum
           call stdlib${ii}$_dlabad( smlnum, bignum )
           ! scale a if max element outside range [smlnum,bignum]
           anrm = stdlib${ii}$_zlange( 'M', m, n, a, lda, rwork )
           iascl = 0_${ik}$
           if( anrm>zero .and. anrm<smlnum ) then
              ! scale matrix norm up to smlnum
              call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, m, n, a, lda, info )
              iascl = 1_${ik}$
           else if( anrm>bignum ) then
              ! scale matrix norm down to bignum
              call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, m, n, a, lda, info )
              iascl = 2_${ik}$
           else if( anrm==zero ) then
              ! matrix all zero. return zero solution.
              call stdlib${ii}$_zlaset( 'F', max( m, n ), nrhs, czero, czero, b, ldb )
              call stdlib${ii}$_dlaset( 'F', minmn, 1_${ik}$, zero, zero, s, minmn )
              rank = 0_${ik}$
              go to 70
           end if
           ! scale b if max element outside range [smlnum,bignum]
           bnrm = stdlib${ii}$_zlange( 'M', m, nrhs, b, ldb, rwork )
           ibscl = 0_${ik}$
           if( bnrm>zero .and. bnrm<smlnum ) then
              ! scale matrix norm up to smlnum
              call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, bnrm, smlnum, m, nrhs, b, ldb, info )
              ibscl = 1_${ik}$
           else if( bnrm>bignum ) then
              ! scale matrix norm down to bignum
              call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, bnrm, bignum, m, nrhs, b, ldb, info )
              ibscl = 2_${ik}$
           end if
           ! overdetermined case
           if( m>=n ) then
              ! path 1 - overdetermined or exactly determined
              mm = m
              if( m>=mnthr ) then
                 ! path 1a - overdetermined, with many more rows than columns
                 mm = n
                 itau = 1_${ik}$
                 iwork = itau + n
                 ! compute a=q*r
                 ! (cworkspace: need 2*n, prefer n+n*nb)
                 ! (rworkspace: none)
                 call stdlib${ii}$_zgeqrf( m, n, a, lda, work( itau ), work( iwork ),lwork-iwork+1, &
                           info )
                 ! multiply b by transpose(q)
                 ! (cworkspace: need n+nrhs, prefer n+nrhs*nb)
                 ! (rworkspace: none)
                 call stdlib${ii}$_zunmqr( 'L', 'C', m, nrhs, n, a, lda, work( itau ), b,ldb, work( &
                           iwork ), lwork-iwork+1, info )
                 ! zero out below r
                 if( n>1_${ik}$ )call stdlib${ii}$_zlaset( 'L', n-1, n-1, czero, czero, a( 2_${ik}$, 1_${ik}$ ),lda )
              end if
              ie = 1_${ik}$
              itauq = 1_${ik}$
              itaup = itauq + n
              iwork = itaup + n
              ! bidiagonalize r in a
              ! (cworkspace: need 2*n+mm, prefer 2*n+(mm+n)*nb)
              ! (rworkspace: need n)
              call stdlib${ii}$_zgebrd( mm, n, a, lda, s, rwork( ie ), work( itauq ),work( itaup ), &
                        work( iwork ), lwork-iwork+1,info )
              ! multiply b by transpose of left bidiagonalizing vectors of r
              ! (cworkspace: need 2*n+nrhs, prefer 2*n+nrhs*nb)
              ! (rworkspace: none)
              call stdlib${ii}$_zunmbr( 'Q', 'L', 'C', mm, nrhs, n, a, lda, work( itauq ),b, ldb, work( &
                        iwork ), lwork-iwork+1, info )
              ! generate right bidiagonalizing vectors of r in a
              ! (cworkspace: need 3*n-1, prefer 2*n+(n-1)*nb)
              ! (rworkspace: none)
              call stdlib${ii}$_zungbr( 'P', n, n, n, a, lda, work( itaup ),work( iwork ), lwork-iwork+&
                        1_${ik}$, info )
              irwork = ie + n
              ! perform bidiagonal qr iteration
                ! multiply b by transpose of left singular vectors
                ! compute right singular vectors in a
              ! (cworkspace: none)
              ! (rworkspace: need bdspac)
              call stdlib${ii}$_zbdsqr( 'U', n, n, 0_${ik}$, nrhs, s, rwork( ie ), a, lda, dum,1_${ik}$, b, ldb, &
                        rwork( irwork ), info )
              if( info/=0 )go to 70
              ! multiply b by reciprocals of singular values
              thr = max( rcond*s( 1_${ik}$ ), sfmin )
              if( rcond<zero )thr = max( eps*s( 1_${ik}$ ), sfmin )
              rank = 0_${ik}$
              do i = 1, n
                 if( s( i )>thr ) then
                    call stdlib${ii}$_zdrscl( nrhs, s( i ), b( i, 1_${ik}$ ), ldb )
                    rank = rank + 1_${ik}$
                 else
                    call stdlib${ii}$_zlaset( 'F', 1_${ik}$, nrhs, czero, czero, b( i, 1_${ik}$ ), ldb )
                 end if
              end do
              ! multiply b by right singular vectors
              ! (cworkspace: need n, prefer n*nrhs)
              ! (rworkspace: none)
              if( lwork>=ldb*nrhs .and. nrhs>1_${ik}$ ) then
                 call stdlib${ii}$_zgemm( 'C', 'N', n, nrhs, n, cone, a, lda, b, ldb,czero, work, ldb )
                           
                 call stdlib${ii}$_zlacpy( 'G', n, nrhs, work, ldb, b, ldb )
              else if( nrhs>1_${ik}$ ) then
                 chunk = lwork / n
                 do i = 1, nrhs, chunk
                    bl = min( nrhs-i+1, chunk )
                    call stdlib${ii}$_zgemm( 'C', 'N', n, bl, n, cone, a, lda, b( 1_${ik}$, i ),ldb, czero, &
                              work, n )
                    call stdlib${ii}$_zlacpy( 'G', n, bl, work, n, b( 1_${ik}$, i ), ldb )
                 end do
              else
                 call stdlib${ii}$_zgemv( 'C', n, n, cone, a, lda, b, 1_${ik}$, czero, work, 1_${ik}$ )
                 call stdlib${ii}$_zcopy( n, work, 1_${ik}$, b, 1_${ik}$ )
              end if
           else if( n>=mnthr .and. lwork>=3_${ik}$*m+m*m+max( m, nrhs, n-2*m ) )then
              ! underdetermined case, m much less than n
              ! path 2a - underdetermined, with many more columns than rows
              ! and sufficient workspace for an efficient algorithm
              ldwork = m
              if( lwork>=3_${ik}$*m+m*lda+max( m, nrhs, n-2*m ) )ldwork = lda
              itau = 1_${ik}$
              iwork = m + 1_${ik}$
              ! compute a=l*q
              ! (cworkspace: need 2*m, prefer m+m*nb)
              ! (rworkspace: none)
              call stdlib${ii}$_zgelqf( m, n, a, lda, work( itau ), work( iwork ),lwork-iwork+1, info )
                        
              il = iwork
              ! copy l to work(il), zeroing out above it
              call stdlib${ii}$_zlacpy( 'L', m, m, a, lda, work( il ), ldwork )
              call stdlib${ii}$_zlaset( 'U', m-1, m-1, czero, czero, work( il+ldwork ),ldwork )
              ie = 1_${ik}$
              itauq = il + ldwork*m
              itaup = itauq + m
              iwork = itaup + m
              ! bidiagonalize l in work(il)
              ! (cworkspace: need m*m+4*m, prefer m*m+3*m+2*m*nb)
              ! (rworkspace: need m)
              call stdlib${ii}$_zgebrd( m, m, work( il ), ldwork, s, rwork( ie ),work( itauq ), work( &
                        itaup ), work( iwork ),lwork-iwork+1, info )
              ! multiply b by transpose of left bidiagonalizing vectors of l
              ! (cworkspace: need m*m+3*m+nrhs, prefer m*m+3*m+nrhs*nb)
              ! (rworkspace: none)
              call stdlib${ii}$_zunmbr( 'Q', 'L', 'C', m, nrhs, m, work( il ), ldwork,work( itauq ), b, &
                        ldb, work( iwork ),lwork-iwork+1, info )
              ! generate right bidiagonalizing vectors of r in work(il)
              ! (cworkspace: need m*m+4*m-1, prefer m*m+3*m+(m-1)*nb)
              ! (rworkspace: none)
              call stdlib${ii}$_zungbr( 'P', m, m, m, work( il ), ldwork, work( itaup ),work( iwork ), &
                        lwork-iwork+1, info )
              irwork = ie + m
              ! perform bidiagonal qr iteration, computing right singular
              ! vectors of l in work(il) and multiplying b by transpose of
              ! left singular vectors
              ! (cworkspace: need m*m)
              ! (rworkspace: need bdspac)
              call stdlib${ii}$_zbdsqr( 'U', m, m, 0_${ik}$, nrhs, s, rwork( ie ), work( il ),ldwork, a, lda, &
                        b, ldb, rwork( irwork ), info )
              if( info/=0 )go to 70
              ! multiply b by reciprocals of singular values
              thr = max( rcond*s( 1_${ik}$ ), sfmin )
              if( rcond<zero )thr = max( eps*s( 1_${ik}$ ), sfmin )
              rank = 0_${ik}$
              do i = 1, m
                 if( s( i )>thr ) then
                    call stdlib${ii}$_zdrscl( nrhs, s( i ), b( i, 1_${ik}$ ), ldb )
                    rank = rank + 1_${ik}$
                 else
                    call stdlib${ii}$_zlaset( 'F', 1_${ik}$, nrhs, czero, czero, b( i, 1_${ik}$ ), ldb )
                 end if
              end do
              iwork = il + m*ldwork
              ! multiply b by right singular vectors of l in work(il)
              ! (cworkspace: need m*m+2*m, prefer m*m+m+m*nrhs)
              ! (rworkspace: none)
              if( lwork>=ldb*nrhs+iwork-1 .and. nrhs>1_${ik}$ ) then
                 call stdlib${ii}$_zgemm( 'C', 'N', m, nrhs, m, cone, work( il ), ldwork,b, ldb, czero, &
                           work( iwork ), ldb )
                 call stdlib${ii}$_zlacpy( 'G', m, nrhs, work( iwork ), ldb, b, ldb )
              else if( nrhs>1_${ik}$ ) then
                 chunk = ( lwork-iwork+1 ) / m
                 do i = 1, nrhs, chunk
                    bl = min( nrhs-i+1, chunk )
                    call stdlib${ii}$_zgemm( 'C', 'N', m, bl, m, cone, work( il ), ldwork,b( 1_${ik}$, i ), &
                              ldb, czero, work( iwork ), m )
                    call stdlib${ii}$_zlacpy( 'G', m, bl, work( iwork ), m, b( 1_${ik}$, i ),ldb )
                 end do
              else
                 call stdlib${ii}$_zgemv( 'C', m, m, cone, work( il ), ldwork, b( 1_${ik}$, 1_${ik}$ ),1_${ik}$, czero, work(&
                            iwork ), 1_${ik}$ )
                 call stdlib${ii}$_zcopy( m, work( iwork ), 1_${ik}$, b( 1_${ik}$, 1_${ik}$ ), 1_${ik}$ )
              end if
              ! zero out below first m rows of b
              call stdlib${ii}$_zlaset( 'F', n-m, nrhs, czero, czero, b( m+1, 1_${ik}$ ), ldb )
              iwork = itau + m
              ! multiply transpose(q) by b
              ! (cworkspace: need m+nrhs, prefer m+nhrs*nb)
              ! (rworkspace: none)
              call stdlib${ii}$_zunmlq( 'L', 'C', n, nrhs, m, a, lda, work( itau ), b,ldb, work( iwork )&
                        , lwork-iwork+1, info )
           else
              ! path 2 - remaining underdetermined cases
              ie = 1_${ik}$
              itauq = 1_${ik}$
              itaup = itauq + m
              iwork = itaup + m
              ! bidiagonalize a
              ! (cworkspace: need 3*m, prefer 2*m+(m+n)*nb)
              ! (rworkspace: need n)
              call stdlib${ii}$_zgebrd( m, n, a, lda, s, rwork( ie ), work( itauq ),work( itaup ), work(&
                         iwork ), lwork-iwork+1,info )
              ! multiply b by transpose of left bidiagonalizing vectors
              ! (cworkspace: need 2*m+nrhs, prefer 2*m+nrhs*nb)
              ! (rworkspace: none)
              call stdlib${ii}$_zunmbr( 'Q', 'L', 'C', m, nrhs, n, a, lda, work( itauq ),b, ldb, work( &
                        iwork ), lwork-iwork+1, info )
              ! generate right bidiagonalizing vectors in a
              ! (cworkspace: need 3*m, prefer 2*m+m*nb)
              ! (rworkspace: none)
              call stdlib${ii}$_zungbr( 'P', m, n, m, a, lda, work( itaup ),work( iwork ), lwork-iwork+&
                        1_${ik}$, info )
              irwork = ie + m
              ! perform bidiagonal qr iteration,
                 ! computing right singular vectors of a in a and
                 ! multiplying b by transpose of left singular vectors
              ! (cworkspace: none)
              ! (rworkspace: need bdspac)
              call stdlib${ii}$_zbdsqr( 'L', m, n, 0_${ik}$, nrhs, s, rwork( ie ), a, lda, dum,1_${ik}$, b, ldb, &
                        rwork( irwork ), info )
              if( info/=0 )go to 70
              ! multiply b by reciprocals of singular values
              thr = max( rcond*s( 1_${ik}$ ), sfmin )
              if( rcond<zero )thr = max( eps*s( 1_${ik}$ ), sfmin )
              rank = 0_${ik}$
              do i = 1, m
                 if( s( i )>thr ) then
                    call stdlib${ii}$_zdrscl( nrhs, s( i ), b( i, 1_${ik}$ ), ldb )
                    rank = rank + 1_${ik}$
                 else
                    call stdlib${ii}$_zlaset( 'F', 1_${ik}$, nrhs, czero, czero, b( i, 1_${ik}$ ), ldb )
                 end if
              end do
              ! multiply b by right singular vectors of a
              ! (cworkspace: need n, prefer n*nrhs)
              ! (rworkspace: none)
              if( lwork>=ldb*nrhs .and. nrhs>1_${ik}$ ) then
                 call stdlib${ii}$_zgemm( 'C', 'N', n, nrhs, m, cone, a, lda, b, ldb,czero, work, ldb )
                           
                 call stdlib${ii}$_zlacpy( 'G', n, nrhs, work, ldb, b, ldb )
              else if( nrhs>1_${ik}$ ) then
                 chunk = lwork / n
                 do i = 1, nrhs, chunk
                    bl = min( nrhs-i+1, chunk )
                    call stdlib${ii}$_zgemm( 'C', 'N', n, bl, m, cone, a, lda, b( 1_${ik}$, i ),ldb, czero, &
                              work, n )
                    call stdlib${ii}$_zlacpy( 'F', n, bl, work, n, b( 1_${ik}$, i ), ldb )
                 end do
              else
                 call stdlib${ii}$_zgemv( 'C', m, n, cone, a, lda, b, 1_${ik}$, czero, work, 1_${ik}$ )
                 call stdlib${ii}$_zcopy( n, work, 1_${ik}$, b, 1_${ik}$ )
              end if
           end if
           ! undo scaling
           if( iascl==1_${ik}$ ) then
              call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, n, nrhs, b, ldb, info )
              call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, smlnum, anrm, minmn, 1_${ik}$, s, minmn,info )
           else if( iascl==2_${ik}$ ) then
              call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, n, nrhs, b, ldb, info )
              call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, bignum, anrm, minmn, 1_${ik}$, s, minmn,info )
           end if
           if( ibscl==1_${ik}$ ) then
              call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, smlnum, bnrm, n, nrhs, b, ldb, info )
           else if( ibscl==2_${ik}$ ) then
              call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, bignum, bnrm, n, nrhs, b, ldb, info )
           end if
           70 continue
           work( 1_${ik}$ ) = maxwrk
           return
     end subroutine stdlib${ii}$_zgelss

#:for ck,ct,ci in CMPLX_KINDS_TYPES
#:if not ck in ["sp","dp"]
     module subroutine stdlib${ii}$_${ci}$gelss( m, n, nrhs, a, lda, b, ldb, s, rcond, rank,work, lwork, rwork, &
     !! ZGELSS: computes the minimum norm solution to a complex linear
     !! least squares problem:
     !! Minimize 2-norm(| b - A*x |).
     !! using the singular value decomposition (SVD) of A. A is an M-by-N
     !! matrix which may be rank-deficient.
     !! Several right hand side vectors b and solution vectors x can be
     !! handled in a single call; they are stored as the columns of the
     !! M-by-NRHS right hand side matrix B and the N-by-NRHS solution matrix
     !! X.
     !! The effective rank of A is determined by treating as zero those
     !! singular values which are less than RCOND times the largest singular
     !! value.
               info )
        ! -- lapack driver routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${ck}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info, rank
           integer(${ik}$), intent(in) :: lda, ldb, lwork, m, n, nrhs
           real(${ck}$), intent(in) :: rcond
           ! Array Arguments 
           real(${ck}$), intent(out) :: rwork(*), s(*)
           complex(${ck}$), intent(inout) :: a(lda,*), b(ldb,*)
           complex(${ck}$), intent(out) :: work(*)
        ! =====================================================================
           
           
           ! Local Scalars 
           logical(lk) :: lquery
           integer(${ik}$) :: bl, chunk, i, iascl, ibscl, ie, il, irwork, itau, itaup, itauq, iwork, &
                     ldwork, maxmn, maxwrk, minmn, minwrk, mm, mnthr
           integer(${ik}$) :: lwork_wgeqrf, lwork_wunmqr, lwork_wgebrd, lwork_wunmbr, lwork_wungbr, &
                     lwork_wunmlq, lwork_wgelqf
           real(${ck}$) :: anrm, bignum, bnrm, eps, sfmin, smlnum, thr
           ! Local Arrays 
           complex(${ck}$) :: dum(1_${ik}$)
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           minmn = min( m, n )
           maxmn = max( m, n )
           lquery = ( lwork==-1_${ik}$ )
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( nrhs<0_${ik}$ ) then
              info = -3_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -5_${ik}$
           else if( ldb<max( 1_${ik}$, maxmn ) ) then
              info = -7_${ik}$
           end if
           ! compute workspace
            ! (note: comments in the code beginning "workspace:" describe the
             ! minimal amount of workspace needed at that point in the code,
             ! as well as the preferred amount for good performance.
             ! cworkspace refers to complex workspace, and rworkspace refers
             ! to real workspace. nb refers to the optimal block size for the
             ! immediately following subroutine, as returned by stdlib${ii}$_ilaenv.)
           if( info==0_${ik}$ ) then
              minwrk = 1_${ik}$
              maxwrk = 1_${ik}$
              if( minmn>0_${ik}$ ) then
                 mm = m
                 mnthr = stdlib${ii}$_ilaenv( 6_${ik}$, 'ZGELSS', ' ', m, n, nrhs, -1_${ik}$ )
                 if( m>=n .and. m>=mnthr ) then
                    ! path 1a - overdetermined, with many more rows than
                              ! columns
                    ! compute space needed for stdlib${ii}$_${ci}$geqrf
                    call stdlib${ii}$_${ci}$geqrf( m, n, a, lda, dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, info )
                    lwork_wgeqrf = real( dum(1_${ik}$),KIND=${ck}$)
                    ! compute space needed for stdlib${ii}$_${ci}$unmqr
                    call stdlib${ii}$_${ci}$unmqr( 'L', 'C', m, nrhs, n, a, lda, dum(1_${ik}$), b,ldb, dum(1_${ik}$), -1_${ik}$, &
                              info )
                    lwork_wunmqr = real( dum(1_${ik}$),KIND=${ck}$)
                    mm = n
                    maxwrk = max( maxwrk, n + n*stdlib${ii}$_ilaenv( 1_${ik}$, 'ZGEQRF', ' ', m,n, -1_${ik}$, -1_${ik}$ ) )
                              
                    maxwrk = max( maxwrk, n + nrhs*stdlib${ii}$_ilaenv( 1_${ik}$, 'ZUNMQR', 'LC',m, nrhs, n, -&
                              1_${ik}$ ) )
                 end if
                 if( m>=n ) then
                    ! path 1 - overdetermined or exactly determined
                    ! compute space needed for stdlib${ii}$_${ci}$gebrd
                    call stdlib${ii}$_${ci}$gebrd( mm, n, a, lda, s, s, dum(1_${ik}$), dum(1_${ik}$), dum(1_${ik}$),-1_${ik}$, info )
                              
                    lwork_wgebrd = real( dum(1_${ik}$),KIND=${ck}$)
                    ! compute space needed for stdlib${ii}$_${ci}$unmbr
                    call stdlib${ii}$_${ci}$unmbr( 'Q', 'L', 'C', mm, nrhs, n, a, lda, dum(1_${ik}$),b, ldb, dum(1_${ik}$),&
                               -1_${ik}$, info )
                    lwork_wunmbr = real( dum(1_${ik}$),KIND=${ck}$)
                    ! compute space needed for stdlib${ii}$_${ci}$ungbr
                    call stdlib${ii}$_${ci}$ungbr( 'P', n, n, n, a, lda, dum(1_${ik}$),dum(1_${ik}$), -1_${ik}$, info )
                    lwork_wungbr = real( dum(1_${ik}$),KIND=${ck}$)
                    ! compute total workspace needed
                    maxwrk = max( maxwrk, 2_${ik}$*n + lwork_wgebrd )
                    maxwrk = max( maxwrk, 2_${ik}$*n + lwork_wunmbr )
                    maxwrk = max( maxwrk, 2_${ik}$*n + lwork_wungbr )
                    maxwrk = max( maxwrk, n*nrhs )
                    minwrk = 2_${ik}$*n + max( nrhs, m )
                 end if
                 if( n>m ) then
                    minwrk = 2_${ik}$*m + max( nrhs, n )
                    if( n>=mnthr ) then
                       ! path 2a - underdetermined, with many more columns
                       ! than rows
                       ! compute space needed for stdlib${ii}$_${ci}$gelqf
                       call stdlib${ii}$_${ci}$gelqf( m, n, a, lda, dum(1_${ik}$), dum(1_${ik}$),-1_${ik}$, info )
                       lwork_wgelqf = real( dum(1_${ik}$),KIND=${ck}$)
                       ! compute space needed for stdlib${ii}$_${ci}$gebrd
                       call stdlib${ii}$_${ci}$gebrd( m, m, a, lda, s, s, dum(1_${ik}$), dum(1_${ik}$),dum(1_${ik}$), -1_${ik}$, info )
                                 
                       lwork_wgebrd = real( dum(1_${ik}$),KIND=${ck}$)
                       ! compute space needed for stdlib${ii}$_${ci}$unmbr
                       call stdlib${ii}$_${ci}$unmbr( 'Q', 'L', 'C', m, nrhs, n, a, lda,dum(1_${ik}$), b, ldb, dum(&
                                 1_${ik}$), -1_${ik}$, info )
                       lwork_wunmbr = real( dum(1_${ik}$),KIND=${ck}$)
                       ! compute space needed for stdlib${ii}$_${ci}$ungbr
                       call stdlib${ii}$_${ci}$ungbr( 'P', m, m, m, a, lda, dum(1_${ik}$),dum(1_${ik}$), -1_${ik}$, info )
                       lwork_wungbr = real( dum(1_${ik}$),KIND=${ck}$)
                       ! compute space needed for stdlib${ii}$_${ci}$unmlq
                       call stdlib${ii}$_${ci}$unmlq( 'L', 'C', n, nrhs, m, a, lda, dum(1_${ik}$),b, ldb, dum(1_${ik}$), -&
                                 1_${ik}$, info )
                       lwork_wunmlq = real( dum(1_${ik}$),KIND=${ck}$)
                       ! compute total workspace needed
                       maxwrk = m + lwork_wgelqf
                       maxwrk = max( maxwrk, 3_${ik}$*m + m*m + lwork_wgebrd )
                       maxwrk = max( maxwrk, 3_${ik}$*m + m*m + lwork_wunmbr )
                       maxwrk = max( maxwrk, 3_${ik}$*m + m*m + lwork_wungbr )
                       if( nrhs>1_${ik}$ ) then
                          maxwrk = max( maxwrk, m*m + m + m*nrhs )
                       else
                          maxwrk = max( maxwrk, m*m + 2_${ik}$*m )
                       end if
                       maxwrk = max( maxwrk, m + lwork_wunmlq )
                    else
                       ! path 2 - underdetermined
                       ! compute space needed for stdlib${ii}$_${ci}$gebrd
                       call stdlib${ii}$_${ci}$gebrd( m, n, a, lda, s, s, dum(1_${ik}$), dum(1_${ik}$),dum(1_${ik}$), -1_${ik}$, info )
                                 
                       lwork_wgebrd = real( dum(1_${ik}$),KIND=${ck}$)
                       ! compute space needed for stdlib${ii}$_${ci}$unmbr
                       call stdlib${ii}$_${ci}$unmbr( 'Q', 'L', 'C', m, nrhs, m, a, lda,dum(1_${ik}$), b, ldb, dum(&
                                 1_${ik}$), -1_${ik}$, info )
                       lwork_wunmbr = real( dum(1_${ik}$),KIND=${ck}$)
                       ! compute space needed for stdlib${ii}$_${ci}$ungbr
                       call stdlib${ii}$_${ci}$ungbr( 'P', m, n, m, a, lda, dum(1_${ik}$),dum(1_${ik}$), -1_${ik}$, info )
                       lwork_wungbr = real( dum(1_${ik}$),KIND=${ck}$)
                       maxwrk = 2_${ik}$*m + lwork_wgebrd
                       maxwrk = max( maxwrk, 2_${ik}$*m + lwork_wunmbr )
                       maxwrk = max( maxwrk, 2_${ik}$*m + lwork_wungbr )
                       maxwrk = max( maxwrk, n*nrhs )
                    end if
                 end if
                 maxwrk = max( minwrk, maxwrk )
              end if
              work( 1_${ik}$ ) = maxwrk
              if( lwork<minwrk .and. .not.lquery )info = -12_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'ZGELSS', -info )
              return
           else if( lquery ) then
              return
           end if
           ! quick return if possible
           if( m==0_${ik}$ .or. n==0_${ik}$ ) then
              rank = 0_${ik}$
              return
           end if
           ! get machine parameters
           eps = stdlib${ii}$_${c2ri(ci)}$lamch( 'P' )
           sfmin = stdlib${ii}$_${c2ri(ci)}$lamch( 'S' )
           smlnum = sfmin / eps
           bignum = one / smlnum
           call stdlib${ii}$_${c2ri(ci)}$labad( smlnum, bignum )
           ! scale a if max element outside range [smlnum,bignum]
           anrm = stdlib${ii}$_${ci}$lange( 'M', m, n, a, lda, rwork )
           iascl = 0_${ik}$
           if( anrm>zero .and. anrm<smlnum ) then
              ! scale matrix norm up to smlnum
              call stdlib${ii}$_${ci}$lascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, m, n, a, lda, info )
              iascl = 1_${ik}$
           else if( anrm>bignum ) then
              ! scale matrix norm down to bignum
              call stdlib${ii}$_${ci}$lascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, m, n, a, lda, info )
              iascl = 2_${ik}$
           else if( anrm==zero ) then
              ! matrix all zero. return zero solution.
              call stdlib${ii}$_${ci}$laset( 'F', max( m, n ), nrhs, czero, czero, b, ldb )
              call stdlib${ii}$_${c2ri(ci)}$laset( 'F', minmn, 1_${ik}$, zero, zero, s, minmn )
              rank = 0_${ik}$
              go to 70
           end if
           ! scale b if max element outside range [smlnum,bignum]
           bnrm = stdlib${ii}$_${ci}$lange( 'M', m, nrhs, b, ldb, rwork )
           ibscl = 0_${ik}$
           if( bnrm>zero .and. bnrm<smlnum ) then
              ! scale matrix norm up to smlnum
              call stdlib${ii}$_${ci}$lascl( 'G', 0_${ik}$, 0_${ik}$, bnrm, smlnum, m, nrhs, b, ldb, info )
              ibscl = 1_${ik}$
           else if( bnrm>bignum ) then
              ! scale matrix norm down to bignum
              call stdlib${ii}$_${ci}$lascl( 'G', 0_${ik}$, 0_${ik}$, bnrm, bignum, m, nrhs, b, ldb, info )
              ibscl = 2_${ik}$
           end if
           ! overdetermined case
           if( m>=n ) then
              ! path 1 - overdetermined or exactly determined
              mm = m
              if( m>=mnthr ) then
                 ! path 1a - overdetermined, with many more rows than columns
                 mm = n
                 itau = 1_${ik}$
                 iwork = itau + n
                 ! compute a=q*r
                 ! (cworkspace: need 2*n, prefer n+n*nb)
                 ! (rworkspace: none)
                 call stdlib${ii}$_${ci}$geqrf( m, n, a, lda, work( itau ), work( iwork ),lwork-iwork+1, &
                           info )
                 ! multiply b by transpose(q)
                 ! (cworkspace: need n+nrhs, prefer n+nrhs*nb)
                 ! (rworkspace: none)
                 call stdlib${ii}$_${ci}$unmqr( 'L', 'C', m, nrhs, n, a, lda, work( itau ), b,ldb, work( &
                           iwork ), lwork-iwork+1, info )
                 ! zero out below r
                 if( n>1_${ik}$ )call stdlib${ii}$_${ci}$laset( 'L', n-1, n-1, czero, czero, a( 2_${ik}$, 1_${ik}$ ),lda )
              end if
              ie = 1_${ik}$
              itauq = 1_${ik}$
              itaup = itauq + n
              iwork = itaup + n
              ! bidiagonalize r in a
              ! (cworkspace: need 2*n+mm, prefer 2*n+(mm+n)*nb)
              ! (rworkspace: need n)
              call stdlib${ii}$_${ci}$gebrd( mm, n, a, lda, s, rwork( ie ), work( itauq ),work( itaup ), &
                        work( iwork ), lwork-iwork+1,info )
              ! multiply b by transpose of left bidiagonalizing vectors of r
              ! (cworkspace: need 2*n+nrhs, prefer 2*n+nrhs*nb)
              ! (rworkspace: none)
              call stdlib${ii}$_${ci}$unmbr( 'Q', 'L', 'C', mm, nrhs, n, a, lda, work( itauq ),b, ldb, work( &
                        iwork ), lwork-iwork+1, info )
              ! generate right bidiagonalizing vectors of r in a
              ! (cworkspace: need 3*n-1, prefer 2*n+(n-1)*nb)
              ! (rworkspace: none)
              call stdlib${ii}$_${ci}$ungbr( 'P', n, n, n, a, lda, work( itaup ),work( iwork ), lwork-iwork+&
                        1_${ik}$, info )
              irwork = ie + n
              ! perform bidiagonal qr iteration
                ! multiply b by transpose of left singular vectors
                ! compute right singular vectors in a
              ! (cworkspace: none)
              ! (rworkspace: need bdspac)
              call stdlib${ii}$_${ci}$bdsqr( 'U', n, n, 0_${ik}$, nrhs, s, rwork( ie ), a, lda, dum,1_${ik}$, b, ldb, &
                        rwork( irwork ), info )
              if( info/=0 )go to 70
              ! multiply b by reciprocals of singular values
              thr = max( rcond*s( 1_${ik}$ ), sfmin )
              if( rcond<zero )thr = max( eps*s( 1_${ik}$ ), sfmin )
              rank = 0_${ik}$
              do i = 1, n
                 if( s( i )>thr ) then
                    call stdlib${ii}$_${ci}$drscl( nrhs, s( i ), b( i, 1_${ik}$ ), ldb )
                    rank = rank + 1_${ik}$
                 else
                    call stdlib${ii}$_${ci}$laset( 'F', 1_${ik}$, nrhs, czero, czero, b( i, 1_${ik}$ ), ldb )
                 end if
              end do
              ! multiply b by right singular vectors
              ! (cworkspace: need n, prefer n*nrhs)
              ! (rworkspace: none)
              if( lwork>=ldb*nrhs .and. nrhs>1_${ik}$ ) then
                 call stdlib${ii}$_${ci}$gemm( 'C', 'N', n, nrhs, n, cone, a, lda, b, ldb,czero, work, ldb )
                           
                 call stdlib${ii}$_${ci}$lacpy( 'G', n, nrhs, work, ldb, b, ldb )
              else if( nrhs>1_${ik}$ ) then
                 chunk = lwork / n
                 do i = 1, nrhs, chunk
                    bl = min( nrhs-i+1, chunk )
                    call stdlib${ii}$_${ci}$gemm( 'C', 'N', n, bl, n, cone, a, lda, b( 1_${ik}$, i ),ldb, czero, &
                              work, n )
                    call stdlib${ii}$_${ci}$lacpy( 'G', n, bl, work, n, b( 1_${ik}$, i ), ldb )
                 end do
              else
                 call stdlib${ii}$_${ci}$gemv( 'C', n, n, cone, a, lda, b, 1_${ik}$, czero, work, 1_${ik}$ )
                 call stdlib${ii}$_${ci}$copy( n, work, 1_${ik}$, b, 1_${ik}$ )
              end if
           else if( n>=mnthr .and. lwork>=3_${ik}$*m+m*m+max( m, nrhs, n-2*m ) )then
              ! underdetermined case, m much less than n
              ! path 2a - underdetermined, with many more columns than rows
              ! and sufficient workspace for an efficient algorithm
              ldwork = m
              if( lwork>=3_${ik}$*m+m*lda+max( m, nrhs, n-2*m ) )ldwork = lda
              itau = 1_${ik}$
              iwork = m + 1_${ik}$
              ! compute a=l*q
              ! (cworkspace: need 2*m, prefer m+m*nb)
              ! (rworkspace: none)
              call stdlib${ii}$_${ci}$gelqf( m, n, a, lda, work( itau ), work( iwork ),lwork-iwork+1, info )
                        
              il = iwork
              ! copy l to work(il), zeroing out above it
              call stdlib${ii}$_${ci}$lacpy( 'L', m, m, a, lda, work( il ), ldwork )
              call stdlib${ii}$_${ci}$laset( 'U', m-1, m-1, czero, czero, work( il+ldwork ),ldwork )
              ie = 1_${ik}$
              itauq = il + ldwork*m
              itaup = itauq + m
              iwork = itaup + m
              ! bidiagonalize l in work(il)
              ! (cworkspace: need m*m+4*m, prefer m*m+3*m+2*m*nb)
              ! (rworkspace: need m)
              call stdlib${ii}$_${ci}$gebrd( m, m, work( il ), ldwork, s, rwork( ie ),work( itauq ), work( &
                        itaup ), work( iwork ),lwork-iwork+1, info )
              ! multiply b by transpose of left bidiagonalizing vectors of l
              ! (cworkspace: need m*m+3*m+nrhs, prefer m*m+3*m+nrhs*nb)
              ! (rworkspace: none)
              call stdlib${ii}$_${ci}$unmbr( 'Q', 'L', 'C', m, nrhs, m, work( il ), ldwork,work( itauq ), b, &
                        ldb, work( iwork ),lwork-iwork+1, info )
              ! generate right bidiagonalizing vectors of r in work(il)
              ! (cworkspace: need m*m+4*m-1, prefer m*m+3*m+(m-1)*nb)
              ! (rworkspace: none)
              call stdlib${ii}$_${ci}$ungbr( 'P', m, m, m, work( il ), ldwork, work( itaup ),work( iwork ), &
                        lwork-iwork+1, info )
              irwork = ie + m
              ! perform bidiagonal qr iteration, computing right singular
              ! vectors of l in work(il) and multiplying b by transpose of
              ! left singular vectors
              ! (cworkspace: need m*m)
              ! (rworkspace: need bdspac)
              call stdlib${ii}$_${ci}$bdsqr( 'U', m, m, 0_${ik}$, nrhs, s, rwork( ie ), work( il ),ldwork, a, lda, &
                        b, ldb, rwork( irwork ), info )
              if( info/=0 )go to 70
              ! multiply b by reciprocals of singular values
              thr = max( rcond*s( 1_${ik}$ ), sfmin )
              if( rcond<zero )thr = max( eps*s( 1_${ik}$ ), sfmin )
              rank = 0_${ik}$
              do i = 1, m
                 if( s( i )>thr ) then
                    call stdlib${ii}$_${ci}$drscl( nrhs, s( i ), b( i, 1_${ik}$ ), ldb )
                    rank = rank + 1_${ik}$
                 else
                    call stdlib${ii}$_${ci}$laset( 'F', 1_${ik}$, nrhs, czero, czero, b( i, 1_${ik}$ ), ldb )
                 end if
              end do
              iwork = il + m*ldwork
              ! multiply b by right singular vectors of l in work(il)
              ! (cworkspace: need m*m+2*m, prefer m*m+m+m*nrhs)
              ! (rworkspace: none)
              if( lwork>=ldb*nrhs+iwork-1 .and. nrhs>1_${ik}$ ) then
                 call stdlib${ii}$_${ci}$gemm( 'C', 'N', m, nrhs, m, cone, work( il ), ldwork,b, ldb, czero, &
                           work( iwork ), ldb )
                 call stdlib${ii}$_${ci}$lacpy( 'G', m, nrhs, work( iwork ), ldb, b, ldb )
              else if( nrhs>1_${ik}$ ) then
                 chunk = ( lwork-iwork+1 ) / m
                 do i = 1, nrhs, chunk
                    bl = min( nrhs-i+1, chunk )
                    call stdlib${ii}$_${ci}$gemm( 'C', 'N', m, bl, m, cone, work( il ), ldwork,b( 1_${ik}$, i ), &
                              ldb, czero, work( iwork ), m )
                    call stdlib${ii}$_${ci}$lacpy( 'G', m, bl, work( iwork ), m, b( 1_${ik}$, i ),ldb )
                 end do
              else
                 call stdlib${ii}$_${ci}$gemv( 'C', m, m, cone, work( il ), ldwork, b( 1_${ik}$, 1_${ik}$ ),1_${ik}$, czero, work(&
                            iwork ), 1_${ik}$ )
                 call stdlib${ii}$_${ci}$copy( m, work( iwork ), 1_${ik}$, b( 1_${ik}$, 1_${ik}$ ), 1_${ik}$ )
              end if
              ! zero out below first m rows of b
              call stdlib${ii}$_${ci}$laset( 'F', n-m, nrhs, czero, czero, b( m+1, 1_${ik}$ ), ldb )
              iwork = itau + m
              ! multiply transpose(q) by b
              ! (cworkspace: need m+nrhs, prefer m+nhrs*nb)
              ! (rworkspace: none)
              call stdlib${ii}$_${ci}$unmlq( 'L', 'C', n, nrhs, m, a, lda, work( itau ), b,ldb, work( iwork )&
                        , lwork-iwork+1, info )
           else
              ! path 2 - remaining underdetermined cases
              ie = 1_${ik}$
              itauq = 1_${ik}$
              itaup = itauq + m
              iwork = itaup + m
              ! bidiagonalize a
              ! (cworkspace: need 3*m, prefer 2*m+(m+n)*nb)
              ! (rworkspace: need n)
              call stdlib${ii}$_${ci}$gebrd( m, n, a, lda, s, rwork( ie ), work( itauq ),work( itaup ), work(&
                         iwork ), lwork-iwork+1,info )
              ! multiply b by transpose of left bidiagonalizing vectors
              ! (cworkspace: need 2*m+nrhs, prefer 2*m+nrhs*nb)
              ! (rworkspace: none)
              call stdlib${ii}$_${ci}$unmbr( 'Q', 'L', 'C', m, nrhs, n, a, lda, work( itauq ),b, ldb, work( &
                        iwork ), lwork-iwork+1, info )
              ! generate right bidiagonalizing vectors in a
              ! (cworkspace: need 3*m, prefer 2*m+m*nb)
              ! (rworkspace: none)
              call stdlib${ii}$_${ci}$ungbr( 'P', m, n, m, a, lda, work( itaup ),work( iwork ), lwork-iwork+&
                        1_${ik}$, info )
              irwork = ie + m
              ! perform bidiagonal qr iteration,
                 ! computing right singular vectors of a in a and
                 ! multiplying b by transpose of left singular vectors
              ! (cworkspace: none)
              ! (rworkspace: need bdspac)
              call stdlib${ii}$_${ci}$bdsqr( 'L', m, n, 0_${ik}$, nrhs, s, rwork( ie ), a, lda, dum,1_${ik}$, b, ldb, &
                        rwork( irwork ), info )
              if( info/=0 )go to 70
              ! multiply b by reciprocals of singular values
              thr = max( rcond*s( 1_${ik}$ ), sfmin )
              if( rcond<zero )thr = max( eps*s( 1_${ik}$ ), sfmin )
              rank = 0_${ik}$
              do i = 1, m
                 if( s( i )>thr ) then
                    call stdlib${ii}$_${ci}$drscl( nrhs, s( i ), b( i, 1_${ik}$ ), ldb )
                    rank = rank + 1_${ik}$
                 else
                    call stdlib${ii}$_${ci}$laset( 'F', 1_${ik}$, nrhs, czero, czero, b( i, 1_${ik}$ ), ldb )
                 end if
              end do
              ! multiply b by right singular vectors of a
              ! (cworkspace: need n, prefer n*nrhs)
              ! (rworkspace: none)
              if( lwork>=ldb*nrhs .and. nrhs>1_${ik}$ ) then
                 call stdlib${ii}$_${ci}$gemm( 'C', 'N', n, nrhs, m, cone, a, lda, b, ldb,czero, work, ldb )
                           
                 call stdlib${ii}$_${ci}$lacpy( 'G', n, nrhs, work, ldb, b, ldb )
              else if( nrhs>1_${ik}$ ) then
                 chunk = lwork / n
                 do i = 1, nrhs, chunk
                    bl = min( nrhs-i+1, chunk )
                    call stdlib${ii}$_${ci}$gemm( 'C', 'N', n, bl, m, cone, a, lda, b( 1_${ik}$, i ),ldb, czero, &
                              work, n )
                    call stdlib${ii}$_${ci}$lacpy( 'F', n, bl, work, n, b( 1_${ik}$, i ), ldb )
                 end do
              else
                 call stdlib${ii}$_${ci}$gemv( 'C', m, n, cone, a, lda, b, 1_${ik}$, czero, work, 1_${ik}$ )
                 call stdlib${ii}$_${ci}$copy( n, work, 1_${ik}$, b, 1_${ik}$ )
              end if
           end if
           ! undo scaling
           if( iascl==1_${ik}$ ) then
              call stdlib${ii}$_${ci}$lascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, n, nrhs, b, ldb, info )
              call stdlib${ii}$_${c2ri(ci)}$lascl( 'G', 0_${ik}$, 0_${ik}$, smlnum, anrm, minmn, 1_${ik}$, s, minmn,info )
           else if( iascl==2_${ik}$ ) then
              call stdlib${ii}$_${ci}$lascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, n, nrhs, b, ldb, info )
              call stdlib${ii}$_${c2ri(ci)}$lascl( 'G', 0_${ik}$, 0_${ik}$, bignum, anrm, minmn, 1_${ik}$, s, minmn,info )
           end if
           if( ibscl==1_${ik}$ ) then
              call stdlib${ii}$_${ci}$lascl( 'G', 0_${ik}$, 0_${ik}$, smlnum, bnrm, n, nrhs, b, ldb, info )
           else if( ibscl==2_${ik}$ ) then
              call stdlib${ii}$_${ci}$lascl( 'G', 0_${ik}$, 0_${ik}$, bignum, bnrm, n, nrhs, b, ldb, info )
           end if
           70 continue
           work( 1_${ik}$ ) = maxwrk
           return
     end subroutine stdlib${ii}$_${ci}$gelss

#:endif
#:endfor



     module subroutine stdlib${ii}$_sgelsy( m, n, nrhs, a, lda, b, ldb, jpvt, rcond, rank,work, lwork, info )
     !! SGELSY computes the minimum-norm solution to a real linear least
     !! squares problem:
     !! minimize || A * X - B ||
     !! using a complete orthogonal factorization of A.  A is an M-by-N
     !! matrix which may be rank-deficient.
     !! Several right hand side vectors b and solution vectors x can be
     !! handled in a single call; they are stored as the columns of the
     !! M-by-NRHS right hand side matrix B and the N-by-NRHS solution
     !! matrix X.
     !! The routine first computes a QR factorization with column pivoting:
     !! A * P = Q * [ R11 R12 ]
     !! [  0  R22 ]
     !! with R11 defined as the largest leading submatrix whose estimated
     !! condition number is less than 1/RCOND.  The order of R11, RANK,
     !! is the effective rank of A.
     !! Then, R22 is considered to be negligible, and R12 is annihilated
     !! by orthogonal transformations from the right, arriving at the
     !! complete orthogonal factorization:
     !! A * P = Q * [ T11 0 ] * Z
     !! [  0  0 ]
     !! The minimum-norm solution is then
     !! X = P * Z**T [ inv(T11)*Q1**T*B ]
     !! [        0         ]
     !! where Q1 consists of the first RANK columns of Q.
     !! This routine is basically identical to the original xGELSX except
     !! three differences:
     !! o The call to the subroutine xGEQPF has been substituted by the
     !! the call to the subroutine xGEQP3. This subroutine is a Blas-3
     !! version of the QR factorization with column pivoting.
     !! o Matrix B (the right hand side) is updated with Blas-3.
     !! o The permutation of matrix B (the right hand side) is faster and
     !! more simple.
               
        ! -- lapack driver routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info, rank
           integer(${ik}$), intent(in) :: lda, ldb, lwork, m, n, nrhs
           real(sp), intent(in) :: rcond
           ! Array Arguments 
           integer(${ik}$), intent(inout) :: jpvt(*)
           real(sp), intent(inout) :: a(lda,*), b(ldb,*)
           real(sp), intent(out) :: work(*)
        ! =====================================================================
           ! Parameters 
           integer(${ik}$), parameter :: imax = 1_${ik}$
           integer(${ik}$), parameter :: imin = 2_${ik}$
           
           
           ! Local Scalars 
           logical(lk) :: lquery
           integer(${ik}$) :: i, iascl, ibscl, ismax, ismin, j, lwkmin, lwkopt, mn, nb, nb1, nb2, &
                     nb3, nb4
           real(sp) :: anrm, bignum, bnrm, c1, c2, s1, s2, smax, smaxpr, smin, sminpr, smlnum, &
                     wsize
           ! Intrinsic Functions 
           ! Executable Statements 
           mn = min( m, n )
           ismin = mn + 1_${ik}$
           ismax = 2_${ik}$*mn + 1_${ik}$
           ! test the input arguments.
           info = 0_${ik}$
           lquery = ( lwork==-1_${ik}$ )
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( nrhs<0_${ik}$ ) then
              info = -3_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -5_${ik}$
           else if( ldb<max( 1_${ik}$, m, n ) ) then
              info = -7_${ik}$
           end if
           ! figure out optimal block size
           if( info==0_${ik}$ ) then
              if( mn==0_${ik}$ .or. nrhs==0_${ik}$ ) then
                 lwkmin = 1_${ik}$
                 lwkopt = 1_${ik}$
              else
                 nb1 = stdlib${ii}$_ilaenv( 1_${ik}$, 'SGEQRF', ' ', m, n, -1_${ik}$, -1_${ik}$ )
                 nb2 = stdlib${ii}$_ilaenv( 1_${ik}$, 'SGERQF', ' ', m, n, -1_${ik}$, -1_${ik}$ )
                 nb3 = stdlib${ii}$_ilaenv( 1_${ik}$, 'SORMQR', ' ', m, n, nrhs, -1_${ik}$ )
                 nb4 = stdlib${ii}$_ilaenv( 1_${ik}$, 'SORMRQ', ' ', m, n, nrhs, -1_${ik}$ )
                 nb = max( nb1, nb2, nb3, nb4 )
                 lwkmin = mn + max( 2_${ik}$*mn, n + 1_${ik}$, mn + nrhs )
                 lwkopt = max( lwkmin,mn + 2_${ik}$*n + nb*( n + 1_${ik}$ ), 2_${ik}$*mn + nb*nrhs )
              end if
              work( 1_${ik}$ ) = lwkopt
              if( lwork<lwkmin .and. .not.lquery ) then
                 info = -12_${ik}$
              end if
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'SGELSY', -info )
              return
           else if( lquery ) then
              return
           end if
           ! quick return if possible
           if( mn==0_${ik}$ .or. nrhs==0_${ik}$ ) then
              rank = 0_${ik}$
              return
           end if
           ! get machine parameters
           smlnum = stdlib${ii}$_slamch( 'S' ) / stdlib${ii}$_slamch( 'P' )
           bignum = one / smlnum
           call stdlib${ii}$_slabad( smlnum, bignum )
           ! scale a, b if max entries outside range [smlnum,bignum]
           anrm = stdlib${ii}$_slange( 'M', m, n, a, lda, work )
           iascl = 0_${ik}$
           if( anrm>zero .and. anrm<smlnum ) then
              ! scale matrix norm up to smlnum
              call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, m, n, a, lda, info )
              iascl = 1_${ik}$
           else if( anrm>bignum ) then
              ! scale matrix norm down to bignum
              call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, m, n, a, lda, info )
              iascl = 2_${ik}$
           else if( anrm==zero ) then
              ! matrix all zero. return zero solution.
              call stdlib${ii}$_slaset( 'F', max( m, n ), nrhs, zero, zero, b, ldb )
              rank = 0_${ik}$
              go to 70
           end if
           bnrm = stdlib${ii}$_slange( 'M', m, nrhs, b, ldb, work )
           ibscl = 0_${ik}$
           if( bnrm>zero .and. bnrm<smlnum ) then
              ! scale matrix norm up to smlnum
              call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, bnrm, smlnum, m, nrhs, b, ldb, info )
              ibscl = 1_${ik}$
           else if( bnrm>bignum ) then
              ! scale matrix norm down to bignum
              call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, bnrm, bignum, m, nrhs, b, ldb, info )
              ibscl = 2_${ik}$
           end if
           ! compute qr factorization with column pivoting of a:
              ! a * p = q * r
           call stdlib${ii}$_sgeqp3( m, n, a, lda, jpvt, work( 1_${ik}$ ), work( mn+1 ),lwork-mn, info )
                     
           wsize = mn + work( mn+1 )
           ! workspace: mn+2*n+nb*(n+1).
           ! details of householder rotations stored in work(1:mn).
           ! determine rank using incremental condition estimation
           work( ismin ) = one
           work( ismax ) = one
           smax = abs( a( 1_${ik}$, 1_${ik}$ ) )
           smin = smax
           if( abs( a( 1_${ik}$, 1_${ik}$ ) )==zero ) then
              rank = 0_${ik}$
              call stdlib${ii}$_slaset( 'F', max( m, n ), nrhs, zero, zero, b, ldb )
              go to 70
           else
              rank = 1_${ik}$
           end if
           10 continue
           if( rank<mn ) then
              i = rank + 1_${ik}$
              call stdlib${ii}$_slaic1( imin, rank, work( ismin ), smin, a( 1_${ik}$, i ),a( i, i ), sminpr, &
                        s1, c1 )
              call stdlib${ii}$_slaic1( imax, rank, work( ismax ), smax, a( 1_${ik}$, i ),a( i, i ), smaxpr, &
                        s2, c2 )
              if( smaxpr*rcond<=sminpr ) then
                 do i = 1, rank
                    work( ismin+i-1 ) = s1*work( ismin+i-1 )
                    work( ismax+i-1 ) = s2*work( ismax+i-1 )
                 end do
                 work( ismin+rank ) = c1
                 work( ismax+rank ) = c2
                 smin = sminpr
                 smax = smaxpr
                 rank = rank + 1_${ik}$
                 go to 10
              end if
           end if
           ! workspace: 3*mn.
           ! logically partition r = [ r11 r12 ]
                                   ! [  0  r22 ]
           ! where r11 = r(1:rank,1:rank)
           ! [r11,r12] = [ t11, 0 ] * y
           if( rank<n )call stdlib${ii}$_stzrzf( rank, n, a, lda, work( mn+1 ), work( 2_${ik}$*mn+1 ),lwork-&
                     2_${ik}$*mn, info )
           ! workspace: 2*mn.
           ! details of householder rotations stored in work(mn+1:2*mn)
           ! b(1:m,1:nrhs) := q**t * b(1:m,1:nrhs)
           call stdlib${ii}$_sormqr( 'LEFT', 'TRANSPOSE', m, nrhs, mn, a, lda, work( 1_${ik}$ ),b, ldb, work( &
                     2_${ik}$*mn+1 ), lwork-2*mn, info )
           wsize = max( wsize, 2_${ik}$*mn+work( 2_${ik}$*mn+1 ) )
           ! workspace: 2*mn+nb*nrhs.
           ! b(1:rank,1:nrhs) := inv(t11) * b(1:rank,1:nrhs)
           call stdlib${ii}$_strsm( 'LEFT', 'UPPER', 'NO TRANSPOSE', 'NON-UNIT', rank,nrhs, one, a, lda,&
                      b, ldb )
           do j = 1, nrhs
              do i = rank + 1, n
                 b( i, j ) = zero
              end do
           end do
           ! b(1:n,1:nrhs) := y**t * b(1:n,1:nrhs)
           if( rank<n ) then
              call stdlib${ii}$_sormrz( 'LEFT', 'TRANSPOSE', n, nrhs, rank, n-rank, a,lda, work( mn+1 ),&
                         b, ldb, work( 2_${ik}$*mn+1 ),lwork-2*mn, info )
           end if
           ! workspace: 2*mn+nrhs.
           ! b(1:n,1:nrhs) := p * b(1:n,1:nrhs)
           do j = 1, nrhs
              do i = 1, n
                 work( jpvt( i ) ) = b( i, j )
              end do
              call stdlib${ii}$_scopy( n, work( 1_${ik}$ ), 1_${ik}$, b( 1_${ik}$, j ), 1_${ik}$ )
           end do
           ! workspace: n.
           ! undo scaling
           if( iascl==1_${ik}$ ) then
              call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, n, nrhs, b, ldb, info )
              call stdlib${ii}$_slascl( 'U', 0_${ik}$, 0_${ik}$, smlnum, anrm, rank, rank, a, lda,info )
           else if( iascl==2_${ik}$ ) then
              call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, n, nrhs, b, ldb, info )
              call stdlib${ii}$_slascl( 'U', 0_${ik}$, 0_${ik}$, bignum, anrm, rank, rank, a, lda,info )
           end if
           if( ibscl==1_${ik}$ ) then
              call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, smlnum, bnrm, n, nrhs, b, ldb, info )
           else if( ibscl==2_${ik}$ ) then
              call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, bignum, bnrm, n, nrhs, b, ldb, info )
           end if
           70 continue
           work( 1_${ik}$ ) = lwkopt
           return
     end subroutine stdlib${ii}$_sgelsy

     module subroutine stdlib${ii}$_dgelsy( m, n, nrhs, a, lda, b, ldb, jpvt, rcond, rank,work, lwork, info )
     !! DGELSY computes the minimum-norm solution to a real linear least
     !! squares problem:
     !! minimize || A * X - B ||
     !! using a complete orthogonal factorization of A.  A is an M-by-N
     !! matrix which may be rank-deficient.
     !! Several right hand side vectors b and solution vectors x can be
     !! handled in a single call; they are stored as the columns of the
     !! M-by-NRHS right hand side matrix B and the N-by-NRHS solution
     !! matrix X.
     !! The routine first computes a QR factorization with column pivoting:
     !! A * P = Q * [ R11 R12 ]
     !! [  0  R22 ]
     !! with R11 defined as the largest leading submatrix whose estimated
     !! condition number is less than 1/RCOND.  The order of R11, RANK,
     !! is the effective rank of A.
     !! Then, R22 is considered to be negligible, and R12 is annihilated
     !! by orthogonal transformations from the right, arriving at the
     !! complete orthogonal factorization:
     !! A * P = Q * [ T11 0 ] * Z
     !! [  0  0 ]
     !! The minimum-norm solution is then
     !! X = P * Z**T [ inv(T11)*Q1**T*B ]
     !! [        0         ]
     !! where Q1 consists of the first RANK columns of Q.
     !! This routine is basically identical to the original xGELSX except
     !! three differences:
     !! o The call to the subroutine xGEQPF has been substituted by the
     !! the call to the subroutine xGEQP3. This subroutine is a Blas-3
     !! version of the QR factorization with column pivoting.
     !! o Matrix B (the right hand side) is updated with Blas-3.
     !! o The permutation of matrix B (the right hand side) is faster and
     !! more simple.
               
        ! -- lapack driver routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info, rank
           integer(${ik}$), intent(in) :: lda, ldb, lwork, m, n, nrhs
           real(dp), intent(in) :: rcond
           ! Array Arguments 
           integer(${ik}$), intent(inout) :: jpvt(*)
           real(dp), intent(inout) :: a(lda,*), b(ldb,*)
           real(dp), intent(out) :: work(*)
        ! =====================================================================
           ! Parameters 
           integer(${ik}$), parameter :: imax = 1_${ik}$
           integer(${ik}$), parameter :: imin = 2_${ik}$
           
           
           ! Local Scalars 
           logical(lk) :: lquery
           integer(${ik}$) :: i, iascl, ibscl, ismax, ismin, j, lwkmin, lwkopt, mn, nb, nb1, nb2, &
                     nb3, nb4
           real(dp) :: anrm, bignum, bnrm, c1, c2, s1, s2, smax, smaxpr, smin, sminpr, smlnum, &
                     wsize
           ! Intrinsic Functions 
           ! Executable Statements 
           mn = min( m, n )
           ismin = mn + 1_${ik}$
           ismax = 2_${ik}$*mn + 1_${ik}$
           ! test the input arguments.
           info = 0_${ik}$
           lquery = ( lwork==-1_${ik}$ )
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( nrhs<0_${ik}$ ) then
              info = -3_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -5_${ik}$
           else if( ldb<max( 1_${ik}$, m, n ) ) then
              info = -7_${ik}$
           end if
           ! figure out optimal block size
           if( info==0_${ik}$ ) then
              if( mn==0_${ik}$ .or. nrhs==0_${ik}$ ) then
                 lwkmin = 1_${ik}$
                 lwkopt = 1_${ik}$
              else
                 nb1 = stdlib${ii}$_ilaenv( 1_${ik}$, 'DGEQRF', ' ', m, n, -1_${ik}$, -1_${ik}$ )
                 nb2 = stdlib${ii}$_ilaenv( 1_${ik}$, 'DGERQF', ' ', m, n, -1_${ik}$, -1_${ik}$ )
                 nb3 = stdlib${ii}$_ilaenv( 1_${ik}$, 'DORMQR', ' ', m, n, nrhs, -1_${ik}$ )
                 nb4 = stdlib${ii}$_ilaenv( 1_${ik}$, 'DORMRQ', ' ', m, n, nrhs, -1_${ik}$ )
                 nb = max( nb1, nb2, nb3, nb4 )
                 lwkmin = mn + max( 2_${ik}$*mn, n + 1_${ik}$, mn + nrhs )
                 lwkopt = max( lwkmin,mn + 2_${ik}$*n + nb*( n + 1_${ik}$ ), 2_${ik}$*mn + nb*nrhs )
              end if
              work( 1_${ik}$ ) = lwkopt
              if( lwork<lwkmin .and. .not.lquery ) then
                 info = -12_${ik}$
              end if
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DGELSY', -info )
              return
           else if( lquery ) then
              return
           end if
           ! quick return if possible
           if( mn==0_${ik}$ .or. nrhs==0_${ik}$ ) then
              rank = 0_${ik}$
              return
           end if
           ! get machine parameters
           smlnum = stdlib${ii}$_dlamch( 'S' ) / stdlib${ii}$_dlamch( 'P' )
           bignum = one / smlnum
           call stdlib${ii}$_dlabad( smlnum, bignum )
           ! scale a, b if max entries outside range [smlnum,bignum]
           anrm = stdlib${ii}$_dlange( 'M', m, n, a, lda, work )
           iascl = 0_${ik}$
           if( anrm>zero .and. anrm<smlnum ) then
              ! scale matrix norm up to smlnum
              call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, m, n, a, lda, info )
              iascl = 1_${ik}$
           else if( anrm>bignum ) then
              ! scale matrix norm down to bignum
              call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, m, n, a, lda, info )
              iascl = 2_${ik}$
           else if( anrm==zero ) then
              ! matrix all zero. return zero solution.
              call stdlib${ii}$_dlaset( 'F', max( m, n ), nrhs, zero, zero, b, ldb )
              rank = 0_${ik}$
              go to 70
           end if
           bnrm = stdlib${ii}$_dlange( 'M', m, nrhs, b, ldb, work )
           ibscl = 0_${ik}$
           if( bnrm>zero .and. bnrm<smlnum ) then
              ! scale matrix norm up to smlnum
              call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, bnrm, smlnum, m, nrhs, b, ldb, info )
              ibscl = 1_${ik}$
           else if( bnrm>bignum ) then
              ! scale matrix norm down to bignum
              call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, bnrm, bignum, m, nrhs, b, ldb, info )
              ibscl = 2_${ik}$
           end if
           ! compute qr factorization with column pivoting of a:
              ! a * p = q * r
           call stdlib${ii}$_dgeqp3( m, n, a, lda, jpvt, work( 1_${ik}$ ), work( mn+1 ),lwork-mn, info )
                     
           wsize = mn + work( mn+1 )
           ! workspace: mn+2*n+nb*(n+1).
           ! details of householder rotations stored in work(1:mn).
           ! determine rank using incremental condition estimation
           work( ismin ) = one
           work( ismax ) = one
           smax = abs( a( 1_${ik}$, 1_${ik}$ ) )
           smin = smax
           if( abs( a( 1_${ik}$, 1_${ik}$ ) )==zero ) then
              rank = 0_${ik}$
              call stdlib${ii}$_dlaset( 'F', max( m, n ), nrhs, zero, zero, b, ldb )
              go to 70
           else
              rank = 1_${ik}$
           end if
           10 continue
           if( rank<mn ) then
              i = rank + 1_${ik}$
              call stdlib${ii}$_dlaic1( imin, rank, work( ismin ), smin, a( 1_${ik}$, i ),a( i, i ), sminpr, &
                        s1, c1 )
              call stdlib${ii}$_dlaic1( imax, rank, work( ismax ), smax, a( 1_${ik}$, i ),a( i, i ), smaxpr, &
                        s2, c2 )
              if( smaxpr*rcond<=sminpr ) then
                 do i = 1, rank
                    work( ismin+i-1 ) = s1*work( ismin+i-1 )
                    work( ismax+i-1 ) = s2*work( ismax+i-1 )
                 end do
                 work( ismin+rank ) = c1
                 work( ismax+rank ) = c2
                 smin = sminpr
                 smax = smaxpr
                 rank = rank + 1_${ik}$
                 go to 10
              end if
           end if
           ! workspace: 3*mn.
           ! logically partition r = [ r11 r12 ]
                                   ! [  0  r22 ]
           ! where r11 = r(1:rank,1:rank)
           ! [r11,r12] = [ t11, 0 ] * y
           if( rank<n )call stdlib${ii}$_dtzrzf( rank, n, a, lda, work( mn+1 ), work( 2_${ik}$*mn+1 ),lwork-&
                     2_${ik}$*mn, info )
           ! workspace: 2*mn.
           ! details of householder rotations stored in work(mn+1:2*mn)
           ! b(1:m,1:nrhs) := q**t * b(1:m,1:nrhs)
           call stdlib${ii}$_dormqr( 'LEFT', 'TRANSPOSE', m, nrhs, mn, a, lda, work( 1_${ik}$ ),b, ldb, work( &
                     2_${ik}$*mn+1 ), lwork-2*mn, info )
           wsize = max( wsize, 2_${ik}$*mn+work( 2_${ik}$*mn+1 ) )
           ! workspace: 2*mn+nb*nrhs.
           ! b(1:rank,1:nrhs) := inv(t11) * b(1:rank,1:nrhs)
           call stdlib${ii}$_dtrsm( 'LEFT', 'UPPER', 'NO TRANSPOSE', 'NON-UNIT', rank,nrhs, one, a, lda,&
                      b, ldb )
           do j = 1, nrhs
              do i = rank + 1, n
                 b( i, j ) = zero
              end do
           end do
           ! b(1:n,1:nrhs) := y**t * b(1:n,1:nrhs)
           if( rank<n ) then
              call stdlib${ii}$_dormrz( 'LEFT', 'TRANSPOSE', n, nrhs, rank, n-rank, a,lda, work( mn+1 ),&
                         b, ldb, work( 2_${ik}$*mn+1 ),lwork-2*mn, info )
           end if
           ! workspace: 2*mn+nrhs.
           ! b(1:n,1:nrhs) := p * b(1:n,1:nrhs)
           do j = 1, nrhs
              do i = 1, n
                 work( jpvt( i ) ) = b( i, j )
              end do
              call stdlib${ii}$_dcopy( n, work( 1_${ik}$ ), 1_${ik}$, b( 1_${ik}$, j ), 1_${ik}$ )
           end do
           ! workspace: n.
           ! undo scaling
           if( iascl==1_${ik}$ ) then
              call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, n, nrhs, b, ldb, info )
              call stdlib${ii}$_dlascl( 'U', 0_${ik}$, 0_${ik}$, smlnum, anrm, rank, rank, a, lda,info )
           else if( iascl==2_${ik}$ ) then
              call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, n, nrhs, b, ldb, info )
              call stdlib${ii}$_dlascl( 'U', 0_${ik}$, 0_${ik}$, bignum, anrm, rank, rank, a, lda,info )
           end if
           if( ibscl==1_${ik}$ ) then
              call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, smlnum, bnrm, n, nrhs, b, ldb, info )
           else if( ibscl==2_${ik}$ ) then
              call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, bignum, bnrm, n, nrhs, b, ldb, info )
           end if
           70 continue
           work( 1_${ik}$ ) = lwkopt
           return
     end subroutine stdlib${ii}$_dgelsy

#:for rk,rt,ri in REAL_KINDS_TYPES
#:if not rk in ["sp","dp"]
     module subroutine stdlib${ii}$_${ri}$gelsy( m, n, nrhs, a, lda, b, ldb, jpvt, rcond, rank,work, lwork, info )
     !! DGELSY: computes the minimum-norm solution to a real linear least
     !! squares problem:
     !! minimize || A * X - B ||
     !! using a complete orthogonal factorization of A.  A is an M-by-N
     !! matrix which may be rank-deficient.
     !! Several right hand side vectors b and solution vectors x can be
     !! handled in a single call; they are stored as the columns of the
     !! M-by-NRHS right hand side matrix B and the N-by-NRHS solution
     !! matrix X.
     !! The routine first computes a QR factorization with column pivoting:
     !! A * P = Q * [ R11 R12 ]
     !! [  0  R22 ]
     !! with R11 defined as the largest leading submatrix whose estimated
     !! condition number is less than 1/RCOND.  The order of R11, RANK,
     !! is the effective rank of A.
     !! Then, R22 is considered to be negligible, and R12 is annihilated
     !! by orthogonal transformations from the right, arriving at the
     !! complete orthogonal factorization:
     !! A * P = Q * [ T11 0 ] * Z
     !! [  0  0 ]
     !! The minimum-norm solution is then
     !! X = P * Z**T [ inv(T11)*Q1**T*B ]
     !! [        0         ]
     !! where Q1 consists of the first RANK columns of Q.
     !! This routine is basically identical to the original xGELSX except
     !! three differences:
     !! o The call to the subroutine xGEQPF has been substituted by the
     !! the call to the subroutine xGEQP3. This subroutine is a Blas-3
     !! version of the QR factorization with column pivoting.
     !! o Matrix B (the right hand side) is updated with Blas-3.
     !! o The permutation of matrix B (the right hand side) is faster and
     !! more simple.
               
        ! -- lapack driver routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info, rank
           integer(${ik}$), intent(in) :: lda, ldb, lwork, m, n, nrhs
           real(${rk}$), intent(in) :: rcond
           ! Array Arguments 
           integer(${ik}$), intent(inout) :: jpvt(*)
           real(${rk}$), intent(inout) :: a(lda,*), b(ldb,*)
           real(${rk}$), intent(out) :: work(*)
        ! =====================================================================
           ! Parameters 
           integer(${ik}$), parameter :: imax = 1_${ik}$
           integer(${ik}$), parameter :: imin = 2_${ik}$
           
           
           ! Local Scalars 
           logical(lk) :: lquery
           integer(${ik}$) :: i, iascl, ibscl, ismax, ismin, j, lwkmin, lwkopt, mn, nb, nb1, nb2, &
                     nb3, nb4
           real(${rk}$) :: anrm, bignum, bnrm, c1, c2, s1, s2, smax, smaxpr, smin, sminpr, smlnum, &
                     wsize
           ! Intrinsic Functions 
           ! Executable Statements 
           mn = min( m, n )
           ismin = mn + 1_${ik}$
           ismax = 2_${ik}$*mn + 1_${ik}$
           ! test the input arguments.
           info = 0_${ik}$
           lquery = ( lwork==-1_${ik}$ )
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( nrhs<0_${ik}$ ) then
              info = -3_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -5_${ik}$
           else if( ldb<max( 1_${ik}$, m, n ) ) then
              info = -7_${ik}$
           end if
           ! figure out optimal block size
           if( info==0_${ik}$ ) then
              if( mn==0_${ik}$ .or. nrhs==0_${ik}$ ) then
                 lwkmin = 1_${ik}$
                 lwkopt = 1_${ik}$
              else
                 nb1 = stdlib${ii}$_ilaenv( 1_${ik}$, 'DGEQRF', ' ', m, n, -1_${ik}$, -1_${ik}$ )
                 nb2 = stdlib${ii}$_ilaenv( 1_${ik}$, 'DGERQF', ' ', m, n, -1_${ik}$, -1_${ik}$ )
                 nb3 = stdlib${ii}$_ilaenv( 1_${ik}$, 'DORMQR', ' ', m, n, nrhs, -1_${ik}$ )
                 nb4 = stdlib${ii}$_ilaenv( 1_${ik}$, 'DORMRQ', ' ', m, n, nrhs, -1_${ik}$ )
                 nb = max( nb1, nb2, nb3, nb4 )
                 lwkmin = mn + max( 2_${ik}$*mn, n + 1_${ik}$, mn + nrhs )
                 lwkopt = max( lwkmin,mn + 2_${ik}$*n + nb*( n + 1_${ik}$ ), 2_${ik}$*mn + nb*nrhs )
              end if
              work( 1_${ik}$ ) = lwkopt
              if( lwork<lwkmin .and. .not.lquery ) then
                 info = -12_${ik}$
              end if
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DGELSY', -info )
              return
           else if( lquery ) then
              return
           end if
           ! quick return if possible
           if( mn==0_${ik}$ .or. nrhs==0_${ik}$ ) then
              rank = 0_${ik}$
              return
           end if
           ! get machine parameters
           smlnum = stdlib${ii}$_${ri}$lamch( 'S' ) / stdlib${ii}$_${ri}$lamch( 'P' )
           bignum = one / smlnum
           call stdlib${ii}$_${ri}$labad( smlnum, bignum )
           ! scale a, b if max entries outside range [smlnum,bignum]
           anrm = stdlib${ii}$_${ri}$lange( 'M', m, n, a, lda, work )
           iascl = 0_${ik}$
           if( anrm>zero .and. anrm<smlnum ) then
              ! scale matrix norm up to smlnum
              call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, m, n, a, lda, info )
              iascl = 1_${ik}$
           else if( anrm>bignum ) then
              ! scale matrix norm down to bignum
              call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, m, n, a, lda, info )
              iascl = 2_${ik}$
           else if( anrm==zero ) then
              ! matrix all zero. return zero solution.
              call stdlib${ii}$_${ri}$laset( 'F', max( m, n ), nrhs, zero, zero, b, ldb )
              rank = 0_${ik}$
              go to 70
           end if
           bnrm = stdlib${ii}$_${ri}$lange( 'M', m, nrhs, b, ldb, work )
           ibscl = 0_${ik}$
           if( bnrm>zero .and. bnrm<smlnum ) then
              ! scale matrix norm up to smlnum
              call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, bnrm, smlnum, m, nrhs, b, ldb, info )
              ibscl = 1_${ik}$
           else if( bnrm>bignum ) then
              ! scale matrix norm down to bignum
              call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, bnrm, bignum, m, nrhs, b, ldb, info )
              ibscl = 2_${ik}$
           end if
           ! compute qr factorization with column pivoting of a:
              ! a * p = q * r
           call stdlib${ii}$_${ri}$geqp3( m, n, a, lda, jpvt, work( 1_${ik}$ ), work( mn+1 ),lwork-mn, info )
                     
           wsize = mn + work( mn+1 )
           ! workspace: mn+2*n+nb*(n+1).
           ! details of householder rotations stored in work(1:mn).
           ! determine rank using incremental condition estimation
           work( ismin ) = one
           work( ismax ) = one
           smax = abs( a( 1_${ik}$, 1_${ik}$ ) )
           smin = smax
           if( abs( a( 1_${ik}$, 1_${ik}$ ) )==zero ) then
              rank = 0_${ik}$
              call stdlib${ii}$_${ri}$laset( 'F', max( m, n ), nrhs, zero, zero, b, ldb )
              go to 70
           else
              rank = 1_${ik}$
           end if
           10 continue
           if( rank<mn ) then
              i = rank + 1_${ik}$
              call stdlib${ii}$_${ri}$laic1( imin, rank, work( ismin ), smin, a( 1_${ik}$, i ),a( i, i ), sminpr, &
                        s1, c1 )
              call stdlib${ii}$_${ri}$laic1( imax, rank, work( ismax ), smax, a( 1_${ik}$, i ),a( i, i ), smaxpr, &
                        s2, c2 )
              if( smaxpr*rcond<=sminpr ) then
                 do i = 1, rank
                    work( ismin+i-1 ) = s1*work( ismin+i-1 )
                    work( ismax+i-1 ) = s2*work( ismax+i-1 )
                 end do
                 work( ismin+rank ) = c1
                 work( ismax+rank ) = c2
                 smin = sminpr
                 smax = smaxpr
                 rank = rank + 1_${ik}$
                 go to 10
              end if
           end if
           ! workspace: 3*mn.
           ! logically partition r = [ r11 r12 ]
                                   ! [  0  r22 ]
           ! where r11 = r(1:rank,1:rank)
           ! [r11,r12] = [ t11, 0 ] * y
           if( rank<n )call stdlib${ii}$_${ri}$tzrzf( rank, n, a, lda, work( mn+1 ), work( 2_${ik}$*mn+1 ),lwork-&
                     2_${ik}$*mn, info )
           ! workspace: 2*mn.
           ! details of householder rotations stored in work(mn+1:2*mn)
           ! b(1:m,1:nrhs) := q**t * b(1:m,1:nrhs)
           call stdlib${ii}$_${ri}$ormqr( 'LEFT', 'TRANSPOSE', m, nrhs, mn, a, lda, work( 1_${ik}$ ),b, ldb, work( &
                     2_${ik}$*mn+1 ), lwork-2*mn, info )
           wsize = max( wsize, 2_${ik}$*mn+work( 2_${ik}$*mn+1 ) )
           ! workspace: 2*mn+nb*nrhs.
           ! b(1:rank,1:nrhs) := inv(t11) * b(1:rank,1:nrhs)
           call stdlib${ii}$_${ri}$trsm( 'LEFT', 'UPPER', 'NO TRANSPOSE', 'NON-UNIT', rank,nrhs, one, a, lda,&
                      b, ldb )
           do j = 1, nrhs
              do i = rank + 1, n
                 b( i, j ) = zero
              end do
           end do
           ! b(1:n,1:nrhs) := y**t * b(1:n,1:nrhs)
           if( rank<n ) then
              call stdlib${ii}$_${ri}$ormrz( 'LEFT', 'TRANSPOSE', n, nrhs, rank, n-rank, a,lda, work( mn+1 ),&
                         b, ldb, work( 2_${ik}$*mn+1 ),lwork-2*mn, info )
           end if
           ! workspace: 2*mn+nrhs.
           ! b(1:n,1:nrhs) := p * b(1:n,1:nrhs)
           do j = 1, nrhs
              do i = 1, n
                 work( jpvt( i ) ) = b( i, j )
              end do
              call stdlib${ii}$_${ri}$copy( n, work( 1_${ik}$ ), 1_${ik}$, b( 1_${ik}$, j ), 1_${ik}$ )
           end do
           ! workspace: n.
           ! undo scaling
           if( iascl==1_${ik}$ ) then
              call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, n, nrhs, b, ldb, info )
              call stdlib${ii}$_${ri}$lascl( 'U', 0_${ik}$, 0_${ik}$, smlnum, anrm, rank, rank, a, lda,info )
           else if( iascl==2_${ik}$ ) then
              call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, n, nrhs, b, ldb, info )
              call stdlib${ii}$_${ri}$lascl( 'U', 0_${ik}$, 0_${ik}$, bignum, anrm, rank, rank, a, lda,info )
           end if
           if( ibscl==1_${ik}$ ) then
              call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, smlnum, bnrm, n, nrhs, b, ldb, info )
           else if( ibscl==2_${ik}$ ) then
              call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, bignum, bnrm, n, nrhs, b, ldb, info )
           end if
           70 continue
           work( 1_${ik}$ ) = lwkopt
           return
     end subroutine stdlib${ii}$_${ri}$gelsy

#:endif
#:endfor

     module subroutine stdlib${ii}$_cgelsy( m, n, nrhs, a, lda, b, ldb, jpvt, rcond, rank,work, lwork, rwork, &
     !! CGELSY computes the minimum-norm solution to a complex linear least
     !! squares problem:
     !! minimize || A * X - B ||
     !! using a complete orthogonal factorization of A.  A is an M-by-N
     !! matrix which may be rank-deficient.
     !! Several right hand side vectors b and solution vectors x can be
     !! handled in a single call; they are stored as the columns of the
     !! M-by-NRHS right hand side matrix B and the N-by-NRHS solution
     !! matrix X.
     !! The routine first computes a QR factorization with column pivoting:
     !! A * P = Q * [ R11 R12 ]
     !! [  0  R22 ]
     !! with R11 defined as the largest leading submatrix whose estimated
     !! condition number is less than 1/RCOND.  The order of R11, RANK,
     !! is the effective rank of A.
     !! Then, R22 is considered to be negligible, and R12 is annihilated
     !! by unitary transformations from the right, arriving at the
     !! complete orthogonal factorization:
     !! A * P = Q * [ T11 0 ] * Z
     !! [  0  0 ]
     !! The minimum-norm solution is then
     !! X = P * Z**H [ inv(T11)*Q1**H*B ]
     !! [        0         ]
     !! where Q1 consists of the first RANK columns of Q.
     !! This routine is basically identical to the original xGELSX except
     !! three differences:
     !! o The permutation of matrix B (the right hand side) is faster and
     !! more simple.
     !! o The call to the subroutine xGEQPF has been substituted by the
     !! the call to the subroutine xGEQP3. This subroutine is a Blas-3
     !! version of the QR factorization with column pivoting.
     !! o Matrix B (the right hand side) is updated with Blas-3.
               info )
        ! -- lapack driver routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info, rank
           integer(${ik}$), intent(in) :: lda, ldb, lwork, m, n, nrhs
           real(sp), intent(in) :: rcond
           ! Array Arguments 
           integer(${ik}$), intent(inout) :: jpvt(*)
           real(sp), intent(out) :: rwork(*)
           complex(sp), intent(inout) :: a(lda,*), b(ldb,*)
           complex(sp), intent(out) :: work(*)
        ! =====================================================================
           ! Parameters 
           integer(${ik}$), parameter :: imax = 1_${ik}$
           integer(${ik}$), parameter :: imin = 2_${ik}$
           
           
           
           ! Local Scalars 
           logical(lk) :: lquery
           integer(${ik}$) :: i, iascl, ibscl, ismax, ismin, j, lwkopt, mn, nb, nb1, nb2, nb3, &
                     nb4
           real(sp) :: anrm, bignum, bnrm, smax, smaxpr, smin, sminpr, smlnum, wsize
           complex(sp) :: c1, c2, s1, s2
           ! Intrinsic Functions 
           ! Executable Statements 
           mn = min( m, n )
           ismin = mn + 1_${ik}$
           ismax = 2_${ik}$*mn + 1_${ik}$
           ! test the input arguments.
           info = 0_${ik}$
           nb1 = stdlib${ii}$_ilaenv( 1_${ik}$, 'CGEQRF', ' ', m, n, -1_${ik}$, -1_${ik}$ )
           nb2 = stdlib${ii}$_ilaenv( 1_${ik}$, 'CGERQF', ' ', m, n, -1_${ik}$, -1_${ik}$ )
           nb3 = stdlib${ii}$_ilaenv( 1_${ik}$, 'CUNMQR', ' ', m, n, nrhs, -1_${ik}$ )
           nb4 = stdlib${ii}$_ilaenv( 1_${ik}$, 'CUNMRQ', ' ', m, n, nrhs, -1_${ik}$ )
           nb = max( nb1, nb2, nb3, nb4 )
           lwkopt = max( 1_${ik}$, mn+2*n+nb*(n+1), 2_${ik}$*mn+nb*nrhs )
           work( 1_${ik}$ ) = cmplx( lwkopt,KIND=sp)
           lquery = ( lwork==-1_${ik}$ )
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( nrhs<0_${ik}$ ) then
              info = -3_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -5_${ik}$
           else if( ldb<max( 1_${ik}$, m, n ) ) then
              info = -7_${ik}$
           else if( lwork<( mn+max( 2_${ik}$*mn, n+1, mn+nrhs ) ) .and..not.lquery ) then
              info = -12_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'CGELSY', -info )
              return
           else if( lquery ) then
              return
           end if
           ! quick return if possible
           if( min( m, n, nrhs )==0_${ik}$ ) then
              rank = 0_${ik}$
              return
           end if
           ! get machine parameters
           smlnum = stdlib${ii}$_slamch( 'S' ) / stdlib${ii}$_slamch( 'P' )
           bignum = one / smlnum
           call stdlib${ii}$_slabad( smlnum, bignum )
           ! scale a, b if max entries outside range [smlnum,bignum]
           anrm = stdlib${ii}$_clange( 'M', m, n, a, lda, rwork )
           iascl = 0_${ik}$
           if( anrm>zero .and. anrm<smlnum ) then
              ! scale matrix norm up to smlnum
              call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, m, n, a, lda, info )
              iascl = 1_${ik}$
           else if( anrm>bignum ) then
              ! scale matrix norm down to bignum
              call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, m, n, a, lda, info )
              iascl = 2_${ik}$
           else if( anrm==zero ) then
              ! matrix all zero. return zero solution.
              call stdlib${ii}$_claset( 'F', max( m, n ), nrhs, czero, czero, b, ldb )
              rank = 0_${ik}$
              go to 70
           end if
           bnrm = stdlib${ii}$_clange( 'M', m, nrhs, b, ldb, rwork )
           ibscl = 0_${ik}$
           if( bnrm>zero .and. bnrm<smlnum ) then
              ! scale matrix norm up to smlnum
              call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, bnrm, smlnum, m, nrhs, b, ldb, info )
              ibscl = 1_${ik}$
           else if( bnrm>bignum ) then
              ! scale matrix norm down to bignum
              call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, bnrm, bignum, m, nrhs, b, ldb, info )
              ibscl = 2_${ik}$
           end if
           ! compute qr factorization with column pivoting of a:
              ! a * p = q * r
           call stdlib${ii}$_cgeqp3( m, n, a, lda, jpvt, work( 1_${ik}$ ), work( mn+1 ),lwork-mn, rwork, info )
                     
           wsize = mn + real( work( mn+1 ),KIND=sp)
           ! complex workspace: mn+nb*(n+1). real workspace 2*n.
           ! details of householder rotations stored in work(1:mn).
           ! determine rank using incremental condition estimation
           work( ismin ) = cone
           work( ismax ) = cone
           smax = abs( a( 1_${ik}$, 1_${ik}$ ) )
           smin = smax
           if( abs( a( 1_${ik}$, 1_${ik}$ ) )==zero ) then
              rank = 0_${ik}$
              call stdlib${ii}$_claset( 'F', max( m, n ), nrhs, czero, czero, b, ldb )
              go to 70
           else
              rank = 1_${ik}$
           end if
           10 continue
           if( rank<mn ) then
              i = rank + 1_${ik}$
              call stdlib${ii}$_claic1( imin, rank, work( ismin ), smin, a( 1_${ik}$, i ),a( i, i ), sminpr, &
                        s1, c1 )
              call stdlib${ii}$_claic1( imax, rank, work( ismax ), smax, a( 1_${ik}$, i ),a( i, i ), smaxpr, &
                        s2, c2 )
              if( smaxpr*rcond<=sminpr ) then
                 do i = 1, rank
                    work( ismin+i-1 ) = s1*work( ismin+i-1 )
                    work( ismax+i-1 ) = s2*work( ismax+i-1 )
                 end do
                 work( ismin+rank ) = c1
                 work( ismax+rank ) = c2
                 smin = sminpr
                 smax = smaxpr
                 rank = rank + 1_${ik}$
                 go to 10
              end if
           end if
           ! complex workspace: 3*mn.
           ! logically partition r = [ r11 r12 ]
                                   ! [  0  r22 ]
           ! where r11 = r(1:rank,1:rank)
           ! [r11,r12] = [ t11, 0 ] * y
           if( rank<n )call stdlib${ii}$_ctzrzf( rank, n, a, lda, work( mn+1 ), work( 2_${ik}$*mn+1 ),lwork-&
                     2_${ik}$*mn, info )
           ! complex workspace: 2*mn.
           ! details of householder rotations stored in work(mn+1:2*mn)
           ! b(1:m,1:nrhs) := q**h * b(1:m,1:nrhs)
           call stdlib${ii}$_cunmqr( 'LEFT', 'CONJUGATE TRANSPOSE', m, nrhs, mn, a, lda,work( 1_${ik}$ ), b, &
                     ldb, work( 2_${ik}$*mn+1 ), lwork-2*mn, info )
           wsize = max( wsize, 2_${ik}$*mn+real( work( 2_${ik}$*mn+1 ),KIND=sp) )
           ! complex workspace: 2*mn+nb*nrhs.
           ! b(1:rank,1:nrhs) := inv(t11) * b(1:rank,1:nrhs)
           call stdlib${ii}$_ctrsm( 'LEFT', 'UPPER', 'NO TRANSPOSE', 'NON-UNIT', rank,nrhs, cone, a, &
                     lda, b, ldb )
           do j = 1, nrhs
              do i = rank + 1, n
                 b( i, j ) = czero
              end do
           end do
           ! b(1:n,1:nrhs) := y**h * b(1:n,1:nrhs)
           if( rank<n ) then
              call stdlib${ii}$_cunmrz( 'LEFT', 'CONJUGATE TRANSPOSE', n, nrhs, rank,n-rank, a, lda, &
                        work( mn+1 ), b, ldb,work( 2_${ik}$*mn+1 ), lwork-2*mn, info )
           end if
           ! complex workspace: 2*mn+nrhs.
           ! b(1:n,1:nrhs) := p * b(1:n,1:nrhs)
           do j = 1, nrhs
              do i = 1, n
                 work( jpvt( i ) ) = b( i, j )
              end do
              call stdlib${ii}$_ccopy( n, work( 1_${ik}$ ), 1_${ik}$, b( 1_${ik}$, j ), 1_${ik}$ )
           end do
           ! complex workspace: n.
           ! undo scaling
           if( iascl==1_${ik}$ ) then
              call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, n, nrhs, b, ldb, info )
              call stdlib${ii}$_clascl( 'U', 0_${ik}$, 0_${ik}$, smlnum, anrm, rank, rank, a, lda,info )
           else if( iascl==2_${ik}$ ) then
              call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, n, nrhs, b, ldb, info )
              call stdlib${ii}$_clascl( 'U', 0_${ik}$, 0_${ik}$, bignum, anrm, rank, rank, a, lda,info )
           end if
           if( ibscl==1_${ik}$ ) then
              call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, smlnum, bnrm, n, nrhs, b, ldb, info )
           else if( ibscl==2_${ik}$ ) then
              call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, bignum, bnrm, n, nrhs, b, ldb, info )
           end if
           70 continue
           work( 1_${ik}$ ) = cmplx( lwkopt,KIND=sp)
           return
     end subroutine stdlib${ii}$_cgelsy

     module subroutine stdlib${ii}$_zgelsy( m, n, nrhs, a, lda, b, ldb, jpvt, rcond, rank,work, lwork, rwork, &
     !! ZGELSY computes the minimum-norm solution to a complex linear least
     !! squares problem:
     !! minimize || A * X - B ||
     !! using a complete orthogonal factorization of A.  A is an M-by-N
     !! matrix which may be rank-deficient.
     !! Several right hand side vectors b and solution vectors x can be
     !! handled in a single call; they are stored as the columns of the
     !! M-by-NRHS right hand side matrix B and the N-by-NRHS solution
     !! matrix X.
     !! The routine first computes a QR factorization with column pivoting:
     !! A * P = Q * [ R11 R12 ]
     !! [  0  R22 ]
     !! with R11 defined as the largest leading submatrix whose estimated
     !! condition number is less than 1/RCOND.  The order of R11, RANK,
     !! is the effective rank of A.
     !! Then, R22 is considered to be negligible, and R12 is annihilated
     !! by unitary transformations from the right, arriving at the
     !! complete orthogonal factorization:
     !! A * P = Q * [ T11 0 ] * Z
     !! [  0  0 ]
     !! The minimum-norm solution is then
     !! X = P * Z**H [ inv(T11)*Q1**H*B ]
     !! [        0         ]
     !! where Q1 consists of the first RANK columns of Q.
     !! This routine is basically identical to the original xGELSX except
     !! three differences:
     !! o The permutation of matrix B (the right hand side) is faster and
     !! more simple.
     !! o The call to the subroutine xGEQPF has been substituted by the
     !! the call to the subroutine xGEQP3. This subroutine is a Blas-3
     !! version of the QR factorization with column pivoting.
     !! o Matrix B (the right hand side) is updated with Blas-3.
               info )
        ! -- lapack driver routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info, rank
           integer(${ik}$), intent(in) :: lda, ldb, lwork, m, n, nrhs
           real(dp), intent(in) :: rcond
           ! Array Arguments 
           integer(${ik}$), intent(inout) :: jpvt(*)
           real(dp), intent(out) :: rwork(*)
           complex(dp), intent(inout) :: a(lda,*), b(ldb,*)
           complex(dp), intent(out) :: work(*)
        ! =====================================================================
           ! Parameters 
           integer(${ik}$), parameter :: imax = 1_${ik}$
           integer(${ik}$), parameter :: imin = 2_${ik}$
           
           
           
           ! Local Scalars 
           logical(lk) :: lquery
           integer(${ik}$) :: i, iascl, ibscl, ismax, ismin, j, lwkopt, mn, nb, nb1, nb2, nb3, &
                     nb4
           real(dp) :: anrm, bignum, bnrm, smax, smaxpr, smin, sminpr, smlnum, wsize
           complex(dp) :: c1, c2, s1, s2
           ! Intrinsic Functions 
           ! Executable Statements 
           mn = min( m, n )
           ismin = mn + 1_${ik}$
           ismax = 2_${ik}$*mn + 1_${ik}$
           ! test the input arguments.
           info = 0_${ik}$
           nb1 = stdlib${ii}$_ilaenv( 1_${ik}$, 'ZGEQRF', ' ', m, n, -1_${ik}$, -1_${ik}$ )
           nb2 = stdlib${ii}$_ilaenv( 1_${ik}$, 'ZGERQF', ' ', m, n, -1_${ik}$, -1_${ik}$ )
           nb3 = stdlib${ii}$_ilaenv( 1_${ik}$, 'ZUNMQR', ' ', m, n, nrhs, -1_${ik}$ )
           nb4 = stdlib${ii}$_ilaenv( 1_${ik}$, 'ZUNMRQ', ' ', m, n, nrhs, -1_${ik}$ )
           nb = max( nb1, nb2, nb3, nb4 )
           lwkopt = max( 1_${ik}$, mn+2*n+nb*( n+1 ), 2_${ik}$*mn+nb*nrhs )
           work( 1_${ik}$ ) = cmplx( lwkopt,KIND=dp)
           lquery = ( lwork==-1_${ik}$ )
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( nrhs<0_${ik}$ ) then
              info = -3_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -5_${ik}$
           else if( ldb<max( 1_${ik}$, m, n ) ) then
              info = -7_${ik}$
           else if( lwork<( mn+max( 2_${ik}$*mn, n+1, mn+nrhs ) ) .and. .not.lquery ) then
              info = -12_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'ZGELSY', -info )
              return
           else if( lquery ) then
              return
           end if
           ! quick return if possible
           if( min( m, n, nrhs )==0_${ik}$ ) then
              rank = 0_${ik}$
              return
           end if
           ! get machine parameters
           smlnum = stdlib${ii}$_dlamch( 'S' ) / stdlib${ii}$_dlamch( 'P' )
           bignum = one / smlnum
           call stdlib${ii}$_dlabad( smlnum, bignum )
           ! scale a, b if max entries outside range [smlnum,bignum]
           anrm = stdlib${ii}$_zlange( 'M', m, n, a, lda, rwork )
           iascl = 0_${ik}$
           if( anrm>zero .and. anrm<smlnum ) then
              ! scale matrix norm up to smlnum
              call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, m, n, a, lda, info )
              iascl = 1_${ik}$
           else if( anrm>bignum ) then
              ! scale matrix norm down to bignum
              call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, m, n, a, lda, info )
              iascl = 2_${ik}$
           else if( anrm==zero ) then
              ! matrix all zero. return zero solution.
              call stdlib${ii}$_zlaset( 'F', max( m, n ), nrhs, czero, czero, b, ldb )
              rank = 0_${ik}$
              go to 70
           end if
           bnrm = stdlib${ii}$_zlange( 'M', m, nrhs, b, ldb, rwork )
           ibscl = 0_${ik}$
           if( bnrm>zero .and. bnrm<smlnum ) then
              ! scale matrix norm up to smlnum
              call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, bnrm, smlnum, m, nrhs, b, ldb, info )
              ibscl = 1_${ik}$
           else if( bnrm>bignum ) then
              ! scale matrix norm down to bignum
              call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, bnrm, bignum, m, nrhs, b, ldb, info )
              ibscl = 2_${ik}$
           end if
           ! compute qr factorization with column pivoting of a:
              ! a * p = q * r
           call stdlib${ii}$_zgeqp3( m, n, a, lda, jpvt, work( 1_${ik}$ ), work( mn+1 ),lwork-mn, rwork, info )
                     
           wsize = mn + real( work( mn+1 ),KIND=dp)
           ! complex workspace: mn+nb*(n+1). real workspace 2*n.
           ! details of householder rotations stored in work(1:mn).
           ! determine rank using incremental condition estimation
           work( ismin ) = cone
           work( ismax ) = cone
           smax = abs( a( 1_${ik}$, 1_${ik}$ ) )
           smin = smax
           if( abs( a( 1_${ik}$, 1_${ik}$ ) )==zero ) then
              rank = 0_${ik}$
              call stdlib${ii}$_zlaset( 'F', max( m, n ), nrhs, czero, czero, b, ldb )
              go to 70
           else
              rank = 1_${ik}$
           end if
           10 continue
           if( rank<mn ) then
              i = rank + 1_${ik}$
              call stdlib${ii}$_zlaic1( imin, rank, work( ismin ), smin, a( 1_${ik}$, i ),a( i, i ), sminpr, &
                        s1, c1 )
              call stdlib${ii}$_zlaic1( imax, rank, work( ismax ), smax, a( 1_${ik}$, i ),a( i, i ), smaxpr, &
                        s2, c2 )
              if( smaxpr*rcond<=sminpr ) then
                 do i = 1, rank
                    work( ismin+i-1 ) = s1*work( ismin+i-1 )
                    work( ismax+i-1 ) = s2*work( ismax+i-1 )
                 end do
                 work( ismin+rank ) = c1
                 work( ismax+rank ) = c2
                 smin = sminpr
                 smax = smaxpr
                 rank = rank + 1_${ik}$
                 go to 10
              end if
           end if
           ! complex workspace: 3*mn.
           ! logically partition r = [ r11 r12 ]
                                   ! [  0  r22 ]
           ! where r11 = r(1:rank,1:rank)
           ! [r11,r12] = [ t11, 0 ] * y
           if( rank<n )call stdlib${ii}$_ztzrzf( rank, n, a, lda, work( mn+1 ), work( 2_${ik}$*mn+1 ),lwork-&
                     2_${ik}$*mn, info )
           ! complex workspace: 2*mn.
           ! details of householder rotations stored in work(mn+1:2*mn)
           ! b(1:m,1:nrhs) := q**h * b(1:m,1:nrhs)
           call stdlib${ii}$_zunmqr( 'LEFT', 'CONJUGATE TRANSPOSE', m, nrhs, mn, a, lda,work( 1_${ik}$ ), b, &
                     ldb, work( 2_${ik}$*mn+1 ), lwork-2*mn, info )
           wsize = max( wsize, 2_${ik}$*mn+real( work( 2_${ik}$*mn+1 ),KIND=dp) )
           ! complex workspace: 2*mn+nb*nrhs.
           ! b(1:rank,1:nrhs) := inv(t11) * b(1:rank,1:nrhs)
           call stdlib${ii}$_ztrsm( 'LEFT', 'UPPER', 'NO TRANSPOSE', 'NON-UNIT', rank,nrhs, cone, a, &
                     lda, b, ldb )
           do j = 1, nrhs
              do i = rank + 1, n
                 b( i, j ) = czero
              end do
           end do
           ! b(1:n,1:nrhs) := y**h * b(1:n,1:nrhs)
           if( rank<n ) then
              call stdlib${ii}$_zunmrz( 'LEFT', 'CONJUGATE TRANSPOSE', n, nrhs, rank,n-rank, a, lda, &
                        work( mn+1 ), b, ldb,work( 2_${ik}$*mn+1 ), lwork-2*mn, info )
           end if
           ! complex workspace: 2*mn+nrhs.
           ! b(1:n,1:nrhs) := p * b(1:n,1:nrhs)
           do j = 1, nrhs
              do i = 1, n
                 work( jpvt( i ) ) = b( i, j )
              end do
              call stdlib${ii}$_zcopy( n, work( 1_${ik}$ ), 1_${ik}$, b( 1_${ik}$, j ), 1_${ik}$ )
           end do
           ! complex workspace: n.
           ! undo scaling
           if( iascl==1_${ik}$ ) then
              call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, n, nrhs, b, ldb, info )
              call stdlib${ii}$_zlascl( 'U', 0_${ik}$, 0_${ik}$, smlnum, anrm, rank, rank, a, lda,info )
           else if( iascl==2_${ik}$ ) then
              call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, n, nrhs, b, ldb, info )
              call stdlib${ii}$_zlascl( 'U', 0_${ik}$, 0_${ik}$, bignum, anrm, rank, rank, a, lda,info )
           end if
           if( ibscl==1_${ik}$ ) then
              call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, smlnum, bnrm, n, nrhs, b, ldb, info )
           else if( ibscl==2_${ik}$ ) then
              call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, bignum, bnrm, n, nrhs, b, ldb, info )
           end if
           70 continue
           work( 1_${ik}$ ) = cmplx( lwkopt,KIND=dp)
           return
     end subroutine stdlib${ii}$_zgelsy

#:for ck,ct,ci in CMPLX_KINDS_TYPES
#:if not ck in ["sp","dp"]
     module subroutine stdlib${ii}$_${ci}$gelsy( m, n, nrhs, a, lda, b, ldb, jpvt, rcond, rank,work, lwork, rwork, &
     !! ZGELSY: computes the minimum-norm solution to a complex linear least
     !! squares problem:
     !! minimize || A * X - B ||
     !! using a complete orthogonal factorization of A.  A is an M-by-N
     !! matrix which may be rank-deficient.
     !! Several right hand side vectors b and solution vectors x can be
     !! handled in a single call; they are stored as the columns of the
     !! M-by-NRHS right hand side matrix B and the N-by-NRHS solution
     !! matrix X.
     !! The routine first computes a QR factorization with column pivoting:
     !! A * P = Q * [ R11 R12 ]
     !! [  0  R22 ]
     !! with R11 defined as the largest leading submatrix whose estimated
     !! condition number is less than 1/RCOND.  The order of R11, RANK,
     !! is the effective rank of A.
     !! Then, R22 is considered to be negligible, and R12 is annihilated
     !! by unitary transformations from the right, arriving at the
     !! complete orthogonal factorization:
     !! A * P = Q * [ T11 0 ] * Z
     !! [  0  0 ]
     !! The minimum-norm solution is then
     !! X = P * Z**H [ inv(T11)*Q1**H*B ]
     !! [        0         ]
     !! where Q1 consists of the first RANK columns of Q.
     !! This routine is basically identical to the original xGELSX except
     !! three differences:
     !! o The permutation of matrix B (the right hand side) is faster and
     !! more simple.
     !! o The call to the subroutine xGEQPF has been substituted by the
     !! the call to the subroutine xGEQP3. This subroutine is a Blas-3
     !! version of the QR factorization with column pivoting.
     !! o Matrix B (the right hand side) is updated with Blas-3.
               info )
        ! -- lapack driver routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${ck}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info, rank
           integer(${ik}$), intent(in) :: lda, ldb, lwork, m, n, nrhs
           real(${ck}$), intent(in) :: rcond
           ! Array Arguments 
           integer(${ik}$), intent(inout) :: jpvt(*)
           real(${ck}$), intent(out) :: rwork(*)
           complex(${ck}$), intent(inout) :: a(lda,*), b(ldb,*)
           complex(${ck}$), intent(out) :: work(*)
        ! =====================================================================
           ! Parameters 
           integer(${ik}$), parameter :: imax = 1_${ik}$
           integer(${ik}$), parameter :: imin = 2_${ik}$
           
           
           
           ! Local Scalars 
           logical(lk) :: lquery
           integer(${ik}$) :: i, iascl, ibscl, ismax, ismin, j, lwkopt, mn, nb, nb1, nb2, nb3, &
                     nb4
           real(${ck}$) :: anrm, bignum, bnrm, smax, smaxpr, smin, sminpr, smlnum, wsize
           complex(${ck}$) :: c1, c2, s1, s2
           ! Intrinsic Functions 
           ! Executable Statements 
           mn = min( m, n )
           ismin = mn + 1_${ik}$
           ismax = 2_${ik}$*mn + 1_${ik}$
           ! test the input arguments.
           info = 0_${ik}$
           nb1 = stdlib${ii}$_ilaenv( 1_${ik}$, 'ZGEQRF', ' ', m, n, -1_${ik}$, -1_${ik}$ )
           nb2 = stdlib${ii}$_ilaenv( 1_${ik}$, 'ZGERQF', ' ', m, n, -1_${ik}$, -1_${ik}$ )
           nb3 = stdlib${ii}$_ilaenv( 1_${ik}$, 'ZUNMQR', ' ', m, n, nrhs, -1_${ik}$ )
           nb4 = stdlib${ii}$_ilaenv( 1_${ik}$, 'ZUNMRQ', ' ', m, n, nrhs, -1_${ik}$ )
           nb = max( nb1, nb2, nb3, nb4 )
           lwkopt = max( 1_${ik}$, mn+2*n+nb*( n+1 ), 2_${ik}$*mn+nb*nrhs )
           work( 1_${ik}$ ) = cmplx( lwkopt,KIND=${ck}$)
           lquery = ( lwork==-1_${ik}$ )
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( nrhs<0_${ik}$ ) then
              info = -3_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -5_${ik}$
           else if( ldb<max( 1_${ik}$, m, n ) ) then
              info = -7_${ik}$
           else if( lwork<( mn+max( 2_${ik}$*mn, n+1, mn+nrhs ) ) .and. .not.lquery ) then
              info = -12_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'ZGELSY', -info )
              return
           else if( lquery ) then
              return
           end if
           ! quick return if possible
           if( min( m, n, nrhs )==0_${ik}$ ) then
              rank = 0_${ik}$
              return
           end if
           ! get machine parameters
           smlnum = stdlib${ii}$_${c2ri(ci)}$lamch( 'S' ) / stdlib${ii}$_${c2ri(ci)}$lamch( 'P' )
           bignum = one / smlnum
           call stdlib${ii}$_${c2ri(ci)}$labad( smlnum, bignum )
           ! scale a, b if max entries outside range [smlnum,bignum]
           anrm = stdlib${ii}$_${ci}$lange( 'M', m, n, a, lda, rwork )
           iascl = 0_${ik}$
           if( anrm>zero .and. anrm<smlnum ) then
              ! scale matrix norm up to smlnum
              call stdlib${ii}$_${ci}$lascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, m, n, a, lda, info )
              iascl = 1_${ik}$
           else if( anrm>bignum ) then
              ! scale matrix norm down to bignum
              call stdlib${ii}$_${ci}$lascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, m, n, a, lda, info )
              iascl = 2_${ik}$
           else if( anrm==zero ) then
              ! matrix all zero. return zero solution.
              call stdlib${ii}$_${ci}$laset( 'F', max( m, n ), nrhs, czero, czero, b, ldb )
              rank = 0_${ik}$
              go to 70
           end if
           bnrm = stdlib${ii}$_${ci}$lange( 'M', m, nrhs, b, ldb, rwork )
           ibscl = 0_${ik}$
           if( bnrm>zero .and. bnrm<smlnum ) then
              ! scale matrix norm up to smlnum
              call stdlib${ii}$_${ci}$lascl( 'G', 0_${ik}$, 0_${ik}$, bnrm, smlnum, m, nrhs, b, ldb, info )
              ibscl = 1_${ik}$
           else if( bnrm>bignum ) then
              ! scale matrix norm down to bignum
              call stdlib${ii}$_${ci}$lascl( 'G', 0_${ik}$, 0_${ik}$, bnrm, bignum, m, nrhs, b, ldb, info )
              ibscl = 2_${ik}$
           end if
           ! compute qr factorization with column pivoting of a:
              ! a * p = q * r
           call stdlib${ii}$_${ci}$geqp3( m, n, a, lda, jpvt, work( 1_${ik}$ ), work( mn+1 ),lwork-mn, rwork, info )
                     
           wsize = mn + real( work( mn+1 ),KIND=${ck}$)
           ! complex workspace: mn+nb*(n+1). real workspace 2*n.
           ! details of householder rotations stored in work(1:mn).
           ! determine rank using incremental condition estimation
           work( ismin ) = cone
           work( ismax ) = cone
           smax = abs( a( 1_${ik}$, 1_${ik}$ ) )
           smin = smax
           if( abs( a( 1_${ik}$, 1_${ik}$ ) )==zero ) then
              rank = 0_${ik}$
              call stdlib${ii}$_${ci}$laset( 'F', max( m, n ), nrhs, czero, czero, b, ldb )
              go to 70
           else
              rank = 1_${ik}$
           end if
           10 continue
           if( rank<mn ) then
              i = rank + 1_${ik}$
              call stdlib${ii}$_${ci}$laic1( imin, rank, work( ismin ), smin, a( 1_${ik}$, i ),a( i, i ), sminpr, &
                        s1, c1 )
              call stdlib${ii}$_${ci}$laic1( imax, rank, work( ismax ), smax, a( 1_${ik}$, i ),a( i, i ), smaxpr, &
                        s2, c2 )
              if( smaxpr*rcond<=sminpr ) then
                 do i = 1, rank
                    work( ismin+i-1 ) = s1*work( ismin+i-1 )
                    work( ismax+i-1 ) = s2*work( ismax+i-1 )
                 end do
                 work( ismin+rank ) = c1
                 work( ismax+rank ) = c2
                 smin = sminpr
                 smax = smaxpr
                 rank = rank + 1_${ik}$
                 go to 10
              end if
           end if
           ! complex workspace: 3*mn.
           ! logically partition r = [ r11 r12 ]
                                   ! [  0  r22 ]
           ! where r11 = r(1:rank,1:rank)
           ! [r11,r12] = [ t11, 0 ] * y
           if( rank<n )call stdlib${ii}$_${ci}$tzrzf( rank, n, a, lda, work( mn+1 ), work( 2_${ik}$*mn+1 ),lwork-&
                     2_${ik}$*mn, info )
           ! complex workspace: 2*mn.
           ! details of householder rotations stored in work(mn+1:2*mn)
           ! b(1:m,1:nrhs) := q**h * b(1:m,1:nrhs)
           call stdlib${ii}$_${ci}$unmqr( 'LEFT', 'CONJUGATE TRANSPOSE', m, nrhs, mn, a, lda,work( 1_${ik}$ ), b, &
                     ldb, work( 2_${ik}$*mn+1 ), lwork-2*mn, info )
           wsize = max( wsize, 2_${ik}$*mn+real( work( 2_${ik}$*mn+1 ),KIND=${ck}$) )
           ! complex workspace: 2*mn+nb*nrhs.
           ! b(1:rank,1:nrhs) := inv(t11) * b(1:rank,1:nrhs)
           call stdlib${ii}$_${ci}$trsm( 'LEFT', 'UPPER', 'NO TRANSPOSE', 'NON-UNIT', rank,nrhs, cone, a, &
                     lda, b, ldb )
           do j = 1, nrhs
              do i = rank + 1, n
                 b( i, j ) = czero
              end do
           end do
           ! b(1:n,1:nrhs) := y**h * b(1:n,1:nrhs)
           if( rank<n ) then
              call stdlib${ii}$_${ci}$unmrz( 'LEFT', 'CONJUGATE TRANSPOSE', n, nrhs, rank,n-rank, a, lda, &
                        work( mn+1 ), b, ldb,work( 2_${ik}$*mn+1 ), lwork-2*mn, info )
           end if
           ! complex workspace: 2*mn+nrhs.
           ! b(1:n,1:nrhs) := p * b(1:n,1:nrhs)
           do j = 1, nrhs
              do i = 1, n
                 work( jpvt( i ) ) = b( i, j )
              end do
              call stdlib${ii}$_${ci}$copy( n, work( 1_${ik}$ ), 1_${ik}$, b( 1_${ik}$, j ), 1_${ik}$ )
           end do
           ! complex workspace: n.
           ! undo scaling
           if( iascl==1_${ik}$ ) then
              call stdlib${ii}$_${ci}$lascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, n, nrhs, b, ldb, info )
              call stdlib${ii}$_${ci}$lascl( 'U', 0_${ik}$, 0_${ik}$, smlnum, anrm, rank, rank, a, lda,info )
           else if( iascl==2_${ik}$ ) then
              call stdlib${ii}$_${ci}$lascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, n, nrhs, b, ldb, info )
              call stdlib${ii}$_${ci}$lascl( 'U', 0_${ik}$, 0_${ik}$, bignum, anrm, rank, rank, a, lda,info )
           end if
           if( ibscl==1_${ik}$ ) then
              call stdlib${ii}$_${ci}$lascl( 'G', 0_${ik}$, 0_${ik}$, smlnum, bnrm, n, nrhs, b, ldb, info )
           else if( ibscl==2_${ik}$ ) then
              call stdlib${ii}$_${ci}$lascl( 'G', 0_${ik}$, 0_${ik}$, bignum, bnrm, n, nrhs, b, ldb, info )
           end if
           70 continue
           work( 1_${ik}$ ) = cmplx( lwkopt,KIND=${ck}$)
           return
     end subroutine stdlib${ii}$_${ci}$gelsy

#:endif
#:endfor



     module subroutine stdlib${ii}$_sgels( trans, m, n, nrhs, a, lda, b, ldb, work, lwork,info )
     !! SGELS solves overdetermined or underdetermined real linear systems
     !! involving an M-by-N matrix A, or its transpose, using a QR or LQ
     !! factorization of A.  It is assumed that A has full rank.
     !! The following options are provided:
     !! 1. If TRANS = 'N' and m >= n:  find the least squares solution of
     !! an overdetermined system, i.e., solve the least squares problem
     !! minimize || B - A*X ||.
     !! 2. If TRANS = 'N' and m < n:  find the minimum norm solution of
     !! an underdetermined system A * X = B.
     !! 3. If TRANS = 'T' and m >= n:  find the minimum norm solution of
     !! an underdetermined system A**T * X = B.
     !! 4. If TRANS = 'T' and m < n:  find the least squares solution of
     !! an overdetermined system, i.e., solve the least squares problem
     !! minimize || B - A**T * X ||.
     !! Several right hand side vectors b and solution vectors x can be
     !! handled in a single call; they are stored as the columns of the
     !! M-by-NRHS right hand side matrix B and the N-by-NRHS solution
     !! matrix X.
        ! -- lapack driver routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: trans
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, ldb, lwork, m, n, nrhs
           ! Array Arguments 
           real(sp), intent(inout) :: a(lda,*), b(ldb,*)
           real(sp), intent(out) :: work(*)
        ! =====================================================================
           
           ! Local Scalars 
           logical(lk) :: lquery, tpsd
           integer(${ik}$) :: brow, i, iascl, ibscl, j, mn, nb, scllen, wsize
           real(sp) :: anrm, bignum, bnrm, smlnum
           ! Local Arrays 
           real(sp) :: rwork(1_${ik}$)
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments.
           info = 0_${ik}$
           mn = min( m, n )
           lquery = ( lwork==-1_${ik}$ )
           if( .not.( stdlib_lsame( trans, 'N' ) .or. stdlib_lsame( trans, 'T' ) ) ) then
              info = -1_${ik}$
           else if( m<0_${ik}$ ) then
              info = -2_${ik}$
           else if( n<0_${ik}$ ) then
              info = -3_${ik}$
           else if( nrhs<0_${ik}$ ) then
              info = -4_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -6_${ik}$
           else if( ldb<max( 1_${ik}$, m, n ) ) then
              info = -8_${ik}$
           else if( lwork<max( 1_${ik}$, mn + max( mn, nrhs ) ) .and..not.lquery ) then
              info = -10_${ik}$
           end if
           ! figure out optimal block size
           if( info==0_${ik}$ .or. info==-10_${ik}$ ) then
              tpsd = .true.
              if( stdlib_lsame( trans, 'N' ) )tpsd = .false.
              if( m>=n ) then
                 nb = stdlib${ii}$_ilaenv( 1_${ik}$, 'SGEQRF', ' ', m, n, -1_${ik}$, -1_${ik}$ )
                 if( tpsd ) then
                    nb = max( nb, stdlib${ii}$_ilaenv( 1_${ik}$, 'SORMQR', 'LN', m, nrhs, n,-1_${ik}$ ) )
                 else
                    nb = max( nb, stdlib${ii}$_ilaenv( 1_${ik}$, 'SORMQR', 'LT', m, nrhs, n,-1_${ik}$ ) )
                 end if
              else
                 nb = stdlib${ii}$_ilaenv( 1_${ik}$, 'SGELQF', ' ', m, n, -1_${ik}$, -1_${ik}$ )
                 if( tpsd ) then
                    nb = max( nb, stdlib${ii}$_ilaenv( 1_${ik}$, 'SORMLQ', 'LT', n, nrhs, m,-1_${ik}$ ) )
                 else
                    nb = max( nb, stdlib${ii}$_ilaenv( 1_${ik}$, 'SORMLQ', 'LN', n, nrhs, m,-1_${ik}$ ) )
                 end if
              end if
              wsize = max( 1_${ik}$, mn + max( mn, nrhs )*nb )
              work( 1_${ik}$ ) = real( wsize,KIND=sp)
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'SGELS ', -info )
              return
           else if( lquery ) then
              return
           end if
           ! quick return if possible
           if( min( m, n, nrhs )==0_${ik}$ ) then
              call stdlib${ii}$_slaset( 'FULL', max( m, n ), nrhs, zero, zero, b, ldb )
              return
           end if
           ! get machine parameters
           smlnum = stdlib${ii}$_slamch( 'S' ) / stdlib${ii}$_slamch( 'P' )
           bignum = one / smlnum
           call stdlib${ii}$_slabad( smlnum, bignum )
           ! scale a, b if max element outside range [smlnum,bignum]
           anrm = stdlib${ii}$_slange( 'M', m, n, a, lda, rwork )
           iascl = 0_${ik}$
           if( anrm>zero .and. anrm<smlnum ) then
              ! scale matrix norm up to smlnum
              call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, m, n, a, lda, info )
              iascl = 1_${ik}$
           else if( anrm>bignum ) then
              ! scale matrix norm down to bignum
              call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, m, n, a, lda, info )
              iascl = 2_${ik}$
           else if( anrm==zero ) then
              ! matrix all zero. return zero solution.
              call stdlib${ii}$_slaset( 'F', max( m, n ), nrhs, zero, zero, b, ldb )
              go to 50
           end if
           brow = m
           if( tpsd )brow = n
           bnrm = stdlib${ii}$_slange( 'M', brow, nrhs, b, ldb, rwork )
           ibscl = 0_${ik}$
           if( bnrm>zero .and. bnrm<smlnum ) then
              ! scale matrix norm up to smlnum
              call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, bnrm, smlnum, brow, nrhs, b, ldb,info )
              ibscl = 1_${ik}$
           else if( bnrm>bignum ) then
              ! scale matrix norm down to bignum
              call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, bnrm, bignum, brow, nrhs, b, ldb,info )
              ibscl = 2_${ik}$
           end if
           if( m>=n ) then
              ! compute qr factorization of a
              call stdlib${ii}$_sgeqrf( m, n, a, lda, work( 1_${ik}$ ), work( mn+1 ), lwork-mn,info )
              ! workspace at least n, optimally n*nb
              if( .not.tpsd ) then
                 ! least-squares problem min || a * x - b ||
                 ! b(1:m,1:nrhs) := q**t * b(1:m,1:nrhs)
                 call stdlib${ii}$_sormqr( 'LEFT', 'TRANSPOSE', m, nrhs, n, a, lda,work( 1_${ik}$ ), b, ldb, &
                           work( mn+1 ), lwork-mn,info )
                 ! workspace at least nrhs, optimally nrhs*nb
                 ! b(1:n,1:nrhs) := inv(r) * b(1:n,1:nrhs)
                 call stdlib${ii}$_strtrs( 'UPPER', 'NO TRANSPOSE', 'NON-UNIT', n, nrhs,a, lda, b, ldb, &
                           info )
                 if( info>0_${ik}$ ) then
                    return
                 end if
                 scllen = n
              else
                 ! underdetermined system of equations a**t * x = b
                 ! b(1:n,1:nrhs) := inv(r**t) * b(1:n,1:nrhs)
                 call stdlib${ii}$_strtrs( 'UPPER', 'TRANSPOSE', 'NON-UNIT', n, nrhs,a, lda, b, ldb, &
                           info )
                 if( info>0_${ik}$ ) then
                    return
                 end if
                 ! b(n+1:m,1:nrhs) = zero
                 do j = 1, nrhs
                    do i = n + 1, m
                       b( i, j ) = zero
                    end do
                 end do
                 ! b(1:m,1:nrhs) := q(1:n,:) * b(1:n,1:nrhs)
                 call stdlib${ii}$_sormqr( 'LEFT', 'NO TRANSPOSE', m, nrhs, n, a, lda,work( 1_${ik}$ ), b, ldb,&
                            work( mn+1 ), lwork-mn,info )
                 ! workspace at least nrhs, optimally nrhs*nb
                 scllen = m
              end if
           else
              ! compute lq factorization of a
              call stdlib${ii}$_sgelqf( m, n, a, lda, work( 1_${ik}$ ), work( mn+1 ), lwork-mn,info )
              ! workspace at least m, optimally m*nb.
              if( .not.tpsd ) then
                 ! underdetermined system of equations a * x = b
                 ! b(1:m,1:nrhs) := inv(l) * b(1:m,1:nrhs)
                 call stdlib${ii}$_strtrs( 'LOWER', 'NO TRANSPOSE', 'NON-UNIT', m, nrhs,a, lda, b, ldb, &
                           info )
                 if( info>0_${ik}$ ) then
                    return
                 end if
                 ! b(m+1:n,1:nrhs) = 0
                 do j = 1, nrhs
                    do i = m + 1, n
                       b( i, j ) = zero
                    end do
                 end do
                 ! b(1:n,1:nrhs) := q(1:n,:)**t * b(1:m,1:nrhs)
                 call stdlib${ii}$_sormlq( 'LEFT', 'TRANSPOSE', n, nrhs, m, a, lda,work( 1_${ik}$ ), b, ldb, &
                           work( mn+1 ), lwork-mn,info )
                 ! workspace at least nrhs, optimally nrhs*nb
                 scllen = n
              else
                 ! overdetermined system min || a**t * x - b ||
                 ! b(1:n,1:nrhs) := q * b(1:n,1:nrhs)
                 call stdlib${ii}$_sormlq( 'LEFT', 'NO TRANSPOSE', n, nrhs, m, a, lda,work( 1_${ik}$ ), b, ldb,&
                            work( mn+1 ), lwork-mn,info )
                 ! workspace at least nrhs, optimally nrhs*nb
                 ! b(1:m,1:nrhs) := inv(l**t) * b(1:m,1:nrhs)
                 call stdlib${ii}$_strtrs( 'LOWER', 'TRANSPOSE', 'NON-UNIT', m, nrhs,a, lda, b, ldb, &
                           info )
                 if( info>0_${ik}$ ) then
                    return
                 end if
                 scllen = m
              end if
           end if
           ! undo scaling
           if( iascl==1_${ik}$ ) then
              call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, scllen, nrhs, b, ldb,info )
           else if( iascl==2_${ik}$ ) then
              call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, scllen, nrhs, b, ldb,info )
           end if
           if( ibscl==1_${ik}$ ) then
              call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, smlnum, bnrm, scllen, nrhs, b, ldb,info )
           else if( ibscl==2_${ik}$ ) then
              call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, bignum, bnrm, scllen, nrhs, b, ldb,info )
           end if
           50 continue
           work( 1_${ik}$ ) = real( wsize,KIND=sp)
           return
     end subroutine stdlib${ii}$_sgels

     module subroutine stdlib${ii}$_dgels( trans, m, n, nrhs, a, lda, b, ldb, work, lwork,info )
     !! DGELS solves overdetermined or underdetermined real linear systems
     !! involving an M-by-N matrix A, or its transpose, using a QR or LQ
     !! factorization of A.  It is assumed that A has full rank.
     !! The following options are provided:
     !! 1. If TRANS = 'N' and m >= n:  find the least squares solution of
     !! an overdetermined system, i.e., solve the least squares problem
     !! minimize || B - A*X ||.
     !! 2. If TRANS = 'N' and m < n:  find the minimum norm solution of
     !! an underdetermined system A * X = B.
     !! 3. If TRANS = 'T' and m >= n:  find the minimum norm solution of
     !! an underdetermined system A**T * X = B.
     !! 4. If TRANS = 'T' and m < n:  find the least squares solution of
     !! an overdetermined system, i.e., solve the least squares problem
     !! minimize || B - A**T * X ||.
     !! Several right hand side vectors b and solution vectors x can be
     !! handled in a single call; they are stored as the columns of the
     !! M-by-NRHS right hand side matrix B and the N-by-NRHS solution
     !! matrix X.
        ! -- lapack driver routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: trans
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, ldb, lwork, m, n, nrhs
           ! Array Arguments 
           real(dp), intent(inout) :: a(lda,*), b(ldb,*)
           real(dp), intent(out) :: work(*)
        ! =====================================================================
           
           ! Local Scalars 
           logical(lk) :: lquery, tpsd
           integer(${ik}$) :: brow, i, iascl, ibscl, j, mn, nb, scllen, wsize
           real(dp) :: anrm, bignum, bnrm, smlnum
           ! Local Arrays 
           real(dp) :: rwork(1_${ik}$)
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments.
           info = 0_${ik}$
           mn = min( m, n )
           lquery = ( lwork==-1_${ik}$ )
           if( .not.( stdlib_lsame( trans, 'N' ) .or. stdlib_lsame( trans, 'T' ) ) ) then
              info = -1_${ik}$
           else if( m<0_${ik}$ ) then
              info = -2_${ik}$
           else if( n<0_${ik}$ ) then
              info = -3_${ik}$
           else if( nrhs<0_${ik}$ ) then
              info = -4_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -6_${ik}$
           else if( ldb<max( 1_${ik}$, m, n ) ) then
              info = -8_${ik}$
           else if( lwork<max( 1_${ik}$, mn+max( mn, nrhs ) ) .and. .not.lquery )then
              info = -10_${ik}$
           end if
           ! figure out optimal block size
           if( info==0_${ik}$ .or. info==-10_${ik}$ ) then
              tpsd = .true.
              if( stdlib_lsame( trans, 'N' ) )tpsd = .false.
              if( m>=n ) then
                 nb = stdlib${ii}$_ilaenv( 1_${ik}$, 'DGEQRF', ' ', m, n, -1_${ik}$, -1_${ik}$ )
                 if( tpsd ) then
                    nb = max( nb, stdlib${ii}$_ilaenv( 1_${ik}$, 'DORMQR', 'LN', m, nrhs, n,-1_${ik}$ ) )
                 else
                    nb = max( nb, stdlib${ii}$_ilaenv( 1_${ik}$, 'DORMQR', 'LT', m, nrhs, n,-1_${ik}$ ) )
                 end if
              else
                 nb = stdlib${ii}$_ilaenv( 1_${ik}$, 'DGELQF', ' ', m, n, -1_${ik}$, -1_${ik}$ )
                 if( tpsd ) then
                    nb = max( nb, stdlib${ii}$_ilaenv( 1_${ik}$, 'DORMLQ', 'LT', n, nrhs, m,-1_${ik}$ ) )
                 else
                    nb = max( nb, stdlib${ii}$_ilaenv( 1_${ik}$, 'DORMLQ', 'LN', n, nrhs, m,-1_${ik}$ ) )
                 end if
              end if
              wsize = max( 1_${ik}$, mn+max( mn, nrhs )*nb )
              work( 1_${ik}$ ) = real( wsize,KIND=dp)
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DGELS ', -info )
              return
           else if( lquery ) then
              return
           end if
           ! quick return if possible
           if( min( m, n, nrhs )==0_${ik}$ ) then
              call stdlib${ii}$_dlaset( 'FULL', max( m, n ), nrhs, zero, zero, b, ldb )
              return
           end if
           ! get machine parameters
           smlnum = stdlib${ii}$_dlamch( 'S' ) / stdlib${ii}$_dlamch( 'P' )
           bignum = one / smlnum
           call stdlib${ii}$_dlabad( smlnum, bignum )
           ! scale a, b if max element outside range [smlnum,bignum]
           anrm = stdlib${ii}$_dlange( 'M', m, n, a, lda, rwork )
           iascl = 0_${ik}$
           if( anrm>zero .and. anrm<smlnum ) then
              ! scale matrix norm up to smlnum
              call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, m, n, a, lda, info )
              iascl = 1_${ik}$
           else if( anrm>bignum ) then
              ! scale matrix norm down to bignum
              call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, m, n, a, lda, info )
              iascl = 2_${ik}$
           else if( anrm==zero ) then
              ! matrix all zero. return zero solution.
              call stdlib${ii}$_dlaset( 'F', max( m, n ), nrhs, zero, zero, b, ldb )
              go to 50
           end if
           brow = m
           if( tpsd )brow = n
           bnrm = stdlib${ii}$_dlange( 'M', brow, nrhs, b, ldb, rwork )
           ibscl = 0_${ik}$
           if( bnrm>zero .and. bnrm<smlnum ) then
              ! scale matrix norm up to smlnum
              call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, bnrm, smlnum, brow, nrhs, b, ldb,info )
              ibscl = 1_${ik}$
           else if( bnrm>bignum ) then
              ! scale matrix norm down to bignum
              call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, bnrm, bignum, brow, nrhs, b, ldb,info )
              ibscl = 2_${ik}$
           end if
           if( m>=n ) then
              ! compute qr factorization of a
              call stdlib${ii}$_dgeqrf( m, n, a, lda, work( 1_${ik}$ ), work( mn+1 ), lwork-mn,info )
              ! workspace at least n, optimally n*nb
              if( .not.tpsd ) then
                 ! least-squares problem min || a * x - b ||
                 ! b(1:m,1:nrhs) := q**t * b(1:m,1:nrhs)
                 call stdlib${ii}$_dormqr( 'LEFT', 'TRANSPOSE', m, nrhs, n, a, lda,work( 1_${ik}$ ), b, ldb, &
                           work( mn+1 ), lwork-mn,info )
                 ! workspace at least nrhs, optimally nrhs*nb
                 ! b(1:n,1:nrhs) := inv(r) * b(1:n,1:nrhs)
                 call stdlib${ii}$_dtrtrs( 'UPPER', 'NO TRANSPOSE', 'NON-UNIT', n, nrhs,a, lda, b, ldb, &
                           info )
                 if( info>0_${ik}$ ) then
                    return
                 end if
                 scllen = n
              else
                 ! underdetermined system of equations a**t * x = b
                 ! b(1:n,1:nrhs) := inv(r**t) * b(1:n,1:nrhs)
                 call stdlib${ii}$_dtrtrs( 'UPPER', 'TRANSPOSE', 'NON-UNIT', n, nrhs,a, lda, b, ldb, &
                           info )
                 if( info>0_${ik}$ ) then
                    return
                 end if
                 ! b(n+1:m,1:nrhs) = zero
                 do j = 1, nrhs
                    do i = n + 1, m
                       b( i, j ) = zero
                    end do
                 end do
                 ! b(1:m,1:nrhs) := q(1:n,:) * b(1:n,1:nrhs)
                 call stdlib${ii}$_dormqr( 'LEFT', 'NO TRANSPOSE', m, nrhs, n, a, lda,work( 1_${ik}$ ), b, ldb,&
                            work( mn+1 ), lwork-mn,info )
                 ! workspace at least nrhs, optimally nrhs*nb
                 scllen = m
              end if
           else
              ! compute lq factorization of a
              call stdlib${ii}$_dgelqf( m, n, a, lda, work( 1_${ik}$ ), work( mn+1 ), lwork-mn,info )
              ! workspace at least m, optimally m*nb.
              if( .not.tpsd ) then
                 ! underdetermined system of equations a * x = b
                 ! b(1:m,1:nrhs) := inv(l) * b(1:m,1:nrhs)
                 call stdlib${ii}$_dtrtrs( 'LOWER', 'NO TRANSPOSE', 'NON-UNIT', m, nrhs,a, lda, b, ldb, &
                           info )
                 if( info>0_${ik}$ ) then
                    return
                 end if
                 ! b(m+1:n,1:nrhs) = 0
                 do j = 1, nrhs
                    do i = m + 1, n
                       b( i, j ) = zero
                    end do
                 end do
                 ! b(1:n,1:nrhs) := q(1:n,:)**t * b(1:m,1:nrhs)
                 call stdlib${ii}$_dormlq( 'LEFT', 'TRANSPOSE', n, nrhs, m, a, lda,work( 1_${ik}$ ), b, ldb, &
                           work( mn+1 ), lwork-mn,info )
                 ! workspace at least nrhs, optimally nrhs*nb
                 scllen = n
              else
                 ! overdetermined system min || a**t * x - b ||
                 ! b(1:n,1:nrhs) := q * b(1:n,1:nrhs)
                 call stdlib${ii}$_dormlq( 'LEFT', 'NO TRANSPOSE', n, nrhs, m, a, lda,work( 1_${ik}$ ), b, ldb,&
                            work( mn+1 ), lwork-mn,info )
                 ! workspace at least nrhs, optimally nrhs*nb
                 ! b(1:m,1:nrhs) := inv(l**t) * b(1:m,1:nrhs)
                 call stdlib${ii}$_dtrtrs( 'LOWER', 'TRANSPOSE', 'NON-UNIT', m, nrhs,a, lda, b, ldb, &
                           info )
                 if( info>0_${ik}$ ) then
                    return
                 end if
                 scllen = m
              end if
           end if
           ! undo scaling
           if( iascl==1_${ik}$ ) then
              call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, scllen, nrhs, b, ldb,info )
           else if( iascl==2_${ik}$ ) then
              call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, scllen, nrhs, b, ldb,info )
           end if
           if( ibscl==1_${ik}$ ) then
              call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, smlnum, bnrm, scllen, nrhs, b, ldb,info )
           else if( ibscl==2_${ik}$ ) then
              call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, bignum, bnrm, scllen, nrhs, b, ldb,info )
           end if
           50 continue
           work( 1_${ik}$ ) = real( wsize,KIND=dp)
           return
     end subroutine stdlib${ii}$_dgels

#:for rk,rt,ri in REAL_KINDS_TYPES
#:if not rk in ["sp","dp"]
     module subroutine stdlib${ii}$_${ri}$gels( trans, m, n, nrhs, a, lda, b, ldb, work, lwork,info )
     !! DGELS: solves overdetermined or underdetermined real linear systems
     !! involving an M-by-N matrix A, or its transpose, using a QR or LQ
     !! factorization of A.  It is assumed that A has full rank.
     !! The following options are provided:
     !! 1. If TRANS = 'N' and m >= n:  find the least squares solution of
     !! an overdetermined system, i.e., solve the least squares problem
     !! minimize || B - A*X ||.
     !! 2. If TRANS = 'N' and m < n:  find the minimum norm solution of
     !! an underdetermined system A * X = B.
     !! 3. If TRANS = 'T' and m >= n:  find the minimum norm solution of
     !! an underdetermined system A**T * X = B.
     !! 4. If TRANS = 'T' and m < n:  find the least squares solution of
     !! an overdetermined system, i.e., solve the least squares problem
     !! minimize || B - A**T * X ||.
     !! Several right hand side vectors b and solution vectors x can be
     !! handled in a single call; they are stored as the columns of the
     !! M-by-NRHS right hand side matrix B and the N-by-NRHS solution
     !! matrix X.
        ! -- lapack driver routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: trans
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, ldb, lwork, m, n, nrhs
           ! Array Arguments 
           real(${rk}$), intent(inout) :: a(lda,*), b(ldb,*)
           real(${rk}$), intent(out) :: work(*)
        ! =====================================================================
           
           ! Local Scalars 
           logical(lk) :: lquery, tpsd
           integer(${ik}$) :: brow, i, iascl, ibscl, j, mn, nb, scllen, wsize
           real(${rk}$) :: anrm, bignum, bnrm, smlnum
           ! Local Arrays 
           real(${rk}$) :: rwork(1_${ik}$)
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments.
           info = 0_${ik}$
           mn = min( m, n )
           lquery = ( lwork==-1_${ik}$ )
           if( .not.( stdlib_lsame( trans, 'N' ) .or. stdlib_lsame( trans, 'T' ) ) ) then
              info = -1_${ik}$
           else if( m<0_${ik}$ ) then
              info = -2_${ik}$
           else if( n<0_${ik}$ ) then
              info = -3_${ik}$
           else if( nrhs<0_${ik}$ ) then
              info = -4_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -6_${ik}$
           else if( ldb<max( 1_${ik}$, m, n ) ) then
              info = -8_${ik}$
           else if( lwork<max( 1_${ik}$, mn+max( mn, nrhs ) ) .and. .not.lquery )then
              info = -10_${ik}$
           end if
           ! figure out optimal block size
           if( info==0_${ik}$ .or. info==-10_${ik}$ ) then
              tpsd = .true.
              if( stdlib_lsame( trans, 'N' ) )tpsd = .false.
              if( m>=n ) then
                 nb = stdlib${ii}$_ilaenv( 1_${ik}$, 'DGEQRF', ' ', m, n, -1_${ik}$, -1_${ik}$ )
                 if( tpsd ) then
                    nb = max( nb, stdlib${ii}$_ilaenv( 1_${ik}$, 'DORMQR', 'LN', m, nrhs, n,-1_${ik}$ ) )
                 else
                    nb = max( nb, stdlib${ii}$_ilaenv( 1_${ik}$, 'DORMQR', 'LT', m, nrhs, n,-1_${ik}$ ) )
                 end if
              else
                 nb = stdlib${ii}$_ilaenv( 1_${ik}$, 'DGELQF', ' ', m, n, -1_${ik}$, -1_${ik}$ )
                 if( tpsd ) then
                    nb = max( nb, stdlib${ii}$_ilaenv( 1_${ik}$, 'DORMLQ', 'LT', n, nrhs, m,-1_${ik}$ ) )
                 else
                    nb = max( nb, stdlib${ii}$_ilaenv( 1_${ik}$, 'DORMLQ', 'LN', n, nrhs, m,-1_${ik}$ ) )
                 end if
              end if
              wsize = max( 1_${ik}$, mn+max( mn, nrhs )*nb )
              work( 1_${ik}$ ) = real( wsize,KIND=${rk}$)
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DGELS ', -info )
              return
           else if( lquery ) then
              return
           end if
           ! quick return if possible
           if( min( m, n, nrhs )==0_${ik}$ ) then
              call stdlib${ii}$_${ri}$laset( 'FULL', max( m, n ), nrhs, zero, zero, b, ldb )
              return
           end if
           ! get machine parameters
           smlnum = stdlib${ii}$_${ri}$lamch( 'S' ) / stdlib${ii}$_${ri}$lamch( 'P' )
           bignum = one / smlnum
           call stdlib${ii}$_${ri}$labad( smlnum, bignum )
           ! scale a, b if max element outside range [smlnum,bignum]
           anrm = stdlib${ii}$_${ri}$lange( 'M', m, n, a, lda, rwork )
           iascl = 0_${ik}$
           if( anrm>zero .and. anrm<smlnum ) then
              ! scale matrix norm up to smlnum
              call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, m, n, a, lda, info )
              iascl = 1_${ik}$
           else if( anrm>bignum ) then
              ! scale matrix norm down to bignum
              call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, m, n, a, lda, info )
              iascl = 2_${ik}$
           else if( anrm==zero ) then
              ! matrix all zero. return zero solution.
              call stdlib${ii}$_${ri}$laset( 'F', max( m, n ), nrhs, zero, zero, b, ldb )
              go to 50
           end if
           brow = m
           if( tpsd )brow = n
           bnrm = stdlib${ii}$_${ri}$lange( 'M', brow, nrhs, b, ldb, rwork )
           ibscl = 0_${ik}$
           if( bnrm>zero .and. bnrm<smlnum ) then
              ! scale matrix norm up to smlnum
              call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, bnrm, smlnum, brow, nrhs, b, ldb,info )
              ibscl = 1_${ik}$
           else if( bnrm>bignum ) then
              ! scale matrix norm down to bignum
              call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, bnrm, bignum, brow, nrhs, b, ldb,info )
              ibscl = 2_${ik}$
           end if
           if( m>=n ) then
              ! compute qr factorization of a
              call stdlib${ii}$_${ri}$geqrf( m, n, a, lda, work( 1_${ik}$ ), work( mn+1 ), lwork-mn,info )
              ! workspace at least n, optimally n*nb
              if( .not.tpsd ) then
                 ! least-squares problem min || a * x - b ||
                 ! b(1:m,1:nrhs) := q**t * b(1:m,1:nrhs)
                 call stdlib${ii}$_${ri}$ormqr( 'LEFT', 'TRANSPOSE', m, nrhs, n, a, lda,work( 1_${ik}$ ), b, ldb, &
                           work( mn+1 ), lwork-mn,info )
                 ! workspace at least nrhs, optimally nrhs*nb
                 ! b(1:n,1:nrhs) := inv(r) * b(1:n,1:nrhs)
                 call stdlib${ii}$_${ri}$trtrs( 'UPPER', 'NO TRANSPOSE', 'NON-UNIT', n, nrhs,a, lda, b, ldb, &
                           info )
                 if( info>0_${ik}$ ) then
                    return
                 end if
                 scllen = n
              else
                 ! underdetermined system of equations a**t * x = b
                 ! b(1:n,1:nrhs) := inv(r**t) * b(1:n,1:nrhs)
                 call stdlib${ii}$_${ri}$trtrs( 'UPPER', 'TRANSPOSE', 'NON-UNIT', n, nrhs,a, lda, b, ldb, &
                           info )
                 if( info>0_${ik}$ ) then
                    return
                 end if
                 ! b(n+1:m,1:nrhs) = zero
                 do j = 1, nrhs
                    do i = n + 1, m
                       b( i, j ) = zero
                    end do
                 end do
                 ! b(1:m,1:nrhs) := q(1:n,:) * b(1:n,1:nrhs)
                 call stdlib${ii}$_${ri}$ormqr( 'LEFT', 'NO TRANSPOSE', m, nrhs, n, a, lda,work( 1_${ik}$ ), b, ldb,&
                            work( mn+1 ), lwork-mn,info )
                 ! workspace at least nrhs, optimally nrhs*nb
                 scllen = m
              end if
           else
              ! compute lq factorization of a
              call stdlib${ii}$_${ri}$gelqf( m, n, a, lda, work( 1_${ik}$ ), work( mn+1 ), lwork-mn,info )
              ! workspace at least m, optimally m*nb.
              if( .not.tpsd ) then
                 ! underdetermined system of equations a * x = b
                 ! b(1:m,1:nrhs) := inv(l) * b(1:m,1:nrhs)
                 call stdlib${ii}$_${ri}$trtrs( 'LOWER', 'NO TRANSPOSE', 'NON-UNIT', m, nrhs,a, lda, b, ldb, &
                           info )
                 if( info>0_${ik}$ ) then
                    return
                 end if
                 ! b(m+1:n,1:nrhs) = 0
                 do j = 1, nrhs
                    do i = m + 1, n
                       b( i, j ) = zero
                    end do
                 end do
                 ! b(1:n,1:nrhs) := q(1:n,:)**t * b(1:m,1:nrhs)
                 call stdlib${ii}$_${ri}$ormlq( 'LEFT', 'TRANSPOSE', n, nrhs, m, a, lda,work( 1_${ik}$ ), b, ldb, &
                           work( mn+1 ), lwork-mn,info )
                 ! workspace at least nrhs, optimally nrhs*nb
                 scllen = n
              else
                 ! overdetermined system min || a**t * x - b ||
                 ! b(1:n,1:nrhs) := q * b(1:n,1:nrhs)
                 call stdlib${ii}$_${ri}$ormlq( 'LEFT', 'NO TRANSPOSE', n, nrhs, m, a, lda,work( 1_${ik}$ ), b, ldb,&
                            work( mn+1 ), lwork-mn,info )
                 ! workspace at least nrhs, optimally nrhs*nb
                 ! b(1:m,1:nrhs) := inv(l**t) * b(1:m,1:nrhs)
                 call stdlib${ii}$_${ri}$trtrs( 'LOWER', 'TRANSPOSE', 'NON-UNIT', m, nrhs,a, lda, b, ldb, &
                           info )
                 if( info>0_${ik}$ ) then
                    return
                 end if
                 scllen = m
              end if
           end if
           ! undo scaling
           if( iascl==1_${ik}$ ) then
              call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, scllen, nrhs, b, ldb,info )
           else if( iascl==2_${ik}$ ) then
              call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, scllen, nrhs, b, ldb,info )
           end if
           if( ibscl==1_${ik}$ ) then
              call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, smlnum, bnrm, scllen, nrhs, b, ldb,info )
           else if( ibscl==2_${ik}$ ) then
              call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, bignum, bnrm, scllen, nrhs, b, ldb,info )
           end if
           50 continue
           work( 1_${ik}$ ) = real( wsize,KIND=${rk}$)
           return
     end subroutine stdlib${ii}$_${ri}$gels

#:endif
#:endfor

     module subroutine stdlib${ii}$_cgels( trans, m, n, nrhs, a, lda, b, ldb, work, lwork,info )
     !! CGELS solves overdetermined or underdetermined complex linear systems
     !! involving an M-by-N matrix A, or its conjugate-transpose, using a QR
     !! or LQ factorization of A.  It is assumed that A has full rank.
     !! The following options are provided:
     !! 1. If TRANS = 'N' and m >= n:  find the least squares solution of
     !! an overdetermined system, i.e., solve the least squares problem
     !! minimize || B - A*X ||.
     !! 2. If TRANS = 'N' and m < n:  find the minimum norm solution of
     !! an underdetermined system A * X = B.
     !! 3. If TRANS = 'C' and m >= n:  find the minimum norm solution of
     !! an underdetermined system A**H * X = B.
     !! 4. If TRANS = 'C' and m < n:  find the least squares solution of
     !! an overdetermined system, i.e., solve the least squares problem
     !! minimize || B - A**H * X ||.
     !! Several right hand side vectors b and solution vectors x can be
     !! handled in a single call; they are stored as the columns of the
     !! M-by-NRHS right hand side matrix B and the N-by-NRHS solution
     !! matrix X.
        ! -- lapack driver routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: trans
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, ldb, lwork, m, n, nrhs
           ! Array Arguments 
           complex(sp), intent(inout) :: a(lda,*), b(ldb,*)
           complex(sp), intent(out) :: work(*)
        ! =====================================================================
           
           
           ! Local Scalars 
           logical(lk) :: lquery, tpsd
           integer(${ik}$) :: brow, i, iascl, ibscl, j, mn, nb, scllen, wsize
           real(sp) :: anrm, bignum, bnrm, smlnum
           ! Local Arrays 
           real(sp) :: rwork(1_${ik}$)
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments.
           info = 0_${ik}$
           mn = min( m, n )
           lquery = ( lwork==-1_${ik}$ )
           if( .not.( stdlib_lsame( trans, 'N' ) .or. stdlib_lsame( trans, 'C' ) ) ) then
              info = -1_${ik}$
           else if( m<0_${ik}$ ) then
              info = -2_${ik}$
           else if( n<0_${ik}$ ) then
              info = -3_${ik}$
           else if( nrhs<0_${ik}$ ) then
              info = -4_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -6_${ik}$
           else if( ldb<max( 1_${ik}$, m, n ) ) then
              info = -8_${ik}$
           else if( lwork<max( 1_${ik}$, mn+max( mn, nrhs ) ) .and..not.lquery ) then
              info = -10_${ik}$
           end if
           ! figure out optimal block size
           if( info==0_${ik}$ .or. info==-10_${ik}$ ) then
              tpsd = .true.
              if( stdlib_lsame( trans, 'N' ) )tpsd = .false.
              if( m>=n ) then
                 nb = stdlib${ii}$_ilaenv( 1_${ik}$, 'CGEQRF', ' ', m, n, -1_${ik}$, -1_${ik}$ )
                 if( tpsd ) then
                    nb = max( nb, stdlib${ii}$_ilaenv( 1_${ik}$, 'CUNMQR', 'LN', m, nrhs, n,-1_${ik}$ ) )
                 else
                    nb = max( nb, stdlib${ii}$_ilaenv( 1_${ik}$, 'CUNMQR', 'LC', m, nrhs, n,-1_${ik}$ ) )
                 end if
              else
                 nb = stdlib${ii}$_ilaenv( 1_${ik}$, 'CGELQF', ' ', m, n, -1_${ik}$, -1_${ik}$ )
                 if( tpsd ) then
                    nb = max( nb, stdlib${ii}$_ilaenv( 1_${ik}$, 'CUNMLQ', 'LC', n, nrhs, m,-1_${ik}$ ) )
                 else
                    nb = max( nb, stdlib${ii}$_ilaenv( 1_${ik}$, 'CUNMLQ', 'LN', n, nrhs, m,-1_${ik}$ ) )
                 end if
              end if
              wsize = max( 1_${ik}$, mn + max( mn, nrhs )*nb )
              work( 1_${ik}$ ) = real( wsize,KIND=sp)
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'CGELS ', -info )
              return
           else if( lquery ) then
              return
           end if
           ! quick return if possible
           if( min( m, n, nrhs )==0_${ik}$ ) then
              call stdlib${ii}$_claset( 'FULL', max( m, n ), nrhs, czero, czero, b, ldb )
              return
           end if
           ! get machine parameters
           smlnum = stdlib${ii}$_slamch( 'S' ) / stdlib${ii}$_slamch( 'P' )
           bignum = one / smlnum
           call stdlib${ii}$_slabad( smlnum, bignum )
           ! scale a, b if max element outside range [smlnum,bignum]
           anrm = stdlib${ii}$_clange( 'M', m, n, a, lda, rwork )
           iascl = 0_${ik}$
           if( anrm>zero .and. anrm<smlnum ) then
              ! scale matrix norm up to smlnum
              call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, m, n, a, lda, info )
              iascl = 1_${ik}$
           else if( anrm>bignum ) then
              ! scale matrix norm down to bignum
              call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, m, n, a, lda, info )
              iascl = 2_${ik}$
           else if( anrm==zero ) then
              ! matrix all zero. return zero solution.
              call stdlib${ii}$_claset( 'F', max( m, n ), nrhs, czero, czero, b, ldb )
              go to 50
           end if
           brow = m
           if( tpsd )brow = n
           bnrm = stdlib${ii}$_clange( 'M', brow, nrhs, b, ldb, rwork )
           ibscl = 0_${ik}$
           if( bnrm>zero .and. bnrm<smlnum ) then
              ! scale matrix norm up to smlnum
              call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, bnrm, smlnum, brow, nrhs, b, ldb,info )
              ibscl = 1_${ik}$
           else if( bnrm>bignum ) then
              ! scale matrix norm down to bignum
              call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, bnrm, bignum, brow, nrhs, b, ldb,info )
              ibscl = 2_${ik}$
           end if
           if( m>=n ) then
              ! compute qr factorization of a
              call stdlib${ii}$_cgeqrf( m, n, a, lda, work( 1_${ik}$ ), work( mn+1 ), lwork-mn,info )
              ! workspace at least n, optimally n*nb
              if( .not.tpsd ) then
                 ! least-squares problem min || a * x - b ||
                 ! b(1:m,1:nrhs) := q**h * b(1:m,1:nrhs)
                 call stdlib${ii}$_cunmqr( 'LEFT', 'CONJUGATE TRANSPOSE', m, nrhs, n, a,lda, work( 1_${ik}$ ), &
                           b, ldb, work( mn+1 ), lwork-mn,info )
                 ! workspace at least nrhs, optimally nrhs*nb
                 ! b(1:n,1:nrhs) := inv(r) * b(1:n,1:nrhs)
                 call stdlib${ii}$_ctrtrs( 'UPPER', 'NO TRANSPOSE', 'NON-UNIT', n, nrhs,a, lda, b, ldb, &
                           info )
                 if( info>0_${ik}$ ) then
                    return
                 end if
                 scllen = n
              else
                 ! underdetermined system of equations a**t * x = b
                 ! b(1:n,1:nrhs) := inv(r**h) * b(1:n,1:nrhs)
                 call stdlib${ii}$_ctrtrs( 'UPPER', 'CONJUGATE TRANSPOSE','NON-UNIT',n, nrhs, a, lda, b,&
                            ldb, info )
                 if( info>0_${ik}$ ) then
                    return
                 end if
                 ! b(n+1:m,1:nrhs) = zero
                 do j = 1, nrhs
                    do i = n + 1, m
                       b( i, j ) = czero
                    end do
                 end do
                 ! b(1:m,1:nrhs) := q(1:n,:) * b(1:n,1:nrhs)
                 call stdlib${ii}$_cunmqr( 'LEFT', 'NO TRANSPOSE', m, nrhs, n, a, lda,work( 1_${ik}$ ), b, ldb,&
                            work( mn+1 ), lwork-mn,info )
                 ! workspace at least nrhs, optimally nrhs*nb
                 scllen = m
              end if
           else
              ! compute lq factorization of a
              call stdlib${ii}$_cgelqf( m, n, a, lda, work( 1_${ik}$ ), work( mn+1 ), lwork-mn,info )
              ! workspace at least m, optimally m*nb.
              if( .not.tpsd ) then
                 ! underdetermined system of equations a * x = b
                 ! b(1:m,1:nrhs) := inv(l) * b(1:m,1:nrhs)
                 call stdlib${ii}$_ctrtrs( 'LOWER', 'NO TRANSPOSE', 'NON-UNIT', m, nrhs,a, lda, b, ldb, &
                           info )
                 if( info>0_${ik}$ ) then
                    return
                 end if
                 ! b(m+1:n,1:nrhs) = 0
                 do j = 1, nrhs
                    do i = m + 1, n
                       b( i, j ) = czero
                    end do
                 end do
                 ! b(1:n,1:nrhs) := q(1:n,:)**h * b(1:m,1:nrhs)
                 call stdlib${ii}$_cunmlq( 'LEFT', 'CONJUGATE TRANSPOSE', n, nrhs, m, a,lda, work( 1_${ik}$ ), &
                           b, ldb, work( mn+1 ), lwork-mn,info )
                 ! workspace at least nrhs, optimally nrhs*nb
                 scllen = n
              else
                 ! overdetermined system min || a**h * x - b ||
                 ! b(1:n,1:nrhs) := q * b(1:n,1:nrhs)
                 call stdlib${ii}$_cunmlq( 'LEFT', 'NO TRANSPOSE', n, nrhs, m, a, lda,work( 1_${ik}$ ), b, ldb,&
                            work( mn+1 ), lwork-mn,info )
                 ! workspace at least nrhs, optimally nrhs*nb
                 ! b(1:m,1:nrhs) := inv(l**h) * b(1:m,1:nrhs)
                 call stdlib${ii}$_ctrtrs( 'LOWER', 'CONJUGATE TRANSPOSE', 'NON-UNIT',m, nrhs, a, lda, &
                           b, ldb, info )
                 if( info>0_${ik}$ ) then
                    return
                 end if
                 scllen = m
              end if
           end if
           ! undo scaling
           if( iascl==1_${ik}$ ) then
              call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, scllen, nrhs, b, ldb,info )
           else if( iascl==2_${ik}$ ) then
              call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, scllen, nrhs, b, ldb,info )
           end if
           if( ibscl==1_${ik}$ ) then
              call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, smlnum, bnrm, scllen, nrhs, b, ldb,info )
           else if( ibscl==2_${ik}$ ) then
              call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, bignum, bnrm, scllen, nrhs, b, ldb,info )
           end if
           50 continue
           work( 1_${ik}$ ) = real( wsize,KIND=sp)
           return
     end subroutine stdlib${ii}$_cgels

     module subroutine stdlib${ii}$_zgels( trans, m, n, nrhs, a, lda, b, ldb, work, lwork,info )
     !! ZGELS solves overdetermined or underdetermined complex linear systems
     !! involving an M-by-N matrix A, or its conjugate-transpose, using a QR
     !! or LQ factorization of A.  It is assumed that A has full rank.
     !! The following options are provided:
     !! 1. If TRANS = 'N' and m >= n:  find the least squares solution of
     !! an overdetermined system, i.e., solve the least squares problem
     !! minimize || B - A*X ||.
     !! 2. If TRANS = 'N' and m < n:  find the minimum norm solution of
     !! an underdetermined system A * X = B.
     !! 3. If TRANS = 'C' and m >= n:  find the minimum norm solution of
     !! an underdetermined system A**H * X = B.
     !! 4. If TRANS = 'C' and m < n:  find the least squares solution of
     !! an overdetermined system, i.e., solve the least squares problem
     !! minimize || B - A**H * X ||.
     !! Several right hand side vectors b and solution vectors x can be
     !! handled in a single call; they are stored as the columns of the
     !! M-by-NRHS right hand side matrix B and the N-by-NRHS solution
     !! matrix X.
        ! -- lapack driver routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: trans
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, ldb, lwork, m, n, nrhs
           ! Array Arguments 
           complex(dp), intent(inout) :: a(lda,*), b(ldb,*)
           complex(dp), intent(out) :: work(*)
        ! =====================================================================
           
           
           ! Local Scalars 
           logical(lk) :: lquery, tpsd
           integer(${ik}$) :: brow, i, iascl, ibscl, j, mn, nb, scllen, wsize
           real(dp) :: anrm, bignum, bnrm, smlnum
           ! Local Arrays 
           real(dp) :: rwork(1_${ik}$)
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments.
           info = 0_${ik}$
           mn = min( m, n )
           lquery = ( lwork==-1_${ik}$ )
           if( .not.( stdlib_lsame( trans, 'N' ) .or. stdlib_lsame( trans, 'C' ) ) ) then
              info = -1_${ik}$
           else if( m<0_${ik}$ ) then
              info = -2_${ik}$
           else if( n<0_${ik}$ ) then
              info = -3_${ik}$
           else if( nrhs<0_${ik}$ ) then
              info = -4_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -6_${ik}$
           else if( ldb<max( 1_${ik}$, m, n ) ) then
              info = -8_${ik}$
           else if( lwork<max( 1_${ik}$, mn+max( mn, nrhs ) ) .and. .not.lquery )then
              info = -10_${ik}$
           end if
           ! figure out optimal block size
           if( info==0_${ik}$ .or. info==-10_${ik}$ ) then
              tpsd = .true.
              if( stdlib_lsame( trans, 'N' ) )tpsd = .false.
              if( m>=n ) then
                 nb = stdlib${ii}$_ilaenv( 1_${ik}$, 'ZGEQRF', ' ', m, n, -1_${ik}$, -1_${ik}$ )
                 if( tpsd ) then
                    nb = max( nb, stdlib${ii}$_ilaenv( 1_${ik}$, 'ZUNMQR', 'LN', m, nrhs, n,-1_${ik}$ ) )
                 else
                    nb = max( nb, stdlib${ii}$_ilaenv( 1_${ik}$, 'ZUNMQR', 'LC', m, nrhs, n,-1_${ik}$ ) )
                 end if
              else
                 nb = stdlib${ii}$_ilaenv( 1_${ik}$, 'ZGELQF', ' ', m, n, -1_${ik}$, -1_${ik}$ )
                 if( tpsd ) then
                    nb = max( nb, stdlib${ii}$_ilaenv( 1_${ik}$, 'ZUNMLQ', 'LC', n, nrhs, m,-1_${ik}$ ) )
                 else
                    nb = max( nb, stdlib${ii}$_ilaenv( 1_${ik}$, 'ZUNMLQ', 'LN', n, nrhs, m,-1_${ik}$ ) )
                 end if
              end if
              wsize = max( 1_${ik}$, mn+max( mn, nrhs )*nb )
              work( 1_${ik}$ ) = real( wsize,KIND=dp)
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'ZGELS ', -info )
              return
           else if( lquery ) then
              return
           end if
           ! quick return if possible
           if( min( m, n, nrhs )==0_${ik}$ ) then
              call stdlib${ii}$_zlaset( 'FULL', max( m, n ), nrhs, czero, czero, b, ldb )
              return
           end if
           ! get machine parameters
           smlnum = stdlib${ii}$_dlamch( 'S' ) / stdlib${ii}$_dlamch( 'P' )
           bignum = one / smlnum
           call stdlib${ii}$_dlabad( smlnum, bignum )
           ! scale a, b if max element outside range [smlnum,bignum]
           anrm = stdlib${ii}$_zlange( 'M', m, n, a, lda, rwork )
           iascl = 0_${ik}$
           if( anrm>zero .and. anrm<smlnum ) then
              ! scale matrix norm up to smlnum
              call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, m, n, a, lda, info )
              iascl = 1_${ik}$
           else if( anrm>bignum ) then
              ! scale matrix norm down to bignum
              call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, m, n, a, lda, info )
              iascl = 2_${ik}$
           else if( anrm==zero ) then
              ! matrix all zero. return zero solution.
              call stdlib${ii}$_zlaset( 'F', max( m, n ), nrhs, czero, czero, b, ldb )
              go to 50
           end if
           brow = m
           if( tpsd )brow = n
           bnrm = stdlib${ii}$_zlange( 'M', brow, nrhs, b, ldb, rwork )
           ibscl = 0_${ik}$
           if( bnrm>zero .and. bnrm<smlnum ) then
              ! scale matrix norm up to smlnum
              call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, bnrm, smlnum, brow, nrhs, b, ldb,info )
              ibscl = 1_${ik}$
           else if( bnrm>bignum ) then
              ! scale matrix norm down to bignum
              call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, bnrm, bignum, brow, nrhs, b, ldb,info )
              ibscl = 2_${ik}$
           end if
           if( m>=n ) then
              ! compute qr factorization of a
              call stdlib${ii}$_zgeqrf( m, n, a, lda, work( 1_${ik}$ ), work( mn+1 ), lwork-mn,info )
              ! workspace at least n, optimally n*nb
              if( .not.tpsd ) then
                 ! least-squares problem min || a * x - b ||
                 ! b(1:m,1:nrhs) := q**h * b(1:m,1:nrhs)
                 call stdlib${ii}$_zunmqr( 'LEFT', 'CONJUGATE TRANSPOSE', m, nrhs, n, a,lda, work( 1_${ik}$ ), &
                           b, ldb, work( mn+1 ), lwork-mn,info )
                 ! workspace at least nrhs, optimally nrhs*nb
                 ! b(1:n,1:nrhs) := inv(r) * b(1:n,1:nrhs)
                 call stdlib${ii}$_ztrtrs( 'UPPER', 'NO TRANSPOSE', 'NON-UNIT', n, nrhs,a, lda, b, ldb, &
                           info )
                 if( info>0_${ik}$ ) then
                    return
                 end if
                 scllen = n
              else
                 ! underdetermined system of equations a**t * x = b
                 ! b(1:n,1:nrhs) := inv(r**h) * b(1:n,1:nrhs)
                 call stdlib${ii}$_ztrtrs( 'UPPER', 'CONJUGATE TRANSPOSE','NON-UNIT',n, nrhs, a, lda, b,&
                            ldb, info )
                 if( info>0_${ik}$ ) then
                    return
                 end if
                 ! b(n+1:m,1:nrhs) = zero
                 do j = 1, nrhs
                    do i = n + 1, m
                       b( i, j ) = czero
                    end do
                 end do
                 ! b(1:m,1:nrhs) := q(1:n,:) * b(1:n,1:nrhs)
                 call stdlib${ii}$_zunmqr( 'LEFT', 'NO TRANSPOSE', m, nrhs, n, a, lda,work( 1_${ik}$ ), b, ldb,&
                            work( mn+1 ), lwork-mn,info )
                 ! workspace at least nrhs, optimally nrhs*nb
                 scllen = m
              end if
           else
              ! compute lq factorization of a
              call stdlib${ii}$_zgelqf( m, n, a, lda, work( 1_${ik}$ ), work( mn+1 ), lwork-mn,info )
              ! workspace at least m, optimally m*nb.
              if( .not.tpsd ) then
                 ! underdetermined system of equations a * x = b
                 ! b(1:m,1:nrhs) := inv(l) * b(1:m,1:nrhs)
                 call stdlib${ii}$_ztrtrs( 'LOWER', 'NO TRANSPOSE', 'NON-UNIT', m, nrhs,a, lda, b, ldb, &
                           info )
                 if( info>0_${ik}$ ) then
                    return
                 end if
                 ! b(m+1:n,1:nrhs) = 0
                 do j = 1, nrhs
                    do i = m + 1, n
                       b( i, j ) = czero
                    end do
                 end do
                 ! b(1:n,1:nrhs) := q(1:n,:)**h * b(1:m,1:nrhs)
                 call stdlib${ii}$_zunmlq( 'LEFT', 'CONJUGATE TRANSPOSE', n, nrhs, m, a,lda, work( 1_${ik}$ ), &
                           b, ldb, work( mn+1 ), lwork-mn,info )
                 ! workspace at least nrhs, optimally nrhs*nb
                 scllen = n
              else
                 ! overdetermined system min || a**h * x - b ||
                 ! b(1:n,1:nrhs) := q * b(1:n,1:nrhs)
                 call stdlib${ii}$_zunmlq( 'LEFT', 'NO TRANSPOSE', n, nrhs, m, a, lda,work( 1_${ik}$ ), b, ldb,&
                            work( mn+1 ), lwork-mn,info )
                 ! workspace at least nrhs, optimally nrhs*nb
                 ! b(1:m,1:nrhs) := inv(l**h) * b(1:m,1:nrhs)
                 call stdlib${ii}$_ztrtrs( 'LOWER', 'CONJUGATE TRANSPOSE', 'NON-UNIT',m, nrhs, a, lda, &
                           b, ldb, info )
                 if( info>0_${ik}$ ) then
                    return
                 end if
                 scllen = m
              end if
           end if
           ! undo scaling
           if( iascl==1_${ik}$ ) then
              call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, scllen, nrhs, b, ldb,info )
           else if( iascl==2_${ik}$ ) then
              call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, scllen, nrhs, b, ldb,info )
           end if
           if( ibscl==1_${ik}$ ) then
              call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, smlnum, bnrm, scllen, nrhs, b, ldb,info )
           else if( ibscl==2_${ik}$ ) then
              call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, bignum, bnrm, scllen, nrhs, b, ldb,info )
           end if
           50 continue
           work( 1_${ik}$ ) = real( wsize,KIND=dp)
           return
     end subroutine stdlib${ii}$_zgels

#:for ck,ct,ci in CMPLX_KINDS_TYPES
#:if not ck in ["sp","dp"]
     module subroutine stdlib${ii}$_${ci}$gels( trans, m, n, nrhs, a, lda, b, ldb, work, lwork,info )
     !! ZGELS: solves overdetermined or underdetermined complex linear systems
     !! involving an M-by-N matrix A, or its conjugate-transpose, using a QR
     !! or LQ factorization of A.  It is assumed that A has full rank.
     !! The following options are provided:
     !! 1. If TRANS = 'N' and m >= n:  find the least squares solution of
     !! an overdetermined system, i.e., solve the least squares problem
     !! minimize || B - A*X ||.
     !! 2. If TRANS = 'N' and m < n:  find the minimum norm solution of
     !! an underdetermined system A * X = B.
     !! 3. If TRANS = 'C' and m >= n:  find the minimum norm solution of
     !! an underdetermined system A**H * X = B.
     !! 4. If TRANS = 'C' and m < n:  find the least squares solution of
     !! an overdetermined system, i.e., solve the least squares problem
     !! minimize || B - A**H * X ||.
     !! Several right hand side vectors b and solution vectors x can be
     !! handled in a single call; they are stored as the columns of the
     !! M-by-NRHS right hand side matrix B and the N-by-NRHS solution
     !! matrix X.
        ! -- lapack driver routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${ck}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: trans
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, ldb, lwork, m, n, nrhs
           ! Array Arguments 
           complex(${ck}$), intent(inout) :: a(lda,*), b(ldb,*)
           complex(${ck}$), intent(out) :: work(*)
        ! =====================================================================
           
           
           ! Local Scalars 
           logical(lk) :: lquery, tpsd
           integer(${ik}$) :: brow, i, iascl, ibscl, j, mn, nb, scllen, wsize
           real(${ck}$) :: anrm, bignum, bnrm, smlnum
           ! Local Arrays 
           real(${ck}$) :: rwork(1_${ik}$)
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments.
           info = 0_${ik}$
           mn = min( m, n )
           lquery = ( lwork==-1_${ik}$ )
           if( .not.( stdlib_lsame( trans, 'N' ) .or. stdlib_lsame( trans, 'C' ) ) ) then
              info = -1_${ik}$
           else if( m<0_${ik}$ ) then
              info = -2_${ik}$
           else if( n<0_${ik}$ ) then
              info = -3_${ik}$
           else if( nrhs<0_${ik}$ ) then
              info = -4_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -6_${ik}$
           else if( ldb<max( 1_${ik}$, m, n ) ) then
              info = -8_${ik}$
           else if( lwork<max( 1_${ik}$, mn+max( mn, nrhs ) ) .and. .not.lquery )then
              info = -10_${ik}$
           end if
           ! figure out optimal block size
           if( info==0_${ik}$ .or. info==-10_${ik}$ ) then
              tpsd = .true.
              if( stdlib_lsame( trans, 'N' ) )tpsd = .false.
              if( m>=n ) then
                 nb = stdlib${ii}$_ilaenv( 1_${ik}$, 'ZGEQRF', ' ', m, n, -1_${ik}$, -1_${ik}$ )
                 if( tpsd ) then
                    nb = max( nb, stdlib${ii}$_ilaenv( 1_${ik}$, 'ZUNMQR', 'LN', m, nrhs, n,-1_${ik}$ ) )
                 else
                    nb = max( nb, stdlib${ii}$_ilaenv( 1_${ik}$, 'ZUNMQR', 'LC', m, nrhs, n,-1_${ik}$ ) )
                 end if
              else
                 nb = stdlib${ii}$_ilaenv( 1_${ik}$, 'ZGELQF', ' ', m, n, -1_${ik}$, -1_${ik}$ )
                 if( tpsd ) then
                    nb = max( nb, stdlib${ii}$_ilaenv( 1_${ik}$, 'ZUNMLQ', 'LC', n, nrhs, m,-1_${ik}$ ) )
                 else
                    nb = max( nb, stdlib${ii}$_ilaenv( 1_${ik}$, 'ZUNMLQ', 'LN', n, nrhs, m,-1_${ik}$ ) )
                 end if
              end if
              wsize = max( 1_${ik}$, mn+max( mn, nrhs )*nb )
              work( 1_${ik}$ ) = real( wsize,KIND=${ck}$)
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'ZGELS ', -info )
              return
           else if( lquery ) then
              return
           end if
           ! quick return if possible
           if( min( m, n, nrhs )==0_${ik}$ ) then
              call stdlib${ii}$_${ci}$laset( 'FULL', max( m, n ), nrhs, czero, czero, b, ldb )
              return
           end if
           ! get machine parameters
           smlnum = stdlib${ii}$_${c2ri(ci)}$lamch( 'S' ) / stdlib${ii}$_${c2ri(ci)}$lamch( 'P' )
           bignum = one / smlnum
           call stdlib${ii}$_${c2ri(ci)}$labad( smlnum, bignum )
           ! scale a, b if max element outside range [smlnum,bignum]
           anrm = stdlib${ii}$_${ci}$lange( 'M', m, n, a, lda, rwork )
           iascl = 0_${ik}$
           if( anrm>zero .and. anrm<smlnum ) then
              ! scale matrix norm up to smlnum
              call stdlib${ii}$_${ci}$lascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, m, n, a, lda, info )
              iascl = 1_${ik}$
           else if( anrm>bignum ) then
              ! scale matrix norm down to bignum
              call stdlib${ii}$_${ci}$lascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, m, n, a, lda, info )
              iascl = 2_${ik}$
           else if( anrm==zero ) then
              ! matrix all zero. return zero solution.
              call stdlib${ii}$_${ci}$laset( 'F', max( m, n ), nrhs, czero, czero, b, ldb )
              go to 50
           end if
           brow = m
           if( tpsd )brow = n
           bnrm = stdlib${ii}$_${ci}$lange( 'M', brow, nrhs, b, ldb, rwork )
           ibscl = 0_${ik}$
           if( bnrm>zero .and. bnrm<smlnum ) then
              ! scale matrix norm up to smlnum
              call stdlib${ii}$_${ci}$lascl( 'G', 0_${ik}$, 0_${ik}$, bnrm, smlnum, brow, nrhs, b, ldb,info )
              ibscl = 1_${ik}$
           else if( bnrm>bignum ) then
              ! scale matrix norm down to bignum
              call stdlib${ii}$_${ci}$lascl( 'G', 0_${ik}$, 0_${ik}$, bnrm, bignum, brow, nrhs, b, ldb,info )
              ibscl = 2_${ik}$
           end if
           if( m>=n ) then
              ! compute qr factorization of a
              call stdlib${ii}$_${ci}$geqrf( m, n, a, lda, work( 1_${ik}$ ), work( mn+1 ), lwork-mn,info )
              ! workspace at least n, optimally n*nb
              if( .not.tpsd ) then
                 ! least-squares problem min || a * x - b ||
                 ! b(1:m,1:nrhs) := q**h * b(1:m,1:nrhs)
                 call stdlib${ii}$_${ci}$unmqr( 'LEFT', 'CONJUGATE TRANSPOSE', m, nrhs, n, a,lda, work( 1_${ik}$ ), &
                           b, ldb, work( mn+1 ), lwork-mn,info )
                 ! workspace at least nrhs, optimally nrhs*nb
                 ! b(1:n,1:nrhs) := inv(r) * b(1:n,1:nrhs)
                 call stdlib${ii}$_${ci}$trtrs( 'UPPER', 'NO TRANSPOSE', 'NON-UNIT', n, nrhs,a, lda, b, ldb, &
                           info )
                 if( info>0_${ik}$ ) then
                    return
                 end if
                 scllen = n
              else
                 ! underdetermined system of equations a**t * x = b
                 ! b(1:n,1:nrhs) := inv(r**h) * b(1:n,1:nrhs)
                 call stdlib${ii}$_${ci}$trtrs( 'UPPER', 'CONJUGATE TRANSPOSE','NON-UNIT',n, nrhs, a, lda, b,&
                            ldb, info )
                 if( info>0_${ik}$ ) then
                    return
                 end if
                 ! b(n+1:m,1:nrhs) = zero
                 do j = 1, nrhs
                    do i = n + 1, m
                       b( i, j ) = czero
                    end do
                 end do
                 ! b(1:m,1:nrhs) := q(1:n,:) * b(1:n,1:nrhs)
                 call stdlib${ii}$_${ci}$unmqr( 'LEFT', 'NO TRANSPOSE', m, nrhs, n, a, lda,work( 1_${ik}$ ), b, ldb,&
                            work( mn+1 ), lwork-mn,info )
                 ! workspace at least nrhs, optimally nrhs*nb
                 scllen = m
              end if
           else
              ! compute lq factorization of a
              call stdlib${ii}$_${ci}$gelqf( m, n, a, lda, work( 1_${ik}$ ), work( mn+1 ), lwork-mn,info )
              ! workspace at least m, optimally m*nb.
              if( .not.tpsd ) then
                 ! underdetermined system of equations a * x = b
                 ! b(1:m,1:nrhs) := inv(l) * b(1:m,1:nrhs)
                 call stdlib${ii}$_${ci}$trtrs( 'LOWER', 'NO TRANSPOSE', 'NON-UNIT', m, nrhs,a, lda, b, ldb, &
                           info )
                 if( info>0_${ik}$ ) then
                    return
                 end if
                 ! b(m+1:n,1:nrhs) = 0
                 do j = 1, nrhs
                    do i = m + 1, n
                       b( i, j ) = czero
                    end do
                 end do
                 ! b(1:n,1:nrhs) := q(1:n,:)**h * b(1:m,1:nrhs)
                 call stdlib${ii}$_${ci}$unmlq( 'LEFT', 'CONJUGATE TRANSPOSE', n, nrhs, m, a,lda, work( 1_${ik}$ ), &
                           b, ldb, work( mn+1 ), lwork-mn,info )
                 ! workspace at least nrhs, optimally nrhs*nb
                 scllen = n
              else
                 ! overdetermined system min || a**h * x - b ||
                 ! b(1:n,1:nrhs) := q * b(1:n,1:nrhs)
                 call stdlib${ii}$_${ci}$unmlq( 'LEFT', 'NO TRANSPOSE', n, nrhs, m, a, lda,work( 1_${ik}$ ), b, ldb,&
                            work( mn+1 ), lwork-mn,info )
                 ! workspace at least nrhs, optimally nrhs*nb
                 ! b(1:m,1:nrhs) := inv(l**h) * b(1:m,1:nrhs)
                 call stdlib${ii}$_${ci}$trtrs( 'LOWER', 'CONJUGATE TRANSPOSE', 'NON-UNIT',m, nrhs, a, lda, &
                           b, ldb, info )
                 if( info>0_${ik}$ ) then
                    return
                 end if
                 scllen = m
              end if
           end if
           ! undo scaling
           if( iascl==1_${ik}$ ) then
              call stdlib${ii}$_${ci}$lascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, scllen, nrhs, b, ldb,info )
           else if( iascl==2_${ik}$ ) then
              call stdlib${ii}$_${ci}$lascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, scllen, nrhs, b, ldb,info )
           end if
           if( ibscl==1_${ik}$ ) then
              call stdlib${ii}$_${ci}$lascl( 'G', 0_${ik}$, 0_${ik}$, smlnum, bnrm, scllen, nrhs, b, ldb,info )
           else if( ibscl==2_${ik}$ ) then
              call stdlib${ii}$_${ci}$lascl( 'G', 0_${ik}$, 0_${ik}$, bignum, bnrm, scllen, nrhs, b, ldb,info )
           end if
           50 continue
           work( 1_${ik}$ ) = real( wsize,KIND=${ck}$)
           return
     end subroutine stdlib${ii}$_${ci}$gels

#:endif
#:endfor



     module subroutine stdlib${ii}$_sgelsd( m, n, nrhs, a, lda, b, ldb, s, rcond,rank, work, lwork, iwork, &
     !! SGELSD computes the minimum-norm solution to a real linear least
     !! squares problem:
     !! minimize 2-norm(| b - A*x |)
     !! using the singular value decomposition (SVD) of A. A is an M-by-N
     !! matrix which may be rank-deficient.
     !! Several right hand side vectors b and solution vectors x can be
     !! handled in a single call; they are stored as the columns of the
     !! M-by-NRHS right hand side matrix B and the N-by-NRHS solution
     !! matrix X.
     !! The problem is solved in three steps:
     !! (1) Reduce the coefficient matrix A to bidiagonal form with
     !! Householder transformations, reducing the original problem
     !! into a "bidiagonal least squares problem" (BLS)
     !! (2) Solve the BLS using a divide and conquer approach.
     !! (3) Apply back all the Householder transformations to solve
     !! the original least squares problem.
     !! The effective rank of A is determined by treating as zero those
     !! singular values which are less than RCOND times the largest singular
     !! value.
     !! The divide and conquer algorithm makes very mild assumptions about
     !! floating point arithmetic. It will work on machines with a guard
     !! digit in add/subtract, or on those binary machines without guard
     !! digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or
     !! Cray-2. It could conceivably fail on hexadecimal or decimal machines
     !! without guard digits, but we know of none.
               info )
        ! -- lapack driver routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info, rank
           integer(${ik}$), intent(in) :: lda, ldb, lwork, m, n, nrhs
           real(sp), intent(in) :: rcond
           ! Array Arguments 
           integer(${ik}$), intent(out) :: iwork(*)
           real(sp), intent(inout) :: a(lda,*), b(ldb,*)
           real(sp), intent(out) :: s(*), work(*)
        ! =====================================================================
           
           ! Local Scalars 
           logical(lk) :: lquery
           integer(${ik}$) :: iascl, ibscl, ie, il, itau, itaup, itauq, ldwork, liwork, maxmn, &
                     maxwrk, minmn, minwrk, mm, mnthr, nlvl, nwork, smlsiz, wlalsd
           real(sp) :: anrm, bignum, bnrm, eps, sfmin, smlnum
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments.
           info = 0_${ik}$
           minmn = min( m, n )
           maxmn = max( m, n )
           lquery = ( lwork==-1_${ik}$ )
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( nrhs<0_${ik}$ ) then
              info = -3_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -5_${ik}$
           else if( ldb<max( 1_${ik}$, maxmn ) ) then
              info = -7_${ik}$
           end if
           ! compute workspace.
           ! (note: comments in the code beginning "workspace:" describe the
           ! minimal amount of workspace needed at that point in the code,
           ! as well as the preferred amount for good performance.
           ! nb refers to the optimal block size for the immediately
           ! following subroutine, as returned by stdlib${ii}$_ilaenv.)
           if( info==0_${ik}$ ) then
              minwrk = 1_${ik}$
              maxwrk = 1_${ik}$
              liwork = 1_${ik}$
              if( minmn>0_${ik}$ ) then
                 smlsiz = stdlib${ii}$_ilaenv( 9_${ik}$, 'SGELSD', ' ', 0_${ik}$, 0_${ik}$, 0_${ik}$, 0_${ik}$ )
                 mnthr = stdlib${ii}$_ilaenv( 6_${ik}$, 'SGELSD', ' ', m, n, nrhs, -1_${ik}$ )
                 nlvl = max( int( log( real( minmn,KIND=sp) / real( smlsiz + 1_${ik}$,KIND=sp) ) /log( &
                           two ),KIND=${ik}$) + 1_${ik}$, 0_${ik}$ )
                 liwork = 3_${ik}$*minmn*nlvl + 11_${ik}$*minmn
                 mm = m
                 if( m>=n .and. m>=mnthr ) then
                    ! path 1a - overdetermined, with many more rows than
                              ! columns.
                    mm = n
                    maxwrk = max( maxwrk, n + n*stdlib${ii}$_ilaenv( 1_${ik}$, 'SGEQRF', ' ', m,n, -1_${ik}$, -1_${ik}$ ) )
                              
                    maxwrk = max( maxwrk, n + nrhs*stdlib${ii}$_ilaenv( 1_${ik}$, 'SORMQR', 'LT',m, nrhs, n, -&
                              1_${ik}$ ) )
                 end if
                 if( m>=n ) then
                    ! path 1 - overdetermined or exactly determined.
                    maxwrk = max( maxwrk, 3_${ik}$*n + ( mm + n )*stdlib${ii}$_ilaenv( 1_${ik}$,'SGEBRD', ' ', mm, n, &
                              -1_${ik}$, -1_${ik}$ ) )
                    maxwrk = max( maxwrk, 3_${ik}$*n + nrhs*stdlib${ii}$_ilaenv( 1_${ik}$, 'SORMBR','QLT', mm, nrhs, &
                              n, -1_${ik}$ ) )
                    maxwrk = max( maxwrk, 3_${ik}$*n + ( n - 1_${ik}$ )*stdlib${ii}$_ilaenv( 1_${ik}$,'SORMBR', 'PLN', n, &
                              nrhs, n, -1_${ik}$ ) )
                    wlalsd = 9_${ik}$*n + 2_${ik}$*n*smlsiz + 8_${ik}$*n*nlvl + n*nrhs +( smlsiz + 1_${ik}$ )**2_${ik}$
                    maxwrk = max( maxwrk, 3_${ik}$*n + wlalsd )
                    minwrk = max( 3_${ik}$*n + mm, 3_${ik}$*n + nrhs, 3_${ik}$*n + wlalsd )
                 end if
                 if( n>m ) then
                    wlalsd = 9_${ik}$*m + 2_${ik}$*m*smlsiz + 8_${ik}$*m*nlvl + m*nrhs +( smlsiz + 1_${ik}$ )**2_${ik}$
                    if( n>=mnthr ) then
                       ! path 2a - underdetermined, with many more columns
                                 ! than rows.
                       maxwrk = m + m*stdlib${ii}$_ilaenv( 1_${ik}$, 'SGELQF', ' ', m, n, -1_${ik}$,-1_${ik}$ )
                       maxwrk = max( maxwrk, m*m + 4_${ik}$*m + 2_${ik}$*m*stdlib${ii}$_ilaenv( 1_${ik}$,'SGEBRD', ' ', m, m,&
                                  -1_${ik}$, -1_${ik}$ ) )
                       maxwrk = max( maxwrk, m*m + 4_${ik}$*m + nrhs*stdlib${ii}$_ilaenv( 1_${ik}$,'SORMBR', 'QLT', m,&
                                  nrhs, m, -1_${ik}$ ) )
                       maxwrk = max( maxwrk, m*m + 4_${ik}$*m + ( m - 1_${ik}$ )*stdlib${ii}$_ilaenv( 1_${ik}$,'SORMBR', &
                                 'PLN', m, nrhs, m, -1_${ik}$ ) )
                       if( nrhs>1_${ik}$ ) then
                          maxwrk = max( maxwrk, m*m + m + m*nrhs )
                       else
                          maxwrk = max( maxwrk, m*m + 2_${ik}$*m )
                       end if
                       maxwrk = max( maxwrk, m + nrhs*stdlib${ii}$_ilaenv( 1_${ik}$, 'SORMLQ','LT', n, nrhs, m,&
                                  -1_${ik}$ ) )
                       maxwrk = max( maxwrk, m*m + 4_${ik}$*m + wlalsd )
           ! xxx: ensure the path 2a case below is triggered.  the workspace
           ! calculation should use queries for all routines eventually.
                       maxwrk = max( maxwrk,4_${ik}$*m+m*m+max( m, 2_${ik}$*m-4, nrhs, n-3*m ) )
                    else
                       ! path 2 - remaining underdetermined cases.
                       maxwrk = 3_${ik}$*m + ( n + m )*stdlib${ii}$_ilaenv( 1_${ik}$, 'SGEBRD', ' ', m,n, -1_${ik}$, -1_${ik}$ )
                                 
                       maxwrk = max( maxwrk, 3_${ik}$*m + nrhs*stdlib${ii}$_ilaenv( 1_${ik}$, 'SORMBR','QLT', m, nrhs,&
                                  n, -1_${ik}$ ) )
                       maxwrk = max( maxwrk, 3_${ik}$*m + m*stdlib${ii}$_ilaenv( 1_${ik}$, 'SORMBR','PLN', n, nrhs, m,&
                                  -1_${ik}$ ) )
                       maxwrk = max( maxwrk, 3_${ik}$*m + wlalsd )
                    end if
                    minwrk = max( 3_${ik}$*m + nrhs, 3_${ik}$*m + m, 3_${ik}$*m + wlalsd )
                 end if
              end if
              minwrk = min( minwrk, maxwrk )
              work( 1_${ik}$ ) = maxwrk
              iwork( 1_${ik}$ ) = liwork
              if( lwork<minwrk .and. .not.lquery ) then
                 info = -12_${ik}$
              end if
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'SGELSD', -info )
              return
           else if( lquery ) then
              return
           end if
           ! quick return if possible.
           if( m==0_${ik}$ .or. n==0_${ik}$ ) then
              rank = 0_${ik}$
              return
           end if
           ! get machine parameters.
           eps = stdlib${ii}$_slamch( 'P' )
           sfmin = stdlib${ii}$_slamch( 'S' )
           smlnum = sfmin / eps
           bignum = one / smlnum
           call stdlib${ii}$_slabad( smlnum, bignum )
           ! scale a if max entry outside range [smlnum,bignum].
           anrm = stdlib${ii}$_slange( 'M', m, n, a, lda, work )
           iascl = 0_${ik}$
           if( anrm>zero .and. anrm<smlnum ) then
              ! scale matrix norm up to smlnum.
              call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, m, n, a, lda, info )
              iascl = 1_${ik}$
           else if( anrm>bignum ) then
              ! scale matrix norm down to bignum.
              call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, m, n, a, lda, info )
              iascl = 2_${ik}$
           else if( anrm==zero ) then
              ! matrix all zero. return zero solution.
              call stdlib${ii}$_slaset( 'F', max( m, n ), nrhs, zero, zero, b, ldb )
              call stdlib${ii}$_slaset( 'F', minmn, 1_${ik}$, zero, zero, s, 1_${ik}$ )
              rank = 0_${ik}$
              go to 10
           end if
           ! scale b if max entry outside range [smlnum,bignum].
           bnrm = stdlib${ii}$_slange( 'M', m, nrhs, b, ldb, work )
           ibscl = 0_${ik}$
           if( bnrm>zero .and. bnrm<smlnum ) then
              ! scale matrix norm up to smlnum.
              call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, bnrm, smlnum, m, nrhs, b, ldb, info )
              ibscl = 1_${ik}$
           else if( bnrm>bignum ) then
              ! scale matrix norm down to bignum.
              call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, bnrm, bignum, m, nrhs, b, ldb, info )
              ibscl = 2_${ik}$
           end if
           ! if m < n make sure certain entries of b are zero.
           if( m<n )call stdlib${ii}$_slaset( 'F', n-m, nrhs, zero, zero, b( m+1, 1_${ik}$ ), ldb )
           ! overdetermined case.
           if( m>=n ) then
              ! path 1 - overdetermined or exactly determined.
              mm = m
              if( m>=mnthr ) then
                 ! path 1a - overdetermined, with many more rows than columns.
                 mm = n
                 itau = 1_${ik}$
                 nwork = itau + n
                 ! compute a=q*r.
                 ! (workspace: need 2*n, prefer n+n*nb)
                 call stdlib${ii}$_sgeqrf( m, n, a, lda, work( itau ), work( nwork ),lwork-nwork+1, &
                           info )
                 ! multiply b by transpose(q).
                 ! (workspace: need n+nrhs, prefer n+nrhs*nb)
                 call stdlib${ii}$_sormqr( 'L', 'T', m, nrhs, n, a, lda, work( itau ), b,ldb, work( &
                           nwork ), lwork-nwork+1, info )
                 ! zero out below r.
                 if( n>1_${ik}$ ) then
                    call stdlib${ii}$_slaset( 'L', n-1, n-1, zero, zero, a( 2_${ik}$, 1_${ik}$ ), lda )
                 end if
              end if
              ie = 1_${ik}$
              itauq = ie + n
              itaup = itauq + n
              nwork = itaup + n
              ! bidiagonalize r in a.
              ! (workspace: need 3*n+mm, prefer 3*n+(mm+n)*nb)
              call stdlib${ii}$_sgebrd( mm, n, a, lda, s, work( ie ), work( itauq ),work( itaup ), work(&
                         nwork ), lwork-nwork+1,info )
              ! multiply b by transpose of left bidiagonalizing vectors of r.
              ! (workspace: need 3*n+nrhs, prefer 3*n+nrhs*nb)
              call stdlib${ii}$_sormbr( 'Q', 'L', 'T', mm, nrhs, n, a, lda, work( itauq ),b, ldb, work( &
                        nwork ), lwork-nwork+1, info )
              ! solve the bidiagonal least squares problem.
              call stdlib${ii}$_slalsd( 'U', smlsiz, n, nrhs, s, work( ie ), b, ldb,rcond, rank, work( &
                        nwork ), iwork, info )
              if( info/=0_${ik}$ ) then
                 go to 10
              end if
              ! multiply b by right bidiagonalizing vectors of r.
              call stdlib${ii}$_sormbr( 'P', 'L', 'N', n, nrhs, n, a, lda, work( itaup ),b, ldb, work( &
                        nwork ), lwork-nwork+1, info )
           else if( n>=mnthr .and. lwork>=4_${ik}$*m+m*m+max( m, 2_${ik}$*m-4, nrhs, n-3*m, wlalsd ) ) &
                     then
              ! path 2a - underdetermined, with many more columns than rows
              ! and sufficient workspace for an efficient algorithm.
              ldwork = m
              if( lwork>=max( 4_${ik}$*m+m*lda+max( m, 2_${ik}$*m-4, nrhs, n-3*m ),m*lda+m+m*nrhs, 4_${ik}$*m+m*lda+&
                        wlalsd ) )ldwork = lda
              itau = 1_${ik}$
              nwork = m + 1_${ik}$
              ! compute a=l*q.
              ! (workspace: need 2*m, prefer m+m*nb)
              call stdlib${ii}$_sgelqf( m, n, a, lda, work( itau ), work( nwork ),lwork-nwork+1, info )
                        
              il = nwork
              ! copy l to work(il), zeroing out above its diagonal.
              call stdlib${ii}$_slacpy( 'L', m, m, a, lda, work( il ), ldwork )
              call stdlib${ii}$_slaset( 'U', m-1, m-1, zero, zero, work( il+ldwork ),ldwork )
              ie = il + ldwork*m
              itauq = ie + m
              itaup = itauq + m
              nwork = itaup + m
              ! bidiagonalize l in work(il).
              ! (workspace: need m*m+5*m, prefer m*m+4*m+2*m*nb)
              call stdlib${ii}$_sgebrd( m, m, work( il ), ldwork, s, work( ie ),work( itauq ), work( &
                        itaup ), work( nwork ),lwork-nwork+1, info )
              ! multiply b by transpose of left bidiagonalizing vectors of l.
              ! (workspace: need m*m+4*m+nrhs, prefer m*m+4*m+nrhs*nb)
              call stdlib${ii}$_sormbr( 'Q', 'L', 'T', m, nrhs, m, work( il ), ldwork,work( itauq ), b, &
                        ldb, work( nwork ),lwork-nwork+1, info )
              ! solve the bidiagonal least squares problem.
              call stdlib${ii}$_slalsd( 'U', smlsiz, m, nrhs, s, work( ie ), b, ldb,rcond, rank, work( &
                        nwork ), iwork, info )
              if( info/=0_${ik}$ ) then
                 go to 10
              end if
              ! multiply b by right bidiagonalizing vectors of l.
              call stdlib${ii}$_sormbr( 'P', 'L', 'N', m, nrhs, m, work( il ), ldwork,work( itaup ), b, &
                        ldb, work( nwork ),lwork-nwork+1, info )
              ! zero out below first m rows of b.
              call stdlib${ii}$_slaset( 'F', n-m, nrhs, zero, zero, b( m+1, 1_${ik}$ ), ldb )
              nwork = itau + m
              ! multiply transpose(q) by b.
              ! (workspace: need m+nrhs, prefer m+nrhs*nb)
              call stdlib${ii}$_sormlq( 'L', 'T', n, nrhs, m, a, lda, work( itau ), b,ldb, work( nwork )&
                        , lwork-nwork+1, info )
           else
              ! path 2 - remaining underdetermined cases.
              ie = 1_${ik}$
              itauq = ie + m
              itaup = itauq + m
              nwork = itaup + m
              ! bidiagonalize a.
              ! (workspace: need 3*m+n, prefer 3*m+(m+n)*nb)
              call stdlib${ii}$_sgebrd( m, n, a, lda, s, work( ie ), work( itauq ),work( itaup ), work( &
                        nwork ), lwork-nwork+1,info )
              ! multiply b by transpose of left bidiagonalizing vectors.
              ! (workspace: need 3*m+nrhs, prefer 3*m+nrhs*nb)
              call stdlib${ii}$_sormbr( 'Q', 'L', 'T', m, nrhs, n, a, lda, work( itauq ),b, ldb, work( &
                        nwork ), lwork-nwork+1, info )
              ! solve the bidiagonal least squares problem.
              call stdlib${ii}$_slalsd( 'L', smlsiz, m, nrhs, s, work( ie ), b, ldb,rcond, rank, work( &
                        nwork ), iwork, info )
              if( info/=0_${ik}$ ) then
                 go to 10
              end if
              ! multiply b by right bidiagonalizing vectors of a.
              call stdlib${ii}$_sormbr( 'P', 'L', 'N', n, nrhs, m, a, lda, work( itaup ),b, ldb, work( &
                        nwork ), lwork-nwork+1, info )
           end if
           ! undo scaling.
           if( iascl==1_${ik}$ ) then
              call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, n, nrhs, b, ldb, info )
              call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, smlnum, anrm, minmn, 1_${ik}$, s, minmn,info )
           else if( iascl==2_${ik}$ ) then
              call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, n, nrhs, b, ldb, info )
              call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, bignum, anrm, minmn, 1_${ik}$, s, minmn,info )
           end if
           if( ibscl==1_${ik}$ ) then
              call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, smlnum, bnrm, n, nrhs, b, ldb, info )
           else if( ibscl==2_${ik}$ ) then
              call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, bignum, bnrm, n, nrhs, b, ldb, info )
           end if
           10 continue
           work( 1_${ik}$ ) = maxwrk
           iwork( 1_${ik}$ ) = liwork
           return
     end subroutine stdlib${ii}$_sgelsd

     module subroutine stdlib${ii}$_dgelsd( m, n, nrhs, a, lda, b, ldb, s, rcond, rank,work, lwork, iwork, &
     !! DGELSD computes the minimum-norm solution to a real linear least
     !! squares problem:
     !! minimize 2-norm(| b - A*x |)
     !! using the singular value decomposition (SVD) of A. A is an M-by-N
     !! matrix which may be rank-deficient.
     !! Several right hand side vectors b and solution vectors x can be
     !! handled in a single call; they are stored as the columns of the
     !! M-by-NRHS right hand side matrix B and the N-by-NRHS solution
     !! matrix X.
     !! The problem is solved in three steps:
     !! (1) Reduce the coefficient matrix A to bidiagonal form with
     !! Householder transformations, reducing the original problem
     !! into a "bidiagonal least squares problem" (BLS)
     !! (2) Solve the BLS using a divide and conquer approach.
     !! (3) Apply back all the Householder transformations to solve
     !! the original least squares problem.
     !! The effective rank of A is determined by treating as zero those
     !! singular values which are less than RCOND times the largest singular
     !! value.
     !! The divide and conquer algorithm makes very mild assumptions about
     !! floating point arithmetic. It will work on machines with a guard
     !! digit in add/subtract, or on those binary machines without guard
     !! digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or
     !! Cray-2. It could conceivably fail on hexadecimal or decimal machines
     !! without guard digits, but we know of none.
               info )
        ! -- lapack driver routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info, rank
           integer(${ik}$), intent(in) :: lda, ldb, lwork, m, n, nrhs
           real(dp), intent(in) :: rcond
           ! Array Arguments 
           integer(${ik}$), intent(out) :: iwork(*)
           real(dp), intent(inout) :: a(lda,*), b(ldb,*)
           real(dp), intent(out) :: s(*), work(*)
        ! =====================================================================
           
           ! Local Scalars 
           logical(lk) :: lquery
           integer(${ik}$) :: iascl, ibscl, ie, il, itau, itaup, itauq, ldwork, liwork, maxmn, &
                     maxwrk, minmn, minwrk, mm, mnthr, nlvl, nwork, smlsiz, wlalsd
           real(dp) :: anrm, bignum, bnrm, eps, sfmin, smlnum
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments.
           info = 0_${ik}$
           minmn = min( m, n )
           maxmn = max( m, n )
           mnthr = stdlib${ii}$_ilaenv( 6_${ik}$, 'DGELSD', ' ', m, n, nrhs, -1_${ik}$ )
           lquery = ( lwork==-1_${ik}$ )
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( nrhs<0_${ik}$ ) then
              info = -3_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -5_${ik}$
           else if( ldb<max( 1_${ik}$, maxmn ) ) then
              info = -7_${ik}$
           end if
           smlsiz = stdlib${ii}$_ilaenv( 9_${ik}$, 'DGELSD', ' ', 0_${ik}$, 0_${ik}$, 0_${ik}$, 0_${ik}$ )
           ! compute workspace.
           ! (note: comments in the code beginning "workspace:" describe the
           ! minimal amount of workspace needed at that point in the code,
           ! as well as the preferred amount for good performance.
           ! nb refers to the optimal block size for the immediately
           ! following subroutine, as returned by stdlib${ii}$_ilaenv.)
           minwrk = 1_${ik}$
           liwork = 1_${ik}$
           minmn = max( 1_${ik}$, minmn )
           nlvl = max( int( log( real( minmn,KIND=dp) / real( smlsiz+1,KIND=dp) ) /log( two ),&
                     KIND=${ik}$) + 1_${ik}$, 0_${ik}$ )
           if( info==0_${ik}$ ) then
              maxwrk = 0_${ik}$
              liwork = 3_${ik}$*minmn*nlvl + 11_${ik}$*minmn
              mm = m
              if( m>=n .and. m>=mnthr ) then
                 ! path 1a - overdetermined, with many more rows than columns.
                 mm = n
                 maxwrk = max( maxwrk, n+n*stdlib${ii}$_ilaenv( 1_${ik}$, 'DGEQRF', ' ', m, n,-1_${ik}$, -1_${ik}$ ) )
                           
                 maxwrk = max( maxwrk, n+nrhs*stdlib${ii}$_ilaenv( 1_${ik}$, 'DORMQR', 'LT', m, nrhs, n, -1_${ik}$ ) )
                           
              end if
              if( m>=n ) then
                 ! path 1 - overdetermined or exactly determined.
                 maxwrk = max( maxwrk, 3_${ik}$*n+( mm+n )*stdlib${ii}$_ilaenv( 1_${ik}$, 'DGEBRD', ' ', mm, n, -1_${ik}$, -&
                           1_${ik}$ ) )
                 maxwrk = max( maxwrk, 3_${ik}$*n+nrhs*stdlib${ii}$_ilaenv( 1_${ik}$, 'DORMBR', 'QLT', mm, nrhs, n, -&
                           1_${ik}$ ) )
                 maxwrk = max( maxwrk, 3_${ik}$*n+( n-1 )*stdlib${ii}$_ilaenv( 1_${ik}$, 'DORMBR', 'PLN', n, nrhs, n, &
                           -1_${ik}$ ) )
                 wlalsd = 9_${ik}$*n+2*n*smlsiz+8*n*nlvl+n*nrhs+(smlsiz+1)**2_${ik}$
                 maxwrk = max( maxwrk, 3_${ik}$*n+wlalsd )
                 minwrk = max( 3_${ik}$*n+mm, 3_${ik}$*n+nrhs, 3_${ik}$*n+wlalsd )
              end if
              if( n>m ) then
                 wlalsd = 9_${ik}$*m+2*m*smlsiz+8*m*nlvl+m*nrhs+(smlsiz+1)**2_${ik}$
                 if( n>=mnthr ) then
                    ! path 2a - underdetermined, with many more columns
                    ! than rows.
                    maxwrk = m + m*stdlib${ii}$_ilaenv( 1_${ik}$, 'DGELQF', ' ', m, n, -1_${ik}$, -1_${ik}$ )
                    maxwrk = max( maxwrk, m*m+4*m+2*m*stdlib${ii}$_ilaenv( 1_${ik}$, 'DGEBRD', ' ', m, m, -1_${ik}$, -&
                              1_${ik}$ ) )
                    maxwrk = max( maxwrk, m*m+4*m+nrhs*stdlib${ii}$_ilaenv( 1_${ik}$, 'DORMBR', 'QLT', m, nrhs,&
                               m, -1_${ik}$ ) )
                    maxwrk = max( maxwrk, m*m+4*m+( m-1 )*stdlib${ii}$_ilaenv( 1_${ik}$, 'DORMBR', 'PLN', m, &
                              nrhs, m, -1_${ik}$ ) )
                    if( nrhs>1_${ik}$ ) then
                       maxwrk = max( maxwrk, m*m+m+m*nrhs )
                    else
                       maxwrk = max( maxwrk, m*m+2*m )
                    end if
                    maxwrk = max( maxwrk, m+nrhs*stdlib${ii}$_ilaenv( 1_${ik}$, 'DORMLQ', 'LT', n, nrhs, m, -1_${ik}$ &
                              ) )
                    maxwrk = max( maxwrk, m*m+4*m+wlalsd )
           ! xxx: ensure the path 2a case below is triggered.  the workspace
           ! calculation should use queries for all routines eventually.
                    maxwrk = max( maxwrk,4_${ik}$*m+m*m+max( m, 2_${ik}$*m-4, nrhs, n-3*m ) )
                 else
                    ! path 2 - remaining underdetermined cases.
                    maxwrk = 3_${ik}$*m + ( n+m )*stdlib${ii}$_ilaenv( 1_${ik}$, 'DGEBRD', ' ', m, n,-1_${ik}$, -1_${ik}$ )
                    maxwrk = max( maxwrk, 3_${ik}$*m+nrhs*stdlib${ii}$_ilaenv( 1_${ik}$, 'DORMBR', 'QLT', m, nrhs, n, &
                              -1_${ik}$ ) )
                    maxwrk = max( maxwrk, 3_${ik}$*m+m*stdlib${ii}$_ilaenv( 1_${ik}$, 'DORMBR', 'PLN', n, nrhs, m, -1_${ik}$ &
                              ) )
                    maxwrk = max( maxwrk, 3_${ik}$*m+wlalsd )
                 end if
                 minwrk = max( 3_${ik}$*m+nrhs, 3_${ik}$*m+m, 3_${ik}$*m+wlalsd )
              end if
              minwrk = min( minwrk, maxwrk )
              work( 1_${ik}$ ) = maxwrk
              iwork( 1_${ik}$ ) = liwork
              if( lwork<minwrk .and. .not.lquery ) then
                 info = -12_${ik}$
              end if
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DGELSD', -info )
              return
           else if( lquery ) then
              go to 10
           end if
           ! quick return if possible.
           if( m==0_${ik}$ .or. n==0_${ik}$ ) then
              rank = 0_${ik}$
              return
           end if
           ! get machine parameters.
           eps = stdlib${ii}$_dlamch( 'P' )
           sfmin = stdlib${ii}$_dlamch( 'S' )
           smlnum = sfmin / eps
           bignum = one / smlnum
           call stdlib${ii}$_dlabad( smlnum, bignum )
           ! scale a if max entry outside range [smlnum,bignum].
           anrm = stdlib${ii}$_dlange( 'M', m, n, a, lda, work )
           iascl = 0_${ik}$
           if( anrm>zero .and. anrm<smlnum ) then
              ! scale matrix norm up to smlnum.
              call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, m, n, a, lda, info )
              iascl = 1_${ik}$
           else if( anrm>bignum ) then
              ! scale matrix norm down to bignum.
              call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, m, n, a, lda, info )
              iascl = 2_${ik}$
           else if( anrm==zero ) then
              ! matrix all zero. return zero solution.
              call stdlib${ii}$_dlaset( 'F', max( m, n ), nrhs, zero, zero, b, ldb )
              call stdlib${ii}$_dlaset( 'F', minmn, 1_${ik}$, zero, zero, s, 1_${ik}$ )
              rank = 0_${ik}$
              go to 10
           end if
           ! scale b if max entry outside range [smlnum,bignum].
           bnrm = stdlib${ii}$_dlange( 'M', m, nrhs, b, ldb, work )
           ibscl = 0_${ik}$
           if( bnrm>zero .and. bnrm<smlnum ) then
              ! scale matrix norm up to smlnum.
              call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, bnrm, smlnum, m, nrhs, b, ldb, info )
              ibscl = 1_${ik}$
           else if( bnrm>bignum ) then
              ! scale matrix norm down to bignum.
              call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, bnrm, bignum, m, nrhs, b, ldb, info )
              ibscl = 2_${ik}$
           end if
           ! if m < n make sure certain entries of b are zero.
           if( m<n )call stdlib${ii}$_dlaset( 'F', n-m, nrhs, zero, zero, b( m+1, 1_${ik}$ ), ldb )
           ! overdetermined case.
           if( m>=n ) then
              ! path 1 - overdetermined or exactly determined.
              mm = m
              if( m>=mnthr ) then
                 ! path 1a - overdetermined, with many more rows than columns.
                 mm = n
                 itau = 1_${ik}$
                 nwork = itau + n
                 ! compute a=q*r.
                 ! (workspace: need 2*n, prefer n+n*nb)
                 call stdlib${ii}$_dgeqrf( m, n, a, lda, work( itau ), work( nwork ),lwork-nwork+1, &
                           info )
                 ! multiply b by transpose(q).
                 ! (workspace: need n+nrhs, prefer n+nrhs*nb)
                 call stdlib${ii}$_dormqr( 'L', 'T', m, nrhs, n, a, lda, work( itau ), b,ldb, work( &
                           nwork ), lwork-nwork+1, info )
                 ! zero out below r.
                 if( n>1_${ik}$ ) then
                    call stdlib${ii}$_dlaset( 'L', n-1, n-1, zero, zero, a( 2_${ik}$, 1_${ik}$ ), lda )
                 end if
              end if
              ie = 1_${ik}$
              itauq = ie + n
              itaup = itauq + n
              nwork = itaup + n
              ! bidiagonalize r in a.
              ! (workspace: need 3*n+mm, prefer 3*n+(mm+n)*nb)
              call stdlib${ii}$_dgebrd( mm, n, a, lda, s, work( ie ), work( itauq ),work( itaup ), work(&
                         nwork ), lwork-nwork+1,info )
              ! multiply b by transpose of left bidiagonalizing vectors of r.
              ! (workspace: need 3*n+nrhs, prefer 3*n+nrhs*nb)
              call stdlib${ii}$_dormbr( 'Q', 'L', 'T', mm, nrhs, n, a, lda, work( itauq ),b, ldb, work( &
                        nwork ), lwork-nwork+1, info )
              ! solve the bidiagonal least squares problem.
              call stdlib${ii}$_dlalsd( 'U', smlsiz, n, nrhs, s, work( ie ), b, ldb,rcond, rank, work( &
                        nwork ), iwork, info )
              if( info/=0_${ik}$ ) then
                 go to 10
              end if
              ! multiply b by right bidiagonalizing vectors of r.
              call stdlib${ii}$_dormbr( 'P', 'L', 'N', n, nrhs, n, a, lda, work( itaup ),b, ldb, work( &
                        nwork ), lwork-nwork+1, info )
           else if( n>=mnthr .and. lwork>=4_${ik}$*m+m*m+max( m, 2_${ik}$*m-4, nrhs, n-3*m, wlalsd ) ) &
                     then
              ! path 2a - underdetermined, with many more columns than rows
              ! and sufficient workspace for an efficient algorithm.
              ldwork = m
              if( lwork>=max( 4_${ik}$*m+m*lda+max( m, 2_${ik}$*m-4, nrhs, n-3*m ),m*lda+m+m*nrhs, 4_${ik}$*m+m*lda+&
                        wlalsd ) )ldwork = lda
              itau = 1_${ik}$
              nwork = m + 1_${ik}$
              ! compute a=l*q.
              ! (workspace: need 2*m, prefer m+m*nb)
              call stdlib${ii}$_dgelqf( m, n, a, lda, work( itau ), work( nwork ),lwork-nwork+1, info )
                        
              il = nwork
              ! copy l to work(il), zeroing out above its diagonal.
              call stdlib${ii}$_dlacpy( 'L', m, m, a, lda, work( il ), ldwork )
              call stdlib${ii}$_dlaset( 'U', m-1, m-1, zero, zero, work( il+ldwork ),ldwork )
              ie = il + ldwork*m
              itauq = ie + m
              itaup = itauq + m
              nwork = itaup + m
              ! bidiagonalize l in work(il).
              ! (workspace: need m*m+5*m, prefer m*m+4*m+2*m*nb)
              call stdlib${ii}$_dgebrd( m, m, work( il ), ldwork, s, work( ie ),work( itauq ), work( &
                        itaup ), work( nwork ),lwork-nwork+1, info )
              ! multiply b by transpose of left bidiagonalizing vectors of l.
              ! (workspace: need m*m+4*m+nrhs, prefer m*m+4*m+nrhs*nb)
              call stdlib${ii}$_dormbr( 'Q', 'L', 'T', m, nrhs, m, work( il ), ldwork,work( itauq ), b, &
                        ldb, work( nwork ),lwork-nwork+1, info )
              ! solve the bidiagonal least squares problem.
              call stdlib${ii}$_dlalsd( 'U', smlsiz, m, nrhs, s, work( ie ), b, ldb,rcond, rank, work( &
                        nwork ), iwork, info )
              if( info/=0_${ik}$ ) then
                 go to 10
              end if
              ! multiply b by right bidiagonalizing vectors of l.
              call stdlib${ii}$_dormbr( 'P', 'L', 'N', m, nrhs, m, work( il ), ldwork,work( itaup ), b, &
                        ldb, work( nwork ),lwork-nwork+1, info )
              ! zero out below first m rows of b.
              call stdlib${ii}$_dlaset( 'F', n-m, nrhs, zero, zero, b( m+1, 1_${ik}$ ), ldb )
              nwork = itau + m
              ! multiply transpose(q) by b.
              ! (workspace: need m+nrhs, prefer m+nrhs*nb)
              call stdlib${ii}$_dormlq( 'L', 'T', n, nrhs, m, a, lda, work( itau ), b,ldb, work( nwork )&
                        , lwork-nwork+1, info )
           else
              ! path 2 - remaining underdetermined cases.
              ie = 1_${ik}$
              itauq = ie + m
              itaup = itauq + m
              nwork = itaup + m
              ! bidiagonalize a.
              ! (workspace: need 3*m+n, prefer 3*m+(m+n)*nb)
              call stdlib${ii}$_dgebrd( m, n, a, lda, s, work( ie ), work( itauq ),work( itaup ), work( &
                        nwork ), lwork-nwork+1,info )
              ! multiply b by transpose of left bidiagonalizing vectors.
              ! (workspace: need 3*m+nrhs, prefer 3*m+nrhs*nb)
              call stdlib${ii}$_dormbr( 'Q', 'L', 'T', m, nrhs, n, a, lda, work( itauq ),b, ldb, work( &
                        nwork ), lwork-nwork+1, info )
              ! solve the bidiagonal least squares problem.
              call stdlib${ii}$_dlalsd( 'L', smlsiz, m, nrhs, s, work( ie ), b, ldb,rcond, rank, work( &
                        nwork ), iwork, info )
              if( info/=0_${ik}$ ) then
                 go to 10
              end if
              ! multiply b by right bidiagonalizing vectors of a.
              call stdlib${ii}$_dormbr( 'P', 'L', 'N', n, nrhs, m, a, lda, work( itaup ),b, ldb, work( &
                        nwork ), lwork-nwork+1, info )
           end if
           ! undo scaling.
           if( iascl==1_${ik}$ ) then
              call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, n, nrhs, b, ldb, info )
              call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, smlnum, anrm, minmn, 1_${ik}$, s, minmn,info )
           else if( iascl==2_${ik}$ ) then
              call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, n, nrhs, b, ldb, info )
              call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, bignum, anrm, minmn, 1_${ik}$, s, minmn,info )
           end if
           if( ibscl==1_${ik}$ ) then
              call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, smlnum, bnrm, n, nrhs, b, ldb, info )
           else if( ibscl==2_${ik}$ ) then
              call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, bignum, bnrm, n, nrhs, b, ldb, info )
           end if
           10 continue
           work( 1_${ik}$ ) = maxwrk
           iwork( 1_${ik}$ ) = liwork
           return
     end subroutine stdlib${ii}$_dgelsd

#:for rk,rt,ri in REAL_KINDS_TYPES
#:if not rk in ["sp","dp"]
     module subroutine stdlib${ii}$_${ri}$gelsd( m, n, nrhs, a, lda, b, ldb, s, rcond, rank,work, lwork, iwork, &
     !! DGELSD: computes the minimum-norm solution to a real linear least
     !! squares problem:
     !! minimize 2-norm(| b - A*x |)
     !! using the singular value decomposition (SVD) of A. A is an M-by-N
     !! matrix which may be rank-deficient.
     !! Several right hand side vectors b and solution vectors x can be
     !! handled in a single call; they are stored as the columns of the
     !! M-by-NRHS right hand side matrix B and the N-by-NRHS solution
     !! matrix X.
     !! The problem is solved in three steps:
     !! (1) Reduce the coefficient matrix A to bidiagonal form with
     !! Householder transformations, reducing the original problem
     !! into a "bidiagonal least squares problem" (BLS)
     !! (2) Solve the BLS using a divide and conquer approach.
     !! (3) Apply back all the Householder transformations to solve
     !! the original least squares problem.
     !! The effective rank of A is determined by treating as zero those
     !! singular values which are less than RCOND times the largest singular
     !! value.
     !! The divide and conquer algorithm makes very mild assumptions about
     !! floating point arithmetic. It will work on machines with a guard
     !! digit in add/subtract, or on those binary machines without guard
     !! digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or
     !! Cray-2. It could conceivably fail on hexadecimal or decimal machines
     !! without guard digits, but we know of none.
               info )
        ! -- lapack driver routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info, rank
           integer(${ik}$), intent(in) :: lda, ldb, lwork, m, n, nrhs
           real(${rk}$), intent(in) :: rcond
           ! Array Arguments 
           integer(${ik}$), intent(out) :: iwork(*)
           real(${rk}$), intent(inout) :: a(lda,*), b(ldb,*)
           real(${rk}$), intent(out) :: s(*), work(*)
        ! =====================================================================
           
           ! Local Scalars 
           logical(lk) :: lquery
           integer(${ik}$) :: iascl, ibscl, ie, il, itau, itaup, itauq, ldwork, liwork, maxmn, &
                     maxwrk, minmn, minwrk, mm, mnthr, nlvl, nwork, smlsiz, wlalsd
           real(${rk}$) :: anrm, bignum, bnrm, eps, sfmin, smlnum
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments.
           info = 0_${ik}$
           minmn = min( m, n )
           maxmn = max( m, n )
           mnthr = stdlib${ii}$_ilaenv( 6_${ik}$, 'DGELSD', ' ', m, n, nrhs, -1_${ik}$ )
           lquery = ( lwork==-1_${ik}$ )
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( nrhs<0_${ik}$ ) then
              info = -3_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -5_${ik}$
           else if( ldb<max( 1_${ik}$, maxmn ) ) then
              info = -7_${ik}$
           end if
           smlsiz = stdlib${ii}$_ilaenv( 9_${ik}$, 'DGELSD', ' ', 0_${ik}$, 0_${ik}$, 0_${ik}$, 0_${ik}$ )
           ! compute workspace.
           ! (note: comments in the code beginning "workspace:" describe the
           ! minimal amount of workspace needed at that point in the code,
           ! as well as the preferred amount for good performance.
           ! nb refers to the optimal block size for the immediately
           ! following subroutine, as returned by stdlib${ii}$_ilaenv.)
           minwrk = 1_${ik}$
           liwork = 1_${ik}$
           minmn = max( 1_${ik}$, minmn )
           nlvl = max( int( log( real( minmn,KIND=${rk}$) / real( smlsiz+1,KIND=${rk}$) ) /log( two ),&
                     KIND=${ik}$) + 1_${ik}$, 0_${ik}$ )
           if( info==0_${ik}$ ) then
              maxwrk = 0_${ik}$
              liwork = 3_${ik}$*minmn*nlvl + 11_${ik}$*minmn
              mm = m
              if( m>=n .and. m>=mnthr ) then
                 ! path 1a - overdetermined, with many more rows than columns.
                 mm = n
                 maxwrk = max( maxwrk, n+n*stdlib${ii}$_ilaenv( 1_${ik}$, 'DGEQRF', ' ', m, n,-1_${ik}$, -1_${ik}$ ) )
                           
                 maxwrk = max( maxwrk, n+nrhs*stdlib${ii}$_ilaenv( 1_${ik}$, 'DORMQR', 'LT', m, nrhs, n, -1_${ik}$ ) )
                           
              end if
              if( m>=n ) then
                 ! path 1 - overdetermined or exactly determined.
                 maxwrk = max( maxwrk, 3_${ik}$*n+( mm+n )*stdlib${ii}$_ilaenv( 1_${ik}$, 'DGEBRD', ' ', mm, n, -1_${ik}$, -&
                           1_${ik}$ ) )
                 maxwrk = max( maxwrk, 3_${ik}$*n+nrhs*stdlib${ii}$_ilaenv( 1_${ik}$, 'DORMBR', 'QLT', mm, nrhs, n, -&
                           1_${ik}$ ) )
                 maxwrk = max( maxwrk, 3_${ik}$*n+( n-1 )*stdlib${ii}$_ilaenv( 1_${ik}$, 'DORMBR', 'PLN', n, nrhs, n, &
                           -1_${ik}$ ) )
                 wlalsd = 9_${ik}$*n+2*n*smlsiz+8*n*nlvl+n*nrhs+(smlsiz+1)**2_${ik}$
                 maxwrk = max( maxwrk, 3_${ik}$*n+wlalsd )
                 minwrk = max( 3_${ik}$*n+mm, 3_${ik}$*n+nrhs, 3_${ik}$*n+wlalsd )
              end if
              if( n>m ) then
                 wlalsd = 9_${ik}$*m+2*m*smlsiz+8*m*nlvl+m*nrhs+(smlsiz+1)**2_${ik}$
                 if( n>=mnthr ) then
                    ! path 2a - underdetermined, with many more columns
                    ! than rows.
                    maxwrk = m + m*stdlib${ii}$_ilaenv( 1_${ik}$, 'DGELQF', ' ', m, n, -1_${ik}$, -1_${ik}$ )
                    maxwrk = max( maxwrk, m*m+4*m+2*m*stdlib${ii}$_ilaenv( 1_${ik}$, 'DGEBRD', ' ', m, m, -1_${ik}$, -&
                              1_${ik}$ ) )
                    maxwrk = max( maxwrk, m*m+4*m+nrhs*stdlib${ii}$_ilaenv( 1_${ik}$, 'DORMBR', 'QLT', m, nrhs,&
                               m, -1_${ik}$ ) )
                    maxwrk = max( maxwrk, m*m+4*m+( m-1 )*stdlib${ii}$_ilaenv( 1_${ik}$, 'DORMBR', 'PLN', m, &
                              nrhs, m, -1_${ik}$ ) )
                    if( nrhs>1_${ik}$ ) then
                       maxwrk = max( maxwrk, m*m+m+m*nrhs )
                    else
                       maxwrk = max( maxwrk, m*m+2*m )
                    end if
                    maxwrk = max( maxwrk, m+nrhs*stdlib${ii}$_ilaenv( 1_${ik}$, 'DORMLQ', 'LT', n, nrhs, m, -1_${ik}$ &
                              ) )
                    maxwrk = max( maxwrk, m*m+4*m+wlalsd )
           ! xxx: ensure the path 2a case below is triggered.  the workspace
           ! calculation should use queries for all routines eventually.
                    maxwrk = max( maxwrk,4_${ik}$*m+m*m+max( m, 2_${ik}$*m-4, nrhs, n-3*m ) )
                 else
                    ! path 2 - remaining underdetermined cases.
                    maxwrk = 3_${ik}$*m + ( n+m )*stdlib${ii}$_ilaenv( 1_${ik}$, 'DGEBRD', ' ', m, n,-1_${ik}$, -1_${ik}$ )
                    maxwrk = max( maxwrk, 3_${ik}$*m+nrhs*stdlib${ii}$_ilaenv( 1_${ik}$, 'DORMBR', 'QLT', m, nrhs, n, &
                              -1_${ik}$ ) )
                    maxwrk = max( maxwrk, 3_${ik}$*m+m*stdlib${ii}$_ilaenv( 1_${ik}$, 'DORMBR', 'PLN', n, nrhs, m, -1_${ik}$ &
                              ) )
                    maxwrk = max( maxwrk, 3_${ik}$*m+wlalsd )
                 end if
                 minwrk = max( 3_${ik}$*m+nrhs, 3_${ik}$*m+m, 3_${ik}$*m+wlalsd )
              end if
              minwrk = min( minwrk, maxwrk )
              work( 1_${ik}$ ) = maxwrk
              iwork( 1_${ik}$ ) = liwork
              if( lwork<minwrk .and. .not.lquery ) then
                 info = -12_${ik}$
              end if
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DGELSD', -info )
              return
           else if( lquery ) then
              go to 10
           end if
           ! quick return if possible.
           if( m==0_${ik}$ .or. n==0_${ik}$ ) then
              rank = 0_${ik}$
              return
           end if
           ! get machine parameters.
           eps = stdlib${ii}$_${ri}$lamch( 'P' )
           sfmin = stdlib${ii}$_${ri}$lamch( 'S' )
           smlnum = sfmin / eps
           bignum = one / smlnum
           call stdlib${ii}$_${ri}$labad( smlnum, bignum )
           ! scale a if max entry outside range [smlnum,bignum].
           anrm = stdlib${ii}$_${ri}$lange( 'M', m, n, a, lda, work )
           iascl = 0_${ik}$
           if( anrm>zero .and. anrm<smlnum ) then
              ! scale matrix norm up to smlnum.
              call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, m, n, a, lda, info )
              iascl = 1_${ik}$
           else if( anrm>bignum ) then
              ! scale matrix norm down to bignum.
              call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, m, n, a, lda, info )
              iascl = 2_${ik}$
           else if( anrm==zero ) then
              ! matrix all zero. return zero solution.
              call stdlib${ii}$_${ri}$laset( 'F', max( m, n ), nrhs, zero, zero, b, ldb )
              call stdlib${ii}$_${ri}$laset( 'F', minmn, 1_${ik}$, zero, zero, s, 1_${ik}$ )
              rank = 0_${ik}$
              go to 10
           end if
           ! scale b if max entry outside range [smlnum,bignum].
           bnrm = stdlib${ii}$_${ri}$lange( 'M', m, nrhs, b, ldb, work )
           ibscl = 0_${ik}$
           if( bnrm>zero .and. bnrm<smlnum ) then
              ! scale matrix norm up to smlnum.
              call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, bnrm, smlnum, m, nrhs, b, ldb, info )
              ibscl = 1_${ik}$
           else if( bnrm>bignum ) then
              ! scale matrix norm down to bignum.
              call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, bnrm, bignum, m, nrhs, b, ldb, info )
              ibscl = 2_${ik}$
           end if
           ! if m < n make sure certain entries of b are zero.
           if( m<n )call stdlib${ii}$_${ri}$laset( 'F', n-m, nrhs, zero, zero, b( m+1, 1_${ik}$ ), ldb )
           ! overdetermined case.
           if( m>=n ) then
              ! path 1 - overdetermined or exactly determined.
              mm = m
              if( m>=mnthr ) then
                 ! path 1a - overdetermined, with many more rows than columns.
                 mm = n
                 itau = 1_${ik}$
                 nwork = itau + n
                 ! compute a=q*r.
                 ! (workspace: need 2*n, prefer n+n*nb)
                 call stdlib${ii}$_${ri}$geqrf( m, n, a, lda, work( itau ), work( nwork ),lwork-nwork+1, &
                           info )
                 ! multiply b by transpose(q).
                 ! (workspace: need n+nrhs, prefer n+nrhs*nb)
                 call stdlib${ii}$_${ri}$ormqr( 'L', 'T', m, nrhs, n, a, lda, work( itau ), b,ldb, work( &
                           nwork ), lwork-nwork+1, info )
                 ! zero out below r.
                 if( n>1_${ik}$ ) then
                    call stdlib${ii}$_${ri}$laset( 'L', n-1, n-1, zero, zero, a( 2_${ik}$, 1_${ik}$ ), lda )
                 end if
              end if
              ie = 1_${ik}$
              itauq = ie + n
              itaup = itauq + n
              nwork = itaup + n
              ! bidiagonalize r in a.
              ! (workspace: need 3*n+mm, prefer 3*n+(mm+n)*nb)
              call stdlib${ii}$_${ri}$gebrd( mm, n, a, lda, s, work( ie ), work( itauq ),work( itaup ), work(&
                         nwork ), lwork-nwork+1,info )
              ! multiply b by transpose of left bidiagonalizing vectors of r.
              ! (workspace: need 3*n+nrhs, prefer 3*n+nrhs*nb)
              call stdlib${ii}$_${ri}$ormbr( 'Q', 'L', 'T', mm, nrhs, n, a, lda, work( itauq ),b, ldb, work( &
                        nwork ), lwork-nwork+1, info )
              ! solve the bidiagonal least squares problem.
              call stdlib${ii}$_${ri}$lalsd( 'U', smlsiz, n, nrhs, s, work( ie ), b, ldb,rcond, rank, work( &
                        nwork ), iwork, info )
              if( info/=0_${ik}$ ) then
                 go to 10
              end if
              ! multiply b by right bidiagonalizing vectors of r.
              call stdlib${ii}$_${ri}$ormbr( 'P', 'L', 'N', n, nrhs, n, a, lda, work( itaup ),b, ldb, work( &
                        nwork ), lwork-nwork+1, info )
           else if( n>=mnthr .and. lwork>=4_${ik}$*m+m*m+max( m, 2_${ik}$*m-4, nrhs, n-3*m, wlalsd ) ) &
                     then
              ! path 2a - underdetermined, with many more columns than rows
              ! and sufficient workspace for an efficient algorithm.
              ldwork = m
              if( lwork>=max( 4_${ik}$*m+m*lda+max( m, 2_${ik}$*m-4, nrhs, n-3*m ),m*lda+m+m*nrhs, 4_${ik}$*m+m*lda+&
                        wlalsd ) )ldwork = lda
              itau = 1_${ik}$
              nwork = m + 1_${ik}$
              ! compute a=l*q.
              ! (workspace: need 2*m, prefer m+m*nb)
              call stdlib${ii}$_${ri}$gelqf( m, n, a, lda, work( itau ), work( nwork ),lwork-nwork+1, info )
                        
              il = nwork
              ! copy l to work(il), zeroing out above its diagonal.
              call stdlib${ii}$_${ri}$lacpy( 'L', m, m, a, lda, work( il ), ldwork )
              call stdlib${ii}$_${ri}$laset( 'U', m-1, m-1, zero, zero, work( il+ldwork ),ldwork )
              ie = il + ldwork*m
              itauq = ie + m
              itaup = itauq + m
              nwork = itaup + m
              ! bidiagonalize l in work(il).
              ! (workspace: need m*m+5*m, prefer m*m+4*m+2*m*nb)
              call stdlib${ii}$_${ri}$gebrd( m, m, work( il ), ldwork, s, work( ie ),work( itauq ), work( &
                        itaup ), work( nwork ),lwork-nwork+1, info )
              ! multiply b by transpose of left bidiagonalizing vectors of l.
              ! (workspace: need m*m+4*m+nrhs, prefer m*m+4*m+nrhs*nb)
              call stdlib${ii}$_${ri}$ormbr( 'Q', 'L', 'T', m, nrhs, m, work( il ), ldwork,work( itauq ), b, &
                        ldb, work( nwork ),lwork-nwork+1, info )
              ! solve the bidiagonal least squares problem.
              call stdlib${ii}$_${ri}$lalsd( 'U', smlsiz, m, nrhs, s, work( ie ), b, ldb,rcond, rank, work( &
                        nwork ), iwork, info )
              if( info/=0_${ik}$ ) then
                 go to 10
              end if
              ! multiply b by right bidiagonalizing vectors of l.
              call stdlib${ii}$_${ri}$ormbr( 'P', 'L', 'N', m, nrhs, m, work( il ), ldwork,work( itaup ), b, &
                        ldb, work( nwork ),lwork-nwork+1, info )
              ! zero out below first m rows of b.
              call stdlib${ii}$_${ri}$laset( 'F', n-m, nrhs, zero, zero, b( m+1, 1_${ik}$ ), ldb )
              nwork = itau + m
              ! multiply transpose(q) by b.
              ! (workspace: need m+nrhs, prefer m+nrhs*nb)
              call stdlib${ii}$_${ri}$ormlq( 'L', 'T', n, nrhs, m, a, lda, work( itau ), b,ldb, work( nwork )&
                        , lwork-nwork+1, info )
           else
              ! path 2 - remaining underdetermined cases.
              ie = 1_${ik}$
              itauq = ie + m
              itaup = itauq + m
              nwork = itaup + m
              ! bidiagonalize a.
              ! (workspace: need 3*m+n, prefer 3*m+(m+n)*nb)
              call stdlib${ii}$_${ri}$gebrd( m, n, a, lda, s, work( ie ), work( itauq ),work( itaup ), work( &
                        nwork ), lwork-nwork+1,info )
              ! multiply b by transpose of left bidiagonalizing vectors.
              ! (workspace: need 3*m+nrhs, prefer 3*m+nrhs*nb)
              call stdlib${ii}$_${ri}$ormbr( 'Q', 'L', 'T', m, nrhs, n, a, lda, work( itauq ),b, ldb, work( &
                        nwork ), lwork-nwork+1, info )
              ! solve the bidiagonal least squares problem.
              call stdlib${ii}$_${ri}$lalsd( 'L', smlsiz, m, nrhs, s, work( ie ), b, ldb,rcond, rank, work( &
                        nwork ), iwork, info )
              if( info/=0_${ik}$ ) then
                 go to 10
              end if
              ! multiply b by right bidiagonalizing vectors of a.
              call stdlib${ii}$_${ri}$ormbr( 'P', 'L', 'N', n, nrhs, m, a, lda, work( itaup ),b, ldb, work( &
                        nwork ), lwork-nwork+1, info )
           end if
           ! undo scaling.
           if( iascl==1_${ik}$ ) then
              call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, n, nrhs, b, ldb, info )
              call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, smlnum, anrm, minmn, 1_${ik}$, s, minmn,info )
           else if( iascl==2_${ik}$ ) then
              call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, n, nrhs, b, ldb, info )
              call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, bignum, anrm, minmn, 1_${ik}$, s, minmn,info )
           end if
           if( ibscl==1_${ik}$ ) then
              call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, smlnum, bnrm, n, nrhs, b, ldb, info )
           else if( ibscl==2_${ik}$ ) then
              call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, bignum, bnrm, n, nrhs, b, ldb, info )
           end if
           10 continue
           work( 1_${ik}$ ) = maxwrk
           iwork( 1_${ik}$ ) = liwork
           return
     end subroutine stdlib${ii}$_${ri}$gelsd

#:endif
#:endfor

     module subroutine stdlib${ii}$_cgelsd( m, n, nrhs, a, lda, b, ldb, s, rcond, rank,work, lwork, rwork, &
     !! CGELSD computes the minimum-norm solution to a real linear least
     !! squares problem:
     !! minimize 2-norm(| b - A*x |)
     !! using the singular value decomposition (SVD) of A. A is an M-by-N
     !! matrix which may be rank-deficient.
     !! Several right hand side vectors b and solution vectors x can be
     !! handled in a single call; they are stored as the columns of the
     !! M-by-NRHS right hand side matrix B and the N-by-NRHS solution
     !! matrix X.
     !! The problem is solved in three steps:
     !! (1) Reduce the coefficient matrix A to bidiagonal form with
     !! Householder transformations, reducing the original problem
     !! into a "bidiagonal least squares problem" (BLS)
     !! (2) Solve the BLS using a divide and conquer approach.
     !! (3) Apply back all the Householder transformations to solve
     !! the original least squares problem.
     !! The effective rank of A is determined by treating as zero those
     !! singular values which are less than RCOND times the largest singular
     !! value.
     !! The divide and conquer algorithm makes very mild assumptions about
     !! floating point arithmetic. It will work on machines with a guard
     !! digit in add/subtract, or on those binary machines without guard
     !! digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or
     !! Cray-2. It could conceivably fail on hexadecimal or decimal machines
     !! without guard digits, but we know of none.
               iwork, info )
        ! -- lapack driver routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info, rank
           integer(${ik}$), intent(in) :: lda, ldb, lwork, m, n, nrhs
           real(sp), intent(in) :: rcond
           ! Array Arguments 
           integer(${ik}$), intent(out) :: iwork(*)
           real(sp), intent(out) :: rwork(*), s(*)
           complex(sp), intent(inout) :: a(lda,*), b(ldb,*)
           complex(sp), intent(out) :: work(*)
        ! =====================================================================
           
           
           ! Local Scalars 
           logical(lk) :: lquery
           integer(${ik}$) :: iascl, ibscl, ie, il, itau, itaup, itauq, ldwork, liwork, lrwork, &
                     maxmn, maxwrk, minmn, minwrk, mm, mnthr, nlvl, nrwork, nwork, smlsiz
           real(sp) :: anrm, bignum, bnrm, eps, sfmin, smlnum
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments.
           info = 0_${ik}$
           minmn = min( m, n )
           maxmn = max( m, n )
           lquery = ( lwork==-1_${ik}$ )
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( nrhs<0_${ik}$ ) then
              info = -3_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -5_${ik}$
           else if( ldb<max( 1_${ik}$, maxmn ) ) then
              info = -7_${ik}$
           end if
           ! compute workspace.
           ! (note: comments in the code beginning "workspace:" describe the
           ! minimal amount of workspace needed at that point in the code,
           ! as well as the preferred amount for good performance.
           ! nb refers to the optimal block size for the immediately
           ! following subroutine, as returned by stdlib${ii}$_ilaenv.)
           if( info==0_${ik}$ ) then
              minwrk = 1_${ik}$
              maxwrk = 1_${ik}$
              liwork = 1_${ik}$
              lrwork = 1_${ik}$
              if( minmn>0_${ik}$ ) then
                 smlsiz = stdlib${ii}$_ilaenv( 9_${ik}$, 'CGELSD', ' ', 0_${ik}$, 0_${ik}$, 0_${ik}$, 0_${ik}$ )
                 mnthr = stdlib${ii}$_ilaenv( 6_${ik}$, 'CGELSD', ' ', m, n, nrhs, -1_${ik}$ )
                 nlvl = max( int( log( real( minmn,KIND=sp) / real( smlsiz + 1_${ik}$,KIND=sp) ) /log( &
                           two ),KIND=${ik}$) + 1_${ik}$, 0_${ik}$ )
                 liwork = 3_${ik}$*minmn*nlvl + 11_${ik}$*minmn
                 mm = m
                 if( m>=n .and. m>=mnthr ) then
                    ! path 1a - overdetermined, with many more rows than
                              ! columns.
                    mm = n
                    maxwrk = max( maxwrk, n*stdlib${ii}$_ilaenv( 1_${ik}$, 'CGEQRF', ' ', m, n,-1_${ik}$, -1_${ik}$ ) )
                              
                    maxwrk = max( maxwrk, nrhs*stdlib${ii}$_ilaenv( 1_${ik}$, 'CUNMQR', 'LC', m,nrhs, n, -1_${ik}$ ) )
                              
                 end if
                 if( m>=n ) then
                    ! path 1 - overdetermined or exactly determined.
                    lrwork = 10_${ik}$*n + 2_${ik}$*n*smlsiz + 8_${ik}$*n*nlvl + 3_${ik}$*smlsiz*nrhs +max( (smlsiz+1)**2_${ik}$, n*(&
                              1_${ik}$+nrhs) + 2_${ik}$*nrhs )
                    maxwrk = max( maxwrk, 2_${ik}$*n + ( mm + n )*stdlib${ii}$_ilaenv( 1_${ik}$,'CGEBRD', ' ', mm, n, &
                              -1_${ik}$, -1_${ik}$ ) )
                    maxwrk = max( maxwrk, 2_${ik}$*n + nrhs*stdlib${ii}$_ilaenv( 1_${ik}$, 'CUNMBR','QLC', mm, nrhs, &
                              n, -1_${ik}$ ) )
                    maxwrk = max( maxwrk, 2_${ik}$*n + ( n - 1_${ik}$ )*stdlib${ii}$_ilaenv( 1_${ik}$,'CUNMBR', 'PLN', n, &
                              nrhs, n, -1_${ik}$ ) )
                    maxwrk = max( maxwrk, 2_${ik}$*n + n*nrhs )
                    minwrk = max( 2_${ik}$*n + mm, 2_${ik}$*n + n*nrhs )
                 end if
                 if( n>m ) then
                    lrwork = 10_${ik}$*m + 2_${ik}$*m*smlsiz + 8_${ik}$*m*nlvl + 3_${ik}$*smlsiz*nrhs +max( (smlsiz+1)**2_${ik}$, n*(&
                              1_${ik}$+nrhs) + 2_${ik}$*nrhs )
                    if( n>=mnthr ) then
                       ! path 2a - underdetermined, with many more columns
                                 ! than rows.
                       maxwrk = m + m*stdlib${ii}$_ilaenv( 1_${ik}$, 'CGELQF', ' ', m, n, -1_${ik}$,-1_${ik}$ )
                       maxwrk = max( maxwrk, m*m + 4_${ik}$*m + 2_${ik}$*m*stdlib${ii}$_ilaenv( 1_${ik}$,'CGEBRD', ' ', m, m,&
                                  -1_${ik}$, -1_${ik}$ ) )
                       maxwrk = max( maxwrk, m*m + 4_${ik}$*m + nrhs*stdlib${ii}$_ilaenv( 1_${ik}$,'CUNMBR', 'QLC', m,&
                                  nrhs, m, -1_${ik}$ ) )
                       maxwrk = max( maxwrk, m*m + 4_${ik}$*m + ( m - 1_${ik}$ )*stdlib${ii}$_ilaenv( 1_${ik}$,'CUNMLQ', &
                                 'LC', n, nrhs, m, -1_${ik}$ ) )
                       if( nrhs>1_${ik}$ ) then
                          maxwrk = max( maxwrk, m*m + m + m*nrhs )
                       else
                          maxwrk = max( maxwrk, m*m + 2_${ik}$*m )
                       end if
                       maxwrk = max( maxwrk, m*m + 4_${ik}$*m + m*nrhs )
           ! xxx: ensure the path 2a case below is triggered.  the workspace
           ! calculation should use queries for all routines eventually.
                       maxwrk = max( maxwrk,4_${ik}$*m+m*m+max( m, 2_${ik}$*m-4, nrhs, n-3*m ) )
                    else
                       ! path 2 - underdetermined.
                       maxwrk = 2_${ik}$*m + ( n + m )*stdlib${ii}$_ilaenv( 1_${ik}$, 'CGEBRD', ' ', m,n, -1_${ik}$, -1_${ik}$ )
                                 
                       maxwrk = max( maxwrk, 2_${ik}$*m + nrhs*stdlib${ii}$_ilaenv( 1_${ik}$, 'CUNMBR','QLC', m, nrhs,&
                                  m, -1_${ik}$ ) )
                       maxwrk = max( maxwrk, 2_${ik}$*m + m*stdlib${ii}$_ilaenv( 1_${ik}$, 'CUNMBR','PLN', n, nrhs, m,&
                                  -1_${ik}$ ) )
                       maxwrk = max( maxwrk, 2_${ik}$*m + m*nrhs )
                    end if
                    minwrk = max( 2_${ik}$*m + n, 2_${ik}$*m + m*nrhs )
                 end if
              end if
              minwrk = min( minwrk, maxwrk )
              work( 1_${ik}$ ) = maxwrk
              iwork( 1_${ik}$ ) = liwork
              rwork( 1_${ik}$ ) = lrwork
              if( lwork<minwrk .and. .not.lquery ) then
                 info = -12_${ik}$
              end if
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'CGELSD', -info )
              return
           else if( lquery ) then
              return
           end if
           ! quick return if possible.
           if( m==0_${ik}$ .or. n==0_${ik}$ ) then
              rank = 0_${ik}$
              return
           end if
           ! get machine parameters.
           eps = stdlib${ii}$_slamch( 'P' )
           sfmin = stdlib${ii}$_slamch( 'S' )
           smlnum = sfmin / eps
           bignum = one / smlnum
           call stdlib${ii}$_slabad( smlnum, bignum )
           ! scale a if max entry outside range [smlnum,bignum].
           anrm = stdlib${ii}$_clange( 'M', m, n, a, lda, rwork )
           iascl = 0_${ik}$
           if( anrm>zero .and. anrm<smlnum ) then
              ! scale matrix norm up to smlnum
              call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, m, n, a, lda, info )
              iascl = 1_${ik}$
           else if( anrm>bignum ) then
              ! scale matrix norm down to bignum.
              call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, m, n, a, lda, info )
              iascl = 2_${ik}$
           else if( anrm==zero ) then
              ! matrix all zero. return zero solution.
              call stdlib${ii}$_claset( 'F', max( m, n ), nrhs, czero, czero, b, ldb )
              call stdlib${ii}$_slaset( 'F', minmn, 1_${ik}$, zero, zero, s, 1_${ik}$ )
              rank = 0_${ik}$
              go to 10
           end if
           ! scale b if max entry outside range [smlnum,bignum].
           bnrm = stdlib${ii}$_clange( 'M', m, nrhs, b, ldb, rwork )
           ibscl = 0_${ik}$
           if( bnrm>zero .and. bnrm<smlnum ) then
              ! scale matrix norm up to smlnum.
              call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, bnrm, smlnum, m, nrhs, b, ldb, info )
              ibscl = 1_${ik}$
           else if( bnrm>bignum ) then
              ! scale matrix norm down to bignum.
              call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, bnrm, bignum, m, nrhs, b, ldb, info )
              ibscl = 2_${ik}$
           end if
           ! if m < n make sure b(m+1:n,:) = 0
           if( m<n )call stdlib${ii}$_claset( 'F', n-m, nrhs, czero, czero, b( m+1, 1_${ik}$ ), ldb )
           ! overdetermined case.
           if( m>=n ) then
              ! path 1 - overdetermined or exactly determined.
              mm = m
              if( m>=mnthr ) then
                 ! path 1a - overdetermined, with many more rows than columns
                 mm = n
                 itau = 1_${ik}$
                 nwork = itau + n
                 ! compute a=q*r.
                 ! (rworkspace: need n)
                 ! (cworkspace: need n, prefer n*nb)
                 call stdlib${ii}$_cgeqrf( m, n, a, lda, work( itau ), work( nwork ),lwork-nwork+1, &
                           info )
                 ! multiply b by transpose(q).
                 ! (rworkspace: need n)
                 ! (cworkspace: need nrhs, prefer nrhs*nb)
                 call stdlib${ii}$_cunmqr( 'L', 'C', m, nrhs, n, a, lda, work( itau ), b,ldb, work( &
                           nwork ), lwork-nwork+1, info )
                 ! zero out below r.
                 if( n>1_${ik}$ ) then
                    call stdlib${ii}$_claset( 'L', n-1, n-1, czero, czero, a( 2_${ik}$, 1_${ik}$ ),lda )
                 end if
              end if
              itauq = 1_${ik}$
              itaup = itauq + n
              nwork = itaup + n
              ie = 1_${ik}$
              nrwork = ie + n
              ! bidiagonalize r in a.
              ! (rworkspace: need n)
              ! (cworkspace: need 2*n+mm, prefer 2*n+(mm+n)*nb)
              call stdlib${ii}$_cgebrd( mm, n, a, lda, s, rwork( ie ), work( itauq ),work( itaup ), &
                        work( nwork ), lwork-nwork+1,info )
              ! multiply b by transpose of left bidiagonalizing vectors of r.
              ! (cworkspace: need 2*n+nrhs, prefer 2*n+nrhs*nb)
              call stdlib${ii}$_cunmbr( 'Q', 'L', 'C', mm, nrhs, n, a, lda, work( itauq ),b, ldb, work( &
                        nwork ), lwork-nwork+1, info )
              ! solve the bidiagonal least squares problem.
              call stdlib${ii}$_clalsd( 'U', smlsiz, n, nrhs, s, rwork( ie ), b, ldb,rcond, rank, work( &
                        nwork ), rwork( nrwork ),iwork, info )
              if( info/=0_${ik}$ ) then
                 go to 10
              end if
              ! multiply b by right bidiagonalizing vectors of r.
              call stdlib${ii}$_cunmbr( 'P', 'L', 'N', n, nrhs, n, a, lda, work( itaup ),b, ldb, work( &
                        nwork ), lwork-nwork+1, info )
           else if( n>=mnthr .and. lwork>=4_${ik}$*m+m*m+max( m, 2_${ik}$*m-4, nrhs, n-3*m ) ) then
              ! path 2a - underdetermined, with many more columns than rows
              ! and sufficient workspace for an efficient algorithm.
              ldwork = m
              if( lwork>=max( 4_${ik}$*m+m*lda+max( m, 2_${ik}$*m-4, nrhs, n-3*m ),m*lda+m+m*nrhs ) )ldwork = &
                        lda
              itau = 1_${ik}$
              nwork = m + 1_${ik}$
              ! compute a=l*q.
              ! (cworkspace: need 2*m, prefer m+m*nb)
              call stdlib${ii}$_cgelqf( m, n, a, lda, work( itau ), work( nwork ),lwork-nwork+1, info )
                        
              il = nwork
              ! copy l to work(il), zeroing out above its diagonal.
              call stdlib${ii}$_clacpy( 'L', m, m, a, lda, work( il ), ldwork )
              call stdlib${ii}$_claset( 'U', m-1, m-1, czero, czero, work( il+ldwork ),ldwork )
              itauq = il + ldwork*m
              itaup = itauq + m
              nwork = itaup + m
              ie = 1_${ik}$
              nrwork = ie + m
              ! bidiagonalize l in work(il).
              ! (rworkspace: need m)
              ! (cworkspace: need m*m+4*m, prefer m*m+4*m+2*m*nb)
              call stdlib${ii}$_cgebrd( m, m, work( il ), ldwork, s, rwork( ie ),work( itauq ), work( &
                        itaup ), work( nwork ),lwork-nwork+1, info )
              ! multiply b by transpose of left bidiagonalizing vectors of l.
              ! (cworkspace: need m*m+4*m+nrhs, prefer m*m+4*m+nrhs*nb)
              call stdlib${ii}$_cunmbr( 'Q', 'L', 'C', m, nrhs, m, work( il ), ldwork,work( itauq ), b, &
                        ldb, work( nwork ),lwork-nwork+1, info )
              ! solve the bidiagonal least squares problem.
              call stdlib${ii}$_clalsd( 'U', smlsiz, m, nrhs, s, rwork( ie ), b, ldb,rcond, rank, work( &
                        nwork ), rwork( nrwork ),iwork, info )
              if( info/=0_${ik}$ ) then
                 go to 10
              end if
              ! multiply b by right bidiagonalizing vectors of l.
              call stdlib${ii}$_cunmbr( 'P', 'L', 'N', m, nrhs, m, work( il ), ldwork,work( itaup ), b, &
                        ldb, work( nwork ),lwork-nwork+1, info )
              ! zero out below first m rows of b.
              call stdlib${ii}$_claset( 'F', n-m, nrhs, czero, czero, b( m+1, 1_${ik}$ ), ldb )
              nwork = itau + m
              ! multiply transpose(q) by b.
              ! (cworkspace: need nrhs, prefer nrhs*nb)
              call stdlib${ii}$_cunmlq( 'L', 'C', n, nrhs, m, a, lda, work( itau ), b,ldb, work( nwork )&
                        , lwork-nwork+1, info )
           else
              ! path 2 - remaining underdetermined cases.
              itauq = 1_${ik}$
              itaup = itauq + m
              nwork = itaup + m
              ie = 1_${ik}$
              nrwork = ie + m
              ! bidiagonalize a.
              ! (rworkspace: need m)
              ! (cworkspace: need 2*m+n, prefer 2*m+(m+n)*nb)
              call stdlib${ii}$_cgebrd( m, n, a, lda, s, rwork( ie ), work( itauq ),work( itaup ), work(&
                         nwork ), lwork-nwork+1,info )
              ! multiply b by transpose of left bidiagonalizing vectors.
              ! (cworkspace: need 2*m+nrhs, prefer 2*m+nrhs*nb)
              call stdlib${ii}$_cunmbr( 'Q', 'L', 'C', m, nrhs, n, a, lda, work( itauq ),b, ldb, work( &
                        nwork ), lwork-nwork+1, info )
              ! solve the bidiagonal least squares problem.
              call stdlib${ii}$_clalsd( 'L', smlsiz, m, nrhs, s, rwork( ie ), b, ldb,rcond, rank, work( &
                        nwork ), rwork( nrwork ),iwork, info )
              if( info/=0_${ik}$ ) then
                 go to 10
              end if
              ! multiply b by right bidiagonalizing vectors of a.
              call stdlib${ii}$_cunmbr( 'P', 'L', 'N', n, nrhs, m, a, lda, work( itaup ),b, ldb, work( &
                        nwork ), lwork-nwork+1, info )
           end if
           ! undo scaling.
           if( iascl==1_${ik}$ ) then
              call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, n, nrhs, b, ldb, info )
              call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, smlnum, anrm, minmn, 1_${ik}$, s, minmn,info )
           else if( iascl==2_${ik}$ ) then
              call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, n, nrhs, b, ldb, info )
              call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, bignum, anrm, minmn, 1_${ik}$, s, minmn,info )
           end if
           if( ibscl==1_${ik}$ ) then
              call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, smlnum, bnrm, n, nrhs, b, ldb, info )
           else if( ibscl==2_${ik}$ ) then
              call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, bignum, bnrm, n, nrhs, b, ldb, info )
           end if
           10 continue
           work( 1_${ik}$ ) = maxwrk
           iwork( 1_${ik}$ ) = liwork
           rwork( 1_${ik}$ ) = lrwork
           return
     end subroutine stdlib${ii}$_cgelsd

     module subroutine stdlib${ii}$_zgelsd( m, n, nrhs, a, lda, b, ldb, s, rcond, rank,work, lwork, rwork, &
     !! ZGELSD computes the minimum-norm solution to a real linear least
     !! squares problem:
     !! minimize 2-norm(| b - A*x |)
     !! using the singular value decomposition (SVD) of A. A is an M-by-N
     !! matrix which may be rank-deficient.
     !! Several right hand side vectors b and solution vectors x can be
     !! handled in a single call; they are stored as the columns of the
     !! M-by-NRHS right hand side matrix B and the N-by-NRHS solution
     !! matrix X.
     !! The problem is solved in three steps:
     !! (1) Reduce the coefficient matrix A to bidiagonal form with
     !! Householder transformations, reducing the original problem
     !! into a "bidiagonal least squares problem" (BLS)
     !! (2) Solve the BLS using a divide and conquer approach.
     !! (3) Apply back all the Householder transformations to solve
     !! the original least squares problem.
     !! The effective rank of A is determined by treating as zero those
     !! singular values which are less than RCOND times the largest singular
     !! value.
     !! The divide and conquer algorithm makes very mild assumptions about
     !! floating point arithmetic. It will work on machines with a guard
     !! digit in add/subtract, or on those binary machines without guard
     !! digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or
     !! Cray-2. It could conceivably fail on hexadecimal or decimal machines
     !! without guard digits, but we know of none.
               iwork, info )
        ! -- lapack driver routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info, rank
           integer(${ik}$), intent(in) :: lda, ldb, lwork, m, n, nrhs
           real(dp), intent(in) :: rcond
           ! Array Arguments 
           integer(${ik}$), intent(out) :: iwork(*)
           real(dp), intent(out) :: rwork(*), s(*)
           complex(dp), intent(inout) :: a(lda,*), b(ldb,*)
           complex(dp), intent(out) :: work(*)
        ! =====================================================================
           
           
           ! Local Scalars 
           logical(lk) :: lquery
           integer(${ik}$) :: iascl, ibscl, ie, il, itau, itaup, itauq, ldwork, liwork, lrwork, &
                     maxmn, maxwrk, minmn, minwrk, mm, mnthr, nlvl, nrwork, nwork, smlsiz
           real(dp) :: anrm, bignum, bnrm, eps, sfmin, smlnum
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments.
           info = 0_${ik}$
           minmn = min( m, n )
           maxmn = max( m, n )
           lquery = ( lwork==-1_${ik}$ )
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( nrhs<0_${ik}$ ) then
              info = -3_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -5_${ik}$
           else if( ldb<max( 1_${ik}$, maxmn ) ) then
              info = -7_${ik}$
           end if
           ! compute workspace.
           ! (note: comments in the code beginning "workspace:" describe the
           ! minimal amount of workspace needed at that point in the code,
           ! as well as the preferred amount for good performance.
           ! nb refers to the optimal block size for the immediately
           ! following subroutine, as returned by stdlib${ii}$_ilaenv.)
           if( info==0_${ik}$ ) then
              minwrk = 1_${ik}$
              maxwrk = 1_${ik}$
              liwork = 1_${ik}$
              lrwork = 1_${ik}$
              if( minmn>0_${ik}$ ) then
                 smlsiz = stdlib${ii}$_ilaenv( 9_${ik}$, 'ZGELSD', ' ', 0_${ik}$, 0_${ik}$, 0_${ik}$, 0_${ik}$ )
                 mnthr = stdlib${ii}$_ilaenv( 6_${ik}$, 'ZGELSD', ' ', m, n, nrhs, -1_${ik}$ )
                 nlvl = max( int( log( real( minmn,KIND=dp) / real( smlsiz + 1_${ik}$,KIND=dp) ) /log( &
                           two ),KIND=${ik}$) + 1_${ik}$, 0_${ik}$ )
                 liwork = 3_${ik}$*minmn*nlvl + 11_${ik}$*minmn
                 mm = m
                 if( m>=n .and. m>=mnthr ) then
                    ! path 1a - overdetermined, with many more rows than
                              ! columns.
                    mm = n
                    maxwrk = max( maxwrk, n*stdlib${ii}$_ilaenv( 1_${ik}$, 'ZGEQRF', ' ', m, n,-1_${ik}$, -1_${ik}$ ) )
                              
                    maxwrk = max( maxwrk, nrhs*stdlib${ii}$_ilaenv( 1_${ik}$, 'ZUNMQR', 'LC', m,nrhs, n, -1_${ik}$ ) )
                              
                 end if
                 if( m>=n ) then
                    ! path 1 - overdetermined or exactly determined.
                    lrwork = 10_${ik}$*n + 2_${ik}$*n*smlsiz + 8_${ik}$*n*nlvl + 3_${ik}$*smlsiz*nrhs +max( (smlsiz+1)**2_${ik}$, n*(&
                              1_${ik}$+nrhs) + 2_${ik}$*nrhs )
                    maxwrk = max( maxwrk, 2_${ik}$*n + ( mm + n )*stdlib${ii}$_ilaenv( 1_${ik}$,'ZGEBRD', ' ', mm, n, &
                              -1_${ik}$, -1_${ik}$ ) )
                    maxwrk = max( maxwrk, 2_${ik}$*n + nrhs*stdlib${ii}$_ilaenv( 1_${ik}$, 'ZUNMBR','QLC', mm, nrhs, &
                              n, -1_${ik}$ ) )
                    maxwrk = max( maxwrk, 2_${ik}$*n + ( n - 1_${ik}$ )*stdlib${ii}$_ilaenv( 1_${ik}$,'ZUNMBR', 'PLN', n, &
                              nrhs, n, -1_${ik}$ ) )
                    maxwrk = max( maxwrk, 2_${ik}$*n + n*nrhs )
                    minwrk = max( 2_${ik}$*n + mm, 2_${ik}$*n + n*nrhs )
                 end if
                 if( n>m ) then
                    lrwork = 10_${ik}$*m + 2_${ik}$*m*smlsiz + 8_${ik}$*m*nlvl + 3_${ik}$*smlsiz*nrhs +max( (smlsiz+1)**2_${ik}$, n*(&
                              1_${ik}$+nrhs) + 2_${ik}$*nrhs )
                    if( n>=mnthr ) then
                       ! path 2a - underdetermined, with many more columns
                                 ! than rows.
                       maxwrk = m + m*stdlib${ii}$_ilaenv( 1_${ik}$, 'ZGELQF', ' ', m, n, -1_${ik}$,-1_${ik}$ )
                       maxwrk = max( maxwrk, m*m + 4_${ik}$*m + 2_${ik}$*m*stdlib${ii}$_ilaenv( 1_${ik}$,'ZGEBRD', ' ', m, m,&
                                  -1_${ik}$, -1_${ik}$ ) )
                       maxwrk = max( maxwrk, m*m + 4_${ik}$*m + nrhs*stdlib${ii}$_ilaenv( 1_${ik}$,'ZUNMBR', 'QLC', m,&
                                  nrhs, m, -1_${ik}$ ) )
                       maxwrk = max( maxwrk, m*m + 4_${ik}$*m + ( m - 1_${ik}$ )*stdlib${ii}$_ilaenv( 1_${ik}$,'ZUNMLQ', &
                                 'LC', n, nrhs, m, -1_${ik}$ ) )
                       if( nrhs>1_${ik}$ ) then
                          maxwrk = max( maxwrk, m*m + m + m*nrhs )
                       else
                          maxwrk = max( maxwrk, m*m + 2_${ik}$*m )
                       end if
                       maxwrk = max( maxwrk, m*m + 4_${ik}$*m + m*nrhs )
           ! xxx: ensure the path 2a case below is triggered.  the workspace
           ! calculation should use queries for all routines eventually.
                       maxwrk = max( maxwrk,4_${ik}$*m+m*m+max( m, 2_${ik}$*m-4, nrhs, n-3*m ) )
                    else
                       ! path 2 - underdetermined.
                       maxwrk = 2_${ik}$*m + ( n + m )*stdlib${ii}$_ilaenv( 1_${ik}$, 'ZGEBRD', ' ', m,n, -1_${ik}$, -1_${ik}$ )
                                 
                       maxwrk = max( maxwrk, 2_${ik}$*m + nrhs*stdlib${ii}$_ilaenv( 1_${ik}$, 'ZUNMBR','QLC', m, nrhs,&
                                  m, -1_${ik}$ ) )
                       maxwrk = max( maxwrk, 2_${ik}$*m + m*stdlib${ii}$_ilaenv( 1_${ik}$, 'ZUNMBR','PLN', n, nrhs, m,&
                                  -1_${ik}$ ) )
                       maxwrk = max( maxwrk, 2_${ik}$*m + m*nrhs )
                    end if
                    minwrk = max( 2_${ik}$*m + n, 2_${ik}$*m + m*nrhs )
                 end if
              end if
              minwrk = min( minwrk, maxwrk )
              work( 1_${ik}$ ) = maxwrk
              iwork( 1_${ik}$ ) = liwork
              rwork( 1_${ik}$ ) = lrwork
              if( lwork<minwrk .and. .not.lquery ) then
                 info = -12_${ik}$
              end if
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'ZGELSD', -info )
              return
           else if( lquery ) then
              return
           end if
           ! quick return if possible.
           if( m==0_${ik}$ .or. n==0_${ik}$ ) then
              rank = 0_${ik}$
              return
           end if
           ! get machine parameters.
           eps = stdlib${ii}$_dlamch( 'P' )
           sfmin = stdlib${ii}$_dlamch( 'S' )
           smlnum = sfmin / eps
           bignum = one / smlnum
           call stdlib${ii}$_dlabad( smlnum, bignum )
           ! scale a if max entry outside range [smlnum,bignum].
           anrm = stdlib${ii}$_zlange( 'M', m, n, a, lda, rwork )
           iascl = 0_${ik}$
           if( anrm>zero .and. anrm<smlnum ) then
              ! scale matrix norm up to smlnum
              call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, m, n, a, lda, info )
              iascl = 1_${ik}$
           else if( anrm>bignum ) then
              ! scale matrix norm down to bignum.
              call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, m, n, a, lda, info )
              iascl = 2_${ik}$
           else if( anrm==zero ) then
              ! matrix all zero. return zero solution.
              call stdlib${ii}$_zlaset( 'F', max( m, n ), nrhs, czero, czero, b, ldb )
              call stdlib${ii}$_dlaset( 'F', minmn, 1_${ik}$, zero, zero, s, 1_${ik}$ )
              rank = 0_${ik}$
              go to 10
           end if
           ! scale b if max entry outside range [smlnum,bignum].
           bnrm = stdlib${ii}$_zlange( 'M', m, nrhs, b, ldb, rwork )
           ibscl = 0_${ik}$
           if( bnrm>zero .and. bnrm<smlnum ) then
              ! scale matrix norm up to smlnum.
              call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, bnrm, smlnum, m, nrhs, b, ldb, info )
              ibscl = 1_${ik}$
           else if( bnrm>bignum ) then
              ! scale matrix norm down to bignum.
              call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, bnrm, bignum, m, nrhs, b, ldb, info )
              ibscl = 2_${ik}$
           end if
           ! if m < n make sure b(m+1:n,:) = 0
           if( m<n )call stdlib${ii}$_zlaset( 'F', n-m, nrhs, czero, czero, b( m+1, 1_${ik}$ ), ldb )
           ! overdetermined case.
           if( m>=n ) then
              ! path 1 - overdetermined or exactly determined.
              mm = m
              if( m>=mnthr ) then
                 ! path 1a - overdetermined, with many more rows than columns
                 mm = n
                 itau = 1_${ik}$
                 nwork = itau + n
                 ! compute a=q*r.
                 ! (rworkspace: need n)
                 ! (cworkspace: need n, prefer n*nb)
                 call stdlib${ii}$_zgeqrf( m, n, a, lda, work( itau ), work( nwork ),lwork-nwork+1, &
                           info )
                 ! multiply b by transpose(q).
                 ! (rworkspace: need n)
                 ! (cworkspace: need nrhs, prefer nrhs*nb)
                 call stdlib${ii}$_zunmqr( 'L', 'C', m, nrhs, n, a, lda, work( itau ), b,ldb, work( &
                           nwork ), lwork-nwork+1, info )
                 ! zero out below r.
                 if( n>1_${ik}$ ) then
                    call stdlib${ii}$_zlaset( 'L', n-1, n-1, czero, czero, a( 2_${ik}$, 1_${ik}$ ),lda )
                 end if
              end if
              itauq = 1_${ik}$
              itaup = itauq + n
              nwork = itaup + n
              ie = 1_${ik}$
              nrwork = ie + n
              ! bidiagonalize r in a.
              ! (rworkspace: need n)
              ! (cworkspace: need 2*n+mm, prefer 2*n+(mm+n)*nb)
              call stdlib${ii}$_zgebrd( mm, n, a, lda, s, rwork( ie ), work( itauq ),work( itaup ), &
                        work( nwork ), lwork-nwork+1,info )
              ! multiply b by transpose of left bidiagonalizing vectors of r.
              ! (cworkspace: need 2*n+nrhs, prefer 2*n+nrhs*nb)
              call stdlib${ii}$_zunmbr( 'Q', 'L', 'C', mm, nrhs, n, a, lda, work( itauq ),b, ldb, work( &
                        nwork ), lwork-nwork+1, info )
              ! solve the bidiagonal least squares problem.
              call stdlib${ii}$_zlalsd( 'U', smlsiz, n, nrhs, s, rwork( ie ), b, ldb,rcond, rank, work( &
                        nwork ), rwork( nrwork ),iwork, info )
              if( info/=0_${ik}$ ) then
                 go to 10
              end if
              ! multiply b by right bidiagonalizing vectors of r.
              call stdlib${ii}$_zunmbr( 'P', 'L', 'N', n, nrhs, n, a, lda, work( itaup ),b, ldb, work( &
                        nwork ), lwork-nwork+1, info )
           else if( n>=mnthr .and. lwork>=4_${ik}$*m+m*m+max( m, 2_${ik}$*m-4, nrhs, n-3*m ) ) then
              ! path 2a - underdetermined, with many more columns than rows
              ! and sufficient workspace for an efficient algorithm.
              ldwork = m
              if( lwork>=max( 4_${ik}$*m+m*lda+max( m, 2_${ik}$*m-4, nrhs, n-3*m ),m*lda+m+m*nrhs ) )ldwork = &
                        lda
              itau = 1_${ik}$
              nwork = m + 1_${ik}$
              ! compute a=l*q.
              ! (cworkspace: need 2*m, prefer m+m*nb)
              call stdlib${ii}$_zgelqf( m, n, a, lda, work( itau ), work( nwork ),lwork-nwork+1, info )
                        
              il = nwork
              ! copy l to work(il), zeroing out above its diagonal.
              call stdlib${ii}$_zlacpy( 'L', m, m, a, lda, work( il ), ldwork )
              call stdlib${ii}$_zlaset( 'U', m-1, m-1, czero, czero, work( il+ldwork ),ldwork )
              itauq = il + ldwork*m
              itaup = itauq + m
              nwork = itaup + m
              ie = 1_${ik}$
              nrwork = ie + m
              ! bidiagonalize l in work(il).
              ! (rworkspace: need m)
              ! (cworkspace: need m*m+4*m, prefer m*m+4*m+2*m*nb)
              call stdlib${ii}$_zgebrd( m, m, work( il ), ldwork, s, rwork( ie ),work( itauq ), work( &
                        itaup ), work( nwork ),lwork-nwork+1, info )
              ! multiply b by transpose of left bidiagonalizing vectors of l.
              ! (cworkspace: need m*m+4*m+nrhs, prefer m*m+4*m+nrhs*nb)
              call stdlib${ii}$_zunmbr( 'Q', 'L', 'C', m, nrhs, m, work( il ), ldwork,work( itauq ), b, &
                        ldb, work( nwork ),lwork-nwork+1, info )
              ! solve the bidiagonal least squares problem.
              call stdlib${ii}$_zlalsd( 'U', smlsiz, m, nrhs, s, rwork( ie ), b, ldb,rcond, rank, work( &
                        nwork ), rwork( nrwork ),iwork, info )
              if( info/=0_${ik}$ ) then
                 go to 10
              end if
              ! multiply b by right bidiagonalizing vectors of l.
              call stdlib${ii}$_zunmbr( 'P', 'L', 'N', m, nrhs, m, work( il ), ldwork,work( itaup ), b, &
                        ldb, work( nwork ),lwork-nwork+1, info )
              ! zero out below first m rows of b.
              call stdlib${ii}$_zlaset( 'F', n-m, nrhs, czero, czero, b( m+1, 1_${ik}$ ), ldb )
              nwork = itau + m
              ! multiply transpose(q) by b.
              ! (cworkspace: need nrhs, prefer nrhs*nb)
              call stdlib${ii}$_zunmlq( 'L', 'C', n, nrhs, m, a, lda, work( itau ), b,ldb, work( nwork )&
                        , lwork-nwork+1, info )
           else
              ! path 2 - remaining underdetermined cases.
              itauq = 1_${ik}$
              itaup = itauq + m
              nwork = itaup + m
              ie = 1_${ik}$
              nrwork = ie + m
              ! bidiagonalize a.
              ! (rworkspace: need m)
              ! (cworkspace: need 2*m+n, prefer 2*m+(m+n)*nb)
              call stdlib${ii}$_zgebrd( m, n, a, lda, s, rwork( ie ), work( itauq ),work( itaup ), work(&
                         nwork ), lwork-nwork+1,info )
              ! multiply b by transpose of left bidiagonalizing vectors.
              ! (cworkspace: need 2*m+nrhs, prefer 2*m+nrhs*nb)
              call stdlib${ii}$_zunmbr( 'Q', 'L', 'C', m, nrhs, n, a, lda, work( itauq ),b, ldb, work( &
                        nwork ), lwork-nwork+1, info )
              ! solve the bidiagonal least squares problem.
              call stdlib${ii}$_zlalsd( 'L', smlsiz, m, nrhs, s, rwork( ie ), b, ldb,rcond, rank, work( &
                        nwork ), rwork( nrwork ),iwork, info )
              if( info/=0_${ik}$ ) then
                 go to 10
              end if
              ! multiply b by right bidiagonalizing vectors of a.
              call stdlib${ii}$_zunmbr( 'P', 'L', 'N', n, nrhs, m, a, lda, work( itaup ),b, ldb, work( &
                        nwork ), lwork-nwork+1, info )
           end if
           ! undo scaling.
           if( iascl==1_${ik}$ ) then
              call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, n, nrhs, b, ldb, info )
              call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, smlnum, anrm, minmn, 1_${ik}$, s, minmn,info )
           else if( iascl==2_${ik}$ ) then
              call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, n, nrhs, b, ldb, info )
              call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, bignum, anrm, minmn, 1_${ik}$, s, minmn,info )
           end if
           if( ibscl==1_${ik}$ ) then
              call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, smlnum, bnrm, n, nrhs, b, ldb, info )
           else if( ibscl==2_${ik}$ ) then
              call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, bignum, bnrm, n, nrhs, b, ldb, info )
           end if
           10 continue
           work( 1_${ik}$ ) = maxwrk
           iwork( 1_${ik}$ ) = liwork
           rwork( 1_${ik}$ ) = lrwork
           return
     end subroutine stdlib${ii}$_zgelsd

#:for ck,ct,ci in CMPLX_KINDS_TYPES
#:if not ck in ["sp","dp"]
     module subroutine stdlib${ii}$_${ci}$gelsd( m, n, nrhs, a, lda, b, ldb, s, rcond, rank,work, lwork, rwork, &
     !! ZGELSD: computes the minimum-norm solution to a real linear least
     !! squares problem:
     !! minimize 2-norm(| b - A*x |)
     !! using the singular value decomposition (SVD) of A. A is an M-by-N
     !! matrix which may be rank-deficient.
     !! Several right hand side vectors b and solution vectors x can be
     !! handled in a single call; they are stored as the columns of the
     !! M-by-NRHS right hand side matrix B and the N-by-NRHS solution
     !! matrix X.
     !! The problem is solved in three steps:
     !! (1) Reduce the coefficient matrix A to bidiagonal form with
     !! Householder transformations, reducing the original problem
     !! into a "bidiagonal least squares problem" (BLS)
     !! (2) Solve the BLS using a divide and conquer approach.
     !! (3) Apply back all the Householder transformations to solve
     !! the original least squares problem.
     !! The effective rank of A is determined by treating as zero those
     !! singular values which are less than RCOND times the largest singular
     !! value.
     !! The divide and conquer algorithm makes very mild assumptions about
     !! floating point arithmetic. It will work on machines with a guard
     !! digit in add/subtract, or on those binary machines without guard
     !! digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or
     !! Cray-2. It could conceivably fail on hexadecimal or decimal machines
     !! without guard digits, but we know of none.
               iwork, info )
        ! -- lapack driver routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${ck}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info, rank
           integer(${ik}$), intent(in) :: lda, ldb, lwork, m, n, nrhs
           real(${ck}$), intent(in) :: rcond
           ! Array Arguments 
           integer(${ik}$), intent(out) :: iwork(*)
           real(${ck}$), intent(out) :: rwork(*), s(*)
           complex(${ck}$), intent(inout) :: a(lda,*), b(ldb,*)
           complex(${ck}$), intent(out) :: work(*)
        ! =====================================================================
           
           
           ! Local Scalars 
           logical(lk) :: lquery
           integer(${ik}$) :: iascl, ibscl, ie, il, itau, itaup, itauq, ldwork, liwork, lrwork, &
                     maxmn, maxwrk, minmn, minwrk, mm, mnthr, nlvl, nrwork, nwork, smlsiz
           real(${ck}$) :: anrm, bignum, bnrm, eps, sfmin, smlnum
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments.
           info = 0_${ik}$
           minmn = min( m, n )
           maxmn = max( m, n )
           lquery = ( lwork==-1_${ik}$ )
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( nrhs<0_${ik}$ ) then
              info = -3_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -5_${ik}$
           else if( ldb<max( 1_${ik}$, maxmn ) ) then
              info = -7_${ik}$
           end if
           ! compute workspace.
           ! (note: comments in the code beginning "workspace:" describe the
           ! minimal amount of workspace needed at that point in the code,
           ! as well as the preferred amount for good performance.
           ! nb refers to the optimal block size for the immediately
           ! following subroutine, as returned by stdlib${ii}$_ilaenv.)
           if( info==0_${ik}$ ) then
              minwrk = 1_${ik}$
              maxwrk = 1_${ik}$
              liwork = 1_${ik}$
              lrwork = 1_${ik}$
              if( minmn>0_${ik}$ ) then
                 smlsiz = stdlib${ii}$_ilaenv( 9_${ik}$, 'ZGELSD', ' ', 0_${ik}$, 0_${ik}$, 0_${ik}$, 0_${ik}$ )
                 mnthr = stdlib${ii}$_ilaenv( 6_${ik}$, 'ZGELSD', ' ', m, n, nrhs, -1_${ik}$ )
                 nlvl = max( int( log( real( minmn,KIND=${ck}$) / real( smlsiz + 1_${ik}$,KIND=${ck}$) ) /log( &
                           two ),KIND=${ik}$) + 1_${ik}$, 0_${ik}$ )
                 liwork = 3_${ik}$*minmn*nlvl + 11_${ik}$*minmn
                 mm = m
                 if( m>=n .and. m>=mnthr ) then
                    ! path 1a - overdetermined, with many more rows than
                              ! columns.
                    mm = n
                    maxwrk = max( maxwrk, n*stdlib${ii}$_ilaenv( 1_${ik}$, 'ZGEQRF', ' ', m, n,-1_${ik}$, -1_${ik}$ ) )
                              
                    maxwrk = max( maxwrk, nrhs*stdlib${ii}$_ilaenv( 1_${ik}$, 'ZUNMQR', 'LC', m,nrhs, n, -1_${ik}$ ) )
                              
                 end if
                 if( m>=n ) then
                    ! path 1 - overdetermined or exactly determined.
                    lrwork = 10_${ik}$*n + 2_${ik}$*n*smlsiz + 8_${ik}$*n*nlvl + 3_${ik}$*smlsiz*nrhs +max( (smlsiz+1)**2_${ik}$, n*(&
                              1_${ik}$+nrhs) + 2_${ik}$*nrhs )
                    maxwrk = max( maxwrk, 2_${ik}$*n + ( mm + n )*stdlib${ii}$_ilaenv( 1_${ik}$,'ZGEBRD', ' ', mm, n, &
                              -1_${ik}$, -1_${ik}$ ) )
                    maxwrk = max( maxwrk, 2_${ik}$*n + nrhs*stdlib${ii}$_ilaenv( 1_${ik}$, 'ZUNMBR','QLC', mm, nrhs, &
                              n, -1_${ik}$ ) )
                    maxwrk = max( maxwrk, 2_${ik}$*n + ( n - 1_${ik}$ )*stdlib${ii}$_ilaenv( 1_${ik}$,'ZUNMBR', 'PLN', n, &
                              nrhs, n, -1_${ik}$ ) )
                    maxwrk = max( maxwrk, 2_${ik}$*n + n*nrhs )
                    minwrk = max( 2_${ik}$*n + mm, 2_${ik}$*n + n*nrhs )
                 end if
                 if( n>m ) then
                    lrwork = 10_${ik}$*m + 2_${ik}$*m*smlsiz + 8_${ik}$*m*nlvl + 3_${ik}$*smlsiz*nrhs +max( (smlsiz+1)**2_${ik}$, n*(&
                              1_${ik}$+nrhs) + 2_${ik}$*nrhs )
                    if( n>=mnthr ) then
                       ! path 2a - underdetermined, with many more columns
                                 ! than rows.
                       maxwrk = m + m*stdlib${ii}$_ilaenv( 1_${ik}$, 'ZGELQF', ' ', m, n, -1_${ik}$,-1_${ik}$ )
                       maxwrk = max( maxwrk, m*m + 4_${ik}$*m + 2_${ik}$*m*stdlib${ii}$_ilaenv( 1_${ik}$,'ZGEBRD', ' ', m, m,&
                                  -1_${ik}$, -1_${ik}$ ) )
                       maxwrk = max( maxwrk, m*m + 4_${ik}$*m + nrhs*stdlib${ii}$_ilaenv( 1_${ik}$,'ZUNMBR', 'QLC', m,&
                                  nrhs, m, -1_${ik}$ ) )
                       maxwrk = max( maxwrk, m*m + 4_${ik}$*m + ( m - 1_${ik}$ )*stdlib${ii}$_ilaenv( 1_${ik}$,'ZUNMLQ', &
                                 'LC', n, nrhs, m, -1_${ik}$ ) )
                       if( nrhs>1_${ik}$ ) then
                          maxwrk = max( maxwrk, m*m + m + m*nrhs )
                       else
                          maxwrk = max( maxwrk, m*m + 2_${ik}$*m )
                       end if
                       maxwrk = max( maxwrk, m*m + 4_${ik}$*m + m*nrhs )
           ! xxx: ensure the path 2a case below is triggered.  the workspace
           ! calculation should use queries for all routines eventually.
                       maxwrk = max( maxwrk,4_${ik}$*m+m*m+max( m, 2_${ik}$*m-4, nrhs, n-3*m ) )
                    else
                       ! path 2 - underdetermined.
                       maxwrk = 2_${ik}$*m + ( n + m )*stdlib${ii}$_ilaenv( 1_${ik}$, 'ZGEBRD', ' ', m,n, -1_${ik}$, -1_${ik}$ )
                                 
                       maxwrk = max( maxwrk, 2_${ik}$*m + nrhs*stdlib${ii}$_ilaenv( 1_${ik}$, 'ZUNMBR','QLC', m, nrhs,&
                                  m, -1_${ik}$ ) )
                       maxwrk = max( maxwrk, 2_${ik}$*m + m*stdlib${ii}$_ilaenv( 1_${ik}$, 'ZUNMBR','PLN', n, nrhs, m,&
                                  -1_${ik}$ ) )
                       maxwrk = max( maxwrk, 2_${ik}$*m + m*nrhs )
                    end if
                    minwrk = max( 2_${ik}$*m + n, 2_${ik}$*m + m*nrhs )
                 end if
              end if
              minwrk = min( minwrk, maxwrk )
              work( 1_${ik}$ ) = maxwrk
              iwork( 1_${ik}$ ) = liwork
              rwork( 1_${ik}$ ) = lrwork
              if( lwork<minwrk .and. .not.lquery ) then
                 info = -12_${ik}$
              end if
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'ZGELSD', -info )
              return
           else if( lquery ) then
              return
           end if
           ! quick return if possible.
           if( m==0_${ik}$ .or. n==0_${ik}$ ) then
              rank = 0_${ik}$
              return
           end if
           ! get machine parameters.
           eps = stdlib${ii}$_${c2ri(ci)}$lamch( 'P' )
           sfmin = stdlib${ii}$_${c2ri(ci)}$lamch( 'S' )
           smlnum = sfmin / eps
           bignum = one / smlnum
           call stdlib${ii}$_${c2ri(ci)}$labad( smlnum, bignum )
           ! scale a if max entry outside range [smlnum,bignum].
           anrm = stdlib${ii}$_${ci}$lange( 'M', m, n, a, lda, rwork )
           iascl = 0_${ik}$
           if( anrm>zero .and. anrm<smlnum ) then
              ! scale matrix norm up to smlnum
              call stdlib${ii}$_${ci}$lascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, m, n, a, lda, info )
              iascl = 1_${ik}$
           else if( anrm>bignum ) then
              ! scale matrix norm down to bignum.
              call stdlib${ii}$_${ci}$lascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, m, n, a, lda, info )
              iascl = 2_${ik}$
           else if( anrm==zero ) then
              ! matrix all zero. return zero solution.
              call stdlib${ii}$_${ci}$laset( 'F', max( m, n ), nrhs, czero, czero, b, ldb )
              call stdlib${ii}$_${c2ri(ci)}$laset( 'F', minmn, 1_${ik}$, zero, zero, s, 1_${ik}$ )
              rank = 0_${ik}$
              go to 10
           end if
           ! scale b if max entry outside range [smlnum,bignum].
           bnrm = stdlib${ii}$_${ci}$lange( 'M', m, nrhs, b, ldb, rwork )
           ibscl = 0_${ik}$
           if( bnrm>zero .and. bnrm<smlnum ) then
              ! scale matrix norm up to smlnum.
              call stdlib${ii}$_${ci}$lascl( 'G', 0_${ik}$, 0_${ik}$, bnrm, smlnum, m, nrhs, b, ldb, info )
              ibscl = 1_${ik}$
           else if( bnrm>bignum ) then
              ! scale matrix norm down to bignum.
              call stdlib${ii}$_${ci}$lascl( 'G', 0_${ik}$, 0_${ik}$, bnrm, bignum, m, nrhs, b, ldb, info )
              ibscl = 2_${ik}$
           end if
           ! if m < n make sure b(m+1:n,:) = 0
           if( m<n )call stdlib${ii}$_${ci}$laset( 'F', n-m, nrhs, czero, czero, b( m+1, 1_${ik}$ ), ldb )
           ! overdetermined case.
           if( m>=n ) then
              ! path 1 - overdetermined or exactly determined.
              mm = m
              if( m>=mnthr ) then
                 ! path 1a - overdetermined, with many more rows than columns
                 mm = n
                 itau = 1_${ik}$
                 nwork = itau + n
                 ! compute a=q*r.
                 ! (rworkspace: need n)
                 ! (cworkspace: need n, prefer n*nb)
                 call stdlib${ii}$_${ci}$geqrf( m, n, a, lda, work( itau ), work( nwork ),lwork-nwork+1, &
                           info )
                 ! multiply b by transpose(q).
                 ! (rworkspace: need n)
                 ! (cworkspace: need nrhs, prefer nrhs*nb)
                 call stdlib${ii}$_${ci}$unmqr( 'L', 'C', m, nrhs, n, a, lda, work( itau ), b,ldb, work( &
                           nwork ), lwork-nwork+1, info )
                 ! zero out below r.
                 if( n>1_${ik}$ ) then
                    call stdlib${ii}$_${ci}$laset( 'L', n-1, n-1, czero, czero, a( 2_${ik}$, 1_${ik}$ ),lda )
                 end if
              end if
              itauq = 1_${ik}$
              itaup = itauq + n
              nwork = itaup + n
              ie = 1_${ik}$
              nrwork = ie + n
              ! bidiagonalize r in a.
              ! (rworkspace: need n)
              ! (cworkspace: need 2*n+mm, prefer 2*n+(mm+n)*nb)
              call stdlib${ii}$_${ci}$gebrd( mm, n, a, lda, s, rwork( ie ), work( itauq ),work( itaup ), &
                        work( nwork ), lwork-nwork+1,info )
              ! multiply b by transpose of left bidiagonalizing vectors of r.
              ! (cworkspace: need 2*n+nrhs, prefer 2*n+nrhs*nb)
              call stdlib${ii}$_${ci}$unmbr( 'Q', 'L', 'C', mm, nrhs, n, a, lda, work( itauq ),b, ldb, work( &
                        nwork ), lwork-nwork+1, info )
              ! solve the bidiagonal least squares problem.
              call stdlib${ii}$_${ci}$lalsd( 'U', smlsiz, n, nrhs, s, rwork( ie ), b, ldb,rcond, rank, work( &
                        nwork ), rwork( nrwork ),iwork, info )
              if( info/=0_${ik}$ ) then
                 go to 10
              end if
              ! multiply b by right bidiagonalizing vectors of r.
              call stdlib${ii}$_${ci}$unmbr( 'P', 'L', 'N', n, nrhs, n, a, lda, work( itaup ),b, ldb, work( &
                        nwork ), lwork-nwork+1, info )
           else if( n>=mnthr .and. lwork>=4_${ik}$*m+m*m+max( m, 2_${ik}$*m-4, nrhs, n-3*m ) ) then
              ! path 2a - underdetermined, with many more columns than rows
              ! and sufficient workspace for an efficient algorithm.
              ldwork = m
              if( lwork>=max( 4_${ik}$*m+m*lda+max( m, 2_${ik}$*m-4, nrhs, n-3*m ),m*lda+m+m*nrhs ) )ldwork = &
                        lda
              itau = 1_${ik}$
              nwork = m + 1_${ik}$
              ! compute a=l*q.
              ! (cworkspace: need 2*m, prefer m+m*nb)
              call stdlib${ii}$_${ci}$gelqf( m, n, a, lda, work( itau ), work( nwork ),lwork-nwork+1, info )
                        
              il = nwork
              ! copy l to work(il), zeroing out above its diagonal.
              call stdlib${ii}$_${ci}$lacpy( 'L', m, m, a, lda, work( il ), ldwork )
              call stdlib${ii}$_${ci}$laset( 'U', m-1, m-1, czero, czero, work( il+ldwork ),ldwork )
              itauq = il + ldwork*m
              itaup = itauq + m
              nwork = itaup + m
              ie = 1_${ik}$
              nrwork = ie + m
              ! bidiagonalize l in work(il).
              ! (rworkspace: need m)
              ! (cworkspace: need m*m+4*m, prefer m*m+4*m+2*m*nb)
              call stdlib${ii}$_${ci}$gebrd( m, m, work( il ), ldwork, s, rwork( ie ),work( itauq ), work( &
                        itaup ), work( nwork ),lwork-nwork+1, info )
              ! multiply b by transpose of left bidiagonalizing vectors of l.
              ! (cworkspace: need m*m+4*m+nrhs, prefer m*m+4*m+nrhs*nb)
              call stdlib${ii}$_${ci}$unmbr( 'Q', 'L', 'C', m, nrhs, m, work( il ), ldwork,work( itauq ), b, &
                        ldb, work( nwork ),lwork-nwork+1, info )
              ! solve the bidiagonal least squares problem.
              call stdlib${ii}$_${ci}$lalsd( 'U', smlsiz, m, nrhs, s, rwork( ie ), b, ldb,rcond, rank, work( &
                        nwork ), rwork( nrwork ),iwork, info )
              if( info/=0_${ik}$ ) then
                 go to 10
              end if
              ! multiply b by right bidiagonalizing vectors of l.
              call stdlib${ii}$_${ci}$unmbr( 'P', 'L', 'N', m, nrhs, m, work( il ), ldwork,work( itaup ), b, &
                        ldb, work( nwork ),lwork-nwork+1, info )
              ! zero out below first m rows of b.
              call stdlib${ii}$_${ci}$laset( 'F', n-m, nrhs, czero, czero, b( m+1, 1_${ik}$ ), ldb )
              nwork = itau + m
              ! multiply transpose(q) by b.
              ! (cworkspace: need nrhs, prefer nrhs*nb)
              call stdlib${ii}$_${ci}$unmlq( 'L', 'C', n, nrhs, m, a, lda, work( itau ), b,ldb, work( nwork )&
                        , lwork-nwork+1, info )
           else
              ! path 2 - remaining underdetermined cases.
              itauq = 1_${ik}$
              itaup = itauq + m
              nwork = itaup + m
              ie = 1_${ik}$
              nrwork = ie + m
              ! bidiagonalize a.
              ! (rworkspace: need m)
              ! (cworkspace: need 2*m+n, prefer 2*m+(m+n)*nb)
              call stdlib${ii}$_${ci}$gebrd( m, n, a, lda, s, rwork( ie ), work( itauq ),work( itaup ), work(&
                         nwork ), lwork-nwork+1,info )
              ! multiply b by transpose of left bidiagonalizing vectors.
              ! (cworkspace: need 2*m+nrhs, prefer 2*m+nrhs*nb)
              call stdlib${ii}$_${ci}$unmbr( 'Q', 'L', 'C', m, nrhs, n, a, lda, work( itauq ),b, ldb, work( &
                        nwork ), lwork-nwork+1, info )
              ! solve the bidiagonal least squares problem.
              call stdlib${ii}$_${ci}$lalsd( 'L', smlsiz, m, nrhs, s, rwork( ie ), b, ldb,rcond, rank, work( &
                        nwork ), rwork( nrwork ),iwork, info )
              if( info/=0_${ik}$ ) then
                 go to 10
              end if
              ! multiply b by right bidiagonalizing vectors of a.
              call stdlib${ii}$_${ci}$unmbr( 'P', 'L', 'N', n, nrhs, m, a, lda, work( itaup ),b, ldb, work( &
                        nwork ), lwork-nwork+1, info )
           end if
           ! undo scaling.
           if( iascl==1_${ik}$ ) then
              call stdlib${ii}$_${ci}$lascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, n, nrhs, b, ldb, info )
              call stdlib${ii}$_${c2ri(ci)}$lascl( 'G', 0_${ik}$, 0_${ik}$, smlnum, anrm, minmn, 1_${ik}$, s, minmn,info )
           else if( iascl==2_${ik}$ ) then
              call stdlib${ii}$_${ci}$lascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, n, nrhs, b, ldb, info )
              call stdlib${ii}$_${c2ri(ci)}$lascl( 'G', 0_${ik}$, 0_${ik}$, bignum, anrm, minmn, 1_${ik}$, s, minmn,info )
           end if
           if( ibscl==1_${ik}$ ) then
              call stdlib${ii}$_${ci}$lascl( 'G', 0_${ik}$, 0_${ik}$, smlnum, bnrm, n, nrhs, b, ldb, info )
           else if( ibscl==2_${ik}$ ) then
              call stdlib${ii}$_${ci}$lascl( 'G', 0_${ik}$, 0_${ik}$, bignum, bnrm, n, nrhs, b, ldb, info )
           end if
           10 continue
           work( 1_${ik}$ ) = maxwrk
           iwork( 1_${ik}$ ) = liwork
           rwork( 1_${ik}$ ) = lrwork
           return
     end subroutine stdlib${ii}$_${ci}$gelsd

#:endif
#:endfor



     module subroutine stdlib${ii}$_sgetsls( trans, m, n, nrhs, a, lda, b, ldb,work, lwork, info )
     !! SGETSLS solves overdetermined or underdetermined real linear systems
     !! involving an M-by-N matrix A, using a tall skinny QR or short wide LQ
     !! factorization of A.  It is assumed that A has full rank.
     !! The following options are provided:
     !! 1. If TRANS = 'N' and m >= n:  find the least squares solution of
     !! an overdetermined system, i.e., solve the least squares problem
     !! minimize || B - A*X ||.
     !! 2. If TRANS = 'N' and m < n:  find the minimum norm solution of
     !! an underdetermined system A * X = B.
     !! 3. If TRANS = 'T' and m >= n:  find the minimum norm solution of
     !! an undetermined system A**T * X = B.
     !! 4. If TRANS = 'T' and m < n:  find the least squares solution of
     !! an overdetermined system, i.e., solve the least squares problem
     !! minimize || B - A**T * X ||.
     !! Several right hand side vectors b and solution vectors x can be
     !! handled in a single call; they are stored as the columns of the
     !! M-by-NRHS right hand side matrix B and the N-by-NRHS solution
     !! matrix X.
        ! -- lapack driver routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: trans
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, ldb, lwork, m, n, nrhs
           ! Array Arguments 
           real(sp), intent(inout) :: a(lda,*), b(ldb,*)
           real(sp), intent(out) :: work(*)
        ! =====================================================================
           
           ! Local Scalars 
           logical(lk) :: lquery, tran
           integer(${ik}$) :: i, iascl, ibscl, j, maxmn, brow, scllen, tszo, tszm, lwo, lwm, lw1, &
                     lw2, wsizeo, wsizem, info2
           real(sp) :: anrm, bignum, bnrm, smlnum, tq(5_${ik}$), workq(1_${ik}$)
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments.
           info = 0_${ik}$
           maxmn = max( m, n )
           tran  = stdlib_lsame( trans, 'T' )
           lquery = ( lwork==-1_${ik}$ .or. lwork==-2_${ik}$ )
           if( .not.( stdlib_lsame( trans, 'N' ) .or.stdlib_lsame( trans, 'T' ) ) ) then
              info = -1_${ik}$
           else if( m<0_${ik}$ ) then
              info = -2_${ik}$
           else if( n<0_${ik}$ ) then
              info = -3_${ik}$
           else if( nrhs<0_${ik}$ ) then
              info = -4_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -6_${ik}$
           else if( ldb<max( 1_${ik}$, m, n ) ) then
              info = -8_${ik}$
           end if
           if( info==0_${ik}$ ) then
           ! determine the optimum and minimum lwork
            if( m>=n ) then
              call stdlib${ii}$_sgeqr( m, n, a, lda, tq, -1_${ik}$, workq, -1_${ik}$, info2 )
              tszo = int( tq( 1_${ik}$ ),KIND=${ik}$)
              lwo  = int( workq( 1_${ik}$ ),KIND=${ik}$)
              call stdlib${ii}$_sgemqr( 'L', trans, m, nrhs, n, a, lda, tq,tszo, b, ldb, workq, -1_${ik}$, &
                        info2 )
              lwo  = max( lwo, int( workq( 1_${ik}$ ),KIND=${ik}$) )
              call stdlib${ii}$_sgeqr( m, n, a, lda, tq, -2_${ik}$, workq, -2_${ik}$, info2 )
              tszm = int( tq( 1_${ik}$ ),KIND=${ik}$)
              lwm  = int( workq( 1_${ik}$ ),KIND=${ik}$)
              call stdlib${ii}$_sgemqr( 'L', trans, m, nrhs, n, a, lda, tq,tszm, b, ldb, workq, -1_${ik}$, &
                        info2 )
              lwm = max( lwm, int( workq( 1_${ik}$ ),KIND=${ik}$) )
              wsizeo = tszo + lwo
              wsizem = tszm + lwm
            else
              call stdlib${ii}$_sgelq( m, n, a, lda, tq, -1_${ik}$, workq, -1_${ik}$, info2 )
              tszo = int( tq( 1_${ik}$ ),KIND=${ik}$)
              lwo  = int( workq( 1_${ik}$ ),KIND=${ik}$)
              call stdlib${ii}$_sgemlq( 'L', trans, n, nrhs, m, a, lda, tq,tszo, b, ldb, workq, -1_${ik}$, &
                        info2 )
              lwo  = max( lwo, int( workq( 1_${ik}$ ),KIND=${ik}$) )
              call stdlib${ii}$_sgelq( m, n, a, lda, tq, -2_${ik}$, workq, -2_${ik}$, info2 )
              tszm = int( tq( 1_${ik}$ ),KIND=${ik}$)
              lwm  = int( workq( 1_${ik}$ ),KIND=${ik}$)
              call stdlib${ii}$_sgemlq( 'L', trans, n, nrhs, m, a, lda, tq,tszm, b, ldb, workq, -1_${ik}$, &
                        info2 )
              lwm  = max( lwm, int( workq( 1_${ik}$ ),KIND=${ik}$) )
              wsizeo = tszo + lwo
              wsizem = tszm + lwm
            end if
            if( ( lwork<wsizem ).and.( .not.lquery ) ) then
               info = -10_${ik}$
            end if
            work( 1_${ik}$ ) = real( wsizeo,KIND=sp)
           end if
           if( info/=0_${ik}$ ) then
             call stdlib${ii}$_xerbla( 'SGETSLS', -info )
             return
           end if
           if( lquery ) then
             if( lwork==-2_${ik}$ ) work( 1_${ik}$ ) = real( wsizem,KIND=sp)
             return
           end if
           if( lwork<wsizeo ) then
             lw1 = tszm
             lw2 = lwm
           else
             lw1 = tszo
             lw2 = lwo
           end if
           ! quick return if possible
           if( min( m, n, nrhs )==0_${ik}$ ) then
                call stdlib${ii}$_slaset( 'FULL', max( m, n ), nrhs, zero, zero,b, ldb )
                return
           end if
           ! get machine parameters
            smlnum = stdlib${ii}$_slamch( 'S' ) / stdlib${ii}$_slamch( 'P' )
            bignum = one / smlnum
            call stdlib${ii}$_slabad( smlnum, bignum )
           ! scale a, b if max element outside range [smlnum,bignum]
           anrm = stdlib${ii}$_slange( 'M', m, n, a, lda, work )
           iascl = 0_${ik}$
           if( anrm>zero .and. anrm<smlnum ) then
              ! scale matrix norm up to smlnum
              call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, m, n, a, lda, info )
              iascl = 1_${ik}$
           else if( anrm>bignum ) then
              ! scale matrix norm down to bignum
              call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, m, n, a, lda, info )
              iascl = 2_${ik}$
           else if( anrm==zero ) then
              ! matrix all zero. return zero solution.
              call stdlib${ii}$_slaset( 'F', maxmn, nrhs, zero, zero, b, ldb )
              go to 50
           end if
           brow = m
           if ( tran ) then
             brow = n
           end if
           bnrm = stdlib${ii}$_slange( 'M', brow, nrhs, b, ldb, work )
           ibscl = 0_${ik}$
           if( bnrm>zero .and. bnrm<smlnum ) then
              ! scale matrix norm up to smlnum
              call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, bnrm, smlnum, brow, nrhs, b, ldb,info )
              ibscl = 1_${ik}$
           else if( bnrm>bignum ) then
              ! scale matrix norm down to bignum
              call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, bnrm, bignum, brow, nrhs, b, ldb,info )
              ibscl = 2_${ik}$
           end if
           if ( m>=n ) then
              ! compute qr factorization of a
             call stdlib${ii}$_sgeqr( m, n, a, lda, work( lw2+1 ), lw1,work( 1_${ik}$ ), lw2, info )
             if ( .not.tran ) then
                 ! least-squares problem min || a * x - b ||
                 ! b(1:m,1:nrhs) := q**t * b(1:m,1:nrhs)
               call stdlib${ii}$_sgemqr( 'L' , 'T', m, nrhs, n, a, lda,work( lw2+1 ), lw1, b, ldb, work(&
                          1_${ik}$ ), lw2,info )
                 ! b(1:n,1:nrhs) := inv(r) * b(1:n,1:nrhs)
               call stdlib${ii}$_strtrs( 'U', 'N', 'N', n, nrhs,a, lda, b, ldb, info )
               if( info>0_${ik}$ ) then
                 return
               end if
               scllen = n
             else
                 ! overdetermined system of equations a**t * x = b
                 ! b(1:n,1:nrhs) := inv(r**t) * b(1:n,1:nrhs)
                 call stdlib${ii}$_strtrs( 'U', 'T', 'N', n, nrhs,a, lda, b, ldb, info )
                 if( info>0_${ik}$ ) then
                    return
                 end if
                 ! b(n+1:m,1:nrhs) = zero
                 do j = 1, nrhs
                    do i = n + 1, m
                       b( i, j ) = zero
                    end do
                 end do
                 ! b(1:m,1:nrhs) := q(1:n,:) * b(1:n,1:nrhs)
                 call stdlib${ii}$_sgemqr( 'L', 'N', m, nrhs, n, a, lda,work( lw2+1 ), lw1, b, ldb, &
                           work( 1_${ik}$ ), lw2,info )
                 scllen = m
              end if
           else
              ! compute lq factorization of a
              call stdlib${ii}$_sgelq( m, n, a, lda, work( lw2+1 ), lw1,work( 1_${ik}$ ), lw2, info )
              ! workspace at least m, optimally m*nb.
              if( .not.tran ) then
                 ! underdetermined system of equations a * x = b
                 ! b(1:m,1:nrhs) := inv(l) * b(1:m,1:nrhs)
                 call stdlib${ii}$_strtrs( 'L', 'N', 'N', m, nrhs,a, lda, b, ldb, info )
                 if( info>0_${ik}$ ) then
                    return
                 end if
                 ! b(m+1:n,1:nrhs) = 0
                 do j = 1, nrhs
                    do i = m + 1, n
                       b( i, j ) = zero
                    end do
                 end do
                 ! b(1:n,1:nrhs) := q(1:n,:)**t * b(1:m,1:nrhs)
                 call stdlib${ii}$_sgemlq( 'L', 'T', n, nrhs, m, a, lda,work( lw2+1 ), lw1, b, ldb, &
                           work( 1_${ik}$ ), lw2,info )
                 ! workspace at least nrhs, optimally nrhs*nb
                 scllen = n
              else
                 ! overdetermined system min || a**t * x - b ||
                 ! b(1:n,1:nrhs) := q * b(1:n,1:nrhs)
                 call stdlib${ii}$_sgemlq( 'L', 'N', n, nrhs, m, a, lda,work( lw2+1 ), lw1, b, ldb, &
                           work( 1_${ik}$ ), lw2,info )
                 ! workspace at least nrhs, optimally nrhs*nb
                 ! b(1:m,1:nrhs) := inv(l**t) * b(1:m,1:nrhs)
                 call stdlib${ii}$_strtrs( 'LOWER', 'TRANSPOSE', 'NON-UNIT', m, nrhs,a, lda, b, ldb, &
                           info )
                 if( info>0_${ik}$ ) then
                    return
                 end if
                 scllen = m
              end if
           end if
           ! undo scaling
           if( iascl==1_${ik}$ ) then
             call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, scllen, nrhs, b, ldb,info )
           else if( iascl==2_${ik}$ ) then
             call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, scllen, nrhs, b, ldb,info )
           end if
           if( ibscl==1_${ik}$ ) then
             call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, smlnum, bnrm, scllen, nrhs, b, ldb,info )
           else if( ibscl==2_${ik}$ ) then
             call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, bignum, bnrm, scllen, nrhs, b, ldb,info )
           end if
           50 continue
           work( 1_${ik}$ ) = real( tszo + lwo,KIND=sp)
           return
     end subroutine stdlib${ii}$_sgetsls

     module subroutine stdlib${ii}$_dgetsls( trans, m, n, nrhs, a, lda, b, ldb,work, lwork, info )
     !! DGETSLS solves overdetermined or underdetermined real linear systems
     !! involving an M-by-N matrix A, using a tall skinny QR or short wide LQ
     !! factorization of A.  It is assumed that A has full rank.
     !! The following options are provided:
     !! 1. If TRANS = 'N' and m >= n:  find the least squares solution of
     !! an overdetermined system, i.e., solve the least squares problem
     !! minimize || B - A*X ||.
     !! 2. If TRANS = 'N' and m < n:  find the minimum norm solution of
     !! an underdetermined system A * X = B.
     !! 3. If TRANS = 'T' and m >= n:  find the minimum norm solution of
     !! an undetermined system A**T * X = B.
     !! 4. If TRANS = 'T' and m < n:  find the least squares solution of
     !! an overdetermined system, i.e., solve the least squares problem
     !! minimize || B - A**T * X ||.
     !! Several right hand side vectors b and solution vectors x can be
     !! handled in a single call; they are stored as the columns of the
     !! M-by-NRHS right hand side matrix B and the N-by-NRHS solution
     !! matrix X.
        ! -- lapack driver routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: trans
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, ldb, lwork, m, n, nrhs
           ! Array Arguments 
           real(dp), intent(inout) :: a(lda,*), b(ldb,*)
           real(dp), intent(out) :: work(*)
        ! =====================================================================
           
           ! Local Scalars 
           logical(lk) :: lquery, tran
           integer(${ik}$) :: i, iascl, ibscl, j, maxmn, brow, scllen, tszo, tszm, lwo, lwm, lw1, &
                     lw2, wsizeo, wsizem, info2
           real(dp) :: anrm, bignum, bnrm, smlnum, tq(5_${ik}$), workq(1_${ik}$)
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments.
           info = 0_${ik}$
           maxmn = max( m, n )
           tran  = stdlib_lsame( trans, 'T' )
           lquery = ( lwork==-1_${ik}$ .or. lwork==-2_${ik}$ )
           if( .not.( stdlib_lsame( trans, 'N' ) .or.stdlib_lsame( trans, 'T' ) ) ) then
              info = -1_${ik}$
           else if( m<0_${ik}$ ) then
              info = -2_${ik}$
           else if( n<0_${ik}$ ) then
              info = -3_${ik}$
           else if( nrhs<0_${ik}$ ) then
              info = -4_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -6_${ik}$
           else if( ldb<max( 1_${ik}$, m, n ) ) then
              info = -8_${ik}$
           end if
           if( info==0_${ik}$ ) then
           ! determine the optimum and minimum lwork
            if( m>=n ) then
              call stdlib${ii}$_dgeqr( m, n, a, lda, tq, -1_${ik}$, workq, -1_${ik}$, info2 )
              tszo = int( tq( 1_${ik}$ ),KIND=${ik}$)
              lwo  = int( workq( 1_${ik}$ ),KIND=${ik}$)
              call stdlib${ii}$_dgemqr( 'L', trans, m, nrhs, n, a, lda, tq,tszo, b, ldb, workq, -1_${ik}$, &
                        info2 )
              lwo  = max( lwo, int( workq( 1_${ik}$ ),KIND=${ik}$) )
              call stdlib${ii}$_dgeqr( m, n, a, lda, tq, -2_${ik}$, workq, -2_${ik}$, info2 )
              tszm = int( tq( 1_${ik}$ ),KIND=${ik}$)
              lwm  = int( workq( 1_${ik}$ ),KIND=${ik}$)
              call stdlib${ii}$_dgemqr( 'L', trans, m, nrhs, n, a, lda, tq,tszm, b, ldb, workq, -1_${ik}$, &
                        info2 )
              lwm = max( lwm, int( workq( 1_${ik}$ ),KIND=${ik}$) )
              wsizeo = tszo + lwo
              wsizem = tszm + lwm
            else
              call stdlib${ii}$_dgelq( m, n, a, lda, tq, -1_${ik}$, workq, -1_${ik}$, info2 )
              tszo = int( tq( 1_${ik}$ ),KIND=${ik}$)
              lwo  = int( workq( 1_${ik}$ ),KIND=${ik}$)
              call stdlib${ii}$_dgemlq( 'L', trans, n, nrhs, m, a, lda, tq,tszo, b, ldb, workq, -1_${ik}$, &
                        info2 )
              lwo  = max( lwo, int( workq( 1_${ik}$ ),KIND=${ik}$) )
              call stdlib${ii}$_dgelq( m, n, a, lda, tq, -2_${ik}$, workq, -2_${ik}$, info2 )
              tszm = int( tq( 1_${ik}$ ),KIND=${ik}$)
              lwm  = int( workq( 1_${ik}$ ),KIND=${ik}$)
              call stdlib${ii}$_dgemlq( 'L', trans, n, nrhs, m, a, lda, tq,tszm, b, ldb, workq, -1_${ik}$, &
                        info2 )
              lwm  = max( lwm, int( workq( 1_${ik}$ ),KIND=${ik}$) )
              wsizeo = tszo + lwo
              wsizem = tszm + lwm
            end if
            if( ( lwork<wsizem ).and.( .not.lquery ) ) then
               info = -10_${ik}$
            end if
            work( 1_${ik}$ ) = real( wsizeo,KIND=dp)
           end if
           if( info/=0_${ik}$ ) then
             call stdlib${ii}$_xerbla( 'DGETSLS', -info )
             return
           end if
           if( lquery ) then
             if( lwork==-2_${ik}$ ) work( 1_${ik}$ ) = real( wsizem,KIND=dp)
             return
           end if
           if( lwork<wsizeo ) then
             lw1 = tszm
             lw2 = lwm
           else
             lw1 = tszo
             lw2 = lwo
           end if
           ! quick return if possible
           if( min( m, n, nrhs )==0_${ik}$ ) then
                call stdlib${ii}$_dlaset( 'FULL', max( m, n ), nrhs, zero, zero,b, ldb )
                return
           end if
           ! get machine parameters
            smlnum = stdlib${ii}$_dlamch( 'S' ) / stdlib${ii}$_dlamch( 'P' )
            bignum = one / smlnum
            call stdlib${ii}$_dlabad( smlnum, bignum )
           ! scale a, b if max element outside range [smlnum,bignum]
           anrm = stdlib${ii}$_dlange( 'M', m, n, a, lda, work )
           iascl = 0_${ik}$
           if( anrm>zero .and. anrm<smlnum ) then
              ! scale matrix norm up to smlnum
              call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, m, n, a, lda, info )
              iascl = 1_${ik}$
           else if( anrm>bignum ) then
              ! scale matrix norm down to bignum
              call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, m, n, a, lda, info )
              iascl = 2_${ik}$
           else if( anrm==zero ) then
              ! matrix all zero. return zero solution.
              call stdlib${ii}$_dlaset( 'F', maxmn, nrhs, zero, zero, b, ldb )
              go to 50
           end if
           brow = m
           if ( tran ) then
             brow = n
           end if
           bnrm = stdlib${ii}$_dlange( 'M', brow, nrhs, b, ldb, work )
           ibscl = 0_${ik}$
           if( bnrm>zero .and. bnrm<smlnum ) then
              ! scale matrix norm up to smlnum
              call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, bnrm, smlnum, brow, nrhs, b, ldb,info )
              ibscl = 1_${ik}$
           else if( bnrm>bignum ) then
              ! scale matrix norm down to bignum
              call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, bnrm, bignum, brow, nrhs, b, ldb,info )
              ibscl = 2_${ik}$
           end if
           if ( m>=n ) then
              ! compute qr factorization of a
             call stdlib${ii}$_dgeqr( m, n, a, lda, work( lw2+1 ), lw1,work( 1_${ik}$ ), lw2, info )
             if ( .not.tran ) then
                 ! least-squares problem min || a * x - b ||
                 ! b(1:m,1:nrhs) := q**t * b(1:m,1:nrhs)
               call stdlib${ii}$_dgemqr( 'L' , 'T', m, nrhs, n, a, lda,work( lw2+1 ), lw1, b, ldb, work(&
                          1_${ik}$ ), lw2,info )
                 ! b(1:n,1:nrhs) := inv(r) * b(1:n,1:nrhs)
               call stdlib${ii}$_dtrtrs( 'U', 'N', 'N', n, nrhs,a, lda, b, ldb, info )
               if( info>0_${ik}$ ) then
                 return
               end if
               scllen = n
             else
                 ! overdetermined system of equations a**t * x = b
                 ! b(1:n,1:nrhs) := inv(r**t) * b(1:n,1:nrhs)
                 call stdlib${ii}$_dtrtrs( 'U', 'T', 'N', n, nrhs,a, lda, b, ldb, info )
                 if( info>0_${ik}$ ) then
                    return
                 end if
                 ! b(n+1:m,1:nrhs) = zero
                 do j = 1, nrhs
                    do i = n + 1, m
                       b( i, j ) = zero
                    end do
                 end do
                 ! b(1:m,1:nrhs) := q(1:n,:) * b(1:n,1:nrhs)
                 call stdlib${ii}$_dgemqr( 'L', 'N', m, nrhs, n, a, lda,work( lw2+1 ), lw1, b, ldb, &
                           work( 1_${ik}$ ), lw2,info )
                 scllen = m
              end if
           else
              ! compute lq factorization of a
              call stdlib${ii}$_dgelq( m, n, a, lda, work( lw2+1 ), lw1,work( 1_${ik}$ ), lw2, info )
              ! workspace at least m, optimally m*nb.
              if( .not.tran ) then
                 ! underdetermined system of equations a * x = b
                 ! b(1:m,1:nrhs) := inv(l) * b(1:m,1:nrhs)
                 call stdlib${ii}$_dtrtrs( 'L', 'N', 'N', m, nrhs,a, lda, b, ldb, info )
                 if( info>0_${ik}$ ) then
                    return
                 end if
                 ! b(m+1:n,1:nrhs) = 0
                 do j = 1, nrhs
                    do i = m + 1, n
                       b( i, j ) = zero
                    end do
                 end do
                 ! b(1:n,1:nrhs) := q(1:n,:)**t * b(1:m,1:nrhs)
                 call stdlib${ii}$_dgemlq( 'L', 'T', n, nrhs, m, a, lda,work( lw2+1 ), lw1, b, ldb, &
                           work( 1_${ik}$ ), lw2,info )
                 ! workspace at least nrhs, optimally nrhs*nb
                 scllen = n
              else
                 ! overdetermined system min || a**t * x - b ||
                 ! b(1:n,1:nrhs) := q * b(1:n,1:nrhs)
                 call stdlib${ii}$_dgemlq( 'L', 'N', n, nrhs, m, a, lda,work( lw2+1 ), lw1, b, ldb, &
                           work( 1_${ik}$ ), lw2,info )
                 ! workspace at least nrhs, optimally nrhs*nb
                 ! b(1:m,1:nrhs) := inv(l**t) * b(1:m,1:nrhs)
                 call stdlib${ii}$_dtrtrs( 'LOWER', 'TRANSPOSE', 'NON-UNIT', m, nrhs,a, lda, b, ldb, &
                           info )
                 if( info>0_${ik}$ ) then
                    return
                 end if
                 scllen = m
              end if
           end if
           ! undo scaling
           if( iascl==1_${ik}$ ) then
             call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, scllen, nrhs, b, ldb,info )
           else if( iascl==2_${ik}$ ) then
             call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, scllen, nrhs, b, ldb,info )
           end if
           if( ibscl==1_${ik}$ ) then
             call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, smlnum, bnrm, scllen, nrhs, b, ldb,info )
           else if( ibscl==2_${ik}$ ) then
             call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, bignum, bnrm, scllen, nrhs, b, ldb,info )
           end if
           50 continue
           work( 1_${ik}$ ) = real( tszo + lwo,KIND=dp)
           return
     end subroutine stdlib${ii}$_dgetsls

#:for rk,rt,ri in REAL_KINDS_TYPES
#:if not rk in ["sp","dp"]
     module subroutine stdlib${ii}$_${ri}$getsls( trans, m, n, nrhs, a, lda, b, ldb,work, lwork, info )
     !! DGETSLS: solves overdetermined or underdetermined real linear systems
     !! involving an M-by-N matrix A, using a tall skinny QR or short wide LQ
     !! factorization of A.  It is assumed that A has full rank.
     !! The following options are provided:
     !! 1. If TRANS = 'N' and m >= n:  find the least squares solution of
     !! an overdetermined system, i.e., solve the least squares problem
     !! minimize || B - A*X ||.
     !! 2. If TRANS = 'N' and m < n:  find the minimum norm solution of
     !! an underdetermined system A * X = B.
     !! 3. If TRANS = 'T' and m >= n:  find the minimum norm solution of
     !! an undetermined system A**T * X = B.
     !! 4. If TRANS = 'T' and m < n:  find the least squares solution of
     !! an overdetermined system, i.e., solve the least squares problem
     !! minimize || B - A**T * X ||.
     !! Several right hand side vectors b and solution vectors x can be
     !! handled in a single call; they are stored as the columns of the
     !! M-by-NRHS right hand side matrix B and the N-by-NRHS solution
     !! matrix X.
        ! -- lapack driver routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: trans
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, ldb, lwork, m, n, nrhs
           ! Array Arguments 
           real(${rk}$), intent(inout) :: a(lda,*), b(ldb,*)
           real(${rk}$), intent(out) :: work(*)
        ! =====================================================================
           
           ! Local Scalars 
           logical(lk) :: lquery, tran
           integer(${ik}$) :: i, iascl, ibscl, j, maxmn, brow, scllen, tszo, tszm, lwo, lwm, lw1, &
                     lw2, wsizeo, wsizem, info2
           real(${rk}$) :: anrm, bignum, bnrm, smlnum, tq(5_${ik}$), workq(1_${ik}$)
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments.
           info = 0_${ik}$
           maxmn = max( m, n )
           tran  = stdlib_lsame( trans, 'T' )
           lquery = ( lwork==-1_${ik}$ .or. lwork==-2_${ik}$ )
           if( .not.( stdlib_lsame( trans, 'N' ) .or.stdlib_lsame( trans, 'T' ) ) ) then
              info = -1_${ik}$
           else if( m<0_${ik}$ ) then
              info = -2_${ik}$
           else if( n<0_${ik}$ ) then
              info = -3_${ik}$
           else if( nrhs<0_${ik}$ ) then
              info = -4_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -6_${ik}$
           else if( ldb<max( 1_${ik}$, m, n ) ) then
              info = -8_${ik}$
           end if
           if( info==0_${ik}$ ) then
           ! determine the optimum and minimum lwork
            if( m>=n ) then
              call stdlib${ii}$_${ri}$geqr( m, n, a, lda, tq, -1_${ik}$, workq, -1_${ik}$, info2 )
              tszo = int( tq( 1_${ik}$ ),KIND=${ik}$)
              lwo  = int( workq( 1_${ik}$ ),KIND=${ik}$)
              call stdlib${ii}$_${ri}$gemqr( 'L', trans, m, nrhs, n, a, lda, tq,tszo, b, ldb, workq, -1_${ik}$, &
                        info2 )
              lwo  = max( lwo, int( workq( 1_${ik}$ ),KIND=${ik}$) )
              call stdlib${ii}$_${ri}$geqr( m, n, a, lda, tq, -2_${ik}$, workq, -2_${ik}$, info2 )
              tszm = int( tq( 1_${ik}$ ),KIND=${ik}$)
              lwm  = int( workq( 1_${ik}$ ),KIND=${ik}$)
              call stdlib${ii}$_${ri}$gemqr( 'L', trans, m, nrhs, n, a, lda, tq,tszm, b, ldb, workq, -1_${ik}$, &
                        info2 )
              lwm = max( lwm, int( workq( 1_${ik}$ ),KIND=${ik}$) )
              wsizeo = tszo + lwo
              wsizem = tszm + lwm
            else
              call stdlib${ii}$_${ri}$gelq( m, n, a, lda, tq, -1_${ik}$, workq, -1_${ik}$, info2 )
              tszo = int( tq( 1_${ik}$ ),KIND=${ik}$)
              lwo  = int( workq( 1_${ik}$ ),KIND=${ik}$)
              call stdlib${ii}$_${ri}$gemlq( 'L', trans, n, nrhs, m, a, lda, tq,tszo, b, ldb, workq, -1_${ik}$, &
                        info2 )
              lwo  = max( lwo, int( workq( 1_${ik}$ ),KIND=${ik}$) )
              call stdlib${ii}$_${ri}$gelq( m, n, a, lda, tq, -2_${ik}$, workq, -2_${ik}$, info2 )
              tszm = int( tq( 1_${ik}$ ),KIND=${ik}$)
              lwm  = int( workq( 1_${ik}$ ),KIND=${ik}$)
              call stdlib${ii}$_${ri}$gemlq( 'L', trans, n, nrhs, m, a, lda, tq,tszm, b, ldb, workq, -1_${ik}$, &
                        info2 )
              lwm  = max( lwm, int( workq( 1_${ik}$ ),KIND=${ik}$) )
              wsizeo = tszo + lwo
              wsizem = tszm + lwm
            end if
            if( ( lwork<wsizem ).and.( .not.lquery ) ) then
               info = -10_${ik}$
            end if
            work( 1_${ik}$ ) = real( wsizeo,KIND=${rk}$)
           end if
           if( info/=0_${ik}$ ) then
             call stdlib${ii}$_xerbla( 'DGETSLS', -info )
             return
           end if
           if( lquery ) then
             if( lwork==-2_${ik}$ ) work( 1_${ik}$ ) = real( wsizem,KIND=${rk}$)
             return
           end if
           if( lwork<wsizeo ) then
             lw1 = tszm
             lw2 = lwm
           else
             lw1 = tszo
             lw2 = lwo
           end if
           ! quick return if possible
           if( min( m, n, nrhs )==0_${ik}$ ) then
                call stdlib${ii}$_${ri}$laset( 'FULL', max( m, n ), nrhs, zero, zero,b, ldb )
                return
           end if
           ! get machine parameters
            smlnum = stdlib${ii}$_${ri}$lamch( 'S' ) / stdlib${ii}$_${ri}$lamch( 'P' )
            bignum = one / smlnum
            call stdlib${ii}$_${ri}$labad( smlnum, bignum )
           ! scale a, b if max element outside range [smlnum,bignum]
           anrm = stdlib${ii}$_${ri}$lange( 'M', m, n, a, lda, work )
           iascl = 0_${ik}$
           if( anrm>zero .and. anrm<smlnum ) then
              ! scale matrix norm up to smlnum
              call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, m, n, a, lda, info )
              iascl = 1_${ik}$
           else if( anrm>bignum ) then
              ! scale matrix norm down to bignum
              call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, m, n, a, lda, info )
              iascl = 2_${ik}$
           else if( anrm==zero ) then
              ! matrix all zero. return zero solution.
              call stdlib${ii}$_${ri}$laset( 'F', maxmn, nrhs, zero, zero, b, ldb )
              go to 50
           end if
           brow = m
           if ( tran ) then
             brow = n
           end if
           bnrm = stdlib${ii}$_${ri}$lange( 'M', brow, nrhs, b, ldb, work )
           ibscl = 0_${ik}$
           if( bnrm>zero .and. bnrm<smlnum ) then
              ! scale matrix norm up to smlnum
              call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, bnrm, smlnum, brow, nrhs, b, ldb,info )
              ibscl = 1_${ik}$
           else if( bnrm>bignum ) then
              ! scale matrix norm down to bignum
              call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, bnrm, bignum, brow, nrhs, b, ldb,info )
              ibscl = 2_${ik}$
           end if
           if ( m>=n ) then
              ! compute qr factorization of a
             call stdlib${ii}$_${ri}$geqr( m, n, a, lda, work( lw2+1 ), lw1,work( 1_${ik}$ ), lw2, info )
             if ( .not.tran ) then
                 ! least-squares problem min || a * x - b ||
                 ! b(1:m,1:nrhs) := q**t * b(1:m,1:nrhs)
               call stdlib${ii}$_${ri}$gemqr( 'L' , 'T', m, nrhs, n, a, lda,work( lw2+1 ), lw1, b, ldb, work(&
                          1_${ik}$ ), lw2,info )
                 ! b(1:n,1:nrhs) := inv(r) * b(1:n,1:nrhs)
               call stdlib${ii}$_${ri}$trtrs( 'U', 'N', 'N', n, nrhs,a, lda, b, ldb, info )
               if( info>0_${ik}$ ) then
                 return
               end if
               scllen = n
             else
                 ! overdetermined system of equations a**t * x = b
                 ! b(1:n,1:nrhs) := inv(r**t) * b(1:n,1:nrhs)
                 call stdlib${ii}$_${ri}$trtrs( 'U', 'T', 'N', n, nrhs,a, lda, b, ldb, info )
                 if( info>0_${ik}$ ) then
                    return
                 end if
                 ! b(n+1:m,1:nrhs) = zero
                 do j = 1, nrhs
                    do i = n + 1, m
                       b( i, j ) = zero
                    end do
                 end do
                 ! b(1:m,1:nrhs) := q(1:n,:) * b(1:n,1:nrhs)
                 call stdlib${ii}$_${ri}$gemqr( 'L', 'N', m, nrhs, n, a, lda,work( lw2+1 ), lw1, b, ldb, &
                           work( 1_${ik}$ ), lw2,info )
                 scllen = m
              end if
           else
              ! compute lq factorization of a
              call stdlib${ii}$_${ri}$gelq( m, n, a, lda, work( lw2+1 ), lw1,work( 1_${ik}$ ), lw2, info )
              ! workspace at least m, optimally m*nb.
              if( .not.tran ) then
                 ! underdetermined system of equations a * x = b
                 ! b(1:m,1:nrhs) := inv(l) * b(1:m,1:nrhs)
                 call stdlib${ii}$_${ri}$trtrs( 'L', 'N', 'N', m, nrhs,a, lda, b, ldb, info )
                 if( info>0_${ik}$ ) then
                    return
                 end if
                 ! b(m+1:n,1:nrhs) = 0
                 do j = 1, nrhs
                    do i = m + 1, n
                       b( i, j ) = zero
                    end do
                 end do
                 ! b(1:n,1:nrhs) := q(1:n,:)**t * b(1:m,1:nrhs)
                 call stdlib${ii}$_${ri}$gemlq( 'L', 'T', n, nrhs, m, a, lda,work( lw2+1 ), lw1, b, ldb, &
                           work( 1_${ik}$ ), lw2,info )
                 ! workspace at least nrhs, optimally nrhs*nb
                 scllen = n
              else
                 ! overdetermined system min || a**t * x - b ||
                 ! b(1:n,1:nrhs) := q * b(1:n,1:nrhs)
                 call stdlib${ii}$_${ri}$gemlq( 'L', 'N', n, nrhs, m, a, lda,work( lw2+1 ), lw1, b, ldb, &
                           work( 1_${ik}$ ), lw2,info )
                 ! workspace at least nrhs, optimally nrhs*nb
                 ! b(1:m,1:nrhs) := inv(l**t) * b(1:m,1:nrhs)
                 call stdlib${ii}$_${ri}$trtrs( 'LOWER', 'TRANSPOSE', 'NON-UNIT', m, nrhs,a, lda, b, ldb, &
                           info )
                 if( info>0_${ik}$ ) then
                    return
                 end if
                 scllen = m
              end if
           end if
           ! undo scaling
           if( iascl==1_${ik}$ ) then
             call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, scllen, nrhs, b, ldb,info )
           else if( iascl==2_${ik}$ ) then
             call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, scllen, nrhs, b, ldb,info )
           end if
           if( ibscl==1_${ik}$ ) then
             call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, smlnum, bnrm, scllen, nrhs, b, ldb,info )
           else if( ibscl==2_${ik}$ ) then
             call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, bignum, bnrm, scllen, nrhs, b, ldb,info )
           end if
           50 continue
           work( 1_${ik}$ ) = real( tszo + lwo,KIND=${rk}$)
           return
     end subroutine stdlib${ii}$_${ri}$getsls

#:endif
#:endfor

     module subroutine stdlib${ii}$_cgetsls( trans, m, n, nrhs, a, lda, b, ldb,work, lwork, info )
     !! CGETSLS solves overdetermined or underdetermined complex linear systems
     !! involving an M-by-N matrix A, using a tall skinny QR or short wide LQ
     !! factorization of A.  It is assumed that A has full rank.
     !! The following options are provided:
     !! 1. If TRANS = 'N' and m >= n:  find the least squares solution of
     !! an overdetermined system, i.e., solve the least squares problem
     !! minimize || B - A*X ||.
     !! 2. If TRANS = 'N' and m < n:  find the minimum norm solution of
     !! an underdetermined system A * X = B.
     !! 3. If TRANS = 'C' and m >= n:  find the minimum norm solution of
     !! an undetermined system A**T * X = B.
     !! 4. If TRANS = 'C' and m < n:  find the least squares solution of
     !! an overdetermined system, i.e., solve the least squares problem
     !! minimize || B - A**T * X ||.
     !! Several right hand side vectors b and solution vectors x can be
     !! handled in a single call; they are stored as the columns of the
     !! M-by-NRHS right hand side matrix B and the N-by-NRHS solution
     !! matrix X.
        ! -- lapack driver routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: trans
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, ldb, lwork, m, n, nrhs
           ! Array Arguments 
           complex(sp), intent(inout) :: a(lda,*), b(ldb,*)
           complex(sp), intent(out) :: work(*)
        ! =====================================================================
           
           
           ! Local Scalars 
           logical(lk) :: lquery, tran
           integer(${ik}$) :: i, iascl, ibscl, j, maxmn, brow, scllen, tszo, tszm, lwo, lwm, lw1, &
                     lw2, wsizeo, wsizem, info2
           real(sp) :: anrm, bignum, bnrm, smlnum, dum(1_${ik}$)
           complex(sp) :: tq(5_${ik}$), workq(1_${ik}$)
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments.
           info = 0_${ik}$
           maxmn = max( m, n )
           tran  = stdlib_lsame( trans, 'C' )
           lquery = ( lwork==-1_${ik}$ .or. lwork==-2_${ik}$ )
           if( .not.( stdlib_lsame( trans, 'N' ) .or.stdlib_lsame( trans, 'C' ) ) ) then
              info = -1_${ik}$
           else if( m<0_${ik}$ ) then
              info = -2_${ik}$
           else if( n<0_${ik}$ ) then
              info = -3_${ik}$
           else if( nrhs<0_${ik}$ ) then
              info = -4_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -6_${ik}$
           else if( ldb<max( 1_${ik}$, m, n ) ) then
              info = -8_${ik}$
           end if
           if( info==0_${ik}$ ) then
           ! determine the optimum and minimum lwork
            if( m>=n ) then
              call stdlib${ii}$_cgeqr( m, n, a, lda, tq, -1_${ik}$, workq, -1_${ik}$, info2 )
              tszo = int( tq( 1_${ik}$ ),KIND=${ik}$)
              lwo  = int( workq( 1_${ik}$ ),KIND=${ik}$)
              call stdlib${ii}$_cgemqr( 'L', trans, m, nrhs, n, a, lda, tq,tszo, b, ldb, workq, -1_${ik}$, &
                        info2 )
              lwo  = max( lwo, int( workq( 1_${ik}$ ),KIND=${ik}$) )
              call stdlib${ii}$_cgeqr( m, n, a, lda, tq, -2_${ik}$, workq, -2_${ik}$, info2 )
              tszm = int( tq( 1_${ik}$ ),KIND=${ik}$)
              lwm  = int( workq( 1_${ik}$ ),KIND=${ik}$)
              call stdlib${ii}$_cgemqr( 'L', trans, m, nrhs, n, a, lda, tq,tszm, b, ldb, workq, -1_${ik}$, &
                        info2 )
              lwm = max( lwm, int( workq( 1_${ik}$ ),KIND=${ik}$) )
              wsizeo = tszo + lwo
              wsizem = tszm + lwm
            else
              call stdlib${ii}$_cgelq( m, n, a, lda, tq, -1_${ik}$, workq, -1_${ik}$, info2 )
              tszo = int( tq( 1_${ik}$ ),KIND=${ik}$)
              lwo  = int( workq( 1_${ik}$ ),KIND=${ik}$)
              call stdlib${ii}$_cgemlq( 'L', trans, n, nrhs, m, a, lda, tq,tszo, b, ldb, workq, -1_${ik}$, &
                        info2 )
              lwo  = max( lwo, int( workq( 1_${ik}$ ),KIND=${ik}$) )
              call stdlib${ii}$_cgelq( m, n, a, lda, tq, -2_${ik}$, workq, -2_${ik}$, info2 )
              tszm = int( tq( 1_${ik}$ ),KIND=${ik}$)
              lwm  = int( workq( 1_${ik}$ ),KIND=${ik}$)
              call stdlib${ii}$_cgemlq( 'L', trans, n, nrhs, m, a, lda, tq,tszm, b, ldb, workq, -1_${ik}$, &
                        info2 )
              lwm  = max( lwm, int( workq( 1_${ik}$ ),KIND=${ik}$) )
              wsizeo = tszo + lwo
              wsizem = tszm + lwm
            end if
            if( ( lwork<wsizem ).and.( .not.lquery ) ) then
               info = -10_${ik}$
            end if
            work( 1_${ik}$ ) = real( wsizeo,KIND=sp)
           end if
           if( info/=0_${ik}$ ) then
             call stdlib${ii}$_xerbla( 'CGETSLS', -info )
             return
           end if
           if( lquery ) then
             if( lwork==-2_${ik}$ ) work( 1_${ik}$ ) = real( wsizem,KIND=sp)
             return
           end if
           if( lwork<wsizeo ) then
             lw1 = tszm
             lw2 = lwm
           else
             lw1 = tszo
             lw2 = lwo
           end if
           ! quick return if possible
           if( min( m, n, nrhs )==0_${ik}$ ) then
                call stdlib${ii}$_claset( 'FULL', max( m, n ), nrhs, czero, czero,b, ldb )
                return
           end if
           ! get machine parameters
            smlnum = stdlib${ii}$_slamch( 'S' ) / stdlib${ii}$_slamch( 'P' )
            bignum = one / smlnum
            call stdlib${ii}$_slabad( smlnum, bignum )
           ! scale a, b if max element outside range [smlnum,bignum]
           anrm = stdlib${ii}$_clange( 'M', m, n, a, lda, dum )
           iascl = 0_${ik}$
           if( anrm>zero .and. anrm<smlnum ) then
              ! scale matrix norm up to smlnum
              call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, m, n, a, lda, info )
              iascl = 1_${ik}$
           else if( anrm>bignum ) then
              ! scale matrix norm down to bignum
              call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, m, n, a, lda, info )
              iascl = 2_${ik}$
           else if( anrm==zero ) then
              ! matrix all zero. return zero solution.
              call stdlib${ii}$_claset( 'F', maxmn, nrhs, czero, czero, b, ldb )
              go to 50
           end if
           brow = m
           if ( tran ) then
             brow = n
           end if
           bnrm = stdlib${ii}$_clange( 'M', brow, nrhs, b, ldb, dum )
           ibscl = 0_${ik}$
           if( bnrm>zero .and. bnrm<smlnum ) then
              ! scale matrix norm up to smlnum
              call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, bnrm, smlnum, brow, nrhs, b, ldb,info )
              ibscl = 1_${ik}$
           else if( bnrm>bignum ) then
              ! scale matrix norm down to bignum
              call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, bnrm, bignum, brow, nrhs, b, ldb,info )
              ibscl = 2_${ik}$
           end if
           if ( m>=n ) then
              ! compute qr factorization of a
             call stdlib${ii}$_cgeqr( m, n, a, lda, work( lw2+1 ), lw1,work( 1_${ik}$ ), lw2, info )
             if ( .not.tran ) then
                 ! least-squares problem min || a * x - b ||
                 ! b(1:m,1:nrhs) := q**t * b(1:m,1:nrhs)
               call stdlib${ii}$_cgemqr( 'L' , 'C', m, nrhs, n, a, lda,work( lw2+1 ), lw1, b, ldb, work(&
                          1_${ik}$ ), lw2,info )
                 ! b(1:n,1:nrhs) := inv(r) * b(1:n,1:nrhs)
               call stdlib${ii}$_ctrtrs( 'U', 'N', 'N', n, nrhs,a, lda, b, ldb, info )
               if( info>0_${ik}$ ) then
                 return
               end if
               scllen = n
             else
                 ! overdetermined system of equations a**t * x = b
                 ! b(1:n,1:nrhs) := inv(r**t) * b(1:n,1:nrhs)
                 call stdlib${ii}$_ctrtrs( 'U', 'C', 'N', n, nrhs,a, lda, b, ldb, info )
                 if( info>0_${ik}$ ) then
                    return
                 end if
                 ! b(n+1:m,1:nrhs) = czero
                 do j = 1, nrhs
                    do i = n + 1, m
                       b( i, j ) = czero
                    end do
                 end do
                 ! b(1:m,1:nrhs) := q(1:n,:) * b(1:n,1:nrhs)
                 call stdlib${ii}$_cgemqr( 'L', 'N', m, nrhs, n, a, lda,work( lw2+1 ), lw1, b, ldb, &
                           work( 1_${ik}$ ), lw2,info )
                 scllen = m
              end if
           else
              ! compute lq factorization of a
              call stdlib${ii}$_cgelq( m, n, a, lda, work( lw2+1 ), lw1,work( 1_${ik}$ ), lw2, info )
              ! workspace at least m, optimally m*nb.
              if( .not.tran ) then
                 ! underdetermined system of equations a * x = b
                 ! b(1:m,1:nrhs) := inv(l) * b(1:m,1:nrhs)
                 call stdlib${ii}$_ctrtrs( 'L', 'N', 'N', m, nrhs,a, lda, b, ldb, info )
                 if( info>0_${ik}$ ) then
                    return
                 end if
                 ! b(m+1:n,1:nrhs) = 0
                 do j = 1, nrhs
                    do i = m + 1, n
                       b( i, j ) = czero
                    end do
                 end do
                 ! b(1:n,1:nrhs) := q(1:n,:)**t * b(1:m,1:nrhs)
                 call stdlib${ii}$_cgemlq( 'L', 'C', n, nrhs, m, a, lda,work( lw2+1 ), lw1, b, ldb, &
                           work( 1_${ik}$ ), lw2,info )
                 ! workspace at least nrhs, optimally nrhs*nb
                 scllen = n
              else
                 ! overdetermined system min || a**t * x - b ||
                 ! b(1:n,1:nrhs) := q * b(1:n,1:nrhs)
                 call stdlib${ii}$_cgemlq( 'L', 'N', n, nrhs, m, a, lda,work( lw2+1 ), lw1, b, ldb, &
                           work( 1_${ik}$ ), lw2,info )
                 ! workspace at least nrhs, optimally nrhs*nb
                 ! b(1:m,1:nrhs) := inv(l**t) * b(1:m,1:nrhs)
                 call stdlib${ii}$_ctrtrs( 'L', 'C', 'N', m, nrhs,a, lda, b, ldb, info )
                 if( info>0_${ik}$ ) then
                    return
                 end if
                 scllen = m
              end if
           end if
           ! undo scaling
           if( iascl==1_${ik}$ ) then
             call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, scllen, nrhs, b, ldb,info )
           else if( iascl==2_${ik}$ ) then
             call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, scllen, nrhs, b, ldb,info )
           end if
           if( ibscl==1_${ik}$ ) then
             call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, smlnum, bnrm, scllen, nrhs, b, ldb,info )
           else if( ibscl==2_${ik}$ ) then
             call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, bignum, bnrm, scllen, nrhs, b, ldb,info )
           end if
           50 continue
           work( 1_${ik}$ ) = real( tszo + lwo,KIND=sp)
           return
     end subroutine stdlib${ii}$_cgetsls

     module subroutine stdlib${ii}$_zgetsls( trans, m, n, nrhs, a, lda, b, ldb,work, lwork, info )
     !! ZGETSLS solves overdetermined or underdetermined complex linear systems
     !! involving an M-by-N matrix A, using a tall skinny QR or short wide LQ
     !! factorization of A.  It is assumed that A has full rank.
     !! The following options are provided:
     !! 1. If TRANS = 'N' and m >= n:  find the least squares solution of
     !! an overdetermined system, i.e., solve the least squares problem
     !! minimize || B - A*X ||.
     !! 2. If TRANS = 'N' and m < n:  find the minimum norm solution of
     !! an underdetermined system A * X = B.
     !! 3. If TRANS = 'C' and m >= n:  find the minimum norm solution of
     !! an undetermined system A**T * X = B.
     !! 4. If TRANS = 'C' and m < n:  find the least squares solution of
     !! an overdetermined system, i.e., solve the least squares problem
     !! minimize || B - A**T * X ||.
     !! Several right hand side vectors b and solution vectors x can be
     !! handled in a single call; they are stored as the columns of the
     !! M-by-NRHS right hand side matrix B and the N-by-NRHS solution
     !! matrix X.
        ! -- lapack driver routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: trans
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, ldb, lwork, m, n, nrhs
           ! Array Arguments 
           complex(dp), intent(inout) :: a(lda,*), b(ldb,*)
           complex(dp), intent(out) :: work(*)
        ! =====================================================================
           
           
           ! Local Scalars 
           logical(lk) :: lquery, tran
           integer(${ik}$) :: i, iascl, ibscl, j, maxmn, brow, scllen, tszo, tszm, lwo, lwm, lw1, &
                     lw2, wsizeo, wsizem, info2
           real(dp) :: anrm, bignum, bnrm, smlnum, dum(1_${ik}$)
           complex(dp) :: tq(5_${ik}$), workq(1_${ik}$)
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments.
           info = 0_${ik}$
           maxmn = max( m, n )
           tran  = stdlib_lsame( trans, 'C' )
           lquery = ( lwork==-1_${ik}$ .or. lwork==-2_${ik}$ )
           if( .not.( stdlib_lsame( trans, 'N' ) .or.stdlib_lsame( trans, 'C' ) ) ) then
              info = -1_${ik}$
           else if( m<0_${ik}$ ) then
              info = -2_${ik}$
           else if( n<0_${ik}$ ) then
              info = -3_${ik}$
           else if( nrhs<0_${ik}$ ) then
              info = -4_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -6_${ik}$
           else if( ldb<max( 1_${ik}$, m, n ) ) then
              info = -8_${ik}$
           end if
           if( info==0_${ik}$ ) then
           ! determine the optimum and minimum lwork
            if( m>=n ) then
              call stdlib${ii}$_zgeqr( m, n, a, lda, tq, -1_${ik}$, workq, -1_${ik}$, info2 )
              tszo = int( tq( 1_${ik}$ ),KIND=${ik}$)
              lwo  = int( workq( 1_${ik}$ ),KIND=${ik}$)
              call stdlib${ii}$_zgemqr( 'L', trans, m, nrhs, n, a, lda, tq,tszo, b, ldb, workq, -1_${ik}$, &
                        info2 )
              lwo  = max( lwo, int( workq( 1_${ik}$ ),KIND=${ik}$) )
              call stdlib${ii}$_zgeqr( m, n, a, lda, tq, -2_${ik}$, workq, -2_${ik}$, info2 )
              tszm = int( tq( 1_${ik}$ ),KIND=${ik}$)
              lwm  = int( workq( 1_${ik}$ ),KIND=${ik}$)
              call stdlib${ii}$_zgemqr( 'L', trans, m, nrhs, n, a, lda, tq,tszm, b, ldb, workq, -1_${ik}$, &
                        info2 )
              lwm = max( lwm, int( workq( 1_${ik}$ ),KIND=${ik}$) )
              wsizeo = tszo + lwo
              wsizem = tszm + lwm
            else
              call stdlib${ii}$_zgelq( m, n, a, lda, tq, -1_${ik}$, workq, -1_${ik}$, info2 )
              tszo = int( tq( 1_${ik}$ ),KIND=${ik}$)
              lwo  = int( workq( 1_${ik}$ ),KIND=${ik}$)
              call stdlib${ii}$_zgemlq( 'L', trans, n, nrhs, m, a, lda, tq,tszo, b, ldb, workq, -1_${ik}$, &
                        info2 )
              lwo  = max( lwo, int( workq( 1_${ik}$ ),KIND=${ik}$) )
              call stdlib${ii}$_zgelq( m, n, a, lda, tq, -2_${ik}$, workq, -2_${ik}$, info2 )
              tszm = int( tq( 1_${ik}$ ),KIND=${ik}$)
              lwm  = int( workq( 1_${ik}$ ),KIND=${ik}$)
              call stdlib${ii}$_zgemlq( 'L', trans, n, nrhs, m, a, lda, tq,tszm, b, ldb, workq, -1_${ik}$, &
                        info2 )
              lwm  = max( lwm, int( workq( 1_${ik}$ ),KIND=${ik}$) )
              wsizeo = tszo + lwo
              wsizem = tszm + lwm
            end if
            if( ( lwork<wsizem ).and.( .not.lquery ) ) then
               info = -10_${ik}$
            end if
            work( 1_${ik}$ ) = real( wsizeo,KIND=dp)
           end if
           if( info/=0_${ik}$ ) then
             call stdlib${ii}$_xerbla( 'ZGETSLS', -info )
             return
           end if
           if( lquery ) then
             if( lwork==-2_${ik}$ ) work( 1_${ik}$ ) = real( wsizem,KIND=dp)
             return
           end if
           if( lwork<wsizeo ) then
             lw1 = tszm
             lw2 = lwm
           else
             lw1 = tszo
             lw2 = lwo
           end if
           ! quick return if possible
           if( min( m, n, nrhs )==0_${ik}$ ) then
                call stdlib${ii}$_zlaset( 'FULL', max( m, n ), nrhs, czero, czero,b, ldb )
                return
           end if
           ! get machine parameters
            smlnum = stdlib${ii}$_dlamch( 'S' ) / stdlib${ii}$_dlamch( 'P' )
            bignum = one / smlnum
            call stdlib${ii}$_dlabad( smlnum, bignum )
           ! scale a, b if max element outside range [smlnum,bignum]
           anrm = stdlib${ii}$_zlange( 'M', m, n, a, lda, dum )
           iascl = 0_${ik}$
           if( anrm>zero .and. anrm<smlnum ) then
              ! scale matrix norm up to smlnum
              call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, m, n, a, lda, info )
              iascl = 1_${ik}$
           else if( anrm>bignum ) then
              ! scale matrix norm down to bignum
              call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, m, n, a, lda, info )
              iascl = 2_${ik}$
           else if( anrm==zero ) then
              ! matrix all zero. return zero solution.
              call stdlib${ii}$_zlaset( 'F', maxmn, nrhs, czero, czero, b, ldb )
              go to 50
           end if
           brow = m
           if ( tran ) then
             brow = n
           end if
           bnrm = stdlib${ii}$_zlange( 'M', brow, nrhs, b, ldb, dum )
           ibscl = 0_${ik}$
           if( bnrm>zero .and. bnrm<smlnum ) then
              ! scale matrix norm up to smlnum
              call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, bnrm, smlnum, brow, nrhs, b, ldb,info )
              ibscl = 1_${ik}$
           else if( bnrm>bignum ) then
              ! scale matrix norm down to bignum
              call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, bnrm, bignum, brow, nrhs, b, ldb,info )
              ibscl = 2_${ik}$
           end if
           if ( m>=n ) then
              ! compute qr factorization of a
             call stdlib${ii}$_zgeqr( m, n, a, lda, work( lw2+1 ), lw1,work( 1_${ik}$ ), lw2, info )
             if ( .not.tran ) then
                 ! least-squares problem min || a * x - b ||
                 ! b(1:m,1:nrhs) := q**t * b(1:m,1:nrhs)
               call stdlib${ii}$_zgemqr( 'L' , 'C', m, nrhs, n, a, lda,work( lw2+1 ), lw1, b, ldb, work(&
                          1_${ik}$ ), lw2,info )
                 ! b(1:n,1:nrhs) := inv(r) * b(1:n,1:nrhs)
               call stdlib${ii}$_ztrtrs( 'U', 'N', 'N', n, nrhs,a, lda, b, ldb, info )
               if( info>0_${ik}$ ) then
                 return
               end if
               scllen = n
             else
                 ! overdetermined system of equations a**t * x = b
                 ! b(1:n,1:nrhs) := inv(r**t) * b(1:n,1:nrhs)
                 call stdlib${ii}$_ztrtrs( 'U', 'C', 'N', n, nrhs,a, lda, b, ldb, info )
                 if( info>0_${ik}$ ) then
                    return
                 end if
                 ! b(n+1:m,1:nrhs) = czero
                 do j = 1, nrhs
                    do i = n + 1, m
                       b( i, j ) = czero
                    end do
                 end do
                 ! b(1:m,1:nrhs) := q(1:n,:) * b(1:n,1:nrhs)
                 call stdlib${ii}$_zgemqr( 'L', 'N', m, nrhs, n, a, lda,work( lw2+1 ), lw1, b, ldb, &
                           work( 1_${ik}$ ), lw2,info )
                 scllen = m
              end if
           else
              ! compute lq factorization of a
              call stdlib${ii}$_zgelq( m, n, a, lda, work( lw2+1 ), lw1,work( 1_${ik}$ ), lw2, info )
              ! workspace at least m, optimally m*nb.
              if( .not.tran ) then
                 ! underdetermined system of equations a * x = b
                 ! b(1:m,1:nrhs) := inv(l) * b(1:m,1:nrhs)
                 call stdlib${ii}$_ztrtrs( 'L', 'N', 'N', m, nrhs,a, lda, b, ldb, info )
                 if( info>0_${ik}$ ) then
                    return
                 end if
                 ! b(m+1:n,1:nrhs) = 0
                 do j = 1, nrhs
                    do i = m + 1, n
                       b( i, j ) = czero
                    end do
                 end do
                 ! b(1:n,1:nrhs) := q(1:n,:)**t * b(1:m,1:nrhs)
                 call stdlib${ii}$_zgemlq( 'L', 'C', n, nrhs, m, a, lda,work( lw2+1 ), lw1, b, ldb, &
                           work( 1_${ik}$ ), lw2,info )
                 ! workspace at least nrhs, optimally nrhs*nb
                 scllen = n
              else
                 ! overdetermined system min || a**t * x - b ||
                 ! b(1:n,1:nrhs) := q * b(1:n,1:nrhs)
                 call stdlib${ii}$_zgemlq( 'L', 'N', n, nrhs, m, a, lda,work( lw2+1 ), lw1, b, ldb, &
                           work( 1_${ik}$ ), lw2,info )
                 ! workspace at least nrhs, optimally nrhs*nb
                 ! b(1:m,1:nrhs) := inv(l**t) * b(1:m,1:nrhs)
                 call stdlib${ii}$_ztrtrs( 'L', 'C', 'N', m, nrhs,a, lda, b, ldb, info )
                 if( info>0_${ik}$ ) then
                    return
                 end if
                 scllen = m
              end if
           end if
           ! undo scaling
           if( iascl==1_${ik}$ ) then
             call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, scllen, nrhs, b, ldb,info )
           else if( iascl==2_${ik}$ ) then
             call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, scllen, nrhs, b, ldb,info )
           end if
           if( ibscl==1_${ik}$ ) then
             call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, smlnum, bnrm, scllen, nrhs, b, ldb,info )
           else if( ibscl==2_${ik}$ ) then
             call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, bignum, bnrm, scllen, nrhs, b, ldb,info )
           end if
           50 continue
           work( 1_${ik}$ ) = real( tszo + lwo,KIND=dp)
           return
     end subroutine stdlib${ii}$_zgetsls

#:for ck,ct,ci in CMPLX_KINDS_TYPES
#:if not ck in ["sp","dp"]
     module subroutine stdlib${ii}$_${ci}$getsls( trans, m, n, nrhs, a, lda, b, ldb,work, lwork, info )
     !! ZGETSLS: solves overdetermined or underdetermined complex linear systems
     !! involving an M-by-N matrix A, using a tall skinny QR or short wide LQ
     !! factorization of A.  It is assumed that A has full rank.
     !! The following options are provided:
     !! 1. If TRANS = 'N' and m >= n:  find the least squares solution of
     !! an overdetermined system, i.e., solve the least squares problem
     !! minimize || B - A*X ||.
     !! 2. If TRANS = 'N' and m < n:  find the minimum norm solution of
     !! an underdetermined system A * X = B.
     !! 3. If TRANS = 'C' and m >= n:  find the minimum norm solution of
     !! an undetermined system A**T * X = B.
     !! 4. If TRANS = 'C' and m < n:  find the least squares solution of
     !! an overdetermined system, i.e., solve the least squares problem
     !! minimize || B - A**T * X ||.
     !! Several right hand side vectors b and solution vectors x can be
     !! handled in a single call; they are stored as the columns of the
     !! M-by-NRHS right hand side matrix B and the N-by-NRHS solution
     !! matrix X.
        ! -- lapack driver routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${ck}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: trans
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, ldb, lwork, m, n, nrhs
           ! Array Arguments 
           complex(${ck}$), intent(inout) :: a(lda,*), b(ldb,*)
           complex(${ck}$), intent(out) :: work(*)
        ! =====================================================================
           
           
           ! Local Scalars 
           logical(lk) :: lquery, tran
           integer(${ik}$) :: i, iascl, ibscl, j, maxmn, brow, scllen, tszo, tszm, lwo, lwm, lw1, &
                     lw2, wsizeo, wsizem, info2
           real(${ck}$) :: anrm, bignum, bnrm, smlnum, dum(1_${ik}$)
           complex(${ck}$) :: tq(5_${ik}$), workq(1_${ik}$)
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments.
           info = 0_${ik}$
           maxmn = max( m, n )
           tran  = stdlib_lsame( trans, 'C' )
           lquery = ( lwork==-1_${ik}$ .or. lwork==-2_${ik}$ )
           if( .not.( stdlib_lsame( trans, 'N' ) .or.stdlib_lsame( trans, 'C' ) ) ) then
              info = -1_${ik}$
           else if( m<0_${ik}$ ) then
              info = -2_${ik}$
           else if( n<0_${ik}$ ) then
              info = -3_${ik}$
           else if( nrhs<0_${ik}$ ) then
              info = -4_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -6_${ik}$
           else if( ldb<max( 1_${ik}$, m, n ) ) then
              info = -8_${ik}$
           end if
           if( info==0_${ik}$ ) then
           ! determine the optimum and minimum lwork
            if( m>=n ) then
              call stdlib${ii}$_${ci}$geqr( m, n, a, lda, tq, -1_${ik}$, workq, -1_${ik}$, info2 )
              tszo = int( tq( 1_${ik}$ ),KIND=${ik}$)
              lwo  = int( workq( 1_${ik}$ ),KIND=${ik}$)
              call stdlib${ii}$_${ci}$gemqr( 'L', trans, m, nrhs, n, a, lda, tq,tszo, b, ldb, workq, -1_${ik}$, &
                        info2 )
              lwo  = max( lwo, int( workq( 1_${ik}$ ),KIND=${ik}$) )
              call stdlib${ii}$_${ci}$geqr( m, n, a, lda, tq, -2_${ik}$, workq, -2_${ik}$, info2 )
              tszm = int( tq( 1_${ik}$ ),KIND=${ik}$)
              lwm  = int( workq( 1_${ik}$ ),KIND=${ik}$)
              call stdlib${ii}$_${ci}$gemqr( 'L', trans, m, nrhs, n, a, lda, tq,tszm, b, ldb, workq, -1_${ik}$, &
                        info2 )
              lwm = max( lwm, int( workq( 1_${ik}$ ),KIND=${ik}$) )
              wsizeo = tszo + lwo
              wsizem = tszm + lwm
            else
              call stdlib${ii}$_${ci}$gelq( m, n, a, lda, tq, -1_${ik}$, workq, -1_${ik}$, info2 )
              tszo = int( tq( 1_${ik}$ ),KIND=${ik}$)
              lwo  = int( workq( 1_${ik}$ ),KIND=${ik}$)
              call stdlib${ii}$_${ci}$gemlq( 'L', trans, n, nrhs, m, a, lda, tq,tszo, b, ldb, workq, -1_${ik}$, &
                        info2 )
              lwo  = max( lwo, int( workq( 1_${ik}$ ),KIND=${ik}$) )
              call stdlib${ii}$_${ci}$gelq( m, n, a, lda, tq, -2_${ik}$, workq, -2_${ik}$, info2 )
              tszm = int( tq( 1_${ik}$ ),KIND=${ik}$)
              lwm  = int( workq( 1_${ik}$ ),KIND=${ik}$)
              call stdlib${ii}$_${ci}$gemlq( 'L', trans, n, nrhs, m, a, lda, tq,tszm, b, ldb, workq, -1_${ik}$, &
                        info2 )
              lwm  = max( lwm, int( workq( 1_${ik}$ ),KIND=${ik}$) )
              wsizeo = tszo + lwo
              wsizem = tszm + lwm
            end if
            if( ( lwork<wsizem ).and.( .not.lquery ) ) then
               info = -10_${ik}$
            end if
            work( 1_${ik}$ ) = real( wsizeo,KIND=${ck}$)
           end if
           if( info/=0_${ik}$ ) then
             call stdlib${ii}$_xerbla( 'ZGETSLS', -info )
             return
           end if
           if( lquery ) then
             if( lwork==-2_${ik}$ ) work( 1_${ik}$ ) = real( wsizem,KIND=${ck}$)
             return
           end if
           if( lwork<wsizeo ) then
             lw1 = tszm
             lw2 = lwm
           else
             lw1 = tszo
             lw2 = lwo
           end if
           ! quick return if possible
           if( min( m, n, nrhs )==0_${ik}$ ) then
                call stdlib${ii}$_${ci}$laset( 'FULL', max( m, n ), nrhs, czero, czero,b, ldb )
                return
           end if
           ! get machine parameters
            smlnum = stdlib${ii}$_${c2ri(ci)}$lamch( 'S' ) / stdlib${ii}$_${c2ri(ci)}$lamch( 'P' )
            bignum = one / smlnum
            call stdlib${ii}$_${c2ri(ci)}$labad( smlnum, bignum )
           ! scale a, b if max element outside range [smlnum,bignum]
           anrm = stdlib${ii}$_${ci}$lange( 'M', m, n, a, lda, dum )
           iascl = 0_${ik}$
           if( anrm>zero .and. anrm<smlnum ) then
              ! scale matrix norm up to smlnum
              call stdlib${ii}$_${ci}$lascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, m, n, a, lda, info )
              iascl = 1_${ik}$
           else if( anrm>bignum ) then
              ! scale matrix norm down to bignum
              call stdlib${ii}$_${ci}$lascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, m, n, a, lda, info )
              iascl = 2_${ik}$
           else if( anrm==zero ) then
              ! matrix all zero. return zero solution.
              call stdlib${ii}$_${ci}$laset( 'F', maxmn, nrhs, czero, czero, b, ldb )
              go to 50
           end if
           brow = m
           if ( tran ) then
             brow = n
           end if
           bnrm = stdlib${ii}$_${ci}$lange( 'M', brow, nrhs, b, ldb, dum )
           ibscl = 0_${ik}$
           if( bnrm>zero .and. bnrm<smlnum ) then
              ! scale matrix norm up to smlnum
              call stdlib${ii}$_${ci}$lascl( 'G', 0_${ik}$, 0_${ik}$, bnrm, smlnum, brow, nrhs, b, ldb,info )
              ibscl = 1_${ik}$
           else if( bnrm>bignum ) then
              ! scale matrix norm down to bignum
              call stdlib${ii}$_${ci}$lascl( 'G', 0_${ik}$, 0_${ik}$, bnrm, bignum, brow, nrhs, b, ldb,info )
              ibscl = 2_${ik}$
           end if
           if ( m>=n ) then
              ! compute qr factorization of a
             call stdlib${ii}$_${ci}$geqr( m, n, a, lda, work( lw2+1 ), lw1,work( 1_${ik}$ ), lw2, info )
             if ( .not.tran ) then
                 ! least-squares problem min || a * x - b ||
                 ! b(1:m,1:nrhs) := q**t * b(1:m,1:nrhs)
               call stdlib${ii}$_${ci}$gemqr( 'L' , 'C', m, nrhs, n, a, lda,work( lw2+1 ), lw1, b, ldb, work(&
                          1_${ik}$ ), lw2,info )
                 ! b(1:n,1:nrhs) := inv(r) * b(1:n,1:nrhs)
               call stdlib${ii}$_${ci}$trtrs( 'U', 'N', 'N', n, nrhs,a, lda, b, ldb, info )
               if( info>0_${ik}$ ) then
                 return
               end if
               scllen = n
             else
                 ! overdetermined system of equations a**t * x = b
                 ! b(1:n,1:nrhs) := inv(r**t) * b(1:n,1:nrhs)
                 call stdlib${ii}$_${ci}$trtrs( 'U', 'C', 'N', n, nrhs,a, lda, b, ldb, info )
                 if( info>0_${ik}$ ) then
                    return
                 end if
                 ! b(n+1:m,1:nrhs) = czero
                 do j = 1, nrhs
                    do i = n + 1, m
                       b( i, j ) = czero
                    end do
                 end do
                 ! b(1:m,1:nrhs) := q(1:n,:) * b(1:n,1:nrhs)
                 call stdlib${ii}$_${ci}$gemqr( 'L', 'N', m, nrhs, n, a, lda,work( lw2+1 ), lw1, b, ldb, &
                           work( 1_${ik}$ ), lw2,info )
                 scllen = m
              end if
           else
              ! compute lq factorization of a
              call stdlib${ii}$_${ci}$gelq( m, n, a, lda, work( lw2+1 ), lw1,work( 1_${ik}$ ), lw2, info )
              ! workspace at least m, optimally m*nb.
              if( .not.tran ) then
                 ! underdetermined system of equations a * x = b
                 ! b(1:m,1:nrhs) := inv(l) * b(1:m,1:nrhs)
                 call stdlib${ii}$_${ci}$trtrs( 'L', 'N', 'N', m, nrhs,a, lda, b, ldb, info )
                 if( info>0_${ik}$ ) then
                    return
                 end if
                 ! b(m+1:n,1:nrhs) = 0
                 do j = 1, nrhs
                    do i = m + 1, n
                       b( i, j ) = czero
                    end do
                 end do
                 ! b(1:n,1:nrhs) := q(1:n,:)**t * b(1:m,1:nrhs)
                 call stdlib${ii}$_${ci}$gemlq( 'L', 'C', n, nrhs, m, a, lda,work( lw2+1 ), lw1, b, ldb, &
                           work( 1_${ik}$ ), lw2,info )
                 ! workspace at least nrhs, optimally nrhs*nb
                 scllen = n
              else
                 ! overdetermined system min || a**t * x - b ||
                 ! b(1:n,1:nrhs) := q * b(1:n,1:nrhs)
                 call stdlib${ii}$_${ci}$gemlq( 'L', 'N', n, nrhs, m, a, lda,work( lw2+1 ), lw1, b, ldb, &
                           work( 1_${ik}$ ), lw2,info )
                 ! workspace at least nrhs, optimally nrhs*nb
                 ! b(1:m,1:nrhs) := inv(l**t) * b(1:m,1:nrhs)
                 call stdlib${ii}$_${ci}$trtrs( 'L', 'C', 'N', m, nrhs,a, lda, b, ldb, info )
                 if( info>0_${ik}$ ) then
                    return
                 end if
                 scllen = m
              end if
           end if
           ! undo scaling
           if( iascl==1_${ik}$ ) then
             call stdlib${ii}$_${ci}$lascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, scllen, nrhs, b, ldb,info )
           else if( iascl==2_${ik}$ ) then
             call stdlib${ii}$_${ci}$lascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, scllen, nrhs, b, ldb,info )
           end if
           if( ibscl==1_${ik}$ ) then
             call stdlib${ii}$_${ci}$lascl( 'G', 0_${ik}$, 0_${ik}$, smlnum, bnrm, scllen, nrhs, b, ldb,info )
           else if( ibscl==2_${ik}$ ) then
             call stdlib${ii}$_${ci}$lascl( 'G', 0_${ik}$, 0_${ik}$, bignum, bnrm, scllen, nrhs, b, ldb,info )
           end if
           50 continue
           work( 1_${ik}$ ) = real( tszo + lwo,KIND=${ck}$)
           return
     end subroutine stdlib${ii}$_${ci}$getsls

#:endif
#:endfor


#:endfor
end submodule stdlib_lapack_lsq