stdlib_lapack_lsq.fypp Source File


Source Code

#:include "common.fypp" 
submodule(stdlib_lapack_eig_svd_lsq) stdlib_lapack_lsq
  implicit none


  contains
#:for ik,it,ii in LINALG_INT_KINDS_TYPES

     module subroutine stdlib${ii}$_sgelss( m, n, nrhs, a, lda, b, ldb, s, rcond, rank,work, lwork, info )
     !! SGELSS computes the minimum norm solution to a real linear least
     !! squares problem:
     !! Minimize 2-norm(| b - A*x |).
     !! using the singular value decomposition (SVD) of A. A is an M-by-N
     !! matrix which may be rank-deficient.
     !! Several right hand side vectors b and solution vectors x can be
     !! handled in a single call; they are stored as the columns of the
     !! M-by-NRHS right hand side matrix B and the N-by-NRHS solution matrix
     !! X.
     !! The effective rank of A is determined by treating as zero those
     !! singular values which are less than RCOND times the largest singular
     !! value.
               
        ! -- lapack driver routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info, rank
           integer(${ik}$), intent(in) :: lda, ldb, lwork, m, n, nrhs
           real(sp), intent(in) :: rcond
           ! Array Arguments 
           real(sp), intent(inout) :: a(lda,*), b(ldb,*)
           real(sp), intent(out) :: s(*), work(*)
        ! =====================================================================
           
           ! Local Scalars 
           logical(lk) :: lquery
           integer(${ik}$) :: bdspac, bl, chunk, i, iascl, ibscl, ie, il, itau, itaup, itauq, iwork, &
                     ldwork, maxmn, maxwrk, minmn, minwrk, mm, mnthr
           integer(${ik}$) :: lwork_sgeqrf, lwork_sormqr, lwork_sgebrd, lwork_sormbr, lwork_sorgbr, &
                     lwork_sormlq
           real(sp) :: anrm, bignum, bnrm, eps, sfmin, smlnum, thr
           ! Local Arrays 
           real(sp) :: dum(1_${ik}$)
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           minmn = min( m, n )
           maxmn = max( m, n )
           lquery = ( lwork==-1_${ik}$ )
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( nrhs<0_${ik}$ ) then
              info = -3_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -5_${ik}$
           else if( ldb<max( 1_${ik}$, maxmn ) ) then
              info = -7_${ik}$
           end if
           ! compute workspace
            ! (note: comments in the code beginning "workspace:" describe the
             ! minimal amount of workspace needed at that point in the code,
             ! as well as the preferred amount for good performance.
             ! nb refers to the optimal block size for the immediately
             ! following subroutine, as returned by stdlib${ii}$_ilaenv.)
           if( info==0_${ik}$ ) then
              minwrk = 1_${ik}$
              maxwrk = 1_${ik}$
              if( minmn>0_${ik}$ ) then
                 mm = m
                 mnthr = stdlib${ii}$_ilaenv( 6_${ik}$, 'SGELSS', ' ', m, n, nrhs, -1_${ik}$ )
                 if( m>=n .and. m>=mnthr ) then
                    ! path 1a - overdetermined, with many more rows than
                              ! columns
                    ! compute space needed for stdlib${ii}$_sgeqrf
                    call stdlib${ii}$_sgeqrf( m, n, a, lda, dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, info )
                    lwork_sgeqrf=dum(1_${ik}$)
                    ! compute space needed for stdlib${ii}$_sormqr
                    call stdlib${ii}$_sormqr( 'L', 'T', m, nrhs, n, a, lda, dum(1_${ik}$), b,ldb, dum(1_${ik}$), -1_${ik}$, &
                              info )
                    lwork_sormqr=dum(1_${ik}$)
                    mm = n
                    maxwrk = max( maxwrk, n + lwork_sgeqrf )
                    maxwrk = max( maxwrk, n + lwork_sormqr )
                 end if
                 if( m>=n ) then
                    ! path 1 - overdetermined or exactly determined
                    ! compute workspace needed for stdlib${ii}$_sbdsqr
                    bdspac = max( 1_${ik}$, 5_${ik}$*n )
                    ! compute space needed for stdlib${ii}$_sgebrd
                    call stdlib${ii}$_sgebrd( mm, n, a, lda, s, dum(1_${ik}$), dum(1_${ik}$),dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, info &
                              )
                    lwork_sgebrd=dum(1_${ik}$)
                    ! compute space needed for stdlib${ii}$_sormbr
                    call stdlib${ii}$_sormbr( 'Q', 'L', 'T', mm, nrhs, n, a, lda, dum(1_${ik}$),b, ldb, dum(1_${ik}$),&
                               -1_${ik}$, info )
                    lwork_sormbr=dum(1_${ik}$)
                    ! compute space needed for stdlib${ii}$_sorgbr
                    call stdlib${ii}$_sorgbr( 'P', n, n, n, a, lda, dum(1_${ik}$),dum(1_${ik}$), -1_${ik}$, info )
                    lwork_sorgbr=dum(1_${ik}$)
                    ! compute total workspace needed
                    maxwrk = max( maxwrk, 3_${ik}$*n + lwork_sgebrd )
                    maxwrk = max( maxwrk, 3_${ik}$*n + lwork_sormbr )
                    maxwrk = max( maxwrk, 3_${ik}$*n + lwork_sorgbr )
                    maxwrk = max( maxwrk, bdspac )
                    maxwrk = max( maxwrk, n*nrhs )
                    minwrk = max( 3_${ik}$*n + mm, 3_${ik}$*n + nrhs, bdspac )
                    maxwrk = max( minwrk, maxwrk )
                 end if
                 if( n>m ) then
                    ! compute workspace needed for stdlib${ii}$_sbdsqr
                    bdspac = max( 1_${ik}$, 5_${ik}$*m )
                    minwrk = max( 3_${ik}$*m+nrhs, 3_${ik}$*m+n, bdspac )
                    if( n>=mnthr ) then
                       ! path 2a - underdetermined, with many more columns
                       ! than rows
                       ! compute space needed for stdlib${ii}$_sgebrd
                       call stdlib${ii}$_sgebrd( m, m, a, lda, s, dum(1_${ik}$), dum(1_${ik}$),dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, &
                                 info )
                       lwork_sgebrd=dum(1_${ik}$)
                       ! compute space needed for stdlib${ii}$_sormbr
                       call stdlib${ii}$_sormbr( 'Q', 'L', 'T', m, nrhs, n, a, lda,dum(1_${ik}$), b, ldb, dum(&
                                 1_${ik}$), -1_${ik}$, info )
                       lwork_sormbr=dum(1_${ik}$)
                       ! compute space needed for stdlib${ii}$_sorgbr
                       call stdlib${ii}$_sorgbr( 'P', m, m, m, a, lda, dum(1_${ik}$),dum(1_${ik}$), -1_${ik}$, info )
                       lwork_sorgbr=dum(1_${ik}$)
                       ! compute space needed for stdlib${ii}$_sormlq
                       call stdlib${ii}$_sormlq( 'L', 'T', n, nrhs, m, a, lda, dum(1_${ik}$),b, ldb, dum(1_${ik}$), -&
                                 1_${ik}$, info )
                       lwork_sormlq=dum(1_${ik}$)
                       ! compute total workspace needed
                       maxwrk = m + m*stdlib${ii}$_ilaenv( 1_${ik}$, 'SGELQF', ' ', m, n, -1_${ik}$,-1_${ik}$ )
                       maxwrk = max( maxwrk, m*m + 4_${ik}$*m + lwork_sgebrd )
                       maxwrk = max( maxwrk, m*m + 4_${ik}$*m + lwork_sormbr )
                       maxwrk = max( maxwrk, m*m + 4_${ik}$*m + lwork_sorgbr )
                       maxwrk = max( maxwrk, m*m + m + bdspac )
                       if( nrhs>1_${ik}$ ) then
                          maxwrk = max( maxwrk, m*m + m + m*nrhs )
                       else
                          maxwrk = max( maxwrk, m*m + 2_${ik}$*m )
                       end if
                       maxwrk = max( maxwrk, m + lwork_sormlq )
                    else
                       ! path 2 - underdetermined
                       ! compute space needed for stdlib${ii}$_sgebrd
                       call stdlib${ii}$_sgebrd( m, n, a, lda, s, dum(1_${ik}$), dum(1_${ik}$),dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, &
                                 info )
                       lwork_sgebrd=dum(1_${ik}$)
                       ! compute space needed for stdlib${ii}$_sormbr
                       call stdlib${ii}$_sormbr( 'Q', 'L', 'T', m, nrhs, m, a, lda,dum(1_${ik}$), b, ldb, dum(&
                                 1_${ik}$), -1_${ik}$, info )
                       lwork_sormbr=dum(1_${ik}$)
                       ! compute space needed for stdlib${ii}$_sorgbr
                       call stdlib${ii}$_sorgbr( 'P', m, n, m, a, lda, dum(1_${ik}$),dum(1_${ik}$), -1_${ik}$, info )
                       lwork_sorgbr=dum(1_${ik}$)
                       maxwrk = 3_${ik}$*m + lwork_sgebrd
                       maxwrk = max( maxwrk, 3_${ik}$*m + lwork_sormbr )
                       maxwrk = max( maxwrk, 3_${ik}$*m + lwork_sorgbr )
                       maxwrk = max( maxwrk, bdspac )
                       maxwrk = max( maxwrk, n*nrhs )
                    end if
                 end if
                 maxwrk = max( minwrk, maxwrk )
              end if
              work( 1_${ik}$ ) = maxwrk
              if( lwork<minwrk .and. .not.lquery )info = -12_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'SGELSS', -info )
              return
           else if( lquery ) then
              return
           end if
           ! quick return if possible
           if( m==0_${ik}$ .or. n==0_${ik}$ ) then
              rank = 0_${ik}$
              return
           end if
           ! get machine parameters
           eps = stdlib${ii}$_slamch( 'P' )
           sfmin = stdlib${ii}$_slamch( 'S' )
           smlnum = sfmin / eps
           bignum = one / smlnum
           call stdlib${ii}$_slabad( smlnum, bignum )
           ! scale a if max element outside range [smlnum,bignum]
           anrm = stdlib${ii}$_slange( 'M', m, n, a, lda, work )
           iascl = 0_${ik}$
           if( anrm>zero .and. anrm<smlnum ) then
              ! scale matrix norm up to smlnum
              call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, m, n, a, lda, info )
              iascl = 1_${ik}$
           else if( anrm>bignum ) then
              ! scale matrix norm down to bignum
              call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, m, n, a, lda, info )
              iascl = 2_${ik}$
           else if( anrm==zero ) then
              ! matrix all zero. return zero solution.
              call stdlib${ii}$_slaset( 'F', max( m, n ), nrhs, zero, zero, b, ldb )
              call stdlib${ii}$_slaset( 'F', minmn, 1_${ik}$, zero, zero, s, minmn )
              rank = 0_${ik}$
              go to 70
           end if
           ! scale b if max element outside range [smlnum,bignum]
           bnrm = stdlib${ii}$_slange( 'M', m, nrhs, b, ldb, work )
           ibscl = 0_${ik}$
           if( bnrm>zero .and. bnrm<smlnum ) then
              ! scale matrix norm up to smlnum
              call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, bnrm, smlnum, m, nrhs, b, ldb, info )
              ibscl = 1_${ik}$
           else if( bnrm>bignum ) then
              ! scale matrix norm down to bignum
              call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, bnrm, bignum, m, nrhs, b, ldb, info )
              ibscl = 2_${ik}$
           end if
           ! overdetermined case
           if( m>=n ) then
              ! path 1 - overdetermined or exactly determined
              mm = m
              if( m>=mnthr ) then
                 ! path 1a - overdetermined, with many more rows than columns
                 mm = n
                 itau = 1_${ik}$
                 iwork = itau + n
                 ! compute a=q*r
                 ! (workspace: need 2*n, prefer n+n*nb)
                 call stdlib${ii}$_sgeqrf( m, n, a, lda, work( itau ), work( iwork ),lwork-iwork+1, &
                           info )
                 ! multiply b by transpose(q)
                 ! (workspace: need n+nrhs, prefer n+nrhs*nb)
                 call stdlib${ii}$_sormqr( 'L', 'T', m, nrhs, n, a, lda, work( itau ), b,ldb, work( &
                           iwork ), lwork-iwork+1, info )
                 ! zero out below r
                 if( n>1_${ik}$ )call stdlib${ii}$_slaset( 'L', n-1, n-1, zero, zero, a( 2_${ik}$, 1_${ik}$ ), lda )
              end if
              ie = 1_${ik}$
              itauq = ie + n
              itaup = itauq + n
              iwork = itaup + n
              ! bidiagonalize r in a
              ! (workspace: need 3*n+mm, prefer 3*n+(mm+n)*nb)
              call stdlib${ii}$_sgebrd( mm, n, a, lda, s, work( ie ), work( itauq ),work( itaup ), work(&
                         iwork ), lwork-iwork+1,info )
              ! multiply b by transpose of left bidiagonalizing vectors of r
              ! (workspace: need 3*n+nrhs, prefer 3*n+nrhs*nb)
              call stdlib${ii}$_sormbr( 'Q', 'L', 'T', mm, nrhs, n, a, lda, work( itauq ),b, ldb, work( &
                        iwork ), lwork-iwork+1, info )
              ! generate right bidiagonalizing vectors of r in a
              ! (workspace: need 4*n-1, prefer 3*n+(n-1)*nb)
              call stdlib${ii}$_sorgbr( 'P', n, n, n, a, lda, work( itaup ),work( iwork ), lwork-iwork+&
                        1_${ik}$, info )
              iwork = ie + n
              ! perform bidiagonal qr iteration
                ! multiply b by transpose of left singular vectors
                ! compute right singular vectors in a
              ! (workspace: need bdspac)
              call stdlib${ii}$_sbdsqr( 'U', n, n, 0_${ik}$, nrhs, s, work( ie ), a, lda, dum,1_${ik}$, b, ldb, work( &
                        iwork ), info )
              if( info/=0 )go to 70
              ! multiply b by reciprocals of singular values
              thr = max( rcond*s( 1_${ik}$ ), sfmin )
              if( rcond<zero )thr = max( eps*s( 1_${ik}$ ), sfmin )
              rank = 0_${ik}$
              do i = 1, n
                 if( s( i )>thr ) then
                    call stdlib${ii}$_srscl( nrhs, s( i ), b( i, 1_${ik}$ ), ldb )
                    rank = rank + 1_${ik}$
                 else
                    call stdlib${ii}$_slaset( 'F', 1_${ik}$, nrhs, zero, zero, b( i, 1_${ik}$ ), ldb )
                 end if
              end do
              ! multiply b by right singular vectors
              ! (workspace: need n, prefer n*nrhs)
              if( lwork>=ldb*nrhs .and. nrhs>1_${ik}$ ) then
                 call stdlib${ii}$_sgemm( 'T', 'N', n, nrhs, n, one, a, lda, b, ldb, zero,work, ldb )
                           
                 call stdlib${ii}$_slacpy( 'G', n, nrhs, work, ldb, b, ldb )
              else if( nrhs>1_${ik}$ ) then
                 chunk = lwork / n
                 do i = 1, nrhs, chunk
                    bl = min( nrhs-i+1, chunk )
                    call stdlib${ii}$_sgemm( 'T', 'N', n, bl, n, one, a, lda, b( 1_${ik}$, i ),ldb, zero, work,&
                               n )
                    call stdlib${ii}$_slacpy( 'G', n, bl, work, n, b( 1_${ik}$, i ), ldb )
                 end do
              else
                 call stdlib${ii}$_sgemv( 'T', n, n, one, a, lda, b, 1_${ik}$, zero, work, 1_${ik}$ )
                 call stdlib${ii}$_scopy( n, work, 1_${ik}$, b, 1_${ik}$ )
              end if
           else if( n>=mnthr .and. lwork>=4_${ik}$*m+m*m+max( m, 2_${ik}$*m-4, nrhs, n-3*m ) ) then
              ! path 2a - underdetermined, with many more columns than rows
              ! and sufficient workspace for an efficient algorithm
              ldwork = m
              if( lwork>=max( 4_${ik}$*m+m*lda+max( m, 2_${ik}$*m-4, nrhs, n-3*m ),m*lda+m+m*nrhs ) )ldwork = &
                        lda
              itau = 1_${ik}$
              iwork = m + 1_${ik}$
              ! compute a=l*q
              ! (workspace: need 2*m, prefer m+m*nb)
              call stdlib${ii}$_sgelqf( m, n, a, lda, work( itau ), work( iwork ),lwork-iwork+1, info )
                        
              il = iwork
              ! copy l to work(il), zeroing out above it
              call stdlib${ii}$_slacpy( 'L', m, m, a, lda, work( il ), ldwork )
              call stdlib${ii}$_slaset( 'U', m-1, m-1, zero, zero, work( il+ldwork ),ldwork )
              ie = il + ldwork*m
              itauq = ie + m
              itaup = itauq + m
              iwork = itaup + m
              ! bidiagonalize l in work(il)
              ! (workspace: need m*m+5*m, prefer m*m+4*m+2*m*nb)
              call stdlib${ii}$_sgebrd( m, m, work( il ), ldwork, s, work( ie ),work( itauq ), work( &
                        itaup ), work( iwork ),lwork-iwork+1, info )
              ! multiply b by transpose of left bidiagonalizing vectors of l
              ! (workspace: need m*m+4*m+nrhs, prefer m*m+4*m+nrhs*nb)
              call stdlib${ii}$_sormbr( 'Q', 'L', 'T', m, nrhs, m, work( il ), ldwork,work( itauq ), b, &
                        ldb, work( iwork ),lwork-iwork+1, info )
              ! generate right bidiagonalizing vectors of r in work(il)
              ! (workspace: need m*m+5*m-1, prefer m*m+4*m+(m-1)*nb)
              call stdlib${ii}$_sorgbr( 'P', m, m, m, work( il ), ldwork, work( itaup ),work( iwork ), &
                        lwork-iwork+1, info )
              iwork = ie + m
              ! perform bidiagonal qr iteration,
                 ! computing right singular vectors of l in work(il) and
                 ! multiplying b by transpose of left singular vectors
              ! (workspace: need m*m+m+bdspac)
              call stdlib${ii}$_sbdsqr( 'U', m, m, 0_${ik}$, nrhs, s, work( ie ), work( il ),ldwork, a, lda, b,&
                         ldb, work( iwork ), info )
              if( info/=0 )go to 70
              ! multiply b by reciprocals of singular values
              thr = max( rcond*s( 1_${ik}$ ), sfmin )
              if( rcond<zero )thr = max( eps*s( 1_${ik}$ ), sfmin )
              rank = 0_${ik}$
              do i = 1, m
                 if( s( i )>thr ) then
                    call stdlib${ii}$_srscl( nrhs, s( i ), b( i, 1_${ik}$ ), ldb )
                    rank = rank + 1_${ik}$
                 else
                    call stdlib${ii}$_slaset( 'F', 1_${ik}$, nrhs, zero, zero, b( i, 1_${ik}$ ), ldb )
                 end if
              end do
              iwork = ie
              ! multiply b by right singular vectors of l in work(il)
              ! (workspace: need m*m+2*m, prefer m*m+m+m*nrhs)
              if( lwork>=ldb*nrhs+iwork-1 .and. nrhs>1_${ik}$ ) then
                 call stdlib${ii}$_sgemm( 'T', 'N', m, nrhs, m, one, work( il ), ldwork,b, ldb, zero, &
                           work( iwork ), ldb )
                 call stdlib${ii}$_slacpy( 'G', m, nrhs, work( iwork ), ldb, b, ldb )
              else if( nrhs>1_${ik}$ ) then
                 chunk = ( lwork-iwork+1 ) / m
                 do i = 1, nrhs, chunk
                    bl = min( nrhs-i+1, chunk )
                    call stdlib${ii}$_sgemm( 'T', 'N', m, bl, m, one, work( il ), ldwork,b( 1_${ik}$, i ), ldb,&
                               zero, work( iwork ), m )
                    call stdlib${ii}$_slacpy( 'G', m, bl, work( iwork ), m, b( 1_${ik}$, i ),ldb )
                 end do
              else
                 call stdlib${ii}$_sgemv( 'T', m, m, one, work( il ), ldwork, b( 1_${ik}$, 1_${ik}$ ),1_${ik}$, zero, work( &
                           iwork ), 1_${ik}$ )
                 call stdlib${ii}$_scopy( m, work( iwork ), 1_${ik}$, b( 1_${ik}$, 1_${ik}$ ), 1_${ik}$ )
              end if
              ! zero out below first m rows of b
              call stdlib${ii}$_slaset( 'F', n-m, nrhs, zero, zero, b( m+1, 1_${ik}$ ), ldb )
              iwork = itau + m
              ! multiply transpose(q) by b
              ! (workspace: need m+nrhs, prefer m+nrhs*nb)
              call stdlib${ii}$_sormlq( 'L', 'T', n, nrhs, m, a, lda, work( itau ), b,ldb, work( iwork )&
                        , lwork-iwork+1, info )
           else
              ! path 2 - remaining underdetermined cases
              ie = 1_${ik}$
              itauq = ie + m
              itaup = itauq + m
              iwork = itaup + m
              ! bidiagonalize a
              ! (workspace: need 3*m+n, prefer 3*m+(m+n)*nb)
              call stdlib${ii}$_sgebrd( m, n, a, lda, s, work( ie ), work( itauq ),work( itaup ), work( &
                        iwork ), lwork-iwork+1,info )
              ! multiply b by transpose of left bidiagonalizing vectors
              ! (workspace: need 3*m+nrhs, prefer 3*m+nrhs*nb)
              call stdlib${ii}$_sormbr( 'Q', 'L', 'T', m, nrhs, n, a, lda, work( itauq ),b, ldb, work( &
                        iwork ), lwork-iwork+1, info )
              ! generate right bidiagonalizing vectors in a
              ! (workspace: need 4*m, prefer 3*m+m*nb)
              call stdlib${ii}$_sorgbr( 'P', m, n, m, a, lda, work( itaup ),work( iwork ), lwork-iwork+&
                        1_${ik}$, info )
              iwork = ie + m
              ! perform bidiagonal qr iteration,
                 ! computing right singular vectors of a in a and
                 ! multiplying b by transpose of left singular vectors
              ! (workspace: need bdspac)
              call stdlib${ii}$_sbdsqr( 'L', m, n, 0_${ik}$, nrhs, s, work( ie ), a, lda, dum,1_${ik}$, b, ldb, work( &
                        iwork ), info )
              if( info/=0 )go to 70
              ! multiply b by reciprocals of singular values
              thr = max( rcond*s( 1_${ik}$ ), sfmin )
              if( rcond<zero )thr = max( eps*s( 1_${ik}$ ), sfmin )
              rank = 0_${ik}$
              do i = 1, m
                 if( s( i )>thr ) then
                    call stdlib${ii}$_srscl( nrhs, s( i ), b( i, 1_${ik}$ ), ldb )
                    rank = rank + 1_${ik}$
                 else
                    call stdlib${ii}$_slaset( 'F', 1_${ik}$, nrhs, zero, zero, b( i, 1_${ik}$ ), ldb )
                 end if
              end do
              ! multiply b by right singular vectors of a
              ! (workspace: need n, prefer n*nrhs)
              if( lwork>=ldb*nrhs .and. nrhs>1_${ik}$ ) then
                 call stdlib${ii}$_sgemm( 'T', 'N', n, nrhs, m, one, a, lda, b, ldb, zero,work, ldb )
                           
                 call stdlib${ii}$_slacpy( 'F', n, nrhs, work, ldb, b, ldb )
              else if( nrhs>1_${ik}$ ) then
                 chunk = lwork / n
                 do i = 1, nrhs, chunk
                    bl = min( nrhs-i+1, chunk )
                    call stdlib${ii}$_sgemm( 'T', 'N', n, bl, m, one, a, lda, b( 1_${ik}$, i ),ldb, zero, work,&
                               n )
                    call stdlib${ii}$_slacpy( 'F', n, bl, work, n, b( 1_${ik}$, i ), ldb )
                 end do
              else
                 call stdlib${ii}$_sgemv( 'T', m, n, one, a, lda, b, 1_${ik}$, zero, work, 1_${ik}$ )
                 call stdlib${ii}$_scopy( n, work, 1_${ik}$, b, 1_${ik}$ )
              end if
           end if
           ! undo scaling
           if( iascl==1_${ik}$ ) then
              call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, n, nrhs, b, ldb, info )
              call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, smlnum, anrm, minmn, 1_${ik}$, s, minmn,info )
           else if( iascl==2_${ik}$ ) then
              call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, n, nrhs, b, ldb, info )
              call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, bignum, anrm, minmn, 1_${ik}$, s, minmn,info )
           end if
           if( ibscl==1_${ik}$ ) then
              call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, smlnum, bnrm, n, nrhs, b, ldb, info )
           else if( ibscl==2_${ik}$ ) then
              call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, bignum, bnrm, n, nrhs, b, ldb, info )
           end if
           70 continue
           work( 1_${ik}$ ) = maxwrk
           return
     end subroutine stdlib${ii}$_sgelss

     module subroutine stdlib${ii}$_dgelss( m, n, nrhs, a, lda, b, ldb, s, rcond, rank,work, lwork, info )
     !! DGELSS computes the minimum norm solution to a real linear least
     !! squares problem:
     !! Minimize 2-norm(| b - A*x |).
     !! using the singular value decomposition (SVD) of A. A is an M-by-N
     !! matrix which may be rank-deficient.
     !! Several right hand side vectors b and solution vectors x can be
     !! handled in a single call; they are stored as the columns of the
     !! M-by-NRHS right hand side matrix B and the N-by-NRHS solution matrix
     !! X.
     !! The effective rank of A is determined by treating as zero those
     !! singular values which are less than RCOND times the largest singular
     !! value.
               
        ! -- lapack driver routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info, rank
           integer(${ik}$), intent(in) :: lda, ldb, lwork, m, n, nrhs
           real(dp), intent(in) :: rcond
           ! Array Arguments 
           real(dp), intent(inout) :: a(lda,*), b(ldb,*)
           real(dp), intent(out) :: s(*), work(*)
        ! =====================================================================
           
           ! Local Scalars 
           logical(lk) :: lquery
           integer(${ik}$) :: bdspac, bl, chunk, i, iascl, ibscl, ie, il, itau, itaup, itauq, iwork, &
                     ldwork, maxmn, maxwrk, minmn, minwrk, mm, mnthr
           integer(${ik}$) :: lwork_dgeqrf, lwork_dormqr, lwork_dgebrd, lwork_dormbr, lwork_dorgbr, &
                     lwork_dormlq, lwork_dgelqf
           real(dp) :: anrm, bignum, bnrm, eps, sfmin, smlnum, thr
           ! Local Arrays 
           real(dp) :: dum(1_${ik}$)
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           minmn = min( m, n )
           maxmn = max( m, n )
           lquery = ( lwork==-1_${ik}$ )
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( nrhs<0_${ik}$ ) then
              info = -3_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -5_${ik}$
           else if( ldb<max( 1_${ik}$, maxmn ) ) then
              info = -7_${ik}$
           end if
           ! compute workspace
            ! (note: comments in the code beginning "workspace:" describe the
             ! minimal amount of workspace needed at that point in the code,
             ! as well as the preferred amount for good performance.
             ! nb refers to the optimal block size for the immediately
             ! following subroutine, as returned by stdlib${ii}$_ilaenv.)
           if( info==0_${ik}$ ) then
              minwrk = 1_${ik}$
              maxwrk = 1_${ik}$
              if( minmn>0_${ik}$ ) then
                 mm = m
                 mnthr = stdlib${ii}$_ilaenv( 6_${ik}$, 'DGELSS', ' ', m, n, nrhs, -1_${ik}$ )
                 if( m>=n .and. m>=mnthr ) then
                    ! path 1a - overdetermined, with many more rows than
                              ! columns
                    ! compute space needed for stdlib${ii}$_dgeqrf
                    call stdlib${ii}$_dgeqrf( m, n, a, lda, dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, info )
                    lwork_dgeqrf=dum(1_${ik}$)
                    ! compute space needed for stdlib${ii}$_dormqr
                    call stdlib${ii}$_dormqr( 'L', 'T', m, nrhs, n, a, lda, dum(1_${ik}$), b,ldb, dum(1_${ik}$), -1_${ik}$, &
                              info )
                    lwork_dormqr=dum(1_${ik}$)
                    mm = n
                    maxwrk = max( maxwrk, n + lwork_dgeqrf )
                    maxwrk = max( maxwrk, n + lwork_dormqr )
                 end if
                 if( m>=n ) then
                    ! path 1 - overdetermined or exactly determined
                    ! compute workspace needed for stdlib${ii}$_dbdsqr
                    bdspac = max( 1_${ik}$, 5_${ik}$*n )
                    ! compute space needed for stdlib${ii}$_dgebrd
                    call stdlib${ii}$_dgebrd( mm, n, a, lda, s, dum(1_${ik}$), dum(1_${ik}$),dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, info &
                              )
                    lwork_dgebrd=dum(1_${ik}$)
                    ! compute space needed for stdlib${ii}$_dormbr
                    call stdlib${ii}$_dormbr( 'Q', 'L', 'T', mm, nrhs, n, a, lda, dum(1_${ik}$),b, ldb, dum(1_${ik}$),&
                               -1_${ik}$, info )
                    lwork_dormbr=dum(1_${ik}$)
                    ! compute space needed for stdlib${ii}$_dorgbr
                    call stdlib${ii}$_dorgbr( 'P', n, n, n, a, lda, dum(1_${ik}$),dum(1_${ik}$), -1_${ik}$, info )
                    lwork_dorgbr=dum(1_${ik}$)
                    ! compute total workspace needed
                    maxwrk = max( maxwrk, 3_${ik}$*n + lwork_dgebrd )
                    maxwrk = max( maxwrk, 3_${ik}$*n + lwork_dormbr )
                    maxwrk = max( maxwrk, 3_${ik}$*n + lwork_dorgbr )
                    maxwrk = max( maxwrk, bdspac )
                    maxwrk = max( maxwrk, n*nrhs )
                    minwrk = max( 3_${ik}$*n + mm, 3_${ik}$*n + nrhs, bdspac )
                    maxwrk = max( minwrk, maxwrk )
                 end if
                 if( n>m ) then
                    ! compute workspace needed for stdlib${ii}$_dbdsqr
                    bdspac = max( 1_${ik}$, 5_${ik}$*m )
                    minwrk = max( 3_${ik}$*m+nrhs, 3_${ik}$*m+n, bdspac )
                    if( n>=mnthr ) then
                       ! path 2a - underdetermined, with many more columns
                       ! than rows
                       ! compute space needed for stdlib${ii}$_dgelqf
                       call stdlib${ii}$_dgelqf( m, n, a, lda, dum(1_${ik}$), dum(1_${ik}$),-1_${ik}$, info )
                       lwork_dgelqf=dum(1_${ik}$)
                       ! compute space needed for stdlib${ii}$_dgebrd
                       call stdlib${ii}$_dgebrd( m, m, a, lda, s, dum(1_${ik}$), dum(1_${ik}$),dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, &
                                 info )
                       lwork_dgebrd=dum(1_${ik}$)
                       ! compute space needed for stdlib${ii}$_dormbr
                       call stdlib${ii}$_dormbr( 'Q', 'L', 'T', m, nrhs, n, a, lda,dum(1_${ik}$), b, ldb, dum(&
                                 1_${ik}$), -1_${ik}$, info )
                       lwork_dormbr=dum(1_${ik}$)
                       ! compute space needed for stdlib${ii}$_dorgbr
                       call stdlib${ii}$_dorgbr( 'P', m, m, m, a, lda, dum(1_${ik}$),dum(1_${ik}$), -1_${ik}$, info )
                       lwork_dorgbr=dum(1_${ik}$)
                       ! compute space needed for stdlib${ii}$_dormlq
                       call stdlib${ii}$_dormlq( 'L', 'T', n, nrhs, m, a, lda, dum(1_${ik}$),b, ldb, dum(1_${ik}$), -&
                                 1_${ik}$, info )
                       lwork_dormlq=dum(1_${ik}$)
                       ! compute total workspace needed
                       maxwrk = m + lwork_dgelqf
                       maxwrk = max( maxwrk, m*m + 4_${ik}$*m + lwork_dgebrd )
                       maxwrk = max( maxwrk, m*m + 4_${ik}$*m + lwork_dormbr )
                       maxwrk = max( maxwrk, m*m + 4_${ik}$*m + lwork_dorgbr )
                       maxwrk = max( maxwrk, m*m + m + bdspac )
                       if( nrhs>1_${ik}$ ) then
                          maxwrk = max( maxwrk, m*m + m + m*nrhs )
                       else
                          maxwrk = max( maxwrk, m*m + 2_${ik}$*m )
                       end if
                       maxwrk = max( maxwrk, m + lwork_dormlq )
                    else
                       ! path 2 - underdetermined
                       ! compute space needed for stdlib${ii}$_dgebrd
                       call stdlib${ii}$_dgebrd( m, n, a, lda, s, dum(1_${ik}$), dum(1_${ik}$),dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, &
                                 info )
                       lwork_dgebrd=dum(1_${ik}$)
                       ! compute space needed for stdlib${ii}$_dormbr
                       call stdlib${ii}$_dormbr( 'Q', 'L', 'T', m, nrhs, m, a, lda,dum(1_${ik}$), b, ldb, dum(&
                                 1_${ik}$), -1_${ik}$, info )
                       lwork_dormbr=dum(1_${ik}$)
                       ! compute space needed for stdlib${ii}$_dorgbr
                       call stdlib${ii}$_dorgbr( 'P', m, n, m, a, lda, dum(1_${ik}$),dum(1_${ik}$), -1_${ik}$, info )
                       lwork_dorgbr=dum(1_${ik}$)
                       maxwrk = 3_${ik}$*m + lwork_dgebrd
                       maxwrk = max( maxwrk, 3_${ik}$*m + lwork_dormbr )
                       maxwrk = max( maxwrk, 3_${ik}$*m + lwork_dorgbr )
                       maxwrk = max( maxwrk, bdspac )
                       maxwrk = max( maxwrk, n*nrhs )
                    end if
                 end if
                 maxwrk = max( minwrk, maxwrk )
              end if
              work( 1_${ik}$ ) = maxwrk
              if( lwork<minwrk .and. .not.lquery )info = -12_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DGELSS', -info )
              return
           else if( lquery ) then
              return
           end if
           ! quick return if possible
           if( m==0_${ik}$ .or. n==0_${ik}$ ) then
              rank = 0_${ik}$
              return
           end if
           ! get machine parameters
           eps = stdlib${ii}$_dlamch( 'P' )
           sfmin = stdlib${ii}$_dlamch( 'S' )
           smlnum = sfmin / eps
           bignum = one / smlnum
           call stdlib${ii}$_dlabad( smlnum, bignum )
           ! scale a if max element outside range [smlnum,bignum]
           anrm = stdlib${ii}$_dlange( 'M', m, n, a, lda, work )
           iascl = 0_${ik}$
           if( anrm>zero .and. anrm<smlnum ) then
              ! scale matrix norm up to smlnum
              call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, m, n, a, lda, info )
              iascl = 1_${ik}$
           else if( anrm>bignum ) then
              ! scale matrix norm down to bignum
              call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, m, n, a, lda, info )
              iascl = 2_${ik}$
           else if( anrm==zero ) then
              ! matrix all zero. return zero solution.
              call stdlib${ii}$_dlaset( 'F', max( m, n ), nrhs, zero, zero, b, ldb )
              call stdlib${ii}$_dlaset( 'F', minmn, 1_${ik}$, zero, zero, s, minmn )
              rank = 0_${ik}$
              go to 70
           end if
           ! scale b if max element outside range [smlnum,bignum]
           bnrm = stdlib${ii}$_dlange( 'M', m, nrhs, b, ldb, work )
           ibscl = 0_${ik}$
           if( bnrm>zero .and. bnrm<smlnum ) then
              ! scale matrix norm up to smlnum
              call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, bnrm, smlnum, m, nrhs, b, ldb, info )
              ibscl = 1_${ik}$
           else if( bnrm>bignum ) then
              ! scale matrix norm down to bignum
              call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, bnrm, bignum, m, nrhs, b, ldb, info )
              ibscl = 2_${ik}$
           end if
           ! overdetermined case
           if( m>=n ) then
              ! path 1 - overdetermined or exactly determined
              mm = m
              if( m>=mnthr ) then
                 ! path 1a - overdetermined, with many more rows than columns
                 mm = n
                 itau = 1_${ik}$
                 iwork = itau + n
                 ! compute a=q*r
                 ! (workspace: need 2*n, prefer n+n*nb)
                 call stdlib${ii}$_dgeqrf( m, n, a, lda, work( itau ), work( iwork ),lwork-iwork+1, &
                           info )
                 ! multiply b by transpose(q)
                 ! (workspace: need n+nrhs, prefer n+nrhs*nb)
                 call stdlib${ii}$_dormqr( 'L', 'T', m, nrhs, n, a, lda, work( itau ), b,ldb, work( &
                           iwork ), lwork-iwork+1, info )
                 ! zero out below r
                 if( n>1_${ik}$ )call stdlib${ii}$_dlaset( 'L', n-1, n-1, zero, zero, a( 2_${ik}$, 1_${ik}$ ), lda )
              end if
              ie = 1_${ik}$
              itauq = ie + n
              itaup = itauq + n
              iwork = itaup + n
              ! bidiagonalize r in a
              ! (workspace: need 3*n+mm, prefer 3*n+(mm+n)*nb)
              call stdlib${ii}$_dgebrd( mm, n, a, lda, s, work( ie ), work( itauq ),work( itaup ), work(&
                         iwork ), lwork-iwork+1,info )
              ! multiply b by transpose of left bidiagonalizing vectors of r
              ! (workspace: need 3*n+nrhs, prefer 3*n+nrhs*nb)
              call stdlib${ii}$_dormbr( 'Q', 'L', 'T', mm, nrhs, n, a, lda, work( itauq ),b, ldb, work( &
                        iwork ), lwork-iwork+1, info )
              ! generate right bidiagonalizing vectors of r in a
              ! (workspace: need 4*n-1, prefer 3*n+(n-1)*nb)
              call stdlib${ii}$_dorgbr( 'P', n, n, n, a, lda, work( itaup ),work( iwork ), lwork-iwork+&
                        1_${ik}$, info )
              iwork = ie + n
              ! perform bidiagonal qr iteration
                ! multiply b by transpose of left singular vectors
                ! compute right singular vectors in a
              ! (workspace: need bdspac)
              call stdlib${ii}$_dbdsqr( 'U', n, n, 0_${ik}$, nrhs, s, work( ie ), a, lda, dum,1_${ik}$, b, ldb, work( &
                        iwork ), info )
              if( info/=0 )go to 70
              ! multiply b by reciprocals of singular values
              thr = max( rcond*s( 1_${ik}$ ), sfmin )
              if( rcond<zero )thr = max( eps*s( 1_${ik}$ ), sfmin )
              rank = 0_${ik}$
              do i = 1, n
                 if( s( i )>thr ) then
                    call stdlib${ii}$_drscl( nrhs, s( i ), b( i, 1_${ik}$ ), ldb )
                    rank = rank + 1_${ik}$
                 else
                    call stdlib${ii}$_dlaset( 'F', 1_${ik}$, nrhs, zero, zero, b( i, 1_${ik}$ ), ldb )
                 end if
              end do
              ! multiply b by right singular vectors
              ! (workspace: need n, prefer n*nrhs)
              if( lwork>=ldb*nrhs .and. nrhs>1_${ik}$ ) then
                 call stdlib${ii}$_dgemm( 'T', 'N', n, nrhs, n, one, a, lda, b, ldb, zero,work, ldb )
                           
                 call stdlib${ii}$_dlacpy( 'G', n, nrhs, work, ldb, b, ldb )
              else if( nrhs>1_${ik}$ ) then
                 chunk = lwork / n
                 do i = 1, nrhs, chunk
                    bl = min( nrhs-i+1, chunk )
                    call stdlib${ii}$_dgemm( 'T', 'N', n, bl, n, one, a, lda, b( 1_${ik}$, i ),ldb, zero, work,&
                               n )
                    call stdlib${ii}$_dlacpy( 'G', n, bl, work, n, b( 1_${ik}$, i ), ldb )
                 end do
              else
                 call stdlib${ii}$_dgemv( 'T', n, n, one, a, lda, b, 1_${ik}$, zero, work, 1_${ik}$ )
                 call stdlib${ii}$_dcopy( n, work, 1_${ik}$, b, 1_${ik}$ )
              end if
           else if( n>=mnthr .and. lwork>=4_${ik}$*m+m*m+max( m, 2_${ik}$*m-4, nrhs, n-3*m ) ) then
              ! path 2a - underdetermined, with many more columns than rows
              ! and sufficient workspace for an efficient algorithm
              ldwork = m
              if( lwork>=max( 4_${ik}$*m+m*lda+max( m, 2_${ik}$*m-4, nrhs, n-3*m ),m*lda+m+m*nrhs ) )ldwork = &
                        lda
              itau = 1_${ik}$
              iwork = m + 1_${ik}$
              ! compute a=l*q
              ! (workspace: need 2*m, prefer m+m*nb)
              call stdlib${ii}$_dgelqf( m, n, a, lda, work( itau ), work( iwork ),lwork-iwork+1, info )
                        
              il = iwork
              ! copy l to work(il), zeroing out above it
              call stdlib${ii}$_dlacpy( 'L', m, m, a, lda, work( il ), ldwork )
              call stdlib${ii}$_dlaset( 'U', m-1, m-1, zero, zero, work( il+ldwork ),ldwork )
              ie = il + ldwork*m
              itauq = ie + m
              itaup = itauq + m
              iwork = itaup + m
              ! bidiagonalize l in work(il)
              ! (workspace: need m*m+5*m, prefer m*m+4*m+2*m*nb)
              call stdlib${ii}$_dgebrd( m, m, work( il ), ldwork, s, work( ie ),work( itauq ), work( &
                        itaup ), work( iwork ),lwork-iwork+1, info )
              ! multiply b by transpose of left bidiagonalizing vectors of l
              ! (workspace: need m*m+4*m+nrhs, prefer m*m+4*m+nrhs*nb)
              call stdlib${ii}$_dormbr( 'Q', 'L', 'T', m, nrhs, m, work( il ), ldwork,work( itauq ), b, &
                        ldb, work( iwork ),lwork-iwork+1, info )
              ! generate right bidiagonalizing vectors of r in work(il)
              ! (workspace: need m*m+5*m-1, prefer m*m+4*m+(m-1)*nb)
              call stdlib${ii}$_dorgbr( 'P', m, m, m, work( il ), ldwork, work( itaup ),work( iwork ), &
                        lwork-iwork+1, info )
              iwork = ie + m
              ! perform bidiagonal qr iteration,
                 ! computing right singular vectors of l in work(il) and
                 ! multiplying b by transpose of left singular vectors
              ! (workspace: need m*m+m+bdspac)
              call stdlib${ii}$_dbdsqr( 'U', m, m, 0_${ik}$, nrhs, s, work( ie ), work( il ),ldwork, a, lda, b,&
                         ldb, work( iwork ), info )
              if( info/=0 )go to 70
              ! multiply b by reciprocals of singular values
              thr = max( rcond*s( 1_${ik}$ ), sfmin )
              if( rcond<zero )thr = max( eps*s( 1_${ik}$ ), sfmin )
              rank = 0_${ik}$
              do i = 1, m
                 if( s( i )>thr ) then
                    call stdlib${ii}$_drscl( nrhs, s( i ), b( i, 1_${ik}$ ), ldb )
                    rank = rank + 1_${ik}$
                 else
                    call stdlib${ii}$_dlaset( 'F', 1_${ik}$, nrhs, zero, zero, b( i, 1_${ik}$ ), ldb )
                 end if
              end do
              iwork = ie
              ! multiply b by right singular vectors of l in work(il)
              ! (workspace: need m*m+2*m, prefer m*m+m+m*nrhs)
              if( lwork>=ldb*nrhs+iwork-1 .and. nrhs>1_${ik}$ ) then
                 call stdlib${ii}$_dgemm( 'T', 'N', m, nrhs, m, one, work( il ), ldwork,b, ldb, zero, &
                           work( iwork ), ldb )
                 call stdlib${ii}$_dlacpy( 'G', m, nrhs, work( iwork ), ldb, b, ldb )
              else if( nrhs>1_${ik}$ ) then
                 chunk = ( lwork-iwork+1 ) / m
                 do i = 1, nrhs, chunk
                    bl = min( nrhs-i+1, chunk )
                    call stdlib${ii}$_dgemm( 'T', 'N', m, bl, m, one, work( il ), ldwork,b( 1_${ik}$, i ), ldb,&
                               zero, work( iwork ), m )
                    call stdlib${ii}$_dlacpy( 'G', m, bl, work( iwork ), m, b( 1_${ik}$, i ),ldb )
                 end do
              else
                 call stdlib${ii}$_dgemv( 'T', m, m, one, work( il ), ldwork, b( 1_${ik}$, 1_${ik}$ ),1_${ik}$, zero, work( &
                           iwork ), 1_${ik}$ )
                 call stdlib${ii}$_dcopy( m, work( iwork ), 1_${ik}$, b( 1_${ik}$, 1_${ik}$ ), 1_${ik}$ )
              end if
              ! zero out below first m rows of b
              call stdlib${ii}$_dlaset( 'F', n-m, nrhs, zero, zero, b( m+1, 1_${ik}$ ), ldb )
              iwork = itau + m
              ! multiply transpose(q) by b
              ! (workspace: need m+nrhs, prefer m+nrhs*nb)
              call stdlib${ii}$_dormlq( 'L', 'T', n, nrhs, m, a, lda, work( itau ), b,ldb, work( iwork )&
                        , lwork-iwork+1, info )
           else
              ! path 2 - remaining underdetermined cases
              ie = 1_${ik}$
              itauq = ie + m
              itaup = itauq + m
              iwork = itaup + m
              ! bidiagonalize a
              ! (workspace: need 3*m+n, prefer 3*m+(m+n)*nb)
              call stdlib${ii}$_dgebrd( m, n, a, lda, s, work( ie ), work( itauq ),work( itaup ), work( &
                        iwork ), lwork-iwork+1,info )
              ! multiply b by transpose of left bidiagonalizing vectors
              ! (workspace: need 3*m+nrhs, prefer 3*m+nrhs*nb)
              call stdlib${ii}$_dormbr( 'Q', 'L', 'T', m, nrhs, n, a, lda, work( itauq ),b, ldb, work( &
                        iwork ), lwork-iwork+1, info )
              ! generate right bidiagonalizing vectors in a
              ! (workspace: need 4*m, prefer 3*m+m*nb)
              call stdlib${ii}$_dorgbr( 'P', m, n, m, a, lda, work( itaup ),work( iwork ), lwork-iwork+&
                        1_${ik}$, info )
              iwork = ie + m
              ! perform bidiagonal qr iteration,
                 ! computing right singular vectors of a in a and
                 ! multiplying b by transpose of left singular vectors
              ! (workspace: need bdspac)
              call stdlib${ii}$_dbdsqr( 'L', m, n, 0_${ik}$, nrhs, s, work( ie ), a, lda, dum,1_${ik}$, b, ldb, work( &
                        iwork ), info )
              if( info/=0 )go to 70
              ! multiply b by reciprocals of singular values
              thr = max( rcond*s( 1_${ik}$ ), sfmin )
              if( rcond<zero )thr = max( eps*s( 1_${ik}$ ), sfmin )
              rank = 0_${ik}$
              do i = 1, m
                 if( s( i )>thr ) then
                    call stdlib${ii}$_drscl( nrhs, s( i ), b( i, 1_${ik}$ ), ldb )
                    rank = rank + 1_${ik}$
                 else
                    call stdlib${ii}$_dlaset( 'F', 1_${ik}$, nrhs, zero, zero, b( i, 1_${ik}$ ), ldb )
                 end if
              end do
              ! multiply b by right singular vectors of a
              ! (workspace: need n, prefer n*nrhs)
              if( lwork>=ldb*nrhs .and. nrhs>1_${ik}$ ) then
                 call stdlib${ii}$_dgemm( 'T', 'N', n, nrhs, m, one, a, lda, b, ldb, zero,work, ldb )
                           
                 call stdlib${ii}$_dlacpy( 'F', n, nrhs, work, ldb, b, ldb )
              else if( nrhs>1_${ik}$ ) then
                 chunk = lwork / n
                 do i = 1, nrhs, chunk
                    bl = min( nrhs-i+1, chunk )
                    call stdlib${ii}$_dgemm( 'T', 'N', n, bl, m, one, a, lda, b( 1_${ik}$, i ),ldb, zero, work,&
                               n )
                    call stdlib${ii}$_dlacpy( 'F', n, bl, work, n, b( 1_${ik}$, i ), ldb )
                 end do
              else
                 call stdlib${ii}$_dgemv( 'T', m, n, one, a, lda, b, 1_${ik}$, zero, work, 1_${ik}$ )
                 call stdlib${ii}$_dcopy( n, work, 1_${ik}$, b, 1_${ik}$ )
              end if
           end if
           ! undo scaling
           if( iascl==1_${ik}$ ) then
              call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, n, nrhs, b, ldb, info )
              call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, smlnum, anrm, minmn, 1_${ik}$, s, minmn,info )
           else if( iascl==2_${ik}$ ) then
              call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, n, nrhs, b, ldb, info )
              call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, bignum, anrm, minmn, 1_${ik}$, s, minmn,info )
           end if
           if( ibscl==1_${ik}$ ) then
              call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, smlnum, bnrm, n, nrhs, b, ldb, info )
           else if( ibscl==2_${ik}$ ) then
              call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, bignum, bnrm, n, nrhs, b, ldb, info )
           end if
           70 continue
           work( 1_${ik}$ ) = maxwrk
           return
     end subroutine stdlib${ii}$_dgelss

#:for rk,rt,ri in REAL_KINDS_TYPES
#:if not rk in ["sp","dp"]
     module subroutine stdlib${ii}$_${ri}$gelss( m, n, nrhs, a, lda, b, ldb, s, rcond, rank,work, lwork, info )
     !! DGELSS: computes the minimum norm solution to a real linear least
     !! squares problem:
     !! Minimize 2-norm(| b - A*x |).
     !! using the singular value decomposition (SVD) of A. A is an M-by-N
     !! matrix which may be rank-deficient.
     !! Several right hand side vectors b and solution vectors x can be
     !! handled in a single call; they are stored as the columns of the
     !! M-by-NRHS right hand side matrix B and the N-by-NRHS solution matrix
     !! X.
     !! The effective rank of A is determined by treating as zero those
     !! singular values which are less than RCOND times the largest singular
     !! value.
               
        ! -- lapack driver routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info, rank
           integer(${ik}$), intent(in) :: lda, ldb, lwork, m, n, nrhs
           real(${rk}$), intent(in) :: rcond
           ! Array Arguments 
           real(${rk}$), intent(inout) :: a(lda,*), b(ldb,*)
           real(${rk}$), intent(out) :: s(*), work(*)
        ! =====================================================================
           
           ! Local Scalars 
           logical(lk) :: lquery
           integer(${ik}$) :: bdspac, bl, chunk, i, iascl, ibscl, ie, il, itau, itaup, itauq, iwork, &
                     ldwork, maxmn, maxwrk, minmn, minwrk, mm, mnthr
           integer(${ik}$) :: lwork_qgeqrf, lwork_qormqr, lwork_qgebrd, lwork_qormbr, lwork_qorgbr, &
                     lwork_qormlq, lwork_qgelqf
           real(${rk}$) :: anrm, bignum, bnrm, eps, sfmin, smlnum, thr
           ! Local Arrays 
           real(${rk}$) :: dum(1_${ik}$)
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           minmn = min( m, n )
           maxmn = max( m, n )
           lquery = ( lwork==-1_${ik}$ )
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( nrhs<0_${ik}$ ) then
              info = -3_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -5_${ik}$
           else if( ldb<max( 1_${ik}$, maxmn ) ) then
              info = -7_${ik}$
           end if
           ! compute workspace
            ! (note: comments in the code beginning "workspace:" describe the
             ! minimal amount of workspace needed at that point in the code,
             ! as well as the preferred amount for good performance.
             ! nb refers to the optimal block size for the immediately
             ! following subroutine, as returned by stdlib${ii}$_ilaenv.)
           if( info==0_${ik}$ ) then
              minwrk = 1_${ik}$
              maxwrk = 1_${ik}$
              if( minmn>0_${ik}$ ) then
                 mm = m
                 mnthr = stdlib${ii}$_ilaenv( 6_${ik}$, 'DGELSS', ' ', m, n, nrhs, -1_${ik}$ )
                 if( m>=n .and. m>=mnthr ) then
                    ! path 1a - overdetermined, with many more rows than
                              ! columns
                    ! compute space needed for stdlib${ii}$_${ri}$geqrf
                    call stdlib${ii}$_${ri}$geqrf( m, n, a, lda, dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, info )
                    lwork_qgeqrf=dum(1_${ik}$)
                    ! compute space needed for stdlib${ii}$_${ri}$ormqr
                    call stdlib${ii}$_${ri}$ormqr( 'L', 'T', m, nrhs, n, a, lda, dum(1_${ik}$), b,ldb, dum(1_${ik}$), -1_${ik}$, &
                              info )
                    lwork_qormqr=dum(1_${ik}$)
                    mm = n
                    maxwrk = max( maxwrk, n + lwork_qgeqrf )
                    maxwrk = max( maxwrk, n + lwork_qormqr )
                 end if
                 if( m>=n ) then
                    ! path 1 - overdetermined or exactly determined
                    ! compute workspace needed for stdlib${ii}$_${ri}$bdsqr
                    bdspac = max( 1_${ik}$, 5_${ik}$*n )
                    ! compute space needed for stdlib${ii}$_${ri}$gebrd
                    call stdlib${ii}$_${ri}$gebrd( mm, n, a, lda, s, dum(1_${ik}$), dum(1_${ik}$),dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, info &
                              )
                    lwork_qgebrd=dum(1_${ik}$)
                    ! compute space needed for stdlib${ii}$_${ri}$ormbr
                    call stdlib${ii}$_${ri}$ormbr( 'Q', 'L', 'T', mm, nrhs, n, a, lda, dum(1_${ik}$),b, ldb, dum(1_${ik}$),&
                               -1_${ik}$, info )
                    lwork_qormbr=dum(1_${ik}$)
                    ! compute space needed for stdlib${ii}$_${ri}$orgbr
                    call stdlib${ii}$_${ri}$orgbr( 'P', n, n, n, a, lda, dum(1_${ik}$),dum(1_${ik}$), -1_${ik}$, info )
                    lwork_qorgbr=dum(1_${ik}$)
                    ! compute total workspace needed
                    maxwrk = max( maxwrk, 3_${ik}$*n + lwork_qgebrd )
                    maxwrk = max( maxwrk, 3_${ik}$*n + lwork_qormbr )
                    maxwrk = max( maxwrk, 3_${ik}$*n + lwork_qorgbr )
                    maxwrk = max( maxwrk, bdspac )
                    maxwrk = max( maxwrk, n*nrhs )
                    minwrk = max( 3_${ik}$*n + mm, 3_${ik}$*n + nrhs, bdspac )
                    maxwrk = max( minwrk, maxwrk )
                 end if
                 if( n>m ) then
                    ! compute workspace needed for stdlib${ii}$_${ri}$bdsqr
                    bdspac = max( 1_${ik}$, 5_${ik}$*m )
                    minwrk = max( 3_${ik}$*m+nrhs, 3_${ik}$*m+n, bdspac )
                    if( n>=mnthr ) then
                       ! path 2a - underdetermined, with many more columns
                       ! than rows
                       ! compute space needed for stdlib${ii}$_${ri}$gelqf
                       call stdlib${ii}$_${ri}$gelqf( m, n, a, lda, dum(1_${ik}$), dum(1_${ik}$),-1_${ik}$, info )
                       lwork_qgelqf=dum(1_${ik}$)
                       ! compute space needed for stdlib${ii}$_${ri}$gebrd
                       call stdlib${ii}$_${ri}$gebrd( m, m, a, lda, s, dum(1_${ik}$), dum(1_${ik}$),dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, &
                                 info )
                       lwork_qgebrd=dum(1_${ik}$)
                       ! compute space needed for stdlib${ii}$_${ri}$ormbr
                       call stdlib${ii}$_${ri}$ormbr( 'Q', 'L', 'T', m, nrhs, n, a, lda,dum(1_${ik}$), b, ldb, dum(&
                                 1_${ik}$), -1_${ik}$, info )
                       lwork_qormbr=dum(1_${ik}$)
                       ! compute space needed for stdlib${ii}$_${ri}$orgbr
                       call stdlib${ii}$_${ri}$orgbr( 'P', m, m, m, a, lda, dum(1_${ik}$),dum(1_${ik}$), -1_${ik}$, info )
                       lwork_qorgbr=dum(1_${ik}$)
                       ! compute space needed for stdlib${ii}$_${ri}$ormlq
                       call stdlib${ii}$_${ri}$ormlq( 'L', 'T', n, nrhs, m, a, lda, dum(1_${ik}$),b, ldb, dum(1_${ik}$), -&
                                 1_${ik}$, info )
                       lwork_qormlq=dum(1_${ik}$)
                       ! compute total workspace needed
                       maxwrk = m + lwork_qgelqf
                       maxwrk = max( maxwrk, m*m + 4_${ik}$*m + lwork_qgebrd )
                       maxwrk = max( maxwrk, m*m + 4_${ik}$*m + lwork_qormbr )
                       maxwrk = max( maxwrk, m*m + 4_${ik}$*m + lwork_qorgbr )
                       maxwrk = max( maxwrk, m*m + m + bdspac )
                       if( nrhs>1_${ik}$ ) then
                          maxwrk = max( maxwrk, m*m + m + m*nrhs )
                       else
                          maxwrk = max( maxwrk, m*m + 2_${ik}$*m )
                       end if
                       maxwrk = max( maxwrk, m + lwork_qormlq )
                    else
                       ! path 2 - underdetermined
                       ! compute space needed for stdlib${ii}$_${ri}$gebrd
                       call stdlib${ii}$_${ri}$gebrd( m, n, a, lda, s, dum(1_${ik}$), dum(1_${ik}$),dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, &
                                 info )
                       lwork_qgebrd=dum(1_${ik}$)
                       ! compute space needed for stdlib${ii}$_${ri}$ormbr
                       call stdlib${ii}$_${ri}$ormbr( 'Q', 'L', 'T', m, nrhs, m, a, lda,dum(1_${ik}$), b, ldb, dum(&
                                 1_${ik}$), -1_${ik}$, info )
                       lwork_qormbr=dum(1_${ik}$)
                       ! compute space needed for stdlib${ii}$_${ri}$orgbr
                       call stdlib${ii}$_${ri}$orgbr( 'P', m, n, m, a, lda, dum(1_${ik}$),dum(1_${ik}$), -1_${ik}$, info )
                       lwork_qorgbr=dum(1_${ik}$)
                       maxwrk = 3_${ik}$*m + lwork_qgebrd
                       maxwrk = max( maxwrk, 3_${ik}$*m + lwork_qormbr )
                       maxwrk = max( maxwrk, 3_${ik}$*m + lwork_qorgbr )
                       maxwrk = max( maxwrk, bdspac )
                       maxwrk = max( maxwrk, n*nrhs )
                    end if
                 end if
                 maxwrk = max( minwrk, maxwrk )
              end if
              work( 1_${ik}$ ) = maxwrk
              if( lwork<minwrk .and. .not.lquery )info = -12_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DGELSS', -info )
              return
           else if( lquery ) then
              return
           end if
           ! quick return if possible
           if( m==0_${ik}$ .or. n==0_${ik}$ ) then
              rank = 0_${ik}$
              return
           end if
           ! get machine parameters
           eps = stdlib${ii}$_${ri}$lamch( 'P' )
           sfmin = stdlib${ii}$_${ri}$lamch( 'S' )
           smlnum = sfmin / eps
           bignum = one / smlnum
           call stdlib${ii}$_${ri}$labad( smlnum, bignum )
           ! scale a if max element outside range [smlnum,bignum]
           anrm = stdlib${ii}$_${ri}$lange( 'M', m, n, a, lda, work )
           iascl = 0_${ik}$
           if( anrm>zero .and. anrm<smlnum ) then
              ! scale matrix norm up to smlnum
              call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, m, n, a, lda, info )
              iascl = 1_${ik}$
           else if( anrm>bignum ) then
              ! scale matrix norm down to bignum
              call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, m, n, a, lda, info )
              iascl = 2_${ik}$
           else if( anrm==zero ) then
              ! matrix all zero. return zero solution.
              call stdlib${ii}$_${ri}$laset( 'F', max( m, n ), nrhs, zero, zero, b, ldb )
              call stdlib${ii}$_${ri}$laset( 'F', minmn, 1_${ik}$, zero, zero, s, minmn )
              rank = 0_${ik}$
              go to 70
           end if
           ! scale b if max element outside range [smlnum,bignum]
           bnrm = stdlib${ii}$_${ri}$lange( 'M', m, nrhs, b, ldb, work )
           ibscl = 0_${ik}$
           if( bnrm>zero .and. bnrm<smlnum ) then
              ! scale matrix norm up to smlnum
              call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, bnrm, smlnum, m, nrhs, b, ldb, info )
              ibscl = 1_${ik}$
           else if( bnrm>bignum ) then
              ! scale matrix norm down to bignum
              call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, bnrm, bignum, m, nrhs, b, ldb, info )
              ibscl = 2_${ik}$
           end if
           ! overdetermined case
           if( m>=n ) then
              ! path 1 - overdetermined or exactly determined
              mm = m
              if( m>=mnthr ) then
                 ! path 1a - overdetermined, with many more rows than columns
                 mm = n
                 itau = 1_${ik}$
                 iwork = itau + n
                 ! compute a=q*r
                 ! (workspace: need 2*n, prefer n+n*nb)
                 call stdlib${ii}$_${ri}$geqrf( m, n, a, lda, work( itau ), work( iwork ),lwork-iwork+1, &
                           info )
                 ! multiply b by transpose(q)
                 ! (workspace: need n+nrhs, prefer n+nrhs*nb)
                 call stdlib${ii}$_${ri}$ormqr( 'L', 'T', m, nrhs, n, a, lda, work( itau ), b,ldb, work( &
                           iwork ), lwork-iwork+1, info )
                 ! zero out below r
                 if( n>1_${ik}$ )call stdlib${ii}$_${ri}$laset( 'L', n-1, n-1, zero, zero, a( 2_${ik}$, 1_${ik}$ ), lda )
              end if
              ie = 1_${ik}$
              itauq = ie + n
              itaup = itauq + n
              iwork = itaup + n
              ! bidiagonalize r in a
              ! (workspace: need 3*n+mm, prefer 3*n+(mm+n)*nb)
              call stdlib${ii}$_${ri}$gebrd( mm, n, a, lda, s, work( ie ), work( itauq ),work( itaup ), work(&
                         iwork ), lwork-iwork+1,info )
              ! multiply b by transpose of left bidiagonalizing vectors of r
              ! (workspace: need 3*n+nrhs, prefer 3*n+nrhs*nb)
              call stdlib${ii}$_${ri}$ormbr( 'Q', 'L', 'T', mm, nrhs, n, a, lda, work( itauq ),b, ldb, work( &
                        iwork ), lwork-iwork+1, info )
              ! generate right bidiagonalizing vectors of r in a
              ! (workspace: need 4*n-1, prefer 3*n+(n-1)*nb)
              call stdlib${ii}$_${ri}$orgbr( 'P', n, n, n, a, lda, work( itaup ),work( iwork ), lwork-iwork+&
                        1_${ik}$, info )
              iwork = ie + n
              ! perform bidiagonal qr iteration
                ! multiply b by transpose of left singular vectors
                ! compute right singular vectors in a
              ! (workspace: need bdspac)
              call stdlib${ii}$_${ri}$bdsqr( 'U', n, n, 0_${ik}$, nrhs, s, work( ie ), a, lda, dum,1_${ik}$, b, ldb, work( &
                        iwork ), info )
              if( info/=0 )go to 70
              ! multiply b by reciprocals of singular values
              thr = max( rcond*s( 1_${ik}$ ), sfmin )
              if( rcond<zero )thr = max( eps*s( 1_${ik}$ ), sfmin )
              rank = 0_${ik}$
              do i = 1, n
                 if( s( i )>thr ) then
                    call stdlib${ii}$_${ri}$rscl( nrhs, s( i ), b( i, 1_${ik}$ ), ldb )
                    rank = rank + 1_${ik}$
                 else
                    call stdlib${ii}$_${ri}$laset( 'F', 1_${ik}$, nrhs, zero, zero, b( i, 1_${ik}$ ), ldb )
                 end if
              end do
              ! multiply b by right singular vectors
              ! (workspace: need n, prefer n*nrhs)
              if( lwork>=ldb*nrhs .and. nrhs>1_${ik}$ ) then
                 call stdlib${ii}$_${ri}$gemm( 'T', 'N', n, nrhs, n, one, a, lda, b, ldb, zero,work, ldb )
                           
                 call stdlib${ii}$_${ri}$lacpy( 'G', n, nrhs, work, ldb, b, ldb )
              else if( nrhs>1_${ik}$ ) then
                 chunk = lwork / n
                 do i = 1, nrhs, chunk
                    bl = min( nrhs-i+1, chunk )
                    call stdlib${ii}$_${ri}$gemm( 'T', 'N', n, bl, n, one, a, lda, b( 1_${ik}$, i ),ldb, zero, work,&
                               n )
                    call stdlib${ii}$_${ri}$lacpy( 'G', n, bl, work, n, b( 1_${ik}$, i ), ldb )
                 end do
              else
                 call stdlib${ii}$_${ri}$gemv( 'T', n, n, one, a, lda, b, 1_${ik}$, zero, work, 1_${ik}$ )
                 call stdlib${ii}$_${ri}$copy( n, work, 1_${ik}$, b, 1_${ik}$ )
              end if
           else if( n>=mnthr .and. lwork>=4_${ik}$*m+m*m+max( m, 2_${ik}$*m-4, nrhs, n-3*m ) ) then
              ! path 2a - underdetermined, with many more columns than rows
              ! and sufficient workspace for an efficient algorithm
              ldwork = m
              if( lwork>=max( 4_${ik}$*m+m*lda+max( m, 2_${ik}$*m-4, nrhs, n-3*m ),m*lda+m+m*nrhs ) )ldwork = &
                        lda
              itau = 1_${ik}$
              iwork = m + 1_${ik}$
              ! compute a=l*q
              ! (workspace: need 2*m, prefer m+m*nb)
              call stdlib${ii}$_${ri}$gelqf( m, n, a, lda, work( itau ), work( iwork ),lwork-iwork+1, info )
                        
              il = iwork
              ! copy l to work(il), zeroing out above it
              call stdlib${ii}$_${ri}$lacpy( 'L', m, m, a, lda, work( il ), ldwork )
              call stdlib${ii}$_${ri}$laset( 'U', m-1, m-1, zero, zero, work( il+ldwork ),ldwork )
              ie = il + ldwork*m
              itauq = ie + m
              itaup = itauq + m
              iwork = itaup + m
              ! bidiagonalize l in work(il)
              ! (workspace: need m*m+5*m, prefer m*m+4*m+2*m*nb)
              call stdlib${ii}$_${ri}$gebrd( m, m, work( il ), ldwork, s, work( ie ),work( itauq ), work( &
                        itaup ), work( iwork ),lwork-iwork+1, info )
              ! multiply b by transpose of left bidiagonalizing vectors of l
              ! (workspace: need m*m+4*m+nrhs, prefer m*m+4*m+nrhs*nb)
              call stdlib${ii}$_${ri}$ormbr( 'Q', 'L', 'T', m, nrhs, m, work( il ), ldwork,work( itauq ), b, &
                        ldb, work( iwork ),lwork-iwork+1, info )
              ! generate right bidiagonalizing vectors of r in work(il)
              ! (workspace: need m*m+5*m-1, prefer m*m+4*m+(m-1)*nb)
              call stdlib${ii}$_${ri}$orgbr( 'P', m, m, m, work( il ), ldwork, work( itaup ),work( iwork ), &
                        lwork-iwork+1, info )
              iwork = ie + m
              ! perform bidiagonal qr iteration,
                 ! computing right singular vectors of l in work(il) and
                 ! multiplying b by transpose of left singular vectors
              ! (workspace: need m*m+m+bdspac)
              call stdlib${ii}$_${ri}$bdsqr( 'U', m, m, 0_${ik}$, nrhs, s, work( ie ), work( il ),ldwork, a, lda, b,&
                         ldb, work( iwork ), info )
              if( info/=0 )go to 70
              ! multiply b by reciprocals of singular values
              thr = max( rcond*s( 1_${ik}$ ), sfmin )
              if( rcond<zero )thr = max( eps*s( 1_${ik}$ ), sfmin )
              rank = 0_${ik}$
              do i = 1, m
                 if( s( i )>thr ) then
                    call stdlib${ii}$_${ri}$rscl( nrhs, s( i ), b( i, 1_${ik}$ ), ldb )
                    rank = rank + 1_${ik}$
                 else
                    call stdlib${ii}$_${ri}$laset( 'F', 1_${ik}$, nrhs, zero, zero, b( i, 1_${ik}$ ), ldb )
                 end if
              end do
              iwork = ie
              ! multiply b by right singular vectors of l in work(il)
              ! (workspace: need m*m+2*m, prefer m*m+m+m*nrhs)
              if( lwork>=ldb*nrhs+iwork-1 .and. nrhs>1_${ik}$ ) then
                 call stdlib${ii}$_${ri}$gemm( 'T', 'N', m, nrhs, m, one, work( il ), ldwork,b, ldb, zero, &
                           work( iwork ), ldb )
                 call stdlib${ii}$_${ri}$lacpy( 'G', m, nrhs, work( iwork ), ldb, b, ldb )
              else if( nrhs>1_${ik}$ ) then
                 chunk = ( lwork-iwork+1 ) / m
                 do i = 1, nrhs, chunk
                    bl = min( nrhs-i+1, chunk )
                    call stdlib${ii}$_${ri}$gemm( 'T', 'N', m, bl, m, one, work( il ), ldwork,b( 1_${ik}$, i ), ldb,&
                               zero, work( iwork ), m )
                    call stdlib${ii}$_${ri}$lacpy( 'G', m, bl, work( iwork ), m, b( 1_${ik}$, i ),ldb )
                 end do
              else
                 call stdlib${ii}$_${ri}$gemv( 'T', m, m, one, work( il ), ldwork, b( 1_${ik}$, 1_${ik}$ ),1_${ik}$, zero, work( &
                           iwork ), 1_${ik}$ )
                 call stdlib${ii}$_${ri}$copy( m, work( iwork ), 1_${ik}$, b( 1_${ik}$, 1_${ik}$ ), 1_${ik}$ )
              end if
              ! zero out below first m rows of b
              call stdlib${ii}$_${ri}$laset( 'F', n-m, nrhs, zero, zero, b( m+1, 1_${ik}$ ), ldb )
              iwork = itau + m
              ! multiply transpose(q) by b
              ! (workspace: need m+nrhs, prefer m+nrhs*nb)
              call stdlib${ii}$_${ri}$ormlq( 'L', 'T', n, nrhs, m, a, lda, work( itau ), b,ldb, work( iwork )&
                        , lwork-iwork+1, info )
           else
              ! path 2 - remaining underdetermined cases
              ie = 1_${ik}$
              itauq = ie + m
              itaup = itauq + m
              iwork = itaup + m
              ! bidiagonalize a
              ! (workspace: need 3*m+n, prefer 3*m+(m+n)*nb)
              call stdlib${ii}$_${ri}$gebrd( m, n, a, lda, s, work( ie ), work( itauq ),work( itaup ), work( &
                        iwork ), lwork-iwork+1,info )
              ! multiply b by transpose of left bidiagonalizing vectors
              ! (workspace: need 3*m+nrhs, prefer 3*m+nrhs*nb)
              call stdlib${ii}$_${ri}$ormbr( 'Q', 'L', 'T', m, nrhs, n, a, lda, work( itauq ),b, ldb, work( &
                        iwork ), lwork-iwork+1, info )
              ! generate right bidiagonalizing vectors in a
              ! (workspace: need 4*m, prefer 3*m+m*nb)
              call stdlib${ii}$_${ri}$orgbr( 'P', m, n, m, a, lda, work( itaup ),work( iwork ), lwork-iwork+&
                        1_${ik}$, info )
              iwork = ie + m
              ! perform bidiagonal qr iteration,
                 ! computing right singular vectors of a in a and
                 ! multiplying b by transpose of left singular vectors
              ! (workspace: need bdspac)
              call stdlib${ii}$_${ri}$bdsqr( 'L', m, n, 0_${ik}$, nrhs, s, work( ie ), a, lda, dum,1_${ik}$, b, ldb, work( &
                        iwork ), info )
              if( info/=0 )go to 70
              ! multiply b by reciprocals of singular values
              thr = max( rcond*s( 1_${ik}$ ), sfmin )
              if( rcond<zero )thr = max( eps*s( 1_${ik}$ ), sfmin )
              rank = 0_${ik}$
              do i = 1, m
                 if( s( i )>thr ) then
                    call stdlib${ii}$_${ri}$rscl( nrhs, s( i ), b( i, 1_${ik}$ ), ldb )
                    rank = rank + 1_${ik}$
                 else
                    call stdlib${ii}$_${ri}$laset( 'F', 1_${ik}$, nrhs, zero, zero, b( i, 1_${ik}$ ), ldb )
                 end if
              end do
              ! multiply b by right singular vectors of a
              ! (workspace: need n, prefer n*nrhs)
              if( lwork>=ldb*nrhs .and. nrhs>1_${ik}$ ) then
                 call stdlib${ii}$_${ri}$gemm( 'T', 'N', n, nrhs, m, one, a, lda, b, ldb, zero,work, ldb )
                           
                 call stdlib${ii}$_${ri}$lacpy( 'F', n, nrhs, work, ldb, b, ldb )
              else if( nrhs>1_${ik}$ ) then
                 chunk = lwork / n
                 do i = 1, nrhs, chunk
                    bl = min( nrhs-i+1, chunk )
                    call stdlib${ii}$_${ri}$gemm( 'T', 'N', n, bl, m, one, a, lda, b( 1_${ik}$, i ),ldb, zero, work,&
                               n )
                    call stdlib${ii}$_${ri}$lacpy( 'F', n, bl, work, n, b( 1_${ik}$, i ), ldb )
                 end do
              else
                 call stdlib${ii}$_${ri}$gemv( 'T', m, n, one, a, lda, b, 1_${ik}$, zero, work, 1_${ik}$ )
                 call stdlib${ii}$_${ri}$copy( n, work, 1_${ik}$, b, 1_${ik}$ )
              end if
           end if
           ! undo scaling
           if( iascl==1_${ik}$ ) then
              call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, n, nrhs, b, ldb, info )
              call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, smlnum, anrm, minmn, 1_${ik}$, s, minmn,info )
           else if( iascl==2_${ik}$ ) then
              call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, n, nrhs, b, ldb, info )
              call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, bignum, anrm, minmn, 1_${ik}$, s, minmn,info )
           end if
           if( ibscl==1_${ik}$ ) then
              call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, smlnum, bnrm, n, nrhs, b, ldb, info )
           else if( ibscl==2_${ik}$ ) then
              call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, bignum, bnrm, n, nrhs, b, ldb, info )
           end if
           70 continue
           work( 1_${ik}$ ) = maxwrk
           return
     end subroutine stdlib${ii}$_${ri}$gelss

#:endif
#:endfor

     module subroutine stdlib${ii}$_cgelss( m, n, nrhs, a, lda, b, ldb, s, rcond, rank,work, lwork, rwork, &
     !! CGELSS computes the minimum norm solution to a complex linear
     !! least squares problem:
     !! Minimize 2-norm(| b - A*x |).
     !! using the singular value decomposition (SVD) of A. A is an M-by-N
     !! matrix which may be rank-deficient.
     !! Several right hand side vectors b and solution vectors x can be
     !! handled in a single call; they are stored as the columns of the
     !! M-by-NRHS right hand side matrix B and the N-by-NRHS solution matrix
     !! X.
     !! The effective rank of A is determined by treating as zero those
     !! singular values which are less than RCOND times the largest singular
     !! value.
               info )
        ! -- lapack driver routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info, rank
           integer(${ik}$), intent(in) :: lda, ldb, lwork, m, n, nrhs
           real(sp), intent(in) :: rcond
           ! Array Arguments 
           real(sp), intent(out) :: rwork(*), s(*)
           complex(sp), intent(inout) :: a(lda,*), b(ldb,*)
           complex(sp), intent(out) :: work(*)
        ! =====================================================================
           
           
           ! Local Scalars 
           logical(lk) :: lquery
           integer(${ik}$) :: bl, chunk, i, iascl, ibscl, ie, il, irwork, itau, itaup, itauq, iwork, &
                     ldwork, maxmn, maxwrk, minmn, minwrk, mm, mnthr
           integer(${ik}$) :: lwork_cgeqrf, lwork_cunmqr, lwork_cgebrd, lwork_cunmbr, lwork_cungbr, &
                     lwork_cunmlq, lwork_cgelqf
           real(sp) :: anrm, bignum, bnrm, eps, sfmin, smlnum, thr
           ! Local Arrays 
           complex(sp) :: dum(1_${ik}$)
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           minmn = min( m, n )
           maxmn = max( m, n )
           lquery = ( lwork==-1_${ik}$ )
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( nrhs<0_${ik}$ ) then
              info = -3_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -5_${ik}$
           else if( ldb<max( 1_${ik}$, maxmn ) ) then
              info = -7_${ik}$
           end if
           ! compute workspace
            ! (note: comments in the code beginning "workspace:" describe the
             ! minimal amount of workspace needed at that point in the code,
             ! as well as the preferred amount for good performance.
             ! cworkspace refers to complex workspace, and rworkspace refers
             ! to real workspace. nb refers to the optimal block size for the
             ! immediately following subroutine, as returned by stdlib${ii}$_ilaenv.)
           if( info==0_${ik}$ ) then
              minwrk = 1_${ik}$
              maxwrk = 1_${ik}$
              if( minmn>0_${ik}$ ) then
                 mm = m
                 mnthr = stdlib${ii}$_ilaenv( 6_${ik}$, 'CGELSS', ' ', m, n, nrhs, -1_${ik}$ )
                 if( m>=n .and. m>=mnthr ) then
                    ! path 1a - overdetermined, with many more rows than
                              ! columns
                    ! compute space needed for stdlib${ii}$_cgeqrf
                    call stdlib${ii}$_cgeqrf( m, n, a, lda, dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, info )
                    lwork_cgeqrf = real( dum(1_${ik}$),KIND=sp)
                    ! compute space needed for stdlib${ii}$_cunmqr
                    call stdlib${ii}$_cunmqr( 'L', 'C', m, nrhs, n, a, lda, dum(1_${ik}$), b,ldb, dum(1_${ik}$), -1_${ik}$, &
                              info )
                    lwork_cunmqr = real( dum(1_${ik}$),KIND=sp)
                    mm = n
                    maxwrk = max( maxwrk, n + n*stdlib${ii}$_ilaenv( 1_${ik}$, 'CGEQRF', ' ', m,n, -1_${ik}$, -1_${ik}$ ) )
                              
                    maxwrk = max( maxwrk, n + nrhs*stdlib${ii}$_ilaenv( 1_${ik}$, 'CUNMQR', 'LC',m, nrhs, n, -&
                              1_${ik}$ ) )
                 end if
                 if( m>=n ) then
                    ! path 1 - overdetermined or exactly determined
                    ! compute space needed for stdlib${ii}$_cgebrd
                    call stdlib${ii}$_cgebrd( mm, n, a, lda, s, s, dum(1_${ik}$), dum(1_${ik}$), dum(1_${ik}$),-1_${ik}$, info )
                              
                    lwork_cgebrd = real( dum(1_${ik}$),KIND=sp)
                    ! compute space needed for stdlib${ii}$_cunmbr
                    call stdlib${ii}$_cunmbr( 'Q', 'L', 'C', mm, nrhs, n, a, lda, dum(1_${ik}$),b, ldb, dum(1_${ik}$),&
                               -1_${ik}$, info )
                    lwork_cunmbr = real( dum(1_${ik}$),KIND=sp)
                    ! compute space needed for stdlib${ii}$_cungbr
                    call stdlib${ii}$_cungbr( 'P', n, n, n, a, lda, dum(1_${ik}$),dum(1_${ik}$), -1_${ik}$, info )
                    lwork_cungbr = real( dum(1_${ik}$),KIND=sp)
                    ! compute total workspace needed
                    maxwrk = max( maxwrk, 2_${ik}$*n + lwork_cgebrd )
                    maxwrk = max( maxwrk, 2_${ik}$*n + lwork_cunmbr )
                    maxwrk = max( maxwrk, 2_${ik}$*n + lwork_cungbr )
                    maxwrk = max( maxwrk, n*nrhs )
                    minwrk = 2_${ik}$*n + max( nrhs, m )
                 end if
                 if( n>m ) then
                    minwrk = 2_${ik}$*m + max( nrhs, n )
                    if( n>=mnthr ) then
                       ! path 2a - underdetermined, with many more columns
                       ! than rows
                       ! compute space needed for stdlib${ii}$_cgelqf
                       call stdlib${ii}$_cgelqf( m, n, a, lda, dum(1_${ik}$), dum(1_${ik}$),-1_${ik}$, info )
                       lwork_cgelqf = real( dum(1_${ik}$),KIND=sp)
                       ! compute space needed for stdlib${ii}$_cgebrd
                       call stdlib${ii}$_cgebrd( m, m, a, lda, s, s, dum(1_${ik}$), dum(1_${ik}$),dum(1_${ik}$), -1_${ik}$, info )
                                 
                       lwork_cgebrd = real( dum(1_${ik}$),KIND=sp)
                       ! compute space needed for stdlib${ii}$_cunmbr
                       call stdlib${ii}$_cunmbr( 'Q', 'L', 'C', m, nrhs, n, a, lda,dum(1_${ik}$), b, ldb, dum(&
                                 1_${ik}$), -1_${ik}$, info )
                       lwork_cunmbr = real( dum(1_${ik}$),KIND=sp)
                       ! compute space needed for stdlib${ii}$_cungbr
                       call stdlib${ii}$_cungbr( 'P', m, m, m, a, lda, dum(1_${ik}$),dum(1_${ik}$), -1_${ik}$, info )
                       lwork_cungbr = real( dum(1_${ik}$),KIND=sp)
                       ! compute space needed for stdlib${ii}$_cunmlq
                       call stdlib${ii}$_cunmlq( 'L', 'C', n, nrhs, m, a, lda, dum(1_${ik}$),b, ldb, dum(1_${ik}$), -&
                                 1_${ik}$, info )
                       lwork_cunmlq = real( dum(1_${ik}$),KIND=sp)
                       ! compute total workspace needed
                       maxwrk = m + lwork_cgelqf
                       maxwrk = max( maxwrk, 3_${ik}$*m + m*m + lwork_cgebrd )
                       maxwrk = max( maxwrk, 3_${ik}$*m + m*m + lwork_cunmbr )
                       maxwrk = max( maxwrk, 3_${ik}$*m + m*m + lwork_cungbr )
                       if( nrhs>1_${ik}$ ) then
                          maxwrk = max( maxwrk, m*m + m + m*nrhs )
                       else
                          maxwrk = max( maxwrk, m*m + 2_${ik}$*m )
                       end if
                       maxwrk = max( maxwrk, m + lwork_cunmlq )
                    else
                       ! path 2 - underdetermined
                       ! compute space needed for stdlib${ii}$_cgebrd
                       call stdlib${ii}$_cgebrd( m, n, a, lda, s, s, dum(1_${ik}$), dum(1_${ik}$),dum(1_${ik}$), -1_${ik}$, info )
                                 
                       lwork_cgebrd = real( dum(1_${ik}$),KIND=sp)
                       ! compute space needed for stdlib${ii}$_cunmbr
                       call stdlib${ii}$_cunmbr( 'Q', 'L', 'C', m, nrhs, m, a, lda,dum(1_${ik}$), b, ldb, dum(&
                                 1_${ik}$), -1_${ik}$, info )
                       lwork_cunmbr = real( dum(1_${ik}$),KIND=sp)
                       ! compute space needed for stdlib${ii}$_cungbr
                       call stdlib${ii}$_cungbr( 'P', m, n, m, a, lda, dum(1_${ik}$),dum(1_${ik}$), -1_${ik}$, info )
                       lwork_cungbr = real( dum(1_${ik}$),KIND=sp)
                       maxwrk = 2_${ik}$*m + lwork_cgebrd
                       maxwrk = max( maxwrk, 2_${ik}$*m + lwork_cunmbr )
                       maxwrk = max( maxwrk, 2_${ik}$*m + lwork_cungbr )
                       maxwrk = max( maxwrk, n*nrhs )
                    end if
                 end if
                 maxwrk = max( minwrk, maxwrk )
              end if
              work( 1_${ik}$ ) = maxwrk
              if( lwork<minwrk .and. .not.lquery )info = -12_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'CGELSS', -info )
              return
           else if( lquery ) then
              return
           end if
           ! quick return if possible
           if( m==0_${ik}$ .or. n==0_${ik}$ ) then
              rank = 0_${ik}$
              return
           end if
           ! get machine parameters
           eps = stdlib${ii}$_slamch( 'P' )
           sfmin = stdlib${ii}$_slamch( 'S' )
           smlnum = sfmin / eps
           bignum = one / smlnum
           call stdlib${ii}$_slabad( smlnum, bignum )
           ! scale a if max element outside range [smlnum,bignum]
           anrm = stdlib${ii}$_clange( 'M', m, n, a, lda, rwork )
           iascl = 0_${ik}$
           if( anrm>zero .and. anrm<smlnum ) then
              ! scale matrix norm up to smlnum
              call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, m, n, a, lda, info )
              iascl = 1_${ik}$
           else if( anrm>bignum ) then
              ! scale matrix norm down to bignum
              call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, m, n, a, lda, info )
              iascl = 2_${ik}$
           else if( anrm==zero ) then
              ! matrix all zero. return zero solution.
              call stdlib${ii}$_claset( 'F', max( m, n ), nrhs, czero, czero, b, ldb )
              call stdlib${ii}$_slaset( 'F', minmn, 1_${ik}$, zero, zero, s, minmn )
              rank = 0_${ik}$
              go to 70
           end if
           ! scale b if max element outside range [smlnum,bignum]
           bnrm = stdlib${ii}$_clange( 'M', m, nrhs, b, ldb, rwork )
           ibscl = 0_${ik}$
           if( bnrm>zero .and. bnrm<smlnum ) then
              ! scale matrix norm up to smlnum
              call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, bnrm, smlnum, m, nrhs, b, ldb, info )
              ibscl = 1_${ik}$
           else if( bnrm>bignum ) then
              ! scale matrix norm down to bignum
              call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, bnrm, bignum, m, nrhs, b, ldb, info )
              ibscl = 2_${ik}$
           end if
           ! overdetermined case
           if( m>=n ) then
              ! path 1 - overdetermined or exactly determined
              mm = m
              if( m>=mnthr ) then
                 ! path 1a - overdetermined, with many more rows than columns
                 mm = n
                 itau = 1_${ik}$
                 iwork = itau + n
                 ! compute a=q*r
                 ! (cworkspace: need 2*n, prefer n+n*nb)
                 ! (rworkspace: none)
                 call stdlib${ii}$_cgeqrf( m, n, a, lda, work( itau ), work( iwork ),lwork-iwork+1, &
                           info )
                 ! multiply b by transpose(q)
                 ! (cworkspace: need n+nrhs, prefer n+nrhs*nb)
                 ! (rworkspace: none)
                 call stdlib${ii}$_cunmqr( 'L', 'C', m, nrhs, n, a, lda, work( itau ), b,ldb, work( &
                           iwork ), lwork-iwork+1, info )
                 ! zero out below r
                 if( n>1_${ik}$ )call stdlib${ii}$_claset( 'L', n-1, n-1, czero, czero, a( 2_${ik}$, 1_${ik}$ ),lda )
              end if
              ie = 1_${ik}$
              itauq = 1_${ik}$
              itaup = itauq + n
              iwork = itaup + n
              ! bidiagonalize r in a
              ! (cworkspace: need 2*n+mm, prefer 2*n+(mm+n)*nb)
              ! (rworkspace: need n)
              call stdlib${ii}$_cgebrd( mm, n, a, lda, s, rwork( ie ), work( itauq ),work( itaup ), &
                        work( iwork ), lwork-iwork+1,info )
              ! multiply b by transpose of left bidiagonalizing vectors of r
              ! (cworkspace: need 2*n+nrhs, prefer 2*n+nrhs*nb)
              ! (rworkspace: none)
              call stdlib${ii}$_cunmbr( 'Q', 'L', 'C', mm, nrhs, n, a, lda, work( itauq ),b, ldb, work( &
                        iwork ), lwork-iwork+1, info )
              ! generate right bidiagonalizing vectors of r in a
              ! (cworkspace: need 3*n-1, prefer 2*n+(n-1)*nb)
              ! (rworkspace: none)
              call stdlib${ii}$_cungbr( 'P', n, n, n, a, lda, work( itaup ),work( iwork ), lwork-iwork+&
                        1_${ik}$, info )
              irwork = ie + n
              ! perform bidiagonal qr iteration
                ! multiply b by transpose of left singular vectors
                ! compute right singular vectors in a
              ! (cworkspace: none)
              ! (rworkspace: need bdspac)
              call stdlib${ii}$_cbdsqr( 'U', n, n, 0_${ik}$, nrhs, s, rwork( ie ), a, lda, dum,1_${ik}$, b, ldb, &
                        rwork( irwork ), info )
              if( info/=0 )go to 70
              ! multiply b by reciprocals of singular values
              thr = max( rcond*s( 1_${ik}$ ), sfmin )
              if( rcond<zero )thr = max( eps*s( 1_${ik}$ ), sfmin )
              rank = 0_${ik}$
              do i = 1, n
                 if( s( i )>thr ) then
                    call stdlib${ii}$_csrscl( nrhs, s( i ), b( i, 1_${ik}$ ), ldb )
                    rank = rank + 1_${ik}$
                 else
                    call stdlib${ii}$_claset( 'F', 1_${ik}$, nrhs, czero, czero, b( i, 1_${ik}$ ), ldb )
                 end if
              end do
              ! multiply b by right singular vectors
              ! (cworkspace: need n, prefer n*nrhs)
              ! (rworkspace: none)
              if( lwork>=ldb*nrhs .and. nrhs>1_${ik}$ ) then
                 call stdlib${ii}$_cgemm( 'C', 'N', n, nrhs, n, cone, a, lda, b, ldb,czero, work, ldb )
                           
                 call stdlib${ii}$_clacpy( 'G', n, nrhs, work, ldb, b, ldb )
              else if( nrhs>1_${ik}$ ) then
                 chunk = lwork / n
                 do i = 1, nrhs, chunk
                    bl = min( nrhs-i+1, chunk )
                    call stdlib${ii}$_cgemm( 'C', 'N', n, bl, n, cone, a, lda, b( 1_${ik}$, i ),ldb, czero, &
                              work, n )
                    call stdlib${ii}$_clacpy( 'G', n, bl, work, n, b( 1_${ik}$, i ), ldb )
                 end do
              else
                 call stdlib${ii}$_cgemv( 'C', n, n, cone, a, lda, b, 1_${ik}$, czero, work, 1_${ik}$ )
                 call stdlib${ii}$_ccopy( n, work, 1_${ik}$, b, 1_${ik}$ )
              end if
           else if( n>=mnthr .and. lwork>=3_${ik}$*m+m*m+max( m, nrhs, n-2*m ) )then
              ! underdetermined case, m much less than n
              ! path 2a - underdetermined, with many more columns than rows
              ! and sufficient workspace for an efficient algorithm
              ldwork = m
              if( lwork>=3_${ik}$*m+m*lda+max( m, nrhs, n-2*m ) )ldwork = lda
              itau = 1_${ik}$
              iwork = m + 1_${ik}$
              ! compute a=l*q
              ! (cworkspace: need 2*m, prefer m+m*nb)
              ! (rworkspace: none)
              call stdlib${ii}$_cgelqf( m, n, a, lda, work( itau ), work( iwork ),lwork-iwork+1, info )
                        
              il = iwork
              ! copy l to work(il), zeroing out above it
              call stdlib${ii}$_clacpy( 'L', m, m, a, lda, work( il ), ldwork )
              call stdlib${ii}$_claset( 'U', m-1, m-1, czero, czero, work( il+ldwork ),ldwork )
              ie = 1_${ik}$
              itauq = il + ldwork*m
              itaup = itauq + m
              iwork = itaup + m
              ! bidiagonalize l in work(il)
              ! (cworkspace: need m*m+4*m, prefer m*m+3*m+2*m*nb)
              ! (rworkspace: need m)
              call stdlib${ii}$_cgebrd( m, m, work( il ), ldwork, s, rwork( ie ),work( itauq ), work( &
                        itaup ), work( iwork ),lwork-iwork+1, info )
              ! multiply b by transpose of left bidiagonalizing vectors of l
              ! (cworkspace: need m*m+3*m+nrhs, prefer m*m+3*m+nrhs*nb)
              ! (rworkspace: none)
              call stdlib${ii}$_cunmbr( 'Q', 'L', 'C', m, nrhs, m, work( il ), ldwork,work( itauq ), b, &
                        ldb, work( iwork ),lwork-iwork+1, info )
              ! generate right bidiagonalizing vectors of r in work(il)
              ! (cworkspace: need m*m+4*m-1, prefer m*m+3*m+(m-1)*nb)
              ! (rworkspace: none)
              call stdlib${ii}$_cungbr( 'P', m, m, m, work( il ), ldwork, work( itaup ),work( iwork ), &
                        lwork-iwork+1, info )
              irwork = ie + m
              ! perform bidiagonal qr iteration, computing right singular
              ! vectors of l in work(il) and multiplying b by transpose of
              ! left singular vectors
              ! (cworkspace: need m*m)
              ! (rworkspace: need bdspac)
              call stdlib${ii}$_cbdsqr( 'U', m, m, 0_${ik}$, nrhs, s, rwork( ie ), work( il ),ldwork, a, lda, &
                        b, ldb, rwork( irwork ), info )
              if( info/=0 )go to 70
              ! multiply b by reciprocals of singular values
              thr = max( rcond*s( 1_${ik}$ ), sfmin )
              if( rcond<zero )thr = max( eps*s( 1_${ik}$ ), sfmin )
              rank = 0_${ik}$
              do i = 1, m
                 if( s( i )>thr ) then
                    call stdlib${ii}$_csrscl( nrhs, s( i ), b( i, 1_${ik}$ ), ldb )
                    rank = rank + 1_${ik}$
                 else
                    call stdlib${ii}$_claset( 'F', 1_${ik}$, nrhs, czero, czero, b( i, 1_${ik}$ ), ldb )
                 end if
              end do
              iwork = il + m*ldwork
              ! multiply b by right singular vectors of l in work(il)
              ! (cworkspace: need m*m+2*m, prefer m*m+m+m*nrhs)
              ! (rworkspace: none)
              if( lwork>=ldb*nrhs+iwork-1 .and. nrhs>1_${ik}$ ) then
                 call stdlib${ii}$_cgemm( 'C', 'N', m, nrhs, m, cone, work( il ), ldwork,b, ldb, czero, &
                           work( iwork ), ldb )
                 call stdlib${ii}$_clacpy( 'G', m, nrhs, work( iwork ), ldb, b, ldb )
              else if( nrhs>1_${ik}$ ) then
                 chunk = ( lwork-iwork+1 ) / m
                 do i = 1, nrhs, chunk
                    bl = min( nrhs-i+1, chunk )
                    call stdlib${ii}$_cgemm( 'C', 'N', m, bl, m, cone, work( il ), ldwork,b( 1_${ik}$, i ), &
                              ldb, czero, work( iwork ), m )
                    call stdlib${ii}$_clacpy( 'G', m, bl, work( iwork ), m, b( 1_${ik}$, i ),ldb )
                 end do
              else
                 call stdlib${ii}$_cgemv( 'C', m, m, cone, work( il ), ldwork, b( 1_${ik}$, 1_${ik}$ ),1_${ik}$, czero, work(&
                            iwork ), 1_${ik}$ )
                 call stdlib${ii}$_ccopy( m, work( iwork ), 1_${ik}$, b( 1_${ik}$, 1_${ik}$ ), 1_${ik}$ )
              end if
              ! zero out below first m rows of b
              call stdlib${ii}$_claset( 'F', n-m, nrhs, czero, czero, b( m+1, 1_${ik}$ ), ldb )
              iwork = itau + m
              ! multiply transpose(q) by b
              ! (cworkspace: need m+nrhs, prefer m+nhrs*nb)
              ! (rworkspace: none)
              call stdlib${ii}$_cunmlq( 'L', 'C', n, nrhs, m, a, lda, work( itau ), b,ldb, work( iwork )&
                        , lwork-iwork+1, info )
           else
              ! path 2 - remaining underdetermined cases
              ie = 1_${ik}$
              itauq = 1_${ik}$
              itaup = itauq + m
              iwork = itaup + m
              ! bidiagonalize a
              ! (cworkspace: need 3*m, prefer 2*m+(m+n)*nb)
              ! (rworkspace: need n)
              call stdlib${ii}$_cgebrd( m, n, a, lda, s, rwork( ie ), work( itauq ),work( itaup ), work(&
                         iwork ), lwork-iwork+1,info )
              ! multiply b by transpose of left bidiagonalizing vectors
              ! (cworkspace: need 2*m+nrhs, prefer 2*m+nrhs*nb)
              ! (rworkspace: none)
              call stdlib${ii}$_cunmbr( 'Q', 'L', 'C', m, nrhs, n, a, lda, work( itauq ),b, ldb, work( &
                        iwork ), lwork-iwork+1, info )
              ! generate right bidiagonalizing vectors in a
              ! (cworkspace: need 3*m, prefer 2*m+m*nb)
              ! (rworkspace: none)
              call stdlib${ii}$_cungbr( 'P', m, n, m, a, lda, work( itaup ),work( iwork ), lwork-iwork+&
                        1_${ik}$, info )
              irwork = ie + m
              ! perform bidiagonal qr iteration,
                 ! computing right singular vectors of a in a and
                 ! multiplying b by transpose of left singular vectors
              ! (cworkspace: none)
              ! (rworkspace: need bdspac)
              call stdlib${ii}$_cbdsqr( 'L', m, n, 0_${ik}$, nrhs, s, rwork( ie ), a, lda, dum,1_${ik}$, b, ldb, &
                        rwork( irwork ), info )
              if( info/=0 )go to 70
              ! multiply b by reciprocals of singular values
              thr = max( rcond*s( 1_${ik}$ ), sfmin )
              if( rcond<zero )thr = max( eps*s( 1_${ik}$ ), sfmin )
              rank = 0_${ik}$
              do i = 1, m
                 if( s( i )>thr ) then
                    call stdlib${ii}$_csrscl( nrhs, s( i ), b( i, 1_${ik}$ ), ldb )
                    rank = rank + 1_${ik}$
                 else
                    call stdlib${ii}$_claset( 'F', 1_${ik}$, nrhs, czero, czero, b( i, 1_${ik}$ ), ldb )
                 end if
              end do
              ! multiply b by right singular vectors of a
              ! (cworkspace: need n, prefer n*nrhs)
              ! (rworkspace: none)
              if( lwork>=ldb*nrhs .and. nrhs>1_${ik}$ ) then
                 call stdlib${ii}$_cgemm( 'C', 'N', n, nrhs, m, cone, a, lda, b, ldb,czero, work, ldb )
                           
                 call stdlib${ii}$_clacpy( 'G', n, nrhs, work, ldb, b, ldb )
              else if( nrhs>1_${ik}$ ) then
                 chunk = lwork / n
                 do i = 1, nrhs, chunk
                    bl = min( nrhs-i+1, chunk )
                    call stdlib${ii}$_cgemm( 'C', 'N', n, bl, m, cone, a, lda, b( 1_${ik}$, i ),ldb, czero, &
                              work, n )
                    call stdlib${ii}$_clacpy( 'F', n, bl, work, n, b( 1_${ik}$, i ), ldb )
                 end do
              else
                 call stdlib${ii}$_cgemv( 'C', m, n, cone, a, lda, b, 1_${ik}$, czero, work, 1_${ik}$ )
                 call stdlib${ii}$_ccopy( n, work, 1_${ik}$, b, 1_${ik}$ )
              end if
           end if
           ! undo scaling
           if( iascl==1_${ik}$ ) then
              call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, n, nrhs, b, ldb, info )
              call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, smlnum, anrm, minmn, 1_${ik}$, s, minmn,info )
           else if( iascl==2_${ik}$ ) then
              call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, n, nrhs, b, ldb, info )
              call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, bignum, anrm, minmn, 1_${ik}$, s, minmn,info )
           end if
           if( ibscl==1_${ik}$ ) then
              call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, smlnum, bnrm, n, nrhs, b, ldb, info )
           else if( ibscl==2_${ik}$ ) then
              call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, bignum, bnrm, n, nrhs, b, ldb, info )
           end if
           70 continue
           work( 1_${ik}$ ) = maxwrk
           return
     end subroutine stdlib${ii}$_cgelss

     module subroutine stdlib${ii}$_zgelss( m, n, nrhs, a, lda, b, ldb, s, rcond, rank,work, lwork, rwork, &
     !! ZGELSS computes the minimum norm solution to a complex linear
     !! least squares problem:
     !! Minimize 2-norm(| b - A*x |).
     !! using the singular value decomposition (SVD) of A. A is an M-by-N
     !! matrix which may be rank-deficient.
     !! Several right hand side vectors b and solution vectors x can be
     !! handled in a single call; they are stored as the columns of the
     !! M-by-NRHS right hand side matrix B and the N-by-NRHS solution matrix
     !! X.
     !! The effective rank of A is determined by treating as zero those
     !! singular values which are less than RCOND times the largest singular
     !! value.
               info )
        ! -- lapack driver routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info, rank
           integer(${ik}$), intent(in) :: lda, ldb, lwork, m, n, nrhs
           real(dp), intent(in) :: rcond
           ! Array Arguments 
           real(dp), intent(out) :: rwork(*), s(*)
           complex(dp), intent(inout) :: a(lda,*), b(ldb,*)
           complex(dp), intent(out) :: work(*)
        ! =====================================================================
           
           
           ! Local Scalars 
           logical(lk) :: lquery
           integer(${ik}$) :: bl, chunk, i, iascl, ibscl, ie, il, irwork, itau, itaup, itauq, iwork, &
                     ldwork, maxmn, maxwrk, minmn, minwrk, mm, mnthr
           integer(${ik}$) :: lwork_zgeqrf, lwork_zunmqr, lwork_zgebrd, lwork_zunmbr, lwork_zungbr, &
                     lwork_zunmlq, lwork_zgelqf
           real(dp) :: anrm, bignum, bnrm, eps, sfmin, smlnum, thr
           ! Local Arrays 
           complex(dp) :: dum(1_${ik}$)
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           minmn = min( m, n )
           maxmn = max( m, n )
           lquery = ( lwork==-1_${ik}$ )
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( nrhs<0_${ik}$ ) then
              info = -3_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -5_${ik}$
           else if( ldb<max( 1_${ik}$, maxmn ) ) then
              info = -7_${ik}$
           end if
           ! compute workspace
            ! (note: comments in the code beginning "workspace:" describe the
             ! minimal amount of workspace needed at that point in the code,
             ! as well as the preferred amount for good performance.
             ! cworkspace refers to complex workspace, and rworkspace refers
             ! to real workspace. nb refers to the optimal block size for the
             ! immediately following subroutine, as returned by stdlib${ii}$_ilaenv.)
           if( info==0_${ik}$ ) then
              minwrk = 1_${ik}$
              maxwrk = 1_${ik}$
              if( minmn>0_${ik}$ ) then
                 mm = m
                 mnthr = stdlib${ii}$_ilaenv( 6_${ik}$, 'ZGELSS', ' ', m, n, nrhs, -1_${ik}$ )
                 if( m>=n .and. m>=mnthr ) then
                    ! path 1a - overdetermined, with many more rows than
                              ! columns
                    ! compute space needed for stdlib${ii}$_zgeqrf
                    call stdlib${ii}$_zgeqrf( m, n, a, lda, dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, info )
                    lwork_zgeqrf = real( dum(1_${ik}$),KIND=dp)
                    ! compute space needed for stdlib${ii}$_zunmqr
                    call stdlib${ii}$_zunmqr( 'L', 'C', m, nrhs, n, a, lda, dum(1_${ik}$), b,ldb, dum(1_${ik}$), -1_${ik}$, &
                              info )
                    lwork_zunmqr = real( dum(1_${ik}$),KIND=dp)
                    mm = n
                    maxwrk = max( maxwrk, n + n*stdlib${ii}$_ilaenv( 1_${ik}$, 'ZGEQRF', ' ', m,n, -1_${ik}$, -1_${ik}$ ) )
                              
                    maxwrk = max( maxwrk, n + nrhs*stdlib${ii}$_ilaenv( 1_${ik}$, 'ZUNMQR', 'LC',m, nrhs, n, -&
                              1_${ik}$ ) )
                 end if
                 if( m>=n ) then
                    ! path 1 - overdetermined or exactly determined
                    ! compute space needed for stdlib${ii}$_zgebrd
                    call stdlib${ii}$_zgebrd( mm, n, a, lda, s, s, dum(1_${ik}$), dum(1_${ik}$), dum(1_${ik}$),-1_${ik}$, info )
                              
                    lwork_zgebrd = real( dum(1_${ik}$),KIND=dp)
                    ! compute space needed for stdlib${ii}$_zunmbr
                    call stdlib${ii}$_zunmbr( 'Q', 'L', 'C', mm, nrhs, n, a, lda, dum(1_${ik}$),b, ldb, dum(1_${ik}$),&
                               -1_${ik}$, info )
                    lwork_zunmbr = real( dum(1_${ik}$),KIND=dp)
                    ! compute space needed for stdlib${ii}$_zungbr
                    call stdlib${ii}$_zungbr( 'P', n, n, n, a, lda, dum(1_${ik}$),dum(1_${ik}$), -1_${ik}$, info )
                    lwork_zungbr = real( dum(1_${ik}$),KIND=dp)
                    ! compute total workspace needed
                    maxwrk = max( maxwrk, 2_${ik}$*n + lwork_zgebrd )
                    maxwrk = max( maxwrk, 2_${ik}$*n + lwork_zunmbr )
                    maxwrk = max( maxwrk, 2_${ik}$*n + lwork_zungbr )
                    maxwrk = max( maxwrk, n*nrhs )
                    minwrk = 2_${ik}$*n + max( nrhs, m )
                 end if
                 if( n>m ) then
                    minwrk = 2_${ik}$*m + max( nrhs, n )
                    if( n>=mnthr ) then
                       ! path 2a - underdetermined, with many more columns
                       ! than rows
                       ! compute space needed for stdlib${ii}$_zgelqf
                       call stdlib${ii}$_zgelqf( m, n, a, lda, dum(1_${ik}$), dum(1_${ik}$),-1_${ik}$, info )
                       lwork_zgelqf = real( dum(1_${ik}$),KIND=dp)
                       ! compute space needed for stdlib${ii}$_zgebrd
                       call stdlib${ii}$_zgebrd( m, m, a, lda, s, s, dum(1_${ik}$), dum(1_${ik}$),dum(1_${ik}$), -1_${ik}$, info )
                                 
                       lwork_zgebrd = real( dum(1_${ik}$),KIND=dp)
                       ! compute space needed for stdlib${ii}$_zunmbr
                       call stdlib${ii}$_zunmbr( 'Q', 'L', 'C', m, nrhs, n, a, lda,dum(1_${ik}$), b, ldb, dum(&
                                 1_${ik}$), -1_${ik}$, info )
                       lwork_zunmbr = real( dum(1_${ik}$),KIND=dp)
                       ! compute space needed for stdlib${ii}$_zungbr
                       call stdlib${ii}$_zungbr( 'P', m, m, m, a, lda, dum(1_${ik}$),dum(1_${ik}$), -1_${ik}$, info )
                       lwork_zungbr = real( dum(1_${ik}$),KIND=dp)
                       ! compute space needed for stdlib${ii}$_zunmlq
                       call stdlib${ii}$_zunmlq( 'L', 'C', n, nrhs, m, a, lda, dum(1_${ik}$),b, ldb, dum(1_${ik}$), -&
                                 1_${ik}$, info )
                       lwork_zunmlq = real( dum(1_${ik}$),KIND=dp)
                       ! compute total workspace needed
                       maxwrk = m + lwork_zgelqf
                       maxwrk = max( maxwrk, 3_${ik}$*m + m*m + lwork_zgebrd )
                       maxwrk = max( maxwrk, 3_${ik}$*m + m*m + lwork_zunmbr )
                       maxwrk = max( maxwrk, 3_${ik}$*m + m*m + lwork_zungbr )
                       if( nrhs>1_${ik}$ ) then
                          maxwrk = max( maxwrk, m*m + m + m*nrhs )
                       else
                          maxwrk = max( maxwrk, m*m + 2_${ik}$*m )
                       end if
                       maxwrk = max( maxwrk, m + lwork_zunmlq )
                    else
                       ! path 2 - underdetermined
                       ! compute space needed for stdlib${ii}$_zgebrd
                       call stdlib${ii}$_zgebrd( m, n, a, lda, s, s, dum(1_${ik}$), dum(1_${ik}$),dum(1_${ik}$), -1_${ik}$, info )
                                 
                       lwork_zgebrd = real( dum(1_${ik}$),KIND=dp)
                       ! compute space needed for stdlib${ii}$_zunmbr
                       call stdlib${ii}$_zunmbr( 'Q', 'L', 'C', m, nrhs, m, a, lda,dum(1_${ik}$), b, ldb, dum(&
                                 1_${ik}$), -1_${ik}$, info )
                       lwork_zunmbr = real( dum(1_${ik}$),KIND=dp)
                       ! compute space needed for stdlib${ii}$_zungbr
                       call stdlib${ii}$_zungbr( 'P', m, n, m, a, lda, dum(1_${ik}$),dum(1_${ik}$), -1_${ik}$, info )
                       lwork_zungbr = real( dum(1_${ik}$),KIND=dp)
                       maxwrk = 2_${ik}$*m + lwork_zgebrd
                       maxwrk = max( maxwrk, 2_${ik}$*m + lwork_zunmbr )
                       maxwrk = max( maxwrk, 2_${ik}$*m + lwork_zungbr )
                       maxwrk = max( maxwrk, n*nrhs )
                    end if
                 end if
                 maxwrk = max( minwrk, maxwrk )
              end if
              work( 1_${ik}$ ) = maxwrk
              if( lwork<minwrk .and. .not.lquery )info = -12_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'ZGELSS', -info )
              return
           else if( lquery ) then
              return
           end if
           ! quick return if possible
           if( m==0_${ik}$ .or. n==0_${ik}$ ) then
              rank = 0_${ik}$
              return
           end if
           ! get machine parameters
           eps = stdlib${ii}$_dlamch( 'P' )
           sfmin = stdlib${ii}$_dlamch( 'S' )
           smlnum = sfmin / eps
           bignum = one / smlnum
           call stdlib${ii}$_dlabad( smlnum, bignum )
           ! scale a if max element outside range [smlnum,bignum]
           anrm = stdlib${ii}$_zlange( 'M', m, n, a, lda, rwork )
           iascl = 0_${ik}$
           if( anrm>zero .and. anrm<smlnum ) then
              ! scale matrix norm up to smlnum
              call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, m, n, a, lda, info )
              iascl = 1_${ik}$
           else if( anrm>bignum ) then
              ! scale matrix norm down to bignum
              call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, m, n, a, lda, info )
              iascl = 2_${ik}$
           else if( anrm==zero ) then
              ! matrix all zero. return zero solution.
              call stdlib${ii}$_zlaset( 'F', max( m, n ), nrhs, czero, czero, b, ldb )
              call stdlib${ii}$_dlaset( 'F', minmn, 1_${ik}$, zero, zero, s, minmn )
              rank = 0_${ik}$
              go to 70
           end if
           ! scale b if max element outside range [smlnum,bignum]
           bnrm = stdlib${ii}$_zlange( 'M', m, nrhs, b, ldb, rwork )
           ibscl = 0_${ik}$
           if( bnrm>zero .and. bnrm<smlnum ) then
              ! scale matrix norm up to smlnum
              call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, bnrm, smlnum, m, nrhs, b, ldb, info )
              ibscl = 1_${ik}$
           else if( bnrm>bignum ) then
              ! scale matrix norm down to bignum
              call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, bnrm, bignum, m, nrhs, b, ldb, info )
              ibscl = 2_${ik}$
           end if
           ! overdetermined case
           if( m>=n ) then
              ! path 1 - overdetermined or exactly determined
              mm = m
              if( m>=mnthr ) then
                 ! path 1a - overdetermined, with many more rows than columns
                 mm = n
                 itau = 1_${ik}$
                 iwork = itau + n
                 ! compute a=q*r
                 ! (cworkspace: need 2*n, prefer n+n*nb)
                 ! (rworkspace: none)
                 call stdlib${ii}$_zgeqrf( m, n, a, lda, work( itau ), work( iwork ),lwork-iwork+1, &
                           info )
                 ! multiply b by transpose(q)
                 ! (cworkspace: need n+nrhs, prefer n+nrhs*nb)
                 ! (rworkspace: none)
                 call stdlib${ii}$_zunmqr( 'L', 'C', m, nrhs, n, a, lda, work( itau ), b,ldb, work( &
                           iwork ), lwork-iwork+1, info )
                 ! zero out below r
                 if( n>1_${ik}$ )call stdlib${ii}$_zlaset( 'L', n-1, n-1, czero, czero, a( 2_${ik}$, 1_${ik}$ ),lda )
              end if
              ie = 1_${ik}$
              itauq = 1_${ik}$
              itaup = itauq + n
              iwork = itaup + n
              ! bidiagonalize r in a
              ! (cworkspace: need 2*n+mm, prefer 2*n+(mm+n)*nb)
              ! (rworkspace: need n)
              call stdlib${ii}$_zgebrd( mm, n, a, lda, s, rwork( ie ), work( itauq ),work( itaup ), &
                        work( iwork ), lwork-iwork+1,info )
              ! multiply b by transpose of left bidiagonalizing vectors of r
              ! (cworkspace: need 2*n+nrhs, prefer 2*n+nrhs*nb)
              ! (rworkspace: none)
              call stdlib${ii}$_zunmbr( 'Q', 'L', 'C', mm, nrhs, n, a, lda, work( itauq ),b, ldb, work( &
                        iwork ), lwork-iwork+1, info )
              ! generate right bidiagonalizing vectors of r in a
              ! (cworkspace: need 3*n-1, prefer 2*n+(n-1)*nb)
              ! (rworkspace: none)
              call stdlib${ii}$_zungbr( 'P', n, n, n, a, lda, work( itaup ),work( iwork ), lwork-iwork+&
                        1_${ik}$, info )
              irwork = ie + n
              ! perform bidiagonal qr iteration
                ! multiply b by transpose of left singular vectors
                ! compute right singular vectors in a
              ! (cworkspace: none)
              ! (rworkspace: need bdspac)
              call stdlib${ii}$_zbdsqr( 'U', n, n, 0_${ik}$, nrhs, s, rwork( ie ), a, lda, dum,1_${ik}$, b, ldb, &
                        rwork( irwork ), info )
              if( info/=0 )go to 70
              ! multiply b by reciprocals of singular values
              thr = max( rcond*s( 1_${ik}$ ), sfmin )
              if( rcond<zero )thr = max( eps*s( 1_${ik}$ ), sfmin )
              rank = 0_${ik}$
              do i = 1, n
                 if( s( i )>thr ) then
                    call stdlib${ii}$_zdrscl( nrhs, s( i ), b( i, 1_${ik}$ ), ldb )
                    rank = rank + 1_${ik}$
                 else
                    call stdlib${ii}$_zlaset( 'F', 1_${ik}$, nrhs, czero, czero, b( i, 1_${ik}$ ), ldb )
                 end if
              end do
              ! multiply b by right singular vectors
              ! (cworkspace: need n, prefer n*nrhs)
              ! (rworkspace: none)
              if( lwork>=ldb*nrhs .and. nrhs>1_${ik}$ ) then
                 call stdlib${ii}$_zgemm( 'C', 'N', n, nrhs, n, cone, a, lda, b, ldb,czero, work, ldb )
                           
                 call stdlib${ii}$_zlacpy( 'G', n, nrhs, work, ldb, b, ldb )
              else if( nrhs>1_${ik}$ ) then
                 chunk = lwork / n
                 do i = 1, nrhs, chunk
                    bl = min( nrhs-i+1, chunk )
                    call stdlib${ii}$_zgemm( 'C', 'N', n, bl, n, cone, a, lda, b( 1_${ik}$, i ),ldb, czero, &
                              work, n )
                    call stdlib${ii}$_zlacpy( 'G', n, bl, work, n, b( 1_${ik}$, i ), ldb )
                 end do
              else
                 call stdlib${ii}$_zgemv( 'C', n, n, cone, a, lda, b, 1_${ik}$, czero, work, 1_${ik}$ )
                 call stdlib${ii}$_zcopy( n, work, 1_${ik}$, b, 1_${ik}$ )
              end if
           else if( n>=mnthr .and. lwork>=3_${ik}$*m+m*m+max( m, nrhs, n-2*m ) )then
              ! underdetermined case, m much less than n
              ! path 2a - underdetermined, with many more columns than rows
              ! and sufficient workspace for an efficient algorithm
              ldwork = m
              if( lwork>=3_${ik}$*m+m*lda+max( m, nrhs, n-2*m ) )ldwork = lda
              itau = 1_${ik}$
              iwork = m + 1_${ik}$
              ! compute a=l*q
              ! (cworkspace: need 2*m, prefer m+m*nb)
              ! (rworkspace: none)
              call stdlib${ii}$_zgelqf( m, n, a, lda, work( itau ), work( iwork ),lwork-iwork+1, info )
                        
              il = iwork
              ! copy l to work(il), zeroing out above it
              call stdlib${ii}$_zlacpy( 'L', m, m, a, lda, work( il ), ldwork )
              call stdlib${ii}$_zlaset( 'U', m-1, m-1, czero, czero, work( il+ldwork ),ldwork )
              ie = 1_${ik}$
              itauq = il + ldwork*m
              itaup = itauq + m
              iwork = itaup + m
              ! bidiagonalize l in work(il)
              ! (cworkspace: need m*m+4*m, prefer m*m+3*m+2*m*nb)
              ! (rworkspace: need m)
              call stdlib${ii}$_zgebrd( m, m, work( il ), ldwork, s, rwork( ie ),work( itauq ), work( &
                        itaup ), work( iwork ),lwork-iwork+1, info )
              ! multiply b by transpose of left bidiagonalizing vectors of l
              ! (cworkspace: need m*m+3*m+nrhs, prefer m*m+3*m+nrhs*nb)
              ! (rworkspace: none)
              call stdlib${ii}$_zunmbr( 'Q', 'L', 'C', m, nrhs, m, work( il ), ldwork,work( itauq ), b, &
                        ldb, work( iwork ),lwork-iwork+1, info )
              ! generate right bidiagonalizing vectors of r in work(il)
              ! (cworkspace: need m*m+4*m-1, prefer m*m+3*m+(m-1)*nb)
              ! (rworkspace: none)
              call stdlib${ii}$_zungbr( 'P', m, m, m, work( il ), ldwork, work( itaup ),work( iwork ), &
                        lwork-iwork+1, info )
              irwork = ie + m
              ! perform bidiagonal qr iteration, computing right singular
              ! vectors of l in work(il) and multiplying b by transpose of
              ! left singular vectors
              ! (cworkspace: need m*m)
              ! (rworkspace: need bdspac)
              call stdlib${ii}$_zbdsqr( 'U', m, m, 0_${ik}$, nrhs, s, rwork( ie ), work( il ),ldwork, a, lda, &
                        b, ldb, rwork( irwork ), info )
              if( info/=0 )go to 70
              ! multiply b by reciprocals of singular values
              thr = max( rcond*s( 1_${ik}$ ), sfmin )
              if( rcond<zero )thr = max( eps*s( 1_${ik}$ ), sfmin )
              rank = 0_${ik}$
              do i = 1, m
                 if( s( i )>thr ) then
                    call stdlib${ii}$_zdrscl( nrhs, s( i ), b( i, 1_${ik}$ ), ldb )
                    rank = rank + 1_${ik}$
                 else
                    call stdlib${ii}$_zlaset( 'F', 1_${ik}$, nrhs, czero, czero, b( i, 1_${ik}$ ), ldb )
                 end if
              end do
              iwork = il + m*ldwork
              ! multiply b by right singular vectors of l in work(il)
              ! (cworkspace: need m*m+2*m, prefer m*m+m+m*nrhs)
              ! (rworkspace: none)
              if( lwork>=ldb*nrhs+iwork-1 .and. nrhs>1_${ik}$ ) then
                 call stdlib${ii}$_zgemm( 'C', 'N', m, nrhs, m, cone, work( il ), ldwork,b, ldb, czero, &
                           work( iwork ), ldb )
                 call stdlib${ii}$_zlacpy( 'G', m, nrhs, work( iwork ), ldb, b, ldb )
              else if( nrhs>1_${ik}$ ) then
                 chunk = ( lwork-iwork+1 ) / m
                 do i = 1, nrhs, chunk
                    bl = min( nrhs-i+1, chunk )
                    call stdlib${ii}$_zgemm( 'C', 'N', m, bl, m, cone, work( il ), ldwork,b( 1_${ik}$, i ), &
                              ldb, czero, work( iwork ), m )
                    call stdlib${ii}$_zlacpy( 'G', m, bl, work( iwork ), m, b( 1_${ik}$, i ),ldb )
                 end do
              else
                 call stdlib${ii}$_zgemv( 'C', m, m, cone, work( il ), ldwork, b( 1_${ik}$, 1_${ik}$ ),1_${ik}$, czero, work(&
                            iwork ), 1_${ik}$ )
                 call stdlib${ii}$_zcopy( m, work( iwork ), 1_${ik}$, b( 1_${ik}$, 1_${ik}$ ), 1_${ik}$ )
              end if
              ! zero out below first m rows of b
              call stdlib${ii}$_zlaset( 'F', n-m, nrhs, czero, czero, b( m+1, 1_${ik}$ ), ldb )
              iwork = itau + m
              ! multiply transpose(q) by b
              ! (cworkspace: need m+nrhs, prefer m+nhrs*nb)
              ! (rworkspace: none)
              call stdlib${ii}$_zunmlq( 'L', 'C', n, nrhs, m, a, lda, work( itau ), b,ldb, work( iwork )&
                        , lwork-iwork+1, info )
           else
              ! path 2 - remaining underdetermined cases
              ie = 1_${ik}$
              itauq = 1_${ik}$
              itaup = itauq + m
              iwork = itaup + m
              ! bidiagonalize a
              ! (cworkspace: need 3*m, prefer 2*m+(m+n)*nb)
              ! (rworkspace: need n)
              call stdlib${ii}$_zgebrd( m, n, a, lda, s, rwork( ie ), work( itauq ),work( itaup ), work(&
                         iwork ), lwork-iwork+1,info )
              ! multiply b by transpose of left bidiagonalizing vectors
              ! (cworkspace: need 2*m+nrhs, prefer 2*m+nrhs*nb)
              ! (rworkspace: none)
              call stdlib${ii}$_zunmbr( 'Q', 'L', 'C', m, nrhs, n, a, lda, work( itauq ),b, ldb, work( &
                        iwork ), lwork-iwork+1, info )
              ! generate right bidiagonalizing vectors in a
              ! (cworkspace: need 3*m, prefer 2*m+m*nb)
              ! (rworkspace: none)
              call stdlib${ii}$_zungbr( 'P', m, n, m, a, lda, work( itaup ),work( iwork ), lwork-iwork+&
                        1_${ik}$, info )
              irwork = ie + m
              ! perform bidiagonal qr iteration,
                 ! computing right singular vectors of a in a and
                 ! multiplying b by transpose of left singular vectors
              ! (cworkspace: none)
              ! (rworkspace: need bdspac)
              call stdlib${ii}$_zbdsqr( 'L', m, n, 0_${ik}$, nrhs, s, rwork( ie ), a, lda, dum,1_${ik}$, b, ldb, &
                        rwork( irwork ), info )
              if( info/=0 )go to 70
              ! multiply b by reciprocals of singular values
              thr = max( rcond*s( 1_${ik}$ ), sfmin )
              if( rcond<zero )thr = max( eps*s( 1_${ik}$ ), sfmin )
              rank = 0_${ik}$
              do i = 1, m
                 if( s( i )>thr ) then
                    call stdlib${ii}$_zdrscl( nrhs, s( i ), b( i, 1_${ik}$ ), ldb )
                    rank = rank + 1_${ik}$
                 else
                    call stdlib${ii}$_zlaset( 'F', 1_${ik}$, nrhs, czero, czero, b( i, 1_${ik}$ ), ldb )
                 end if
              end do
              ! multiply b by right singular vectors of a
              ! (cworkspace: need n, prefer n*nrhs)
              ! (rworkspace: none)
              if( lwork>=ldb*nrhs .and. nrhs>1_${ik}$ ) then
                 call stdlib${ii}$_zgemm( 'C', 'N', n, nrhs, m, cone, a, lda, b, ldb,czero, work, ldb )
                           
                 call stdlib${ii}$_zlacpy( 'G', n, nrhs, work, ldb, b, ldb )
              else if( nrhs>1_${ik}$ ) then
                 chunk = lwork / n
                 do i = 1, nrhs, chunk
                    bl = min( nrhs-i+1, chunk )
                    call stdlib${ii}$_zgemm( 'C', 'N', n, bl, m, cone, a, lda, b( 1_${ik}$, i ),ldb, czero, &
                              work, n )
                    call stdlib${ii}$_zlacpy( 'F', n, bl, work, n, b( 1_${ik}$, i ), ldb )
                 end do
              else
                 call stdlib${ii}$_zgemv( 'C', m, n, cone, a, lda, b, 1_${ik}$, czero, work, 1_${ik}$ )
                 call stdlib${ii}$_zcopy( n, work, 1_${ik}$, b, 1_${ik}$ )
              end if
           end if
           ! undo scaling
           if( iascl==1_${ik}$ ) then
              call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, n, nrhs, b, ldb, info )
              call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, smlnum, anrm, minmn, 1_${ik}$, s, minmn,info )
           else if( iascl==2_${ik}$ ) then
              call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, n, nrhs, b, ldb, info )
              call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, bignum, anrm, minmn, 1_${ik}$, s, minmn,info )
           end if
           if( ibscl==1_${ik}$ ) then
              call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, smlnum, bnrm, n, nrhs, b, ldb, info )
           else if( ibscl==2_${ik}$ ) then
              call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, bignum, bnrm, n, nrhs, b, ldb, info )
           end if
           70 continue
           work( 1_${ik}$ ) = maxwrk
           return
     end subroutine stdlib${ii}$_zgelss

#:for ck,ct,ci in CMPLX_KINDS_TYPES
#:if not ck in ["sp","dp"]
     module subroutine stdlib${ii}$_${ci}$gelss( m, n, nrhs, a, lda, b, ldb, s, rcond, rank,work, lwork, rwork, &
     !! ZGELSS: computes the minimum norm solution to a complex linear
     !! least squares problem:
     !! Minimize 2-norm(| b - A*x |).
     !! using the singular value decomposition (SVD) of A. A is an M-by-N
     !! matrix which may be rank-deficient.
     !! Several right hand side vectors b and solution vectors x can be
     !! handled in a single call; they are stored as the columns of the
     !! M-by-NRHS right hand side matrix B and the N-by-NRHS solution matrix
     !! X.
     !! The effective rank of A is determined by treating as zero those
     !! singular values which are less than RCOND times the largest singular
     !! value.
               info )
        ! -- lapack driver routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${ck}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info, rank
           integer(${ik}$), intent(in) :: lda, ldb, lwork, m, n, nrhs
           real(${ck}$), intent(in) :: rcond
           ! Array Arguments 
           real(${ck}$), intent(out) :: rwork(*), s(*)
           complex(${ck}$), intent(inout) :: a(lda,*), b(ldb,*)
           complex(${ck}$), intent(out) :: work(*)
        ! =====================================================================
           
           
           ! Local Scalars 
           logical(lk) :: lquery
           integer(${ik}$) :: bl, chunk, i, iascl, ibscl, ie, il, irwork, itau, itaup, itauq, iwork, &
                     ldwork, maxmn, maxwrk, minmn, minwrk, mm, mnthr
           integer(${ik}$) :: lwork_wgeqrf, lwork_wunmqr, lwork_wgebrd, lwork_wunmbr, lwork_wungbr, &
                     lwork_wunmlq, lwork_wgelqf
           real(${ck}$) :: anrm, bignum, bnrm, eps, sfmin, smlnum, thr
           ! Local Arrays 
           complex(${ck}$) :: dum(1_${ik}$)
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           minmn = min( m, n )
           maxmn = max( m, n )
           lquery = ( lwork==-1_${ik}$ )
           if( m<0_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ ) then
              info = -2_${ik}$
           else if( nrhs<0_${ik}$ ) then
              info = -3_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -5_${ik}$
           else if( ldb<max( 1_${ik}$, maxmn ) ) then
              info = -7_${ik}$
           end if
           ! compute workspace
            ! (note: comments in the code beginning "workspace:" describe the
             ! minimal amount of workspace needed at that point in the code,
             ! as well as the preferred amount for good performance.
             ! cworkspace refers to complex workspace, and rworkspace refers
             ! to real workspace. nb refers to the optimal block size for the
             ! immediately following subroutine, as returned by stdlib${ii}$_ilaenv.)
           if( info==0_${ik}$ ) then
              minwrk = 1_${ik}$
              maxwrk = 1_${ik}$
              if( minmn>0_${ik}$ ) then
                 mm = m
                 mnthr = stdlib${ii}$_ilaenv( 6_${ik}$, 'ZGELSS', ' ', m, n, nrhs, -1_${ik}$ )
                 if( m>=n .and. m>=mnthr ) then
                    ! path 1a - overdetermined, with many more rows than
                              ! columns
                    ! compute space needed for stdlib${ii}$_${ci}$geqrf
                    call stdlib${ii}$_${ci}$geqrf( m, n, a, lda, dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, info )
                    lwork_wgeqrf = real( dum(1_${ik}$),KIND=${ck}$)
                    ! compute space needed for stdlib${ii}$_${ci}$unmqr
                    call stdlib${ii}$_${ci}$unmqr( 'L', 'C', m, nrhs, n,