#:include "common.fypp" submodule(stdlib_lapack_eig_svd_lsq) stdlib_lapack_lsq implicit none contains #:for ik,it,ii in LINALG_INT_KINDS_TYPES module subroutine stdlib${ii}$_sgelss( m, n, nrhs, a, lda, b, ldb, s, rcond, rank,work, lwork, info ) !! SGELSS computes the minimum norm solution to a real linear least !! squares problem: !! Minimize 2-norm(| b - A*x |). !! using the singular value decomposition (SVD) of A. A is an M-by-N !! matrix which may be rank-deficient. !! Several right hand side vectors b and solution vectors x can be !! handled in a single call; they are stored as the columns of the !! M-by-NRHS right hand side matrix B and the N-by-NRHS solution matrix !! X. !! The effective rank of A is determined by treating as zero those !! singular values which are less than RCOND times the largest singular !! value. ! -- lapack driver routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments integer(${ik}$), intent(out) :: info, rank integer(${ik}$), intent(in) :: lda, ldb, lwork, m, n, nrhs real(sp), intent(in) :: rcond ! Array Arguments real(sp), intent(inout) :: a(lda,*), b(ldb,*) real(sp), intent(out) :: s(*), work(*) ! ===================================================================== ! Local Scalars logical(lk) :: lquery integer(${ik}$) :: bdspac, bl, chunk, i, iascl, ibscl, ie, il, itau, itaup, itauq, iwork, & ldwork, maxmn, maxwrk, minmn, minwrk, mm, mnthr integer(${ik}$) :: lwork_sgeqrf, lwork_sormqr, lwork_sgebrd, lwork_sormbr, lwork_sorgbr, & lwork_sormlq real(sp) :: anrm, bignum, bnrm, eps, sfmin, smlnum, thr ! Local Arrays real(sp) :: dum(1_${ik}$) ! Intrinsic Functions ! Executable Statements ! test the input arguments info = 0_${ik}$ minmn = min( m, n ) maxmn = max( m, n ) lquery = ( lwork==-1_${ik}$ ) if( m<0_${ik}$ ) then info = -1_${ik}$ else if( n<0_${ik}$ ) then info = -2_${ik}$ else if( nrhs<0_${ik}$ ) then info = -3_${ik}$ else if( lda<max( 1_${ik}$, m ) ) then info = -5_${ik}$ else if( ldb<max( 1_${ik}$, maxmn ) ) then info = -7_${ik}$ end if ! compute workspace ! (note: comments in the code beginning "workspace:" describe the ! minimal amount of workspace needed at that point in the code, ! as well as the preferred amount for good performance. ! nb refers to the optimal block size for the immediately ! following subroutine, as returned by stdlib${ii}$_ilaenv.) if( info==0_${ik}$ ) then minwrk = 1_${ik}$ maxwrk = 1_${ik}$ if( minmn>0_${ik}$ ) then mm = m mnthr = stdlib${ii}$_ilaenv( 6_${ik}$, 'SGELSS', ' ', m, n, nrhs, -1_${ik}$ ) if( m>=n .and. m>=mnthr ) then ! path 1a - overdetermined, with many more rows than ! columns ! compute space needed for stdlib${ii}$_sgeqrf call stdlib${ii}$_sgeqrf( m, n, a, lda, dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, info ) lwork_sgeqrf=dum(1_${ik}$) ! compute space needed for stdlib${ii}$_sormqr call stdlib${ii}$_sormqr( 'L', 'T', m, nrhs, n, a, lda, dum(1_${ik}$), b,ldb, dum(1_${ik}$), -1_${ik}$, & info ) lwork_sormqr=dum(1_${ik}$) mm = n maxwrk = max( maxwrk, n + lwork_sgeqrf ) maxwrk = max( maxwrk, n + lwork_sormqr ) end if if( m>=n ) then ! path 1 - overdetermined or exactly determined ! compute workspace needed for stdlib${ii}$_sbdsqr bdspac = max( 1_${ik}$, 5_${ik}$*n ) ! compute space needed for stdlib${ii}$_sgebrd call stdlib${ii}$_sgebrd( mm, n, a, lda, s, dum(1_${ik}$), dum(1_${ik}$),dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, info & ) lwork_sgebrd=dum(1_${ik}$) ! compute space needed for stdlib${ii}$_sormbr call stdlib${ii}$_sormbr( 'Q', 'L', 'T', mm, nrhs, n, a, lda, dum(1_${ik}$),b, ldb, dum(1_${ik}$),& -1_${ik}$, info ) lwork_sormbr=dum(1_${ik}$) ! compute space needed for stdlib${ii}$_sorgbr call stdlib${ii}$_sorgbr( 'P', n, n, n, a, lda, dum(1_${ik}$),dum(1_${ik}$), -1_${ik}$, info ) lwork_sorgbr=dum(1_${ik}$) ! compute total workspace needed maxwrk = max( maxwrk, 3_${ik}$*n + lwork_sgebrd ) maxwrk = max( maxwrk, 3_${ik}$*n + lwork_sormbr ) maxwrk = max( maxwrk, 3_${ik}$*n + lwork_sorgbr ) maxwrk = max( maxwrk, bdspac ) maxwrk = max( maxwrk, n*nrhs ) minwrk = max( 3_${ik}$*n + mm, 3_${ik}$*n + nrhs, bdspac ) maxwrk = max( minwrk, maxwrk ) end if if( n>m ) then ! compute workspace needed for stdlib${ii}$_sbdsqr bdspac = max( 1_${ik}$, 5_${ik}$*m ) minwrk = max( 3_${ik}$*m+nrhs, 3_${ik}$*m+n, bdspac ) if( n>=mnthr ) then ! path 2a - underdetermined, with many more columns ! than rows ! compute space needed for stdlib${ii}$_sgebrd call stdlib${ii}$_sgebrd( m, m, a, lda, s, dum(1_${ik}$), dum(1_${ik}$),dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, & info ) lwork_sgebrd=dum(1_${ik}$) ! compute space needed for stdlib${ii}$_sormbr call stdlib${ii}$_sormbr( 'Q', 'L', 'T', m, nrhs, n, a, lda,dum(1_${ik}$), b, ldb, dum(& 1_${ik}$), -1_${ik}$, info ) lwork_sormbr=dum(1_${ik}$) ! compute space needed for stdlib${ii}$_sorgbr call stdlib${ii}$_sorgbr( 'P', m, m, m, a, lda, dum(1_${ik}$),dum(1_${ik}$), -1_${ik}$, info ) lwork_sorgbr=dum(1_${ik}$) ! compute space needed for stdlib${ii}$_sormlq call stdlib${ii}$_sormlq( 'L', 'T', n, nrhs, m, a, lda, dum(1_${ik}$),b, ldb, dum(1_${ik}$), -& 1_${ik}$, info ) lwork_sormlq=dum(1_${ik}$) ! compute total workspace needed maxwrk = m + m*stdlib${ii}$_ilaenv( 1_${ik}$, 'SGELQF', ' ', m, n, -1_${ik}$,-1_${ik}$ ) maxwrk = max( maxwrk, m*m + 4_${ik}$*m + lwork_sgebrd ) maxwrk = max( maxwrk, m*m + 4_${ik}$*m + lwork_sormbr ) maxwrk = max( maxwrk, m*m + 4_${ik}$*m + lwork_sorgbr ) maxwrk = max( maxwrk, m*m + m + bdspac ) if( nrhs>1_${ik}$ ) then maxwrk = max( maxwrk, m*m + m + m*nrhs ) else maxwrk = max( maxwrk, m*m + 2_${ik}$*m ) end if maxwrk = max( maxwrk, m + lwork_sormlq ) else ! path 2 - underdetermined ! compute space needed for stdlib${ii}$_sgebrd call stdlib${ii}$_sgebrd( m, n, a, lda, s, dum(1_${ik}$), dum(1_${ik}$),dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, & info ) lwork_sgebrd=dum(1_${ik}$) ! compute space needed for stdlib${ii}$_sormbr call stdlib${ii}$_sormbr( 'Q', 'L', 'T', m, nrhs, m, a, lda,dum(1_${ik}$), b, ldb, dum(& 1_${ik}$), -1_${ik}$, info ) lwork_sormbr=dum(1_${ik}$) ! compute space needed for stdlib${ii}$_sorgbr call stdlib${ii}$_sorgbr( 'P', m, n, m, a, lda, dum(1_${ik}$),dum(1_${ik}$), -1_${ik}$, info ) lwork_sorgbr=dum(1_${ik}$) maxwrk = 3_${ik}$*m + lwork_sgebrd maxwrk = max( maxwrk, 3_${ik}$*m + lwork_sormbr ) maxwrk = max( maxwrk, 3_${ik}$*m + lwork_sorgbr ) maxwrk = max( maxwrk, bdspac ) maxwrk = max( maxwrk, n*nrhs ) end if end if maxwrk = max( minwrk, maxwrk ) end if work( 1_${ik}$ ) = maxwrk if( lwork<minwrk .and. .not.lquery )info = -12_${ik}$ end if if( info/=0_${ik}$ ) then call stdlib${ii}$_xerbla( 'SGELSS', -info ) return else if( lquery ) then return end if ! quick return if possible if( m==0_${ik}$ .or. n==0_${ik}$ ) then rank = 0_${ik}$ return end if ! get machine parameters eps = stdlib${ii}$_slamch( 'P' ) sfmin = stdlib${ii}$_slamch( 'S' ) smlnum = sfmin / eps bignum = one / smlnum call stdlib${ii}$_slabad( smlnum, bignum ) ! scale a if max element outside range [smlnum,bignum] anrm = stdlib${ii}$_slange( 'M', m, n, a, lda, work ) iascl = 0_${ik}$ if( anrm>zero .and. anrm<smlnum ) then ! scale matrix norm up to smlnum call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, m, n, a, lda, info ) iascl = 1_${ik}$ else if( anrm>bignum ) then ! scale matrix norm down to bignum call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, m, n, a, lda, info ) iascl = 2_${ik}$ else if( anrm==zero ) then ! matrix all zero. return zero solution. call stdlib${ii}$_slaset( 'F', max( m, n ), nrhs, zero, zero, b, ldb ) call stdlib${ii}$_slaset( 'F', minmn, 1_${ik}$, zero, zero, s, minmn ) rank = 0_${ik}$ go to 70 end if ! scale b if max element outside range [smlnum,bignum] bnrm = stdlib${ii}$_slange( 'M', m, nrhs, b, ldb, work ) ibscl = 0_${ik}$ if( bnrm>zero .and. bnrm<smlnum ) then ! scale matrix norm up to smlnum call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, bnrm, smlnum, m, nrhs, b, ldb, info ) ibscl = 1_${ik}$ else if( bnrm>bignum ) then ! scale matrix norm down to bignum call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, bnrm, bignum, m, nrhs, b, ldb, info ) ibscl = 2_${ik}$ end if ! overdetermined case if( m>=n ) then ! path 1 - overdetermined or exactly determined mm = m if( m>=mnthr ) then ! path 1a - overdetermined, with many more rows than columns mm = n itau = 1_${ik}$ iwork = itau + n ! compute a=q*r ! (workspace: need 2*n, prefer n+n*nb) call stdlib${ii}$_sgeqrf( m, n, a, lda, work( itau ), work( iwork ),lwork-iwork+1, & info ) ! multiply b by transpose(q) ! (workspace: need n+nrhs, prefer n+nrhs*nb) call stdlib${ii}$_sormqr( 'L', 'T', m, nrhs, n, a, lda, work( itau ), b,ldb, work( & iwork ), lwork-iwork+1, info ) ! zero out below r if( n>1_${ik}$ )call stdlib${ii}$_slaset( 'L', n-1, n-1, zero, zero, a( 2_${ik}$, 1_${ik}$ ), lda ) end if ie = 1_${ik}$ itauq = ie + n itaup = itauq + n iwork = itaup + n ! bidiagonalize r in a ! (workspace: need 3*n+mm, prefer 3*n+(mm+n)*nb) call stdlib${ii}$_sgebrd( mm, n, a, lda, s, work( ie ), work( itauq ),work( itaup ), work(& iwork ), lwork-iwork+1,info ) ! multiply b by transpose of left bidiagonalizing vectors of r ! (workspace: need 3*n+nrhs, prefer 3*n+nrhs*nb) call stdlib${ii}$_sormbr( 'Q', 'L', 'T', mm, nrhs, n, a, lda, work( itauq ),b, ldb, work( & iwork ), lwork-iwork+1, info ) ! generate right bidiagonalizing vectors of r in a ! (workspace: need 4*n-1, prefer 3*n+(n-1)*nb) call stdlib${ii}$_sorgbr( 'P', n, n, n, a, lda, work( itaup ),work( iwork ), lwork-iwork+& 1_${ik}$, info ) iwork = ie + n ! perform bidiagonal qr iteration ! multiply b by transpose of left singular vectors ! compute right singular vectors in a ! (workspace: need bdspac) call stdlib${ii}$_sbdsqr( 'U', n, n, 0_${ik}$, nrhs, s, work( ie ), a, lda, dum,1_${ik}$, b, ldb, work( & iwork ), info ) if( info/=0 )go to 70 ! multiply b by reciprocals of singular values thr = max( rcond*s( 1_${ik}$ ), sfmin ) if( rcond<zero )thr = max( eps*s( 1_${ik}$ ), sfmin ) rank = 0_${ik}$ do i = 1, n if( s( i )>thr ) then call stdlib${ii}$_srscl( nrhs, s( i ), b( i, 1_${ik}$ ), ldb ) rank = rank + 1_${ik}$ else call stdlib${ii}$_slaset( 'F', 1_${ik}$, nrhs, zero, zero, b( i, 1_${ik}$ ), ldb ) end if end do ! multiply b by right singular vectors ! (workspace: need n, prefer n*nrhs) if( lwork>=ldb*nrhs .and. nrhs>1_${ik}$ ) then call stdlib${ii}$_sgemm( 'T', 'N', n, nrhs, n, one, a, lda, b, ldb, zero,work, ldb ) call stdlib${ii}$_slacpy( 'G', n, nrhs, work, ldb, b, ldb ) else if( nrhs>1_${ik}$ ) then chunk = lwork / n do i = 1, nrhs, chunk bl = min( nrhs-i+1, chunk ) call stdlib${ii}$_sgemm( 'T', 'N', n, bl, n, one, a, lda, b( 1_${ik}$, i ),ldb, zero, work,& n ) call stdlib${ii}$_slacpy( 'G', n, bl, work, n, b( 1_${ik}$, i ), ldb ) end do else call stdlib${ii}$_sgemv( 'T', n, n, one, a, lda, b, 1_${ik}$, zero, work, 1_${ik}$ ) call stdlib${ii}$_scopy( n, work, 1_${ik}$, b, 1_${ik}$ ) end if else if( n>=mnthr .and. lwork>=4_${ik}$*m+m*m+max( m, 2_${ik}$*m-4, nrhs, n-3*m ) ) then ! path 2a - underdetermined, with many more columns than rows ! and sufficient workspace for an efficient algorithm ldwork = m if( lwork>=max( 4_${ik}$*m+m*lda+max( m, 2_${ik}$*m-4, nrhs, n-3*m ),m*lda+m+m*nrhs ) )ldwork = & lda itau = 1_${ik}$ iwork = m + 1_${ik}$ ! compute a=l*q ! (workspace: need 2*m, prefer m+m*nb) call stdlib${ii}$_sgelqf( m, n, a, lda, work( itau ), work( iwork ),lwork-iwork+1, info ) il = iwork ! copy l to work(il), zeroing out above it call stdlib${ii}$_slacpy( 'L', m, m, a, lda, work( il ), ldwork ) call stdlib${ii}$_slaset( 'U', m-1, m-1, zero, zero, work( il+ldwork ),ldwork ) ie = il + ldwork*m itauq = ie + m itaup = itauq + m iwork = itaup + m ! bidiagonalize l in work(il) ! (workspace: need m*m+5*m, prefer m*m+4*m+2*m*nb) call stdlib${ii}$_sgebrd( m, m, work( il ), ldwork, s, work( ie ),work( itauq ), work( & itaup ), work( iwork ),lwork-iwork+1, info ) ! multiply b by transpose of left bidiagonalizing vectors of l ! (workspace: need m*m+4*m+nrhs, prefer m*m+4*m+nrhs*nb) call stdlib${ii}$_sormbr( 'Q', 'L', 'T', m, nrhs, m, work( il ), ldwork,work( itauq ), b, & ldb, work( iwork ),lwork-iwork+1, info ) ! generate right bidiagonalizing vectors of r in work(il) ! (workspace: need m*m+5*m-1, prefer m*m+4*m+(m-1)*nb) call stdlib${ii}$_sorgbr( 'P', m, m, m, work( il ), ldwork, work( itaup ),work( iwork ), & lwork-iwork+1, info ) iwork = ie + m ! perform bidiagonal qr iteration, ! computing right singular vectors of l in work(il) and ! multiplying b by transpose of left singular vectors ! (workspace: need m*m+m+bdspac) call stdlib${ii}$_sbdsqr( 'U', m, m, 0_${ik}$, nrhs, s, work( ie ), work( il ),ldwork, a, lda, b,& ldb, work( iwork ), info ) if( info/=0 )go to 70 ! multiply b by reciprocals of singular values thr = max( rcond*s( 1_${ik}$ ), sfmin ) if( rcond<zero )thr = max( eps*s( 1_${ik}$ ), sfmin ) rank = 0_${ik}$ do i = 1, m if( s( i )>thr ) then call stdlib${ii}$_srscl( nrhs, s( i ), b( i, 1_${ik}$ ), ldb ) rank = rank + 1_${ik}$ else call stdlib${ii}$_slaset( 'F', 1_${ik}$, nrhs, zero, zero, b( i, 1_${ik}$ ), ldb ) end if end do iwork = ie ! multiply b by right singular vectors of l in work(il) ! (workspace: need m*m+2*m, prefer m*m+m+m*nrhs) if( lwork>=ldb*nrhs+iwork-1 .and. nrhs>1_${ik}$ ) then call stdlib${ii}$_sgemm( 'T', 'N', m, nrhs, m, one, work( il ), ldwork,b, ldb, zero, & work( iwork ), ldb ) call stdlib${ii}$_slacpy( 'G', m, nrhs, work( iwork ), ldb, b, ldb ) else if( nrhs>1_${ik}$ ) then chunk = ( lwork-iwork+1 ) / m do i = 1, nrhs, chunk bl = min( nrhs-i+1, chunk ) call stdlib${ii}$_sgemm( 'T', 'N', m, bl, m, one, work( il ), ldwork,b( 1_${ik}$, i ), ldb,& zero, work( iwork ), m ) call stdlib${ii}$_slacpy( 'G', m, bl, work( iwork ), m, b( 1_${ik}$, i ),ldb ) end do else call stdlib${ii}$_sgemv( 'T', m, m, one, work( il ), ldwork, b( 1_${ik}$, 1_${ik}$ ),1_${ik}$, zero, work( & iwork ), 1_${ik}$ ) call stdlib${ii}$_scopy( m, work( iwork ), 1_${ik}$, b( 1_${ik}$, 1_${ik}$ ), 1_${ik}$ ) end if ! zero out below first m rows of b call stdlib${ii}$_slaset( 'F', n-m, nrhs, zero, zero, b( m+1, 1_${ik}$ ), ldb ) iwork = itau + m ! multiply transpose(q) by b ! (workspace: need m+nrhs, prefer m+nrhs*nb) call stdlib${ii}$_sormlq( 'L', 'T', n, nrhs, m, a, lda, work( itau ), b,ldb, work( iwork )& , lwork-iwork+1, info ) else ! path 2 - remaining underdetermined cases ie = 1_${ik}$ itauq = ie + m itaup = itauq + m iwork = itaup + m ! bidiagonalize a ! (workspace: need 3*m+n, prefer 3*m+(m+n)*nb) call stdlib${ii}$_sgebrd( m, n, a, lda, s, work( ie ), work( itauq ),work( itaup ), work( & iwork ), lwork-iwork+1,info ) ! multiply b by transpose of left bidiagonalizing vectors ! (workspace: need 3*m+nrhs, prefer 3*m+nrhs*nb) call stdlib${ii}$_sormbr( 'Q', 'L', 'T', m, nrhs, n, a, lda, work( itauq ),b, ldb, work( & iwork ), lwork-iwork+1, info ) ! generate right bidiagonalizing vectors in a ! (workspace: need 4*m, prefer 3*m+m*nb) call stdlib${ii}$_sorgbr( 'P', m, n, m, a, lda, work( itaup ),work( iwork ), lwork-iwork+& 1_${ik}$, info ) iwork = ie + m ! perform bidiagonal qr iteration, ! computing right singular vectors of a in a and ! multiplying b by transpose of left singular vectors ! (workspace: need bdspac) call stdlib${ii}$_sbdsqr( 'L', m, n, 0_${ik}$, nrhs, s, work( ie ), a, lda, dum,1_${ik}$, b, ldb, work( & iwork ), info ) if( info/=0 )go to 70 ! multiply b by reciprocals of singular values thr = max( rcond*s( 1_${ik}$ ), sfmin ) if( rcond<zero )thr = max( eps*s( 1_${ik}$ ), sfmin ) rank = 0_${ik}$ do i = 1, m if( s( i )>thr ) then call stdlib${ii}$_srscl( nrhs, s( i ), b( i, 1_${ik}$ ), ldb ) rank = rank + 1_${ik}$ else call stdlib${ii}$_slaset( 'F', 1_${ik}$, nrhs, zero, zero, b( i, 1_${ik}$ ), ldb ) end if end do ! multiply b by right singular vectors of a ! (workspace: need n, prefer n*nrhs) if( lwork>=ldb*nrhs .and. nrhs>1_${ik}$ ) then call stdlib${ii}$_sgemm( 'T', 'N', n, nrhs, m, one, a, lda, b, ldb, zero,work, ldb ) call stdlib${ii}$_slacpy( 'F', n, nrhs, work, ldb, b, ldb ) else if( nrhs>1_${ik}$ ) then chunk = lwork / n do i = 1, nrhs, chunk bl = min( nrhs-i+1, chunk ) call stdlib${ii}$_sgemm( 'T', 'N', n, bl, m, one, a, lda, b( 1_${ik}$, i ),ldb, zero, work,& n ) call stdlib${ii}$_slacpy( 'F', n, bl, work, n, b( 1_${ik}$, i ), ldb ) end do else call stdlib${ii}$_sgemv( 'T', m, n, one, a, lda, b, 1_${ik}$, zero, work, 1_${ik}$ ) call stdlib${ii}$_scopy( n, work, 1_${ik}$, b, 1_${ik}$ ) end if end if ! undo scaling if( iascl==1_${ik}$ ) then call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, n, nrhs, b, ldb, info ) call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, smlnum, anrm, minmn, 1_${ik}$, s, minmn,info ) else if( iascl==2_${ik}$ ) then call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, n, nrhs, b, ldb, info ) call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, bignum, anrm, minmn, 1_${ik}$, s, minmn,info ) end if if( ibscl==1_${ik}$ ) then call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, smlnum, bnrm, n, nrhs, b, ldb, info ) else if( ibscl==2_${ik}$ ) then call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, bignum, bnrm, n, nrhs, b, ldb, info ) end if 70 continue work( 1_${ik}$ ) = maxwrk return end subroutine stdlib${ii}$_sgelss module subroutine stdlib${ii}$_dgelss( m, n, nrhs, a, lda, b, ldb, s, rcond, rank,work, lwork, info ) !! DGELSS computes the minimum norm solution to a real linear least !! squares problem: !! Minimize 2-norm(| b - A*x |). !! using the singular value decomposition (SVD) of A. A is an M-by-N !! matrix which may be rank-deficient. !! Several right hand side vectors b and solution vectors x can be !! handled in a single call; they are stored as the columns of the !! M-by-NRHS right hand side matrix B and the N-by-NRHS solution matrix !! X. !! The effective rank of A is determined by treating as zero those !! singular values which are less than RCOND times the largest singular !! value. ! -- lapack driver routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments integer(${ik}$), intent(out) :: info, rank integer(${ik}$), intent(in) :: lda, ldb, lwork, m, n, nrhs real(dp), intent(in) :: rcond ! Array Arguments real(dp), intent(inout) :: a(lda,*), b(ldb,*) real(dp), intent(out) :: s(*), work(*) ! ===================================================================== ! Local Scalars logical(lk) :: lquery integer(${ik}$) :: bdspac, bl, chunk, i, iascl, ibscl, ie, il, itau, itaup, itauq, iwork, & ldwork, maxmn, maxwrk, minmn, minwrk, mm, mnthr integer(${ik}$) :: lwork_dgeqrf, lwork_dormqr, lwork_dgebrd, lwork_dormbr, lwork_dorgbr, & lwork_dormlq, lwork_dgelqf real(dp) :: anrm, bignum, bnrm, eps, sfmin, smlnum, thr ! Local Arrays real(dp) :: dum(1_${ik}$) ! Intrinsic Functions ! Executable Statements ! test the input arguments info = 0_${ik}$ minmn = min( m, n ) maxmn = max( m, n ) lquery = ( lwork==-1_${ik}$ ) if( m<0_${ik}$ ) then info = -1_${ik}$ else if( n<0_${ik}$ ) then info = -2_${ik}$ else if( nrhs<0_${ik}$ ) then info = -3_${ik}$ else if( lda<max( 1_${ik}$, m ) ) then info = -5_${ik}$ else if( ldb<max( 1_${ik}$, maxmn ) ) then info = -7_${ik}$ end if ! compute workspace ! (note: comments in the code beginning "workspace:" describe the ! minimal amount of workspace needed at that point in the code, ! as well as the preferred amount for good performance. ! nb refers to the optimal block size for the immediately ! following subroutine, as returned by stdlib${ii}$_ilaenv.) if( info==0_${ik}$ ) then minwrk = 1_${ik}$ maxwrk = 1_${ik}$ if( minmn>0_${ik}$ ) then mm = m mnthr = stdlib${ii}$_ilaenv( 6_${ik}$, 'DGELSS', ' ', m, n, nrhs, -1_${ik}$ ) if( m>=n .and. m>=mnthr ) then ! path 1a - overdetermined, with many more rows than ! columns ! compute space needed for stdlib${ii}$_dgeqrf call stdlib${ii}$_dgeqrf( m, n, a, lda, dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, info ) lwork_dgeqrf=dum(1_${ik}$) ! compute space needed for stdlib${ii}$_dormqr call stdlib${ii}$_dormqr( 'L', 'T', m, nrhs, n, a, lda, dum(1_${ik}$), b,ldb, dum(1_${ik}$), -1_${ik}$, & info ) lwork_dormqr=dum(1_${ik}$) mm = n maxwrk = max( maxwrk, n + lwork_dgeqrf ) maxwrk = max( maxwrk, n + lwork_dormqr ) end if if( m>=n ) then ! path 1 - overdetermined or exactly determined ! compute workspace needed for stdlib${ii}$_dbdsqr bdspac = max( 1_${ik}$, 5_${ik}$*n ) ! compute space needed for stdlib${ii}$_dgebrd call stdlib${ii}$_dgebrd( mm, n, a, lda, s, dum(1_${ik}$), dum(1_${ik}$),dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, info & ) lwork_dgebrd=dum(1_${ik}$) ! compute space needed for stdlib${ii}$_dormbr call stdlib${ii}$_dormbr( 'Q', 'L', 'T', mm, nrhs, n, a, lda, dum(1_${ik}$),b, ldb, dum(1_${ik}$),& -1_${ik}$, info ) lwork_dormbr=dum(1_${ik}$) ! compute space needed for stdlib${ii}$_dorgbr call stdlib${ii}$_dorgbr( 'P', n, n, n, a, lda, dum(1_${ik}$),dum(1_${ik}$), -1_${ik}$, info ) lwork_dorgbr=dum(1_${ik}$) ! compute total workspace needed maxwrk = max( maxwrk, 3_${ik}$*n + lwork_dgebrd ) maxwrk = max( maxwrk, 3_${ik}$*n + lwork_dormbr ) maxwrk = max( maxwrk, 3_${ik}$*n + lwork_dorgbr ) maxwrk = max( maxwrk, bdspac ) maxwrk = max( maxwrk, n*nrhs ) minwrk = max( 3_${ik}$*n + mm, 3_${ik}$*n + nrhs, bdspac ) maxwrk = max( minwrk, maxwrk ) end if if( n>m ) then ! compute workspace needed for stdlib${ii}$_dbdsqr bdspac = max( 1_${ik}$, 5_${ik}$*m ) minwrk = max( 3_${ik}$*m+nrhs, 3_${ik}$*m+n, bdspac ) if( n>=mnthr ) then ! path 2a - underdetermined, with many more columns ! than rows ! compute space needed for stdlib${ii}$_dgelqf call stdlib${ii}$_dgelqf( m, n, a, lda, dum(1_${ik}$), dum(1_${ik}$),-1_${ik}$, info ) lwork_dgelqf=dum(1_${ik}$) ! compute space needed for stdlib${ii}$_dgebrd call stdlib${ii}$_dgebrd( m, m, a, lda, s, dum(1_${ik}$), dum(1_${ik}$),dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, & info ) lwork_dgebrd=dum(1_${ik}$) ! compute space needed for stdlib${ii}$_dormbr call stdlib${ii}$_dormbr( 'Q', 'L', 'T', m, nrhs, n, a, lda,dum(1_${ik}$), b, ldb, dum(& 1_${ik}$), -1_${ik}$, info ) lwork_dormbr=dum(1_${ik}$) ! compute space needed for stdlib${ii}$_dorgbr call stdlib${ii}$_dorgbr( 'P', m, m, m, a, lda, dum(1_${ik}$),dum(1_${ik}$), -1_${ik}$, info ) lwork_dorgbr=dum(1_${ik}$) ! compute space needed for stdlib${ii}$_dormlq call stdlib${ii}$_dormlq( 'L', 'T', n, nrhs, m, a, lda, dum(1_${ik}$),b, ldb, dum(1_${ik}$), -& 1_${ik}$, info ) lwork_dormlq=dum(1_${ik}$) ! compute total workspace needed maxwrk = m + lwork_dgelqf maxwrk = max( maxwrk, m*m + 4_${ik}$*m + lwork_dgebrd ) maxwrk = max( maxwrk, m*m + 4_${ik}$*m + lwork_dormbr ) maxwrk = max( maxwrk, m*m + 4_${ik}$*m + lwork_dorgbr ) maxwrk = max( maxwrk, m*m + m + bdspac ) if( nrhs>1_${ik}$ ) then maxwrk = max( maxwrk, m*m + m + m*nrhs ) else maxwrk = max( maxwrk, m*m + 2_${ik}$*m ) end if maxwrk = max( maxwrk, m + lwork_dormlq ) else ! path 2 - underdetermined ! compute space needed for stdlib${ii}$_dgebrd call stdlib${ii}$_dgebrd( m, n, a, lda, s, dum(1_${ik}$), dum(1_${ik}$),dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, & info ) lwork_dgebrd=dum(1_${ik}$) ! compute space needed for stdlib${ii}$_dormbr call stdlib${ii}$_dormbr( 'Q', 'L', 'T', m, nrhs, m, a, lda,dum(1_${ik}$), b, ldb, dum(& 1_${ik}$), -1_${ik}$, info ) lwork_dormbr=dum(1_${ik}$) ! compute space needed for stdlib${ii}$_dorgbr call stdlib${ii}$_dorgbr( 'P', m, n, m, a, lda, dum(1_${ik}$),dum(1_${ik}$), -1_${ik}$, info ) lwork_dorgbr=dum(1_${ik}$) maxwrk = 3_${ik}$*m + lwork_dgebrd maxwrk = max( maxwrk, 3_${ik}$*m + lwork_dormbr ) maxwrk = max( maxwrk, 3_${ik}$*m + lwork_dorgbr ) maxwrk = max( maxwrk, bdspac ) maxwrk = max( maxwrk, n*nrhs ) end if end if maxwrk = max( minwrk, maxwrk ) end if work( 1_${ik}$ ) = maxwrk if( lwork<minwrk .and. .not.lquery )info = -12_${ik}$ end if if( info/=0_${ik}$ ) then call stdlib${ii}$_xerbla( 'DGELSS', -info ) return else if( lquery ) then return end if ! quick return if possible if( m==0_${ik}$ .or. n==0_${ik}$ ) then rank = 0_${ik}$ return end if ! get machine parameters eps = stdlib${ii}$_dlamch( 'P' ) sfmin = stdlib${ii}$_dlamch( 'S' ) smlnum = sfmin / eps bignum = one / smlnum call stdlib${ii}$_dlabad( smlnum, bignum ) ! scale a if max element outside range [smlnum,bignum] anrm = stdlib${ii}$_dlange( 'M', m, n, a, lda, work ) iascl = 0_${ik}$ if( anrm>zero .and. anrm<smlnum ) then ! scale matrix norm up to smlnum call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, m, n, a, lda, info ) iascl = 1_${ik}$ else if( anrm>bignum ) then ! scale matrix norm down to bignum call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, m, n, a, lda, info ) iascl = 2_${ik}$ else if( anrm==zero ) then ! matrix all zero. return zero solution. call stdlib${ii}$_dlaset( 'F', max( m, n ), nrhs, zero, zero, b, ldb ) call stdlib${ii}$_dlaset( 'F', minmn, 1_${ik}$, zero, zero, s, minmn ) rank = 0_${ik}$ go to 70 end if ! scale b if max element outside range [smlnum,bignum] bnrm = stdlib${ii}$_dlange( 'M', m, nrhs, b, ldb, work ) ibscl = 0_${ik}$ if( bnrm>zero .and. bnrm<smlnum ) then ! scale matrix norm up to smlnum call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, bnrm, smlnum, m, nrhs, b, ldb, info ) ibscl = 1_${ik}$ else if( bnrm>bignum ) then ! scale matrix norm down to bignum call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, bnrm, bignum, m, nrhs, b, ldb, info ) ibscl = 2_${ik}$ end if ! overdetermined case if( m>=n ) then ! path 1 - overdetermined or exactly determined mm = m if( m>=mnthr ) then ! path 1a - overdetermined, with many more rows than columns mm = n itau = 1_${ik}$ iwork = itau + n ! compute a=q*r ! (workspace: need 2*n, prefer n+n*nb) call stdlib${ii}$_dgeqrf( m, n, a, lda, work( itau ), work( iwork ),lwork-iwork+1, & info ) ! multiply b by transpose(q) ! (workspace: need n+nrhs, prefer n+nrhs*nb) call stdlib${ii}$_dormqr( 'L', 'T', m, nrhs, n, a, lda, work( itau ), b,ldb, work( & iwork ), lwork-iwork+1, info ) ! zero out below r if( n>1_${ik}$ )call stdlib${ii}$_dlaset( 'L', n-1, n-1, zero, zero, a( 2_${ik}$, 1_${ik}$ ), lda ) end if ie = 1_${ik}$ itauq = ie + n itaup = itauq + n iwork = itaup + n ! bidiagonalize r in a ! (workspace: need 3*n+mm, prefer 3*n+(mm+n)*nb) call stdlib${ii}$_dgebrd( mm, n, a, lda, s, work( ie ), work( itauq ),work( itaup ), work(& iwork ), lwork-iwork+1,info ) ! multiply b by transpose of left bidiagonalizing vectors of r ! (workspace: need 3*n+nrhs, prefer 3*n+nrhs*nb) call stdlib${ii}$_dormbr( 'Q', 'L', 'T', mm, nrhs, n, a, lda, work( itauq ),b, ldb, work( & iwork ), lwork-iwork+1, info ) ! generate right bidiagonalizing vectors of r in a ! (workspace: need 4*n-1, prefer 3*n+(n-1)*nb) call stdlib${ii}$_dorgbr( 'P', n, n, n, a, lda, work( itaup ),work( iwork ), lwork-iwork+& 1_${ik}$, info ) iwork = ie + n ! perform bidiagonal qr iteration ! multiply b by transpose of left singular vectors ! compute right singular vectors in a ! (workspace: need bdspac) call stdlib${ii}$_dbdsqr( 'U', n, n, 0_${ik}$, nrhs, s, work( ie ), a, lda, dum,1_${ik}$, b, ldb, work( & iwork ), info ) if( info/=0 )go to 70 ! multiply b by reciprocals of singular values thr = max( rcond*s( 1_${ik}$ ), sfmin ) if( rcond<zero )thr = max( eps*s( 1_${ik}$ ), sfmin ) rank = 0_${ik}$ do i = 1, n if( s( i )>thr ) then call stdlib${ii}$_drscl( nrhs, s( i ), b( i, 1_${ik}$ ), ldb ) rank = rank + 1_${ik}$ else call stdlib${ii}$_dlaset( 'F', 1_${ik}$, nrhs, zero, zero, b( i, 1_${ik}$ ), ldb ) end if end do ! multiply b by right singular vectors ! (workspace: need n, prefer n*nrhs) if( lwork>=ldb*nrhs .and. nrhs>1_${ik}$ ) then call stdlib${ii}$_dgemm( 'T', 'N', n, nrhs, n, one, a, lda, b, ldb, zero,work, ldb ) call stdlib${ii}$_dlacpy( 'G', n, nrhs, work, ldb, b, ldb ) else if( nrhs>1_${ik}$ ) then chunk = lwork / n do i = 1, nrhs, chunk bl = min( nrhs-i+1, chunk ) call stdlib${ii}$_dgemm( 'T', 'N', n, bl, n, one, a, lda, b( 1_${ik}$, i ),ldb, zero, work,& n ) call stdlib${ii}$_dlacpy( 'G', n, bl, work, n, b( 1_${ik}$, i ), ldb ) end do else call stdlib${ii}$_dgemv( 'T', n, n, one, a, lda, b, 1_${ik}$, zero, work, 1_${ik}$ ) call stdlib${ii}$_dcopy( n, work, 1_${ik}$, b, 1_${ik}$ ) end if else if( n>=mnthr .and. lwork>=4_${ik}$*m+m*m+max( m, 2_${ik}$*m-4, nrhs, n-3*m ) ) then ! path 2a - underdetermined, with many more columns than rows ! and sufficient workspace for an efficient algorithm ldwork = m if( lwork>=max( 4_${ik}$*m+m*lda+max( m, 2_${ik}$*m-4, nrhs, n-3*m ),m*lda+m+m*nrhs ) )ldwork = & lda itau = 1_${ik}$ iwork = m + 1_${ik}$ ! compute a=l*q ! (workspace: need 2*m, prefer m+m*nb) call stdlib${ii}$_dgelqf( m, n, a, lda, work( itau ), work( iwork ),lwork-iwork+1, info ) il = iwork ! copy l to work(il), zeroing out above it call stdlib${ii}$_dlacpy( 'L', m, m, a, lda, work( il ), ldwork ) call stdlib${ii}$_dlaset( 'U', m-1, m-1, zero, zero, work( il+ldwork ),ldwork ) ie = il + ldwork*m itauq = ie + m itaup = itauq + m iwork = itaup + m ! bidiagonalize l in work(il) ! (workspace: need m*m+5*m, prefer m*m+4*m+2*m*nb) call stdlib${ii}$_dgebrd( m, m, work( il ), ldwork, s, work( ie ),work( itauq ), work( & itaup ), work( iwork ),lwork-iwork+1, info ) ! multiply b by transpose of left bidiagonalizing vectors of l ! (workspace: need m*m+4*m+nrhs, prefer m*m+4*m+nrhs*nb) call stdlib${ii}$_dormbr( 'Q', 'L', 'T', m, nrhs, m, work( il ), ldwork,work( itauq ), b, & ldb, work( iwork ),lwork-iwork+1, info ) ! generate right bidiagonalizing vectors of r in work(il) ! (workspace: need m*m+5*m-1, prefer m*m+4*m+(m-1)*nb) call stdlib${ii}$_dorgbr( 'P', m, m, m, work( il ), ldwork, work( itaup ),work( iwork ), & lwork-iwork+1, info ) iwork = ie + m ! perform bidiagonal qr iteration, ! computing right singular vectors of l in work(il) and ! multiplying b by transpose of left singular vectors ! (workspace: need m*m+m+bdspac) call stdlib${ii}$_dbdsqr( 'U', m, m, 0_${ik}$, nrhs, s, work( ie ), work( il ),ldwork, a, lda, b,& ldb, work( iwork ), info ) if( info/=0 )go to 70 ! multiply b by reciprocals of singular values thr = max( rcond*s( 1_${ik}$ ), sfmin ) if( rcond<zero )thr = max( eps*s( 1_${ik}$ ), sfmin ) rank = 0_${ik}$ do i = 1, m if( s( i )>thr ) then call stdlib${ii}$_drscl( nrhs, s( i ), b( i, 1_${ik}$ ), ldb ) rank = rank + 1_${ik}$ else call stdlib${ii}$_dlaset( 'F', 1_${ik}$, nrhs, zero, zero, b( i, 1_${ik}$ ), ldb ) end if end do iwork = ie ! multiply b by right singular vectors of l in work(il) ! (workspace: need m*m+2*m, prefer m*m+m+m*nrhs) if( lwork>=ldb*nrhs+iwork-1 .and. nrhs>1_${ik}$ ) then call stdlib${ii}$_dgemm( 'T', 'N', m, nrhs, m, one, work( il ), ldwork,b, ldb, zero, & work( iwork ), ldb ) call stdlib${ii}$_dlacpy( 'G', m, nrhs, work( iwork ), ldb, b, ldb ) else if( nrhs>1_${ik}$ ) then chunk = ( lwork-iwork+1 ) / m do i = 1, nrhs, chunk bl = min( nrhs-i+1, chunk ) call stdlib${ii}$_dgemm( 'T', 'N', m, bl, m, one, work( il ), ldwork,b( 1_${ik}$, i ), ldb,& zero, work( iwork ), m ) call stdlib${ii}$_dlacpy( 'G', m, bl, work( iwork ), m, b( 1_${ik}$, i ),ldb ) end do else call stdlib${ii}$_dgemv( 'T', m, m, one, work( il ), ldwork, b( 1_${ik}$, 1_${ik}$ ),1_${ik}$, zero, work( & iwork ), 1_${ik}$ ) call stdlib${ii}$_dcopy( m, work( iwork ), 1_${ik}$, b( 1_${ik}$, 1_${ik}$ ), 1_${ik}$ ) end if ! zero out below first m rows of b call stdlib${ii}$_dlaset( 'F', n-m, nrhs, zero, zero, b( m+1, 1_${ik}$ ), ldb ) iwork = itau + m ! multiply transpose(q) by b ! (workspace: need m+nrhs, prefer m+nrhs*nb) call stdlib${ii}$_dormlq( 'L', 'T', n, nrhs, m, a, lda, work( itau ), b,ldb, work( iwork )& , lwork-iwork+1, info ) else ! path 2 - remaining underdetermined cases ie = 1_${ik}$ itauq = ie + m itaup = itauq + m iwork = itaup + m ! bidiagonalize a ! (workspace: need 3*m+n, prefer 3*m+(m+n)*nb) call stdlib${ii}$_dgebrd( m, n, a, lda, s, work( ie ), work( itauq ),work( itaup ), work( & iwork ), lwork-iwork+1,info ) ! multiply b by transpose of left bidiagonalizing vectors ! (workspace: need 3*m+nrhs, prefer 3*m+nrhs*nb) call stdlib${ii}$_dormbr( 'Q', 'L', 'T', m, nrhs, n, a, lda, work( itauq ),b, ldb, work( & iwork ), lwork-iwork+1, info ) ! generate right bidiagonalizing vectors in a ! (workspace: need 4*m, prefer 3*m+m*nb) call stdlib${ii}$_dorgbr( 'P', m, n, m, a, lda, work( itaup ),work( iwork ), lwork-iwork+& 1_${ik}$, info ) iwork = ie + m ! perform bidiagonal qr iteration, ! computing right singular vectors of a in a and ! multiplying b by transpose of left singular vectors ! (workspace: need bdspac) call stdlib${ii}$_dbdsqr( 'L', m, n, 0_${ik}$, nrhs, s, work( ie ), a, lda, dum,1_${ik}$, b, ldb, work( & iwork ), info ) if( info/=0 )go to 70 ! multiply b by reciprocals of singular values thr = max( rcond*s( 1_${ik}$ ), sfmin ) if( rcond<zero )thr = max( eps*s( 1_${ik}$ ), sfmin ) rank = 0_${ik}$ do i = 1, m if( s( i )>thr ) then call stdlib${ii}$_drscl( nrhs, s( i ), b( i, 1_${ik}$ ), ldb ) rank = rank + 1_${ik}$ else call stdlib${ii}$_dlaset( 'F', 1_${ik}$, nrhs, zero, zero, b( i, 1_${ik}$ ), ldb ) end if end do ! multiply b by right singular vectors of a ! (workspace: need n, prefer n*nrhs) if( lwork>=ldb*nrhs .and. nrhs>1_${ik}$ ) then call stdlib${ii}$_dgemm( 'T', 'N', n, nrhs, m, one, a, lda, b, ldb, zero,work, ldb ) call stdlib${ii}$_dlacpy( 'F', n, nrhs, work, ldb, b, ldb ) else if( nrhs>1_${ik}$ ) then chunk = lwork / n do i = 1, nrhs, chunk bl = min( nrhs-i+1, chunk ) call stdlib${ii}$_dgemm( 'T', 'N', n, bl, m, one, a, lda, b( 1_${ik}$, i ),ldb, zero, work,& n ) call stdlib${ii}$_dlacpy( 'F', n, bl, work, n, b( 1_${ik}$, i ), ldb ) end do else call stdlib${ii}$_dgemv( 'T', m, n, one, a, lda, b, 1_${ik}$, zero, work, 1_${ik}$ ) call stdlib${ii}$_dcopy( n, work, 1_${ik}$, b, 1_${ik}$ ) end if end if ! undo scaling if( iascl==1_${ik}$ ) then call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, n, nrhs, b, ldb, info ) call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, smlnum, anrm, minmn, 1_${ik}$, s, minmn,info ) else if( iascl==2_${ik}$ ) then call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, n, nrhs, b, ldb, info ) call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, bignum, anrm, minmn, 1_${ik}$, s, minmn,info ) end if if( ibscl==1_${ik}$ ) then call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, smlnum, bnrm, n, nrhs, b, ldb, info ) else if( ibscl==2_${ik}$ ) then call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, bignum, bnrm, n, nrhs, b, ldb, info ) end if 70 continue work( 1_${ik}$ ) = maxwrk return end subroutine stdlib${ii}$_dgelss #:for rk,rt,ri in REAL_KINDS_TYPES #:if not rk in ["sp","dp"] module subroutine stdlib${ii}$_${ri}$gelss( m, n, nrhs, a, lda, b, ldb, s, rcond, rank,work, lwork, info ) !! DGELSS: computes the minimum norm solution to a real linear least !! squares problem: !! Minimize 2-norm(| b - A*x |). !! using the singular value decomposition (SVD) of A. A is an M-by-N !! matrix which may be rank-deficient. !! Several right hand side vectors b and solution vectors x can be !! handled in a single call; they are stored as the columns of the !! M-by-NRHS right hand side matrix B and the N-by-NRHS solution matrix !! X. !! The effective rank of A is determined by treating as zero those !! singular values which are less than RCOND times the largest singular !! value. ! -- lapack driver routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments integer(${ik}$), intent(out) :: info, rank integer(${ik}$), intent(in) :: lda, ldb, lwork, m, n, nrhs real(${rk}$), intent(in) :: rcond ! Array Arguments real(${rk}$), intent(inout) :: a(lda,*), b(ldb,*) real(${rk}$), intent(out) :: s(*), work(*) ! ===================================================================== ! Local Scalars logical(lk) :: lquery integer(${ik}$) :: bdspac, bl, chunk, i, iascl, ibscl, ie, il, itau, itaup, itauq, iwork, & ldwork, maxmn, maxwrk, minmn, minwrk, mm, mnthr integer(${ik}$) :: lwork_qgeqrf, lwork_qormqr, lwork_qgebrd, lwork_qormbr, lwork_qorgbr, & lwork_qormlq, lwork_qgelqf real(${rk}$) :: anrm, bignum, bnrm, eps, sfmin, smlnum, thr ! Local Arrays real(${rk}$) :: dum(1_${ik}$) ! Intrinsic Functions ! Executable Statements ! test the input arguments info = 0_${ik}$ minmn = min( m, n ) maxmn = max( m, n ) lquery = ( lwork==-1_${ik}$ ) if( m<0_${ik}$ ) then info = -1_${ik}$ else if( n<0_${ik}$ ) then info = -2_${ik}$ else if( nrhs<0_${ik}$ ) then info = -3_${ik}$ else if( lda<max( 1_${ik}$, m ) ) then info = -5_${ik}$ else if( ldb<max( 1_${ik}$, maxmn ) ) then info = -7_${ik}$ end if ! compute workspace ! (note: comments in the code beginning "workspace:" describe the ! minimal amount of workspace needed at that point in the code, ! as well as the preferred amount for good performance. ! nb refers to the optimal block size for the immediately ! following subroutine, as returned by stdlib${ii}$_ilaenv.) if( info==0_${ik}$ ) then minwrk = 1_${ik}$ maxwrk = 1_${ik}$ if( minmn>0_${ik}$ ) then mm = m mnthr = stdlib${ii}$_ilaenv( 6_${ik}$, 'DGELSS', ' ', m, n, nrhs, -1_${ik}$ ) if( m>=n .and. m>=mnthr ) then ! path 1a - overdetermined, with many more rows than ! columns ! compute space needed for stdlib${ii}$_${ri}$geqrf call stdlib${ii}$_${ri}$geqrf( m, n, a, lda, dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, info ) lwork_qgeqrf=dum(1_${ik}$) ! compute space needed for stdlib${ii}$_${ri}$ormqr call stdlib${ii}$_${ri}$ormqr( 'L', 'T', m, nrhs, n, a, lda, dum(1_${ik}$), b,ldb, dum(1_${ik}$), -1_${ik}$, & info ) lwork_qormqr=dum(1_${ik}$) mm = n maxwrk = max( maxwrk, n + lwork_qgeqrf ) maxwrk = max( maxwrk, n + lwork_qormqr ) end if if( m>=n ) then ! path 1 - overdetermined or exactly determined ! compute workspace needed for stdlib${ii}$_${ri}$bdsqr bdspac = max( 1_${ik}$, 5_${ik}$*n ) ! compute space needed for stdlib${ii}$_${ri}$gebrd call stdlib${ii}$_${ri}$gebrd( mm, n, a, lda, s, dum(1_${ik}$), dum(1_${ik}$),dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, info & ) lwork_qgebrd=dum(1_${ik}$) ! compute space needed for stdlib${ii}$_${ri}$ormbr call stdlib${ii}$_${ri}$ormbr( 'Q', 'L', 'T', mm, nrhs, n, a, lda, dum(1_${ik}$),b, ldb, dum(1_${ik}$),& -1_${ik}$, info ) lwork_qormbr=dum(1_${ik}$) ! compute space needed for stdlib${ii}$_${ri}$orgbr call stdlib${ii}$_${ri}$orgbr( 'P', n, n, n, a, lda, dum(1_${ik}$),dum(1_${ik}$), -1_${ik}$, info ) lwork_qorgbr=dum(1_${ik}$) ! compute total workspace needed maxwrk = max( maxwrk, 3_${ik}$*n + lwork_qgebrd ) maxwrk = max( maxwrk, 3_${ik}$*n + lwork_qormbr ) maxwrk = max( maxwrk, 3_${ik}$*n + lwork_qorgbr ) maxwrk = max( maxwrk, bdspac ) maxwrk = max( maxwrk, n*nrhs ) minwrk = max( 3_${ik}$*n + mm, 3_${ik}$*n + nrhs, bdspac ) maxwrk = max( minwrk, maxwrk ) end if if( n>m ) then ! compute workspace needed for stdlib${ii}$_${ri}$bdsqr bdspac = max( 1_${ik}$, 5_${ik}$*m ) minwrk = max( 3_${ik}$*m+nrhs, 3_${ik}$*m+n, bdspac ) if( n>=mnthr ) then ! path 2a - underdetermined, with many more columns ! than rows ! compute space needed for stdlib${ii}$_${ri}$gelqf call stdlib${ii}$_${ri}$gelqf( m, n, a, lda, dum(1_${ik}$), dum(1_${ik}$),-1_${ik}$, info ) lwork_qgelqf=dum(1_${ik}$) ! compute space needed for stdlib${ii}$_${ri}$gebrd call stdlib${ii}$_${ri}$gebrd( m, m, a, lda, s, dum(1_${ik}$), dum(1_${ik}$),dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, & info ) lwork_qgebrd=dum(1_${ik}$) ! compute space needed for stdlib${ii}$_${ri}$ormbr call stdlib${ii}$_${ri}$ormbr( 'Q', 'L', 'T', m, nrhs, n, a, lda,dum(1_${ik}$), b, ldb, dum(& 1_${ik}$), -1_${ik}$, info ) lwork_qormbr=dum(1_${ik}$) ! compute space needed for stdlib${ii}$_${ri}$orgbr call stdlib${ii}$_${ri}$orgbr( 'P', m, m, m, a, lda, dum(1_${ik}$),dum(1_${ik}$), -1_${ik}$, info ) lwork_qorgbr=dum(1_${ik}$) ! compute space needed for stdlib${ii}$_${ri}$ormlq call stdlib${ii}$_${ri}$ormlq( 'L', 'T', n, nrhs, m, a, lda, dum(1_${ik}$),b, ldb, dum(1_${ik}$), -& 1_${ik}$, info ) lwork_qormlq=dum(1_${ik}$) ! compute total workspace needed maxwrk = m + lwork_qgelqf maxwrk = max( maxwrk, m*m + 4_${ik}$*m + lwork_qgebrd ) maxwrk = max( maxwrk, m*m + 4_${ik}$*m + lwork_qormbr ) maxwrk = max( maxwrk, m*m + 4_${ik}$*m + lwork_qorgbr ) maxwrk = max( maxwrk, m*m + m + bdspac ) if( nrhs>1_${ik}$ ) then maxwrk = max( maxwrk, m*m + m + m*nrhs ) else maxwrk = max( maxwrk, m*m + 2_${ik}$*m ) end if maxwrk = max( maxwrk, m + lwork_qormlq ) else ! path 2 - underdetermined ! compute space needed for stdlib${ii}$_${ri}$gebrd call stdlib${ii}$_${ri}$gebrd( m, n, a, lda, s, dum(1_${ik}$), dum(1_${ik}$),dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, & info ) lwork_qgebrd=dum(1_${ik}$) ! compute space needed for stdlib${ii}$_${ri}$ormbr call stdlib${ii}$_${ri}$ormbr( 'Q', 'L', 'T', m, nrhs, m, a, lda,dum(1_${ik}$), b, ldb, dum(& 1_${ik}$), -1_${ik}$, info ) lwork_qormbr=dum(1_${ik}$) ! compute space needed for stdlib${ii}$_${ri}$orgbr call stdlib${ii}$_${ri}$orgbr( 'P', m, n, m, a, lda, dum(1_${ik}$),dum(1_${ik}$), -1_${ik}$, info ) lwork_qorgbr=dum(1_${ik}$) maxwrk = 3_${ik}$*m + lwork_qgebrd maxwrk = max( maxwrk, 3_${ik}$*m + lwork_qormbr ) maxwrk = max( maxwrk, 3_${ik}$*m + lwork_qorgbr ) maxwrk = max( maxwrk, bdspac ) maxwrk = max( maxwrk, n*nrhs ) end if end if maxwrk = max( minwrk, maxwrk ) end if work( 1_${ik}$ ) = maxwrk if( lwork<minwrk .and. .not.lquery )info = -12_${ik}$ end if if( info/=0_${ik}$ ) then call stdlib${ii}$_xerbla( 'DGELSS', -info ) return else if( lquery ) then return end if ! quick return if possible if( m==0_${ik}$ .or. n==0_${ik}$ ) then rank = 0_${ik}$ return end if ! get machine parameters eps = stdlib${ii}$_${ri}$lamch( 'P' ) sfmin = stdlib${ii}$_${ri}$lamch( 'S' ) smlnum = sfmin / eps bignum = one / smlnum call stdlib${ii}$_${ri}$labad( smlnum, bignum ) ! scale a if max element outside range [smlnum,bignum] anrm = stdlib${ii}$_${ri}$lange( 'M', m, n, a, lda, work ) iascl = 0_${ik}$ if( anrm>zero .and. anrm<smlnum ) then ! scale matrix norm up to smlnum call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, m, n, a, lda, info ) iascl = 1_${ik}$ else if( anrm>bignum ) then ! scale matrix norm down to bignum call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, m, n, a, lda, info ) iascl = 2_${ik}$ else if( anrm==zero ) then ! matrix all zero. return zero solution. call stdlib${ii}$_${ri}$laset( 'F', max( m, n ), nrhs, zero, zero, b, ldb ) call stdlib${ii}$_${ri}$laset( 'F', minmn, 1_${ik}$, zero, zero, s, minmn ) rank = 0_${ik}$ go to 70 end if ! scale b if max element outside range [smlnum,bignum] bnrm = stdlib${ii}$_${ri}$lange( 'M', m, nrhs, b, ldb, work ) ibscl = 0_${ik}$ if( bnrm>zero .and. bnrm<smlnum ) then ! scale matrix norm up to smlnum call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, bnrm, smlnum, m, nrhs, b, ldb, info ) ibscl = 1_${ik}$ else if( bnrm>bignum ) then ! scale matrix norm down to bignum call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, bnrm, bignum, m, nrhs, b, ldb, info ) ibscl = 2_${ik}$ end if ! overdetermined case if( m>=n ) then ! path 1 - overdetermined or exactly determined mm = m if( m>=mnthr ) then ! path 1a - overdetermined, with many more rows than columns mm = n itau = 1_${ik}$ iwork = itau + n ! compute a=q*r ! (workspace: need 2*n, prefer n+n*nb) call stdlib${ii}$_${ri}$geqrf( m, n, a, lda, work( itau ), work( iwork ),lwork-iwork+1, & info ) ! multiply b by transpose(q) ! (workspace: need n+nrhs, prefer n+nrhs*nb) call stdlib${ii}$_${ri}$ormqr( 'L', 'T', m, nrhs, n, a, lda, work( itau ), b,ldb, work( & iwork ), lwork-iwork+1, info ) ! zero out below r if( n>1_${ik}$ )call stdlib${ii}$_${ri}$laset( 'L', n-1, n-1, zero, zero, a( 2_${ik}$, 1_${ik}$ ), lda ) end if ie = 1_${ik}$ itauq = ie + n itaup = itauq + n iwork = itaup + n ! bidiagonalize r in a ! (workspace: need 3*n+mm, prefer 3*n+(mm+n)*nb) call stdlib${ii}$_${ri}$gebrd( mm, n, a, lda, s, work( ie ), work( itauq ),work( itaup ), work(& iwork ), lwork-iwork+1,info ) ! multiply b by transpose of left bidiagonalizing vectors of r ! (workspace: need 3*n+nrhs, prefer 3*n+nrhs*nb) call stdlib${ii}$_${ri}$ormbr( 'Q', 'L', 'T', mm, nrhs, n, a, lda, work( itauq ),b, ldb, work( & iwork ), lwork-iwork+1, info ) ! generate right bidiagonalizing vectors of r in a ! (workspace: need 4*n-1, prefer 3*n+(n-1)*nb) call stdlib${ii}$_${ri}$orgbr( 'P', n, n, n, a, lda, work( itaup ),work( iwork ), lwork-iwork+& 1_${ik}$, info ) iwork = ie + n ! perform bidiagonal qr iteration ! multiply b by transpose of left singular vectors ! compute right singular vectors in a ! (workspace: need bdspac) call stdlib${ii}$_${ri}$bdsqr( 'U', n, n, 0_${ik}$, nrhs, s, work( ie ), a, lda, dum,1_${ik}$, b, ldb, work( & iwork ), info ) if( info/=0 )go to 70 ! multiply b by reciprocals of singular values thr = max( rcond*s( 1_${ik}$ ), sfmin ) if( rcond<zero )thr = max( eps*s( 1_${ik}$ ), sfmin ) rank = 0_${ik}$ do i = 1, n if( s( i )>thr ) then call stdlib${ii}$_${ri}$rscl( nrhs, s( i ), b( i, 1_${ik}$ ), ldb ) rank = rank + 1_${ik}$ else call stdlib${ii}$_${ri}$laset( 'F', 1_${ik}$, nrhs, zero, zero, b( i, 1_${ik}$ ), ldb ) end if end do ! multiply b by right singular vectors ! (workspace: need n, prefer n*nrhs) if( lwork>=ldb*nrhs .and. nrhs>1_${ik}$ ) then call stdlib${ii}$_${ri}$gemm( 'T', 'N', n, nrhs, n, one, a, lda, b, ldb, zero,work, ldb ) call stdlib${ii}$_${ri}$lacpy( 'G', n, nrhs, work, ldb, b, ldb ) else if( nrhs>1_${ik}$ ) then chunk = lwork / n do i = 1, nrhs, chunk bl = min( nrhs-i+1, chunk ) call stdlib${ii}$_${ri}$gemm( 'T', 'N', n, bl, n, one, a, lda, b( 1_${ik}$, i ),ldb, zero, work,& n ) call stdlib${ii}$_${ri}$lacpy( 'G', n, bl, work, n, b( 1_${ik}$, i ), ldb ) end do else call stdlib${ii}$_${ri}$gemv( 'T', n, n, one, a, lda, b, 1_${ik}$, zero, work, 1_${ik}$ ) call stdlib${ii}$_${ri}$copy( n, work, 1_${ik}$, b, 1_${ik}$ ) end if else if( n>=mnthr .and. lwork>=4_${ik}$*m+m*m+max( m, 2_${ik}$*m-4, nrhs, n-3*m ) ) then ! path 2a - underdetermined, with many more columns than rows ! and sufficient workspace for an efficient algorithm ldwork = m if( lwork>=max( 4_${ik}$*m+m*lda+max( m, 2_${ik}$*m-4, nrhs, n-3*m ),m*lda+m+m*nrhs ) )ldwork = & lda itau = 1_${ik}$ iwork = m + 1_${ik}$ ! compute a=l*q ! (workspace: need 2*m, prefer m+m*nb) call stdlib${ii}$_${ri}$gelqf( m, n, a, lda, work( itau ), work( iwork ),lwork-iwork+1, info ) il = iwork ! copy l to work(il), zeroing out above it call stdlib${ii}$_${ri}$lacpy( 'L', m, m, a, lda, work( il ), ldwork ) call stdlib${ii}$_${ri}$laset( 'U', m-1, m-1, zero, zero, work( il+ldwork ),ldwork ) ie = il + ldwork*m itauq = ie + m itaup = itauq + m iwork = itaup + m ! bidiagonalize l in work(il) ! (workspace: need m*m+5*m, prefer m*m+4*m+2*m*nb) call stdlib${ii}$_${ri}$gebrd( m, m, work( il ), ldwork, s, work( ie ),work( itauq ), work( & itaup ), work( iwork ),lwork-iwork+1, info ) ! multiply b by transpose of left bidiagonalizing vectors of l ! (workspace: need m*m+4*m+nrhs, prefer m*m+4*m+nrhs*nb) call stdlib${ii}$_${ri}$ormbr( 'Q', 'L', 'T', m, nrhs, m, work( il ), ldwork,work( itauq ), b, & ldb, work( iwork ),lwork-iwork+1, info ) ! generate right bidiagonalizing vectors of r in work(il) ! (workspace: need m*m+5*m-1, prefer m*m+4*m+(m-1)*nb) call stdlib${ii}$_${ri}$orgbr( 'P', m, m, m, work( il ), ldwork, work( itaup ),work( iwork ), & lwork-iwork+1, info ) iwork = ie + m ! perform bidiagonal qr iteration, ! computing right singular vectors of l in work(il) and ! multiplying b by transpose of left singular vectors ! (workspace: need m*m+m+bdspac) call stdlib${ii}$_${ri}$bdsqr( 'U', m, m, 0_${ik}$, nrhs, s, work( ie ), work( il ),ldwork, a, lda, b,& ldb, work( iwork ), info ) if( info/=0 )go to 70 ! multiply b by reciprocals of singular values thr = max( rcond*s( 1_${ik}$ ), sfmin ) if( rcond<zero )thr = max( eps*s( 1_${ik}$ ), sfmin ) rank = 0_${ik}$ do i = 1, m if( s( i )>thr ) then call stdlib${ii}$_${ri}$rscl( nrhs, s( i ), b( i, 1_${ik}$ ), ldb ) rank = rank + 1_${ik}$ else call stdlib${ii}$_${ri}$laset( 'F', 1_${ik}$, nrhs, zero, zero, b( i, 1_${ik}$ ), ldb ) end if end do iwork = ie ! multiply b by right singular vectors of l in work(il) ! (workspace: need m*m+2*m, prefer m*m+m+m*nrhs) if( lwork>=ldb*nrhs+iwork-1 .and. nrhs>1_${ik}$ ) then call stdlib${ii}$_${ri}$gemm( 'T', 'N', m, nrhs, m, one, work( il ), ldwork,b, ldb, zero, & work( iwork ), ldb ) call stdlib${ii}$_${ri}$lacpy( 'G', m, nrhs, work( iwork ), ldb, b, ldb ) else if( nrhs>1_${ik}$ ) then chunk = ( lwork-iwork+1 ) / m do i = 1, nrhs, chunk bl = min( nrhs-i+1, chunk ) call stdlib${ii}$_${ri}$gemm( 'T', 'N', m, bl, m, one, work( il ), ldwork,b( 1_${ik}$, i ), ldb,& zero, work( iwork ), m ) call stdlib${ii}$_${ri}$lacpy( 'G', m, bl, work( iwork ), m, b( 1_${ik}$, i ),ldb ) end do else call stdlib${ii}$_${ri}$gemv( 'T', m, m, one, work( il ), ldwork, b( 1_${ik}$, 1_${ik}$ ),1_${ik}$, zero, work( & iwork ), 1_${ik}$ ) call stdlib${ii}$_${ri}$copy( m, work( iwork ), 1_${ik}$, b( 1_${ik}$, 1_${ik}$ ), 1_${ik}$ ) end if ! zero out below first m rows of b call stdlib${ii}$_${ri}$laset( 'F', n-m, nrhs, zero, zero, b( m+1, 1_${ik}$ ), ldb ) iwork = itau + m ! multiply transpose(q) by b ! (workspace: need m+nrhs, prefer m+nrhs*nb) call stdlib${ii}$_${ri}$ormlq( 'L', 'T', n, nrhs, m, a, lda, work( itau ), b,ldb, work( iwork )& , lwork-iwork+1, info ) else ! path 2 - remaining underdetermined cases ie = 1_${ik}$ itauq = ie + m itaup = itauq + m iwork = itaup + m ! bidiagonalize a ! (workspace: need 3*m+n, prefer 3*m+(m+n)*nb) call stdlib${ii}$_${ri}$gebrd( m, n, a, lda, s, work( ie ), work( itauq ),work( itaup ), work( & iwork ), lwork-iwork+1,info ) ! multiply b by transpose of left bidiagonalizing vectors ! (workspace: need 3*m+nrhs, prefer 3*m+nrhs*nb) call stdlib${ii}$_${ri}$ormbr( 'Q', 'L', 'T', m, nrhs, n, a, lda, work( itauq ),b, ldb, work( & iwork ), lwork-iwork+1, info ) ! generate right bidiagonalizing vectors in a ! (workspace: need 4*m, prefer 3*m+m*nb) call stdlib${ii}$_${ri}$orgbr( 'P', m, n, m, a, lda, work( itaup ),work( iwork ), lwork-iwork+& 1_${ik}$, info ) iwork = ie + m ! perform bidiagonal qr iteration, ! computing right singular vectors of a in a and ! multiplying b by transpose of left singular vectors ! (workspace: need bdspac) call stdlib${ii}$_${ri}$bdsqr( 'L', m, n, 0_${ik}$, nrhs, s, work( ie ), a, lda, dum,1_${ik}$, b, ldb, work( & iwork ), info ) if( info/=0 )go to 70 ! multiply b by reciprocals of singular values thr = max( rcond*s( 1_${ik}$ ), sfmin ) if( rcond<zero )thr = max( eps*s( 1_${ik}$ ), sfmin ) rank = 0_${ik}$ do i = 1, m if( s( i )>thr ) then call stdlib${ii}$_${ri}$rscl( nrhs, s( i ), b( i, 1_${ik}$ ), ldb ) rank = rank + 1_${ik}$ else call stdlib${ii}$_${ri}$laset( 'F', 1_${ik}$, nrhs, zero, zero, b( i, 1_${ik}$ ), ldb ) end if end do ! multiply b by right singular vectors of a ! (workspace: need n, prefer n*nrhs) if( lwork>=ldb*nrhs .and. nrhs>1_${ik}$ ) then call stdlib${ii}$_${ri}$gemm( 'T', 'N', n, nrhs, m, one, a, lda, b, ldb, zero,work, ldb ) call stdlib${ii}$_${ri}$lacpy( 'F', n, nrhs, work, ldb, b, ldb ) else if( nrhs>1_${ik}$ ) then chunk = lwork / n do i = 1, nrhs, chunk bl = min( nrhs-i+1, chunk ) call stdlib${ii}$_${ri}$gemm( 'T', 'N', n, bl, m, one, a, lda, b( 1_${ik}$, i ),ldb, zero, work,& n ) call stdlib${ii}$_${ri}$lacpy( 'F', n, bl, work, n, b( 1_${ik}$, i ), ldb ) end do else call stdlib${ii}$_${ri}$gemv( 'T', m, n, one, a, lda, b, 1_${ik}$, zero, work, 1_${ik}$ ) call stdlib${ii}$_${ri}$copy( n, work, 1_${ik}$, b, 1_${ik}$ ) end if end if ! undo scaling if( iascl==1_${ik}$ ) then call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, n, nrhs, b, ldb, info ) call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, smlnum, anrm, minmn, 1_${ik}$, s, minmn,info ) else if( iascl==2_${ik}$ ) then call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, n, nrhs, b, ldb, info ) call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, bignum, anrm, minmn, 1_${ik}$, s, minmn,info ) end if if( ibscl==1_${ik}$ ) then call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, smlnum, bnrm, n, nrhs, b, ldb, info ) else if( ibscl==2_${ik}$ ) then call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, bignum, bnrm, n, nrhs, b, ldb, info ) end if 70 continue work( 1_${ik}$ ) = maxwrk return end subroutine stdlib${ii}$_${ri}$gelss #:endif #:endfor module subroutine stdlib${ii}$_cgelss( m, n, nrhs, a, lda, b, ldb, s, rcond, rank,work, lwork, rwork, & !! CGELSS computes the minimum norm solution to a complex linear !! least squares problem: !! Minimize 2-norm(| b - A*x |). !! using the singular value decomposition (SVD) of A. A is an M-by-N !! matrix which may be rank-deficient. !! Several right hand side vectors b and solution vectors x can be !! handled in a single call; they are stored as the columns of the !! M-by-NRHS right hand side matrix B and the N-by-NRHS solution matrix !! X. !! The effective rank of A is determined by treating as zero those !! singular values which are less than RCOND times the largest singular !! value. info ) ! -- lapack driver routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments integer(${ik}$), intent(out) :: info, rank integer(${ik}$), intent(in) :: lda, ldb, lwork, m, n, nrhs real(sp), intent(in) :: rcond ! Array Arguments real(sp), intent(out) :: rwork(*), s(*) complex(sp), intent(inout) :: a(lda,*), b(ldb,*) complex(sp), intent(out) :: work(*) ! ===================================================================== ! Local Scalars logical(lk) :: lquery integer(${ik}$) :: bl, chunk, i, iascl, ibscl, ie, il, irwork, itau, itaup, itauq, iwork, & ldwork, maxmn, maxwrk, minmn, minwrk, mm, mnthr integer(${ik}$) :: lwork_cgeqrf, lwork_cunmqr, lwork_cgebrd, lwork_cunmbr, lwork_cungbr, & lwork_cunmlq, lwork_cgelqf real(sp) :: anrm, bignum, bnrm, eps, sfmin, smlnum, thr ! Local Arrays complex(sp) :: dum(1_${ik}$) ! Intrinsic Functions ! Executable Statements ! test the input arguments info = 0_${ik}$ minmn = min( m, n ) maxmn = max( m, n ) lquery = ( lwork==-1_${ik}$ ) if( m<0_${ik}$ ) then info = -1_${ik}$ else if( n<0_${ik}$ ) then info = -2_${ik}$ else if( nrhs<0_${ik}$ ) then info = -3_${ik}$ else if( lda<max( 1_${ik}$, m ) ) then info = -5_${ik}$ else if( ldb<max( 1_${ik}$, maxmn ) ) then info = -7_${ik}$ end if ! compute workspace ! (note: comments in the code beginning "workspace:" describe the ! minimal amount of workspace needed at that point in the code, ! as well as the preferred amount for good performance. ! cworkspace refers to complex workspace, and rworkspace refers ! to real workspace. nb refers to the optimal block size for the ! immediately following subroutine, as returned by stdlib${ii}$_ilaenv.) if( info==0_${ik}$ ) then minwrk = 1_${ik}$ maxwrk = 1_${ik}$ if( minmn>0_${ik}$ ) then mm = m mnthr = stdlib${ii}$_ilaenv( 6_${ik}$, 'CGELSS', ' ', m, n, nrhs, -1_${ik}$ ) if( m>=n .and. m>=mnthr ) then ! path 1a - overdetermined, with many more rows than ! columns ! compute space needed for stdlib${ii}$_cgeqrf call stdlib${ii}$_cgeqrf( m, n, a, lda, dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, info ) lwork_cgeqrf = real( dum(1_${ik}$),KIND=sp) ! compute space needed for stdlib${ii}$_cunmqr call stdlib${ii}$_cunmqr( 'L', 'C', m, nrhs, n, a, lda, dum(1_${ik}$), b,ldb, dum(1_${ik}$), -1_${ik}$, & info ) lwork_cunmqr = real( dum(1_${ik}$),KIND=sp) mm = n maxwrk = max( maxwrk, n + n*stdlib${ii}$_ilaenv( 1_${ik}$, 'CGEQRF', ' ', m,n, -1_${ik}$, -1_${ik}$ ) ) maxwrk = max( maxwrk, n + nrhs*stdlib${ii}$_ilaenv( 1_${ik}$, 'CUNMQR', 'LC',m, nrhs, n, -& 1_${ik}$ ) ) end if if( m>=n ) then ! path 1 - overdetermined or exactly determined ! compute space needed for stdlib${ii}$_cgebrd call stdlib${ii}$_cgebrd( mm, n, a, lda, s, s, dum(1_${ik}$), dum(1_${ik}$), dum(1_${ik}$),-1_${ik}$, info ) lwork_cgebrd = real( dum(1_${ik}$),KIND=sp) ! compute space needed for stdlib${ii}$_cunmbr call stdlib${ii}$_cunmbr( 'Q', 'L', 'C', mm, nrhs, n, a, lda, dum(1_${ik}$),b, ldb, dum(1_${ik}$),& -1_${ik}$, info ) lwork_cunmbr = real( dum(1_${ik}$),KIND=sp) ! compute space needed for stdlib${ii}$_cungbr call stdlib${ii}$_cungbr( 'P', n, n, n, a, lda, dum(1_${ik}$),dum(1_${ik}$), -1_${ik}$, info ) lwork_cungbr = real( dum(1_${ik}$),KIND=sp) ! compute total workspace needed maxwrk = max( maxwrk, 2_${ik}$*n + lwork_cgebrd ) maxwrk = max( maxwrk, 2_${ik}$*n + lwork_cunmbr ) maxwrk = max( maxwrk, 2_${ik}$*n + lwork_cungbr ) maxwrk = max( maxwrk, n*nrhs ) minwrk = 2_${ik}$*n + max( nrhs, m ) end if if( n>m ) then minwrk = 2_${ik}$*m + max( nrhs, n ) if( n>=mnthr ) then ! path 2a - underdetermined, with many more columns ! than rows ! compute space needed for stdlib${ii}$_cgelqf call stdlib${ii}$_cgelqf( m, n, a, lda, dum(1_${ik}$), dum(1_${ik}$),-1_${ik}$, info ) lwork_cgelqf = real( dum(1_${ik}$),KIND=sp) ! compute space needed for stdlib${ii}$_cgebrd call stdlib${ii}$_cgebrd( m, m, a, lda, s, s, dum(1_${ik}$), dum(1_${ik}$),dum(1_${ik}$), -1_${ik}$, info ) lwork_cgebrd = real( dum(1_${ik}$),KIND=sp) ! compute space needed for stdlib${ii}$_cunmbr call stdlib${ii}$_cunmbr( 'Q', 'L', 'C', m, nrhs, n, a, lda,dum(1_${ik}$), b, ldb, dum(& 1_${ik}$), -1_${ik}$, info ) lwork_cunmbr = real( dum(1_${ik}$),KIND=sp) ! compute space needed for stdlib${ii}$_cungbr call stdlib${ii}$_cungbr( 'P', m, m, m, a, lda, dum(1_${ik}$),dum(1_${ik}$), -1_${ik}$, info ) lwork_cungbr = real( dum(1_${ik}$),KIND=sp) ! compute space needed for stdlib${ii}$_cunmlq call stdlib${ii}$_cunmlq( 'L', 'C', n, nrhs, m, a, lda, dum(1_${ik}$),b, ldb, dum(1_${ik}$), -& 1_${ik}$, info ) lwork_cunmlq = real( dum(1_${ik}$),KIND=sp) ! compute total workspace needed maxwrk = m + lwork_cgelqf maxwrk = max( maxwrk, 3_${ik}$*m + m*m + lwork_cgebrd ) maxwrk = max( maxwrk, 3_${ik}$*m + m*m + lwork_cunmbr ) maxwrk = max( maxwrk, 3_${ik}$*m + m*m + lwork_cungbr ) if( nrhs>1_${ik}$ ) then maxwrk = max( maxwrk, m*m + m + m*nrhs ) else maxwrk = max( maxwrk, m*m + 2_${ik}$*m ) end if maxwrk = max( maxwrk, m + lwork_cunmlq ) else ! path 2 - underdetermined ! compute space needed for stdlib${ii}$_cgebrd call stdlib${ii}$_cgebrd( m, n, a, lda, s, s, dum(1_${ik}$), dum(1_${ik}$),dum(1_${ik}$), -1_${ik}$, info ) lwork_cgebrd = real( dum(1_${ik}$),KIND=sp) ! compute space needed for stdlib${ii}$_cunmbr call stdlib${ii}$_cunmbr( 'Q', 'L', 'C', m, nrhs, m, a, lda,dum(1_${ik}$), b, ldb, dum(& 1_${ik}$), -1_${ik}$, info ) lwork_cunmbr = real( dum(1_${ik}$),KIND=sp) ! compute space needed for stdlib${ii}$_cungbr call stdlib${ii}$_cungbr( 'P', m, n, m, a, lda, dum(1_${ik}$),dum(1_${ik}$), -1_${ik}$, info ) lwork_cungbr = real( dum(1_${ik}$),KIND=sp) maxwrk = 2_${ik}$*m + lwork_cgebrd maxwrk = max( maxwrk, 2_${ik}$*m + lwork_cunmbr ) maxwrk = max( maxwrk, 2_${ik}$*m + lwork_cungbr ) maxwrk = max( maxwrk, n*nrhs ) end if end if maxwrk = max( minwrk, maxwrk ) end if work( 1_${ik}$ ) = maxwrk if( lwork<minwrk .and. .not.lquery )info = -12_${ik}$ end if if( info/=0_${ik}$ ) then call stdlib${ii}$_xerbla( 'CGELSS', -info ) return else if( lquery ) then return end if ! quick return if possible if( m==0_${ik}$ .or. n==0_${ik}$ ) then rank = 0_${ik}$ return end if ! get machine parameters eps = stdlib${ii}$_slamch( 'P' ) sfmin = stdlib${ii}$_slamch( 'S' ) smlnum = sfmin / eps bignum = one / smlnum call stdlib${ii}$_slabad( smlnum, bignum ) ! scale a if max element outside range [smlnum,bignum] anrm = stdlib${ii}$_clange( 'M', m, n, a, lda, rwork ) iascl = 0_${ik}$ if( anrm>zero .and. anrm<smlnum ) then ! scale matrix norm up to smlnum call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, m, n, a, lda, info ) iascl = 1_${ik}$ else if( anrm>bignum ) then ! scale matrix norm down to bignum call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, m, n, a, lda, info ) iascl = 2_${ik}$ else if( anrm==zero ) then ! matrix all zero. return zero solution. call stdlib${ii}$_claset( 'F', max( m, n ), nrhs, czero, czero, b, ldb ) call stdlib${ii}$_slaset( 'F', minmn, 1_${ik}$, zero, zero, s, minmn ) rank = 0_${ik}$ go to 70 end if ! scale b if max element outside range [smlnum,bignum] bnrm = stdlib${ii}$_clange( 'M', m, nrhs, b, ldb, rwork ) ibscl = 0_${ik}$ if( bnrm>zero .and. bnrm<smlnum ) then ! scale matrix norm up to smlnum call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, bnrm, smlnum, m, nrhs, b, ldb, info ) ibscl = 1_${ik}$ else if( bnrm>bignum ) then ! scale matrix norm down to bignum call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, bnrm, bignum, m, nrhs, b, ldb, info ) ibscl = 2_${ik}$ end if ! overdetermined case if( m>=n ) then ! path 1 - overdetermined or exactly determined mm = m if( m>=mnthr ) then ! path 1a - overdetermined, with many more rows than columns mm = n itau = 1_${ik}$ iwork = itau + n ! compute a=q*r ! (cworkspace: need 2*n, prefer n+n*nb) ! (rworkspace: none) call stdlib${ii}$_cgeqrf( m, n, a, lda, work( itau ), work( iwork ),lwork-iwork+1, & info ) ! multiply b by transpose(q) ! (cworkspace: need n+nrhs, prefer n+nrhs*nb) ! (rworkspace: none) call stdlib${ii}$_cunmqr( 'L', 'C', m, nrhs, n, a, lda, work( itau ), b,ldb, work( & iwork ), lwork-iwork+1, info ) ! zero out below r if( n>1_${ik}$ )call stdlib${ii}$_claset( 'L', n-1, n-1, czero, czero, a( 2_${ik}$, 1_${ik}$ ),lda ) end if ie = 1_${ik}$ itauq = 1_${ik}$ itaup = itauq + n iwork = itaup + n ! bidiagonalize r in a ! (cworkspace: need 2*n+mm, prefer 2*n+(mm+n)*nb) ! (rworkspace: need n) call stdlib${ii}$_cgebrd( mm, n, a, lda, s, rwork( ie ), work( itauq ),work( itaup ), & work( iwork ), lwork-iwork+1,info ) ! multiply b by transpose of left bidiagonalizing vectors of r ! (cworkspace: need 2*n+nrhs, prefer 2*n+nrhs*nb) ! (rworkspace: none) call stdlib${ii}$_cunmbr( 'Q', 'L', 'C', mm, nrhs, n, a, lda, work( itauq ),b, ldb, work( & iwork ), lwork-iwork+1, info ) ! generate right bidiagonalizing vectors of r in a ! (cworkspace: need 3*n-1, prefer 2*n+(n-1)*nb) ! (rworkspace: none) call stdlib${ii}$_cungbr( 'P', n, n, n, a, lda, work( itaup ),work( iwork ), lwork-iwork+& 1_${ik}$, info ) irwork = ie + n ! perform bidiagonal qr iteration ! multiply b by transpose of left singular vectors ! compute right singular vectors in a ! (cworkspace: none) ! (rworkspace: need bdspac) call stdlib${ii}$_cbdsqr( 'U', n, n, 0_${ik}$, nrhs, s, rwork( ie ), a, lda, dum,1_${ik}$, b, ldb, & rwork( irwork ), info ) if( info/=0 )go to 70 ! multiply b by reciprocals of singular values thr = max( rcond*s( 1_${ik}$ ), sfmin ) if( rcond<zero )thr = max( eps*s( 1_${ik}$ ), sfmin ) rank = 0_${ik}$ do i = 1, n if( s( i )>thr ) then call stdlib${ii}$_csrscl( nrhs, s( i ), b( i, 1_${ik}$ ), ldb ) rank = rank + 1_${ik}$ else call stdlib${ii}$_claset( 'F', 1_${ik}$, nrhs, czero, czero, b( i, 1_${ik}$ ), ldb ) end if end do ! multiply b by right singular vectors ! (cworkspace: need n, prefer n*nrhs) ! (rworkspace: none) if( lwork>=ldb*nrhs .and. nrhs>1_${ik}$ ) then call stdlib${ii}$_cgemm( 'C', 'N', n, nrhs, n, cone, a, lda, b, ldb,czero, work, ldb ) call stdlib${ii}$_clacpy( 'G', n, nrhs, work, ldb, b, ldb ) else if( nrhs>1_${ik}$ ) then chunk = lwork / n do i = 1, nrhs, chunk bl = min( nrhs-i+1, chunk ) call stdlib${ii}$_cgemm( 'C', 'N', n, bl, n, cone, a, lda, b( 1_${ik}$, i ),ldb, czero, & work, n ) call stdlib${ii}$_clacpy( 'G', n, bl, work, n, b( 1_${ik}$, i ), ldb ) end do else call stdlib${ii}$_cgemv( 'C', n, n, cone, a, lda, b, 1_${ik}$, czero, work, 1_${ik}$ ) call stdlib${ii}$_ccopy( n, work, 1_${ik}$, b, 1_${ik}$ ) end if else if( n>=mnthr .and. lwork>=3_${ik}$*m+m*m+max( m, nrhs, n-2*m ) )then ! underdetermined case, m much less than n ! path 2a - underdetermined, with many more columns than rows ! and sufficient workspace for an efficient algorithm ldwork = m if( lwork>=3_${ik}$*m+m*lda+max( m, nrhs, n-2*m ) )ldwork = lda itau = 1_${ik}$ iwork = m + 1_${ik}$ ! compute a=l*q ! (cworkspace: need 2*m, prefer m+m*nb) ! (rworkspace: none) call stdlib${ii}$_cgelqf( m, n, a, lda, work( itau ), work( iwork ),lwork-iwork+1, info ) il = iwork ! copy l to work(il), zeroing out above it call stdlib${ii}$_clacpy( 'L', m, m, a, lda, work( il ), ldwork ) call stdlib${ii}$_claset( 'U', m-1, m-1, czero, czero, work( il+ldwork ),ldwork ) ie = 1_${ik}$ itauq = il + ldwork*m itaup = itauq + m iwork = itaup + m ! bidiagonalize l in work(il) ! (cworkspace: need m*m+4*m, prefer m*m+3*m+2*m*nb) ! (rworkspace: need m) call stdlib${ii}$_cgebrd( m, m, work( il ), ldwork, s, rwork( ie ),work( itauq ), work( & itaup ), work( iwork ),lwork-iwork+1, info ) ! multiply b by transpose of left bidiagonalizing vectors of l ! (cworkspace: need m*m+3*m+nrhs, prefer m*m+3*m+nrhs*nb) ! (rworkspace: none) call stdlib${ii}$_cunmbr( 'Q', 'L', 'C', m, nrhs, m, work( il ), ldwork,work( itauq ), b, & ldb, work( iwork ),lwork-iwork+1, info ) ! generate right bidiagonalizing vectors of r in work(il) ! (cworkspace: need m*m+4*m-1, prefer m*m+3*m+(m-1)*nb) ! (rworkspace: none) call stdlib${ii}$_cungbr( 'P', m, m, m, work( il ), ldwork, work( itaup ),work( iwork ), & lwork-iwork+1, info ) irwork = ie + m ! perform bidiagonal qr iteration, computing right singular ! vectors of l in work(il) and multiplying b by transpose of ! left singular vectors ! (cworkspace: need m*m) ! (rworkspace: need bdspac) call stdlib${ii}$_cbdsqr( 'U', m, m, 0_${ik}$, nrhs, s, rwork( ie ), work( il ),ldwork, a, lda, & b, ldb, rwork( irwork ), info ) if( info/=0 )go to 70 ! multiply b by reciprocals of singular values thr = max( rcond*s( 1_${ik}$ ), sfmin ) if( rcond<zero )thr = max( eps*s( 1_${ik}$ ), sfmin ) rank = 0_${ik}$ do i = 1, m if( s( i )>thr ) then call stdlib${ii}$_csrscl( nrhs, s( i ), b( i, 1_${ik}$ ), ldb ) rank = rank + 1_${ik}$ else call stdlib${ii}$_claset( 'F', 1_${ik}$, nrhs, czero, czero, b( i, 1_${ik}$ ), ldb ) end if end do iwork = il + m*ldwork ! multiply b by right singular vectors of l in work(il) ! (cworkspace: need m*m+2*m, prefer m*m+m+m*nrhs) ! (rworkspace: none) if( lwork>=ldb*nrhs+iwork-1 .and. nrhs>1_${ik}$ ) then call stdlib${ii}$_cgemm( 'C', 'N', m, nrhs, m, cone, work( il ), ldwork,b, ldb, czero, & work( iwork ), ldb ) call stdlib${ii}$_clacpy( 'G', m, nrhs, work( iwork ), ldb, b, ldb ) else if( nrhs>1_${ik}$ ) then chunk = ( lwork-iwork+1 ) / m do i = 1, nrhs, chunk bl = min( nrhs-i+1, chunk ) call stdlib${ii}$_cgemm( 'C', 'N', m, bl, m, cone, work( il ), ldwork,b( 1_${ik}$, i ), & ldb, czero, work( iwork ), m ) call stdlib${ii}$_clacpy( 'G', m, bl, work( iwork ), m, b( 1_${ik}$, i ),ldb ) end do else call stdlib${ii}$_cgemv( 'C', m, m, cone, work( il ), ldwork, b( 1_${ik}$, 1_${ik}$ ),1_${ik}$, czero, work(& iwork ), 1_${ik}$ ) call stdlib${ii}$_ccopy( m, work( iwork ), 1_${ik}$, b( 1_${ik}$, 1_${ik}$ ), 1_${ik}$ ) end if ! zero out below first m rows of b call stdlib${ii}$_claset( 'F', n-m, nrhs, czero, czero, b( m+1, 1_${ik}$ ), ldb ) iwork = itau + m ! multiply transpose(q) by b ! (cworkspace: need m+nrhs, prefer m+nhrs*nb) ! (rworkspace: none) call stdlib${ii}$_cunmlq( 'L', 'C', n, nrhs, m, a, lda, work( itau ), b,ldb, work( iwork )& , lwork-iwork+1, info ) else ! path 2 - remaining underdetermined cases ie = 1_${ik}$ itauq = 1_${ik}$ itaup = itauq + m iwork = itaup + m ! bidiagonalize a ! (cworkspace: need 3*m, prefer 2*m+(m+n)*nb) ! (rworkspace: need n) call stdlib${ii}$_cgebrd( m, n, a, lda, s, rwork( ie ), work( itauq ),work( itaup ), work(& iwork ), lwork-iwork+1,info ) ! multiply b by transpose of left bidiagonalizing vectors ! (cworkspace: need 2*m+nrhs, prefer 2*m+nrhs*nb) ! (rworkspace: none) call stdlib${ii}$_cunmbr( 'Q', 'L', 'C', m, nrhs, n, a, lda, work( itauq ),b, ldb, work( & iwork ), lwork-iwork+1, info ) ! generate right bidiagonalizing vectors in a ! (cworkspace: need 3*m, prefer 2*m+m*nb) ! (rworkspace: none) call stdlib${ii}$_cungbr( 'P', m, n, m, a, lda, work( itaup ),work( iwork ), lwork-iwork+& 1_${ik}$, info ) irwork = ie + m ! perform bidiagonal qr iteration, ! computing right singular vectors of a in a and ! multiplying b by transpose of left singular vectors ! (cworkspace: none) ! (rworkspace: need bdspac) call stdlib${ii}$_cbdsqr( 'L', m, n, 0_${ik}$, nrhs, s, rwork( ie ), a, lda, dum,1_${ik}$, b, ldb, & rwork( irwork ), info ) if( info/=0 )go to 70 ! multiply b by reciprocals of singular values thr = max( rcond*s( 1_${ik}$ ), sfmin ) if( rcond<zero )thr = max( eps*s( 1_${ik}$ ), sfmin ) rank = 0_${ik}$ do i = 1, m if( s( i )>thr ) then call stdlib${ii}$_csrscl( nrhs, s( i ), b( i, 1_${ik}$ ), ldb ) rank = rank + 1_${ik}$ else call stdlib${ii}$_claset( 'F', 1_${ik}$, nrhs, czero, czero, b( i, 1_${ik}$ ), ldb ) end if end do ! multiply b by right singular vectors of a ! (cworkspace: need n, prefer n*nrhs) ! (rworkspace: none) if( lwork>=ldb*nrhs .and. nrhs>1_${ik}$ ) then call stdlib${ii}$_cgemm( 'C', 'N', n, nrhs, m, cone, a, lda, b, ldb,czero, work, ldb ) call stdlib${ii}$_clacpy( 'G', n, nrhs, work, ldb, b, ldb ) else if( nrhs>1_${ik}$ ) then chunk = lwork / n do i = 1, nrhs, chunk bl = min( nrhs-i+1, chunk ) call stdlib${ii}$_cgemm( 'C', 'N', n, bl, m, cone, a, lda, b( 1_${ik}$, i ),ldb, czero, & work, n ) call stdlib${ii}$_clacpy( 'F', n, bl, work, n, b( 1_${ik}$, i ), ldb ) end do else call stdlib${ii}$_cgemv( 'C', m, n, cone, a, lda, b, 1_${ik}$, czero, work, 1_${ik}$ ) call stdlib${ii}$_ccopy( n, work, 1_${ik}$, b, 1_${ik}$ ) end if end if ! undo scaling if( iascl==1_${ik}$ ) then call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, n, nrhs, b, ldb, info ) call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, smlnum, anrm, minmn, 1_${ik}$, s, minmn,info ) else if( iascl==2_${ik}$ ) then call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, n, nrhs, b, ldb, info ) call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, bignum, anrm, minmn, 1_${ik}$, s, minmn,info ) end if if( ibscl==1_${ik}$ ) then call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, smlnum, bnrm, n, nrhs, b, ldb, info ) else if( ibscl==2_${ik}$ ) then call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, bignum, bnrm, n, nrhs, b, ldb, info ) end if 70 continue work( 1_${ik}$ ) = maxwrk return end subroutine stdlib${ii}$_cgelss module subroutine stdlib${ii}$_zgelss( m, n, nrhs, a, lda, b, ldb, s, rcond, rank,work, lwork, rwork, & !! ZGELSS computes the minimum norm solution to a complex linear !! least squares problem: !! Minimize 2-norm(| b - A*x |). !! using the singular value decomposition (SVD) of A. A is an M-by-N !! matrix which may be rank-deficient. !! Several right hand side vectors b and solution vectors x can be !! handled in a single call; they are stored as the columns of the !! M-by-NRHS right hand side matrix B and the N-by-NRHS solution matrix !! X. !! The effective rank of A is determined by treating as zero those !! singular values which are less than RCOND times the largest singular !! value. info ) ! -- lapack driver routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments integer(${ik}$), intent(out) :: info, rank integer(${ik}$), intent(in) :: lda, ldb, lwork, m, n, nrhs real(dp), intent(in) :: rcond ! Array Arguments real(dp), intent(out) :: rwork(*), s(*) complex(dp), intent(inout) :: a(lda,*), b(ldb,*) complex(dp), intent(out) :: work(*) ! ===================================================================== ! Local Scalars logical(lk) :: lquery integer(${ik}$) :: bl, chunk, i, iascl, ibscl, ie, il, irwork, itau, itaup, itauq, iwork, & ldwork, maxmn, maxwrk, minmn, minwrk, mm, mnthr integer(${ik}$) :: lwork_zgeqrf, lwork_zunmqr, lwork_zgebrd, lwork_zunmbr, lwork_zungbr, & lwork_zunmlq, lwork_zgelqf real(dp) :: anrm, bignum, bnrm, eps, sfmin, smlnum, thr ! Local Arrays complex(dp) :: dum(1_${ik}$) ! Intrinsic Functions ! Executable Statements ! test the input arguments info = 0_${ik}$ minmn = min( m, n ) maxmn = max( m, n ) lquery = ( lwork==-1_${ik}$ ) if( m<0_${ik}$ ) then info = -1_${ik}$ else if( n<0_${ik}$ ) then info = -2_${ik}$ else if( nrhs<0_${ik}$ ) then info = -3_${ik}$ else if( lda<max( 1_${ik}$, m ) ) then info = -5_${ik}$ else if( ldb<max( 1_${ik}$, maxmn ) ) then info = -7_${ik}$ end if ! compute workspace ! (note: comments in the code beginning "workspace:" describe the ! minimal amount of workspace needed at that point in the code, ! as well as the preferred amount for good performance. ! cworkspace refers to complex workspace, and rworkspace refers ! to real workspace. nb refers to the optimal block size for the ! immediately following subroutine, as returned by stdlib${ii}$_ilaenv.) if( info==0_${ik}$ ) then minwrk = 1_${ik}$ maxwrk = 1_${ik}$ if( minmn>0_${ik}$ ) then mm = m mnthr = stdlib${ii}$_ilaenv( 6_${ik}$, 'ZGELSS', ' ', m, n, nrhs, -1_${ik}$ ) if( m>=n .and. m>=mnthr ) then ! path 1a - overdetermined, with many more rows than ! columns ! compute space needed for stdlib${ii}$_zgeqrf call stdlib${ii}$_zgeqrf( m, n, a, lda, dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, info ) lwork_zgeqrf = real( dum(1_${ik}$),KIND=dp) ! compute space needed for stdlib${ii}$_zunmqr call stdlib${ii}$_zunmqr( 'L', 'C', m, nrhs, n, a, lda, dum(1_${ik}$), b,ldb, dum(1_${ik}$), -1_${ik}$, & info ) lwork_zunmqr = real( dum(1_${ik}$),KIND=dp) mm = n maxwrk = max( maxwrk, n + n*stdlib${ii}$_ilaenv( 1_${ik}$, 'ZGEQRF', ' ', m,n, -1_${ik}$, -1_${ik}$ ) ) maxwrk = max( maxwrk, n + nrhs*stdlib${ii}$_ilaenv( 1_${ik}$, 'ZUNMQR', 'LC',m, nrhs, n, -& 1_${ik}$ ) ) end if if( m>=n ) then ! path 1 - overdetermined or exactly determined ! compute space needed for stdlib${ii}$_zgebrd call stdlib${ii}$_zgebrd( mm, n, a, lda, s, s, dum(1_${ik}$), dum(1_${ik}$), dum(1_${ik}$),-1_${ik}$, info ) lwork_zgebrd = real( dum(1_${ik}$),KIND=dp) ! compute space needed for stdlib${ii}$_zunmbr call stdlib${ii}$_zunmbr( 'Q', 'L', 'C', mm, nrhs, n, a, lda, dum(1_${ik}$),b, ldb, dum(1_${ik}$),& -1_${ik}$, info ) lwork_zunmbr = real( dum(1_${ik}$),KIND=dp) ! compute space needed for stdlib${ii}$_zungbr call stdlib${ii}$_zungbr( 'P', n, n, n, a, lda, dum(1_${ik}$),dum(1_${ik}$), -1_${ik}$, info ) lwork_zungbr = real( dum(1_${ik}$),KIND=dp) ! compute total workspace needed maxwrk = max( maxwrk, 2_${ik}$*n + lwork_zgebrd ) maxwrk = max( maxwrk, 2_${ik}$*n + lwork_zunmbr ) maxwrk = max( maxwrk, 2_${ik}$*n + lwork_zungbr ) maxwrk = max( maxwrk, n*nrhs ) minwrk = 2_${ik}$*n + max( nrhs, m ) end if if( n>m ) then minwrk = 2_${ik}$*m + max( nrhs, n ) if( n>=mnthr ) then ! path 2a - underdetermined, with many more columns ! than rows ! compute space needed for stdlib${ii}$_zgelqf call stdlib${ii}$_zgelqf( m, n, a, lda, dum(1_${ik}$), dum(1_${ik}$),-1_${ik}$, info ) lwork_zgelqf = real( dum(1_${ik}$),KIND=dp) ! compute space needed for stdlib${ii}$_zgebrd call stdlib${ii}$_zgebrd( m, m, a, lda, s, s, dum(1_${ik}$), dum(1_${ik}$),dum(1_${ik}$), -1_${ik}$, info ) lwork_zgebrd = real( dum(1_${ik}$),KIND=dp) ! compute space needed for stdlib${ii}$_zunmbr call stdlib${ii}$_zunmbr( 'Q', 'L', 'C', m, nrhs, n, a, lda,dum(1_${ik}$), b, ldb, dum(& 1_${ik}$), -1_${ik}$, info ) lwork_zunmbr = real( dum(1_${ik}$),KIND=dp) ! compute space needed for stdlib${ii}$_zungbr call stdlib${ii}$_zungbr( 'P', m, m, m, a, lda, dum(1_${ik}$),dum(1_${ik}$), -1_${ik}$, info ) lwork_zungbr = real( dum(1_${ik}$),KIND=dp) ! compute space needed for stdlib${ii}$_zunmlq call stdlib${ii}$_zunmlq( 'L', 'C', n, nrhs, m, a, lda, dum(1_${ik}$),b, ldb, dum(1_${ik}$), -& 1_${ik}$, info ) lwork_zunmlq = real( dum(1_${ik}$),KIND=dp) ! compute total workspace needed maxwrk = m + lwork_zgelqf maxwrk = max( maxwrk, 3_${ik}$*m + m*m + lwork_zgebrd ) maxwrk = max( maxwrk, 3_${ik}$*m + m*m + lwork_zunmbr ) maxwrk = max( maxwrk, 3_${ik}$*m + m*m + lwork_zungbr ) if( nrhs>1_${ik}$ ) then maxwrk = max( maxwrk, m*m + m + m*nrhs ) else maxwrk = max( maxwrk, m*m + 2_${ik}$*m ) end if maxwrk = max( maxwrk, m + lwork_zunmlq ) else ! path 2 - underdetermined ! compute space needed for stdlib${ii}$_zgebrd call stdlib${ii}$_zgebrd( m, n, a, lda, s, s, dum(1_${ik}$), dum(1_${ik}$),dum(1_${ik}$), -1_${ik}$, info ) lwork_zgebrd = real( dum(1_${ik}$),KIND=dp) ! compute space needed for stdlib${ii}$_zunmbr call stdlib${ii}$_zunmbr( 'Q', 'L', 'C', m, nrhs, m, a, lda,dum(1_${ik}$), b, ldb, dum(& 1_${ik}$), -1_${ik}$, info ) lwork_zunmbr = real( dum(1_${ik}$),KIND=dp) ! compute space needed for stdlib${ii}$_zungbr call stdlib${ii}$_zungbr( 'P', m, n, m, a, lda, dum(1_${ik}$),dum(1_${ik}$), -1_${ik}$, info ) lwork_zungbr = real( dum(1_${ik}$),KIND=dp) maxwrk = 2_${ik}$*m + lwork_zgebrd maxwrk = max( maxwrk, 2_${ik}$*m + lwork_zunmbr ) maxwrk = max( maxwrk, 2_${ik}$*m + lwork_zungbr ) maxwrk = max( maxwrk, n*nrhs ) end if end if maxwrk = max( minwrk, maxwrk ) end if work( 1_${ik}$ ) = maxwrk if( lwork<minwrk .and. .not.lquery )info = -12_${ik}$ end if if( info/=0_${ik}$ ) then call stdlib${ii}$_xerbla( 'ZGELSS', -info ) return else if( lquery ) then return end if ! quick return if possible if( m==0_${ik}$ .or. n==0_${ik}$ ) then rank = 0_${ik}$ return end if ! get machine parameters eps = stdlib${ii}$_dlamch( 'P' ) sfmin = stdlib${ii}$_dlamch( 'S' ) smlnum = sfmin / eps bignum = one / smlnum call stdlib${ii}$_dlabad( smlnum, bignum ) ! scale a if max element outside range [smlnum,bignum] anrm = stdlib${ii}$_zlange( 'M', m, n, a, lda, rwork ) iascl = 0_${ik}$ if( anrm>zero .and. anrm<smlnum ) then ! scale matrix norm up to smlnum call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, m, n, a, lda, info ) iascl = 1_${ik}$ else if( anrm>bignum ) then ! scale matrix norm down to bignum call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, m, n, a, lda, info ) iascl = 2_${ik}$ else if( anrm==zero ) then ! matrix all zero. return zero solution. call stdlib${ii}$_zlaset( 'F', max( m, n ), nrhs, czero, czero, b, ldb ) call stdlib${ii}$_dlaset( 'F', minmn, 1_${ik}$, zero, zero, s, minmn ) rank = 0_${ik}$ go to 70 end if ! scale b if max element outside range [smlnum,bignum] bnrm = stdlib${ii}$_zlange( 'M', m, nrhs, b, ldb, rwork ) ibscl = 0_${ik}$ if( bnrm>zero .and. bnrm<smlnum ) then ! scale matrix norm up to smlnum call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, bnrm, smlnum, m, nrhs, b, ldb, info ) ibscl = 1_${ik}$ else if( bnrm>bignum ) then ! scale matrix norm down to bignum call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, bnrm, bignum, m, nrhs, b, ldb, info ) ibscl = 2_${ik}$ end if ! overdetermined case if( m>=n ) then ! path 1 - overdetermined or exactly determined mm = m if( m>=mnthr ) then ! path 1a - overdetermined, with many more rows than columns mm = n itau = 1_${ik}$ iwork = itau + n ! compute a=q*r ! (cworkspace: need 2*n, prefer n+n*nb) ! (rworkspace: none) call stdlib${ii}$_zgeqrf( m, n, a, lda, work( itau ), work( iwork ),lwork-iwork+1, & info ) ! multiply b by transpose(q) ! (cworkspace: need n+nrhs, prefer n+nrhs*nb) ! (rworkspace: none) call stdlib${ii}$_zunmqr( 'L', 'C', m, nrhs, n, a, lda, work( itau ), b,ldb, work( & iwork ), lwork-iwork+1, info ) ! zero out below r if( n>1_${ik}$ )call stdlib${ii}$_zlaset( 'L', n-1, n-1, czero, czero, a( 2_${ik}$, 1_${ik}$ ),lda ) end if ie = 1_${ik}$ itauq = 1_${ik}$ itaup = itauq + n iwork = itaup + n ! bidiagonalize r in a ! (cworkspace: need 2*n+mm, prefer 2*n+(mm+n)*nb) ! (rworkspace: need n) call stdlib${ii}$_zgebrd( mm, n, a, lda, s, rwork( ie ), work( itauq ),work( itaup ), & work( iwork ), lwork-iwork+1,info ) ! multiply b by transpose of left bidiagonalizing vectors of r ! (cworkspace: need 2*n+nrhs, prefer 2*n+nrhs*nb) ! (rworkspace: none) call stdlib${ii}$_zunmbr( 'Q', 'L', 'C', mm, nrhs, n, a, lda, work( itauq ),b, ldb, work( & iwork ), lwork-iwork+1, info ) ! generate right bidiagonalizing vectors of r in a ! (cworkspace: need 3*n-1, prefer 2*n+(n-1)*nb) ! (rworkspace: none) call stdlib${ii}$_zungbr( 'P', n, n, n, a, lda, work( itaup ),work( iwork ), lwork-iwork+& 1_${ik}$, info ) irwork = ie + n ! perform bidiagonal qr iteration ! multiply b by transpose of left singular vectors ! compute right singular vectors in a ! (cworkspace: none) ! (rworkspace: need bdspac) call stdlib${ii}$_zbdsqr( 'U', n, n, 0_${ik}$, nrhs, s, rwork( ie ), a, lda, dum,1_${ik}$, b, ldb, & rwork( irwork ), info ) if( info/=0 )go to 70 ! multiply b by reciprocals of singular values thr = max( rcond*s( 1_${ik}$ ), sfmin ) if( rcond<zero )thr = max( eps*s( 1_${ik}$ ), sfmin ) rank = 0_${ik}$ do i = 1, n if( s( i )>thr ) then call stdlib${ii}$_zdrscl( nrhs, s( i ), b( i, 1_${ik}$ ), ldb ) rank = rank + 1_${ik}$ else call stdlib${ii}$_zlaset( 'F', 1_${ik}$, nrhs, czero, czero, b( i, 1_${ik}$ ), ldb ) end if end do ! multiply b by right singular vectors ! (cworkspace: need n, prefer n*nrhs) ! (rworkspace: none) if( lwork>=ldb*nrhs .and. nrhs>1_${ik}$ ) then call stdlib${ii}$_zgemm( 'C', 'N', n, nrhs, n, cone, a, lda, b, ldb,czero, work, ldb ) call stdlib${ii}$_zlacpy( 'G', n, nrhs, work, ldb, b, ldb ) else if( nrhs>1_${ik}$ ) then chunk = lwork / n do i = 1, nrhs, chunk bl = min( nrhs-i+1, chunk ) call stdlib${ii}$_zgemm( 'C', 'N', n, bl, n, cone, a, lda, b( 1_${ik}$, i ),ldb, czero, & work, n ) call stdlib${ii}$_zlacpy( 'G', n, bl, work, n, b( 1_${ik}$, i ), ldb ) end do else call stdlib${ii}$_zgemv( 'C', n, n, cone, a, lda, b, 1_${ik}$, czero, work, 1_${ik}$ ) call stdlib${ii}$_zcopy( n, work, 1_${ik}$, b, 1_${ik}$ ) end if else if( n>=mnthr .and. lwork>=3_${ik}$*m+m*m+max( m, nrhs, n-2*m ) )then ! underdetermined case, m much less than n ! path 2a - underdetermined, with many more columns than rows ! and sufficient workspace for an efficient algorithm ldwork = m if( lwork>=3_${ik}$*m+m*lda+max( m, nrhs, n-2*m ) )ldwork = lda itau = 1_${ik}$ iwork = m + 1_${ik}$ ! compute a=l*q ! (cworkspace: need 2*m, prefer m+m*nb) ! (rworkspace: none) call stdlib${ii}$_zgelqf( m, n, a, lda, work( itau ), work( iwork ),lwork-iwork+1, info ) il = iwork ! copy l to work(il), zeroing out above it call stdlib${ii}$_zlacpy( 'L', m, m, a, lda, work( il ), ldwork ) call stdlib${ii}$_zlaset( 'U', m-1, m-1, czero, czero, work( il+ldwork ),ldwork ) ie = 1_${ik}$ itauq = il + ldwork*m itaup = itauq + m iwork = itaup + m ! bidiagonalize l in work(il) ! (cworkspace: need m*m+4*m, prefer m*m+3*m+2*m*nb) ! (rworkspace: need m) call stdlib${ii}$_zgebrd( m, m, work( il ), ldwork, s, rwork( ie ),work( itauq ), work( & itaup ), work( iwork ),lwork-iwork+1, info ) ! multiply b by transpose of left bidiagonalizing vectors of l ! (cworkspace: need m*m+3*m+nrhs, prefer m*m+3*m+nrhs*nb) ! (rworkspace: none) call stdlib${ii}$_zunmbr( 'Q', 'L', 'C', m, nrhs, m, work( il ), ldwork,work( itauq ), b, & ldb, work( iwork ),lwork-iwork+1, info ) ! generate right bidiagonalizing vectors of r in work(il) ! (cworkspace: need m*m+4*m-1, prefer m*m+3*m+(m-1)*nb) ! (rworkspace: none) call stdlib${ii}$_zungbr( 'P', m, m, m, work( il ), ldwork, work( itaup ),work( iwork ), & lwork-iwork+1, info ) irwork = ie + m ! perform bidiagonal qr iteration, computing right singular ! vectors of l in work(il) and multiplying b by transpose of ! left singular vectors ! (cworkspace: need m*m) ! (rworkspace: need bdspac) call stdlib${ii}$_zbdsqr( 'U', m, m, 0_${ik}$, nrhs, s, rwork( ie ), work( il ),ldwork, a, lda, & b, ldb, rwork( irwork ), info ) if( info/=0 )go to 70 ! multiply b by reciprocals of singular values thr = max( rcond*s( 1_${ik}$ ), sfmin ) if( rcond<zero )thr = max( eps*s( 1_${ik}$ ), sfmin ) rank = 0_${ik}$ do i = 1, m if( s( i )>thr ) then call stdlib${ii}$_zdrscl( nrhs, s( i ), b( i, 1_${ik}$ ), ldb ) rank = rank + 1_${ik}$ else call stdlib${ii}$_zlaset( 'F', 1_${ik}$, nrhs, czero, czero, b( i, 1_${ik}$ ), ldb ) end if end do iwork = il + m*ldwork ! multiply b by right singular vectors of l in work(il) ! (cworkspace: need m*m+2*m, prefer m*m+m+m*nrhs) ! (rworkspace: none) if( lwork>=ldb*nrhs+iwork-1 .and. nrhs>1_${ik}$ ) then call stdlib${ii}$_zgemm( 'C', 'N', m, nrhs, m, cone, work( il ), ldwork,b, ldb, czero, & work( iwork ), ldb ) call stdlib${ii}$_zlacpy( 'G', m, nrhs, work( iwork ), ldb, b, ldb ) else if( nrhs>1_${ik}$ ) then chunk = ( lwork-iwork+1 ) / m do i = 1, nrhs, chunk bl = min( nrhs-i+1, chunk ) call stdlib${ii}$_zgemm( 'C', 'N', m, bl, m, cone, work( il ), ldwork,b( 1_${ik}$, i ), & ldb, czero, work( iwork ), m ) call stdlib${ii}$_zlacpy( 'G', m, bl, work( iwork ), m, b( 1_${ik}$, i ),ldb ) end do else call stdlib${ii}$_zgemv( 'C', m, m, cone, work( il ), ldwork, b( 1_${ik}$, 1_${ik}$ ),1_${ik}$, czero, work(& iwork ), 1_${ik}$ ) call stdlib${ii}$_zcopy( m, work( iwork ), 1_${ik}$, b( 1_${ik}$, 1_${ik}$ ), 1_${ik}$ ) end if ! zero out below first m rows of b call stdlib${ii}$_zlaset( 'F', n-m, nrhs, czero, czero, b( m+1, 1_${ik}$ ), ldb ) iwork = itau + m ! multiply transpose(q) by b ! (cworkspace: need m+nrhs, prefer m+nhrs*nb) ! (rworkspace: none) call stdlib${ii}$_zunmlq( 'L', 'C', n, nrhs, m, a, lda, work( itau ), b,ldb, work( iwork )& , lwork-iwork+1, info ) else ! path 2 - remaining underdetermined cases ie = 1_${ik}$ itauq = 1_${ik}$ itaup = itauq + m iwork = itaup + m ! bidiagonalize a ! (cworkspace: need 3*m, prefer 2*m+(m+n)*nb) ! (rworkspace: need n) call stdlib${ii}$_zgebrd( m, n, a, lda, s, rwork( ie ), work( itauq ),work( itaup ), work(& iwork ), lwork-iwork+1,info ) ! multiply b by transpose of left bidiagonalizing vectors ! (cworkspace: need 2*m+nrhs, prefer 2*m+nrhs*nb) ! (rworkspace: none) call stdlib${ii}$_zunmbr( 'Q', 'L', 'C', m, nrhs, n, a, lda, work( itauq ),b, ldb, work( & iwork ), lwork-iwork+1, info ) ! generate right bidiagonalizing vectors in a ! (cworkspace: need 3*m, prefer 2*m+m*nb) ! (rworkspace: none) call stdlib${ii}$_zungbr( 'P', m, n, m, a, lda, work( itaup ),work( iwork ), lwork-iwork+& 1_${ik}$, info ) irwork = ie + m ! perform bidiagonal qr iteration, ! computing right singular vectors of a in a and ! multiplying b by transpose of left singular vectors ! (cworkspace: none) ! (rworkspace: need bdspac) call stdlib${ii}$_zbdsqr( 'L', m, n, 0_${ik}$, nrhs, s, rwork( ie ), a, lda, dum,1_${ik}$, b, ldb, & rwork( irwork ), info ) if( info/=0 )go to 70 ! multiply b by reciprocals of singular values thr = max( rcond*s( 1_${ik}$ ), sfmin ) if( rcond<zero )thr = max( eps*s( 1_${ik}$ ), sfmin ) rank = 0_${ik}$ do i = 1, m if( s( i )>thr ) then call stdlib${ii}$_zdrscl( nrhs, s( i ), b( i, 1_${ik}$ ), ldb ) rank = rank + 1_${ik}$ else call stdlib${ii}$_zlaset( 'F', 1_${ik}$, nrhs, czero, czero, b( i, 1_${ik}$ ), ldb ) end if end do ! multiply b by right singular vectors of a ! (cworkspace: need n, prefer n*nrhs) ! (rworkspace: none) if( lwork>=ldb*nrhs .and. nrhs>1_${ik}$ ) then call stdlib${ii}$_zgemm( 'C', 'N', n, nrhs, m, cone, a, lda, b, ldb,czero, work, ldb ) call stdlib${ii}$_zlacpy( 'G', n, nrhs, work, ldb, b, ldb ) else if( nrhs>1_${ik}$ ) then chunk = lwork / n do i = 1, nrhs, chunk bl = min( nrhs-i+1, chunk ) call stdlib${ii}$_zgemm( 'C', 'N', n, bl, m, cone, a, lda, b( 1_${ik}$, i ),ldb, czero, & work, n ) call stdlib${ii}$_zlacpy( 'F', n, bl, work, n, b( 1_${ik}$, i ), ldb ) end do else call stdlib${ii}$_zgemv( 'C', m, n, cone, a, lda, b, 1_${ik}$, czero, work, 1_${ik}$ ) call stdlib${ii}$_zcopy( n, work, 1_${ik}$, b, 1_${ik}$ ) end if end if ! undo scaling if( iascl==1_${ik}$ ) then call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, n, nrhs, b, ldb, info ) call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, smlnum, anrm, minmn, 1_${ik}$, s, minmn,info ) else if( iascl==2_${ik}$ ) then call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, n, nrhs, b, ldb, info ) call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, bignum, anrm, minmn, 1_${ik}$, s, minmn,info ) end if if( ibscl==1_${ik}$ ) then call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, smlnum, bnrm, n, nrhs, b, ldb, info ) else if( ibscl==2_${ik}$ ) then call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, bignum, bnrm, n, nrhs, b, ldb, info ) end if 70 continue work( 1_${ik}$ ) = maxwrk return end subroutine stdlib${ii}$_zgelss #:for ck,ct,ci in CMPLX_KINDS_TYPES #:if not ck in ["sp","dp"] module subroutine stdlib${ii}$_${ci}$gelss( m, n, nrhs, a, lda, b, ldb, s, rcond, rank,work, lwork, rwork, & !! ZGELSS: computes the minimum norm solution to a complex linear !! least squares problem: !! Minimize 2-norm(| b - A*x |). !! using the singular value decomposition (SVD) of A. A is an M-by-N !! matrix which may be rank-deficient. !! Several right hand side vectors b and solution vectors x can be !! handled in a single call; they are stored as the columns of the !! M-by-NRHS right hand side matrix B and the N-by-NRHS solution matrix !! X. !! The effective rank of A is determined by treating as zero those !! singular values which are less than RCOND times the largest singular !! value. info ) ! -- lapack driver routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_${ck}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments integer(${ik}$), intent(out) :: info, rank integer(${ik}$), intent(in) :: lda, ldb, lwork, m, n, nrhs real(${ck}$), intent(in) :: rcond ! Array Arguments real(${ck}$), intent(out) :: rwork(*), s(*) complex(${ck}$), intent(inout) :: a(lda,*), b(ldb,*) complex(${ck}$), intent(out) :: work(*) ! ===================================================================== ! Local Scalars logical(lk) :: lquery integer(${ik}$) :: bl, chunk, i, iascl, ibscl, ie, il, irwork, itau, itaup, itauq, iwork, & ldwork, maxmn, maxwrk, minmn, minwrk, mm, mnthr integer(${ik}$) :: lwork_wgeqrf, lwork_wunmqr, lwork_wgebrd, lwork_wunmbr, lwork_wungbr, & lwork_wunmlq, lwork_wgelqf real(${ck}$) :: anrm, bignum, bnrm, eps, sfmin, smlnum, thr ! Local Arrays complex(${ck}$) :: dum(1_${ik}$) ! Intrinsic Functions ! Executable Statements ! test the input arguments info = 0_${ik}$ minmn = min( m, n ) maxmn = max( m, n ) lquery = ( lwork==-1_${ik}$ ) if( m<0_${ik}$ ) then info = -1_${ik}$ else if( n<0_${ik}$ ) then info = -2_${ik}$ else if( nrhs<0_${ik}$ ) then info = -3_${ik}$ else if( lda<max( 1_${ik}$, m ) ) then info = -5_${ik}$ else if( ldb<max( 1_${ik}$, maxmn ) ) then info = -7_${ik}$ end if ! compute workspace ! (note: comments in the code beginning "workspace:" describe the ! minimal amount of workspace needed at that point in the code, ! as well as the preferred amount for good performance. ! cworkspace refers to complex workspace, and rworkspace refers ! to real workspace. nb refers to the optimal block size for the ! immediately following subroutine, as returned by stdlib${ii}$_ilaenv.) if( info==0_${ik}$ ) then minwrk = 1_${ik}$ maxwrk = 1_${ik}$ if( minmn>0_${ik}$ ) then mm = m mnthr = stdlib${ii}$_ilaenv( 6_${ik}$, 'ZGELSS', ' ', m, n, nrhs, -1_${ik}$ ) if( m>=n .and. m>=mnthr ) then ! path 1a - overdetermined, with many more rows than ! columns ! compute space needed for stdlib${ii}$_${ci}$geqrf call stdlib${ii}$_${ci}$geqrf( m, n, a, lda, dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, info ) lwork_wgeqrf = real( dum(1_${ik}$),KIND=${ck}$) ! compute space needed for stdlib${ii}$_${ci}$unmqr call stdlib${ii}$_${ci}$unmqr( 'L', 'C', m, nrhs, n,