#:include "common.fypp" submodule(stdlib_lapack_eig_svd_lsq) stdlib_lapack_lsq implicit none contains #:for ik,it,ii in LINALG_INT_KINDS_TYPES module subroutine stdlib${ii}$_sgelss( m, n, nrhs, a, lda, b, ldb, s, rcond, rank,work, lwork, info ) !! SGELSS computes the minimum norm solution to a real linear least !! squares problem: !! Minimize 2-norm(| b - A*x |). !! using the singular value decomposition (SVD) of A. A is an M-by-N !! matrix which may be rank-deficient. !! Several right hand side vectors b and solution vectors x can be !! handled in a single call; they are stored as the columns of the !! M-by-NRHS right hand side matrix B and the N-by-NRHS solution matrix !! X. !! The effective rank of A is determined by treating as zero those !! singular values which are less than RCOND times the largest singular !! value. ! -- lapack driver routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments integer(${ik}$), intent(out) :: info, rank integer(${ik}$), intent(in) :: lda, ldb, lwork, m, n, nrhs real(sp), intent(in) :: rcond ! Array Arguments real(sp), intent(inout) :: a(lda,*), b(ldb,*) real(sp), intent(out) :: s(*), work(*) ! ===================================================================== ! Local Scalars logical(lk) :: lquery integer(${ik}$) :: bdspac, bl, chunk, i, iascl, ibscl, ie, il, itau, itaup, itauq, iwork, & ldwork, maxmn, maxwrk, minmn, minwrk, mm, mnthr integer(${ik}$) :: lwork_sgeqrf, lwork_sormqr, lwork_sgebrd, lwork_sormbr, lwork_sorgbr, & lwork_sormlq real(sp) :: anrm, bignum, bnrm, eps, sfmin, smlnum, thr ! Local Arrays real(sp) :: dum(1_${ik}$) ! Intrinsic Functions ! Executable Statements ! test the input arguments info = 0_${ik}$ minmn = min( m, n ) maxmn = max( m, n ) lquery = ( lwork==-1_${ik}$ ) if( m<0_${ik}$ ) then info = -1_${ik}$ else if( n<0_${ik}$ ) then info = -2_${ik}$ else if( nrhs<0_${ik}$ ) then info = -3_${ik}$ else if( lda<max( 1_${ik}$, m ) ) then info = -5_${ik}$ else if( ldb<max( 1_${ik}$, maxmn ) ) then info = -7_${ik}$ end if ! compute workspace ! (note: comments in the code beginning "workspace:" describe the ! minimal amount of workspace needed at that point in the code, ! as well as the preferred amount for good performance. ! nb refers to the optimal block size for the immediately ! following subroutine, as returned by stdlib${ii}$_ilaenv.) if( info==0_${ik}$ ) then minwrk = 1_${ik}$ maxwrk = 1_${ik}$ if( minmn>0_${ik}$ ) then mm = m mnthr = stdlib${ii}$_ilaenv( 6_${ik}$, 'SGELSS', ' ', m, n, nrhs, -1_${ik}$ ) if( m>=n .and. m>=mnthr ) then ! path 1a - overdetermined, with many more rows than ! columns ! compute space needed for stdlib${ii}$_sgeqrf call stdlib${ii}$_sgeqrf( m, n, a, lda, dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, info ) lwork_sgeqrf=dum(1_${ik}$) ! compute space needed for stdlib${ii}$_sormqr call stdlib${ii}$_sormqr( 'L', 'T', m, nrhs, n, a, lda, dum(1_${ik}$), b,ldb, dum(1_${ik}$), -1_${ik}$, & info ) lwork_sormqr=dum(1_${ik}$) mm = n maxwrk = max( maxwrk, n + lwork_sgeqrf ) maxwrk = max( maxwrk, n + lwork_sormqr ) end if if( m>=n ) then ! path 1 - overdetermined or exactly determined ! compute workspace needed for stdlib${ii}$_sbdsqr bdspac = max( 1_${ik}$, 5_${ik}$*n ) ! compute space needed for stdlib${ii}$_sgebrd call stdlib${ii}$_sgebrd( mm, n, a, lda, s, dum(1_${ik}$), dum(1_${ik}$),dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, info & ) lwork_sgebrd=dum(1_${ik}$) ! compute space needed for stdlib${ii}$_sormbr call stdlib${ii}$_sormbr( 'Q', 'L', 'T', mm, nrhs, n, a, lda, dum(1_${ik}$),b, ldb, dum(1_${ik}$),& -1_${ik}$, info ) lwork_sormbr=dum(1_${ik}$) ! compute space needed for stdlib${ii}$_sorgbr call stdlib${ii}$_sorgbr( 'P', n, n, n, a, lda, dum(1_${ik}$),dum(1_${ik}$), -1_${ik}$, info ) lwork_sorgbr=dum(1_${ik}$) ! compute total workspace needed maxwrk = max( maxwrk, 3_${ik}$*n + lwork_sgebrd ) maxwrk = max( maxwrk, 3_${ik}$*n + lwork_sormbr ) maxwrk = max( maxwrk, 3_${ik}$*n + lwork_sorgbr ) maxwrk = max( maxwrk, bdspac ) maxwrk = max( maxwrk, n*nrhs ) minwrk = max( 3_${ik}$*n + mm, 3_${ik}$*n + nrhs, bdspac ) maxwrk = max( minwrk, maxwrk ) end if if( n>m ) then ! compute workspace needed for stdlib${ii}$_sbdsqr bdspac = max( 1_${ik}$, 5_${ik}$*m ) minwrk = max( 3_${ik}$*m+nrhs, 3_${ik}$*m+n, bdspac ) if( n>=mnthr ) then ! path 2a - underdetermined, with many more columns ! than rows ! compute space needed for stdlib${ii}$_sgebrd call stdlib${ii}$_sgebrd( m, m, a, lda, s, dum(1_${ik}$), dum(1_${ik}$),dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, & info ) lwork_sgebrd=dum(1_${ik}$) ! compute space needed for stdlib${ii}$_sormbr call stdlib${ii}$_sormbr( 'Q', 'L', 'T', m, nrhs, n, a, lda,dum(1_${ik}$), b, ldb, dum(& 1_${ik}$), -1_${ik}$, info ) lwork_sormbr=dum(1_${ik}$) ! compute space needed for stdlib${ii}$_sorgbr call stdlib${ii}$_sorgbr( 'P', m, m, m, a, lda, dum(1_${ik}$),dum(1_${ik}$), -1_${ik}$, info ) lwork_sorgbr=dum(1_${ik}$) ! compute space needed for stdlib${ii}$_sormlq call stdlib${ii}$_sormlq( 'L', 'T', n, nrhs, m, a, lda, dum(1_${ik}$),b, ldb, dum(1_${ik}$), -& 1_${ik}$, info ) lwork_sormlq=dum(1_${ik}$) ! compute total workspace needed maxwrk = m + m*stdlib${ii}$_ilaenv( 1_${ik}$, 'SGELQF', ' ', m, n, -1_${ik}$,-1_${ik}$ ) maxwrk = max( maxwrk, m*m + 4_${ik}$*m + lwork_sgebrd ) maxwrk = max( maxwrk, m*m + 4_${ik}$*m + lwork_sormbr ) maxwrk = max( maxwrk, m*m + 4_${ik}$*m + lwork_sorgbr ) maxwrk = max( maxwrk, m*m + m + bdspac ) if( nrhs>1_${ik}$ ) then maxwrk = max( maxwrk, m*m + m + m*nrhs ) else maxwrk = max( maxwrk, m*m + 2_${ik}$*m ) end if maxwrk = max( maxwrk, m + lwork_sormlq ) else ! path 2 - underdetermined ! compute space needed for stdlib${ii}$_sgebrd call stdlib${ii}$_sgebrd( m, n, a, lda, s, dum(1_${ik}$), dum(1_${ik}$),dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, & info ) lwork_sgebrd=dum(1_${ik}$) ! compute space needed for stdlib${ii}$_sormbr call stdlib${ii}$_sormbr( 'Q', 'L', 'T', m, nrhs, m, a, lda,dum(1_${ik}$), b, ldb, dum(& 1_${ik}$), -1_${ik}$, info ) lwork_sormbr=dum(1_${ik}$) ! compute space needed for stdlib${ii}$_sorgbr call stdlib${ii}$_sorgbr( 'P', m, n, m, a, lda, dum(1_${ik}$),dum(1_${ik}$), -1_${ik}$, info ) lwork_sorgbr=dum(1_${ik}$) maxwrk = 3_${ik}$*m + lwork_sgebrd maxwrk = max( maxwrk, 3_${ik}$*m + lwork_sormbr ) maxwrk = max( maxwrk, 3_${ik}$*m + lwork_sorgbr ) maxwrk = max( maxwrk, bdspac ) maxwrk = max( maxwrk, n*nrhs ) end if end if maxwrk = max( minwrk, maxwrk ) end if work( 1_${ik}$ ) = maxwrk if( lwork<minwrk .and. .not.lquery )info = -12_${ik}$ end if if( info/=0_${ik}$ ) then call stdlib${ii}$_xerbla( 'SGELSS', -info ) return else if( lquery ) then return end if ! quick return if possible if( m==0_${ik}$ .or. n==0_${ik}$ ) then rank = 0_${ik}$ return end if ! get machine parameters eps = stdlib${ii}$_slamch( 'P' ) sfmin = stdlib${ii}$_slamch( 'S' ) smlnum = sfmin / eps bignum = one / smlnum call stdlib${ii}$_slabad( smlnum, bignum ) ! scale a if max element outside range [smlnum,bignum] anrm = stdlib${ii}$_slange( 'M', m, n, a, lda, work ) iascl = 0_${ik}$ if( anrm>zero .and. anrm<smlnum ) then ! scale matrix norm up to smlnum call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, m, n, a, lda, info ) iascl = 1_${ik}$ else if( anrm>bignum ) then ! scale matrix norm down to bignum call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, m, n, a, lda, info ) iascl = 2_${ik}$ else if( anrm==zero ) then ! matrix all zero. return zero solution. call stdlib${ii}$_slaset( 'F', max( m, n ), nrhs, zero, zero, b, ldb ) call stdlib${ii}$_slaset( 'F', minmn, 1_${ik}$, zero, zero, s, minmn ) rank = 0_${ik}$ go to 70 end if ! scale b if max element outside range [smlnum,bignum] bnrm = stdlib${ii}$_slange( 'M', m, nrhs, b, ldb, work ) ibscl = 0_${ik}$ if( bnrm>zero .and. bnrm<smlnum ) then ! scale matrix norm up to smlnum call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, bnrm, smlnum, m, nrhs, b, ldb, info ) ibscl = 1_${ik}$ else if( bnrm>bignum ) then ! scale matrix norm down to bignum call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, bnrm, bignum, m, nrhs, b, ldb, info ) ibscl = 2_${ik}$ end if ! overdetermined case if( m>=n ) then ! path 1 - overdetermined or exactly determined mm = m if( m>=mnthr ) then ! path 1a - overdetermined, with many more rows than columns mm = n itau = 1_${ik}$ iwork = itau + n ! compute a=q*r ! (workspace: need 2*n, prefer n+n*nb) call stdlib${ii}$_sgeqrf( m, n, a, lda, work( itau ), work( iwork ),lwork-iwork+1, & info ) ! multiply b by transpose(q) ! (workspace: need n+nrhs, prefer n+nrhs*nb) call stdlib${ii}$_sormqr( 'L', 'T', m, nrhs, n, a, lda, work( itau ), b,ldb, work( & iwork ), lwork-iwork+1, info ) ! zero out below r if( n>1_${ik}$ )call stdlib${ii}$_slaset( 'L', n-1, n-1, zero, zero, a( 2_${ik}$, 1_${ik}$ ), lda ) end if ie = 1_${ik}$ itauq = ie + n itaup = itauq + n iwork = itaup + n ! bidiagonalize r in a ! (workspace: need 3*n+mm, prefer 3*n+(mm+n)*nb) call stdlib${ii}$_sgebrd( mm, n, a, lda, s, work( ie ), work( itauq ),work( itaup ), work(& iwork ), lwork-iwork+1,info ) ! multiply b by transpose of left bidiagonalizing vectors of r ! (workspace: need 3*n+nrhs, prefer 3*n+nrhs*nb) call stdlib${ii}$_sormbr( 'Q', 'L', 'T', mm, nrhs, n, a, lda, work( itauq ),b, ldb, work( & iwork ), lwork-iwork+1, info ) ! generate right bidiagonalizing vectors of r in a ! (workspace: need 4*n-1, prefer 3*n+(n-1)*nb) call stdlib${ii}$_sorgbr( 'P', n, n, n, a, lda, work( itaup ),work( iwork ), lwork-iwork+& 1_${ik}$, info ) iwork = ie + n ! perform bidiagonal qr iteration ! multiply b by transpose of left singular vectors ! compute right singular vectors in a ! (workspace: need bdspac) call stdlib${ii}$_sbdsqr( 'U', n, n, 0_${ik}$, nrhs, s, work( ie ), a, lda, dum,1_${ik}$, b, ldb, work( & iwork ), info ) if( info/=0 )go to 70 ! multiply b by reciprocals of singular values thr = max( rcond*s( 1_${ik}$ ), sfmin ) if( rcond<zero )thr = max( eps*s( 1_${ik}$ ), sfmin ) rank = 0_${ik}$ do i = 1, n if( s( i )>thr ) then call stdlib${ii}$_srscl( nrhs, s( i ), b( i, 1_${ik}$ ), ldb ) rank = rank + 1_${ik}$ else call stdlib${ii}$_slaset( 'F', 1_${ik}$, nrhs, zero, zero, b( i, 1_${ik}$ ), ldb ) end if end do ! multiply b by right singular vectors ! (workspace: need n, prefer n*nrhs) if( lwork>=ldb*nrhs .and. nrhs>1_${ik}$ ) then call stdlib${ii}$_sgemm( 'T', 'N', n, nrhs, n, one, a, lda, b, ldb, zero,work, ldb ) call stdlib${ii}$_slacpy( 'G', n, nrhs, work, ldb, b, ldb ) else if( nrhs>1_${ik}$ ) then chunk = lwork / n do i = 1, nrhs, chunk bl = min( nrhs-i+1, chunk ) call stdlib${ii}$_sgemm( 'T', 'N', n, bl, n, one, a, lda, b( 1_${ik}$, i ),ldb, zero, work,& n ) call stdlib${ii}$_slacpy( 'G', n, bl, work, n, b( 1_${ik}$, i ), ldb ) end do else call stdlib${ii}$_sgemv( 'T', n, n, one, a, lda, b, 1_${ik}$, zero, work, 1_${ik}$ ) call stdlib${ii}$_scopy( n, work, 1_${ik}$, b, 1_${ik}$ ) end if else if( n>=mnthr .and. lwork>=4_${ik}$*m+m*m+max( m, 2_${ik}$*m-4, nrhs, n-3*m ) ) then ! path 2a - underdetermined, with many more columns than rows ! and sufficient workspace for an efficient algorithm ldwork = m if( lwork>=max( 4_${ik}$*m+m*lda+max( m, 2_${ik}$*m-4, nrhs, n-3*m ),m*lda+m+m*nrhs ) )ldwork = & lda itau = 1_${ik}$ iwork = m + 1_${ik}$ ! compute a=l*q ! (workspace: need 2*m, prefer m+m*nb) call stdlib${ii}$_sgelqf( m, n, a, lda, work( itau ), work( iwork ),lwork-iwork+1, info ) il = iwork ! copy l to work(il), zeroing out above it call stdlib${ii}$_slacpy( 'L', m, m, a, lda, work( il ), ldwork ) call stdlib${ii}$_slaset( 'U', m-1, m-1, zero, zero, work( il+ldwork ),ldwork ) ie = il + ldwork*m itauq = ie + m itaup = itauq + m iwork = itaup + m ! bidiagonalize l in work(il) ! (workspace: need m*m+5*m, prefer m*m+4*m+2*m*nb) call stdlib${ii}$_sgebrd( m, m, work( il ), ldwork, s, work( ie ),work( itauq ), work( & itaup ), work( iwork ),lwork-iwork+1, info ) ! multiply b by transpose of left bidiagonalizing vectors of l ! (workspace: need m*m+4*m+nrhs, prefer m*m+4*m+nrhs*nb) call stdlib${ii}$_sormbr( 'Q', 'L', 'T', m, nrhs, m, work( il ), ldwork,work( itauq ), b, & ldb, work( iwork ),lwork-iwork+1, info ) ! generate right bidiagonalizing vectors of r in work(il) ! (workspace: need m*m+5*m-1, prefer m*m+4*m+(m-1)*nb) call stdlib${ii}$_sorgbr( 'P', m, m, m, work( il ), ldwork, work( itaup ),work( iwork ), & lwork-iwork+1, info ) iwork = ie + m ! perform bidiagonal qr iteration, ! computing right singular vectors of l in work(il) and ! multiplying b by transpose of left singular vectors ! (workspace: need m*m+m+bdspac) call stdlib${ii}$_sbdsqr( 'U', m, m, 0_${ik}$, nrhs, s, work( ie ), work( il ),ldwork, a, lda, b,& ldb, work( iwork ), info ) if( info/=0 )go to 70 ! multiply b by reciprocals of singular values thr = max( rcond*s( 1_${ik}$ ), sfmin ) if( rcond<zero )thr = max( eps*s( 1_${ik}$ ), sfmin ) rank = 0_${ik}$ do i = 1, m if( s( i )>thr ) then call stdlib${ii}$_srscl( nrhs, s( i ), b( i, 1_${ik}$ ), ldb ) rank = rank + 1_${ik}$ else call stdlib${ii}$_slaset( 'F', 1_${ik}$, nrhs, zero, zero, b( i, 1_${ik}$ ), ldb ) end if end do iwork = ie ! multiply b by right singular vectors of l in work(il) ! (workspace: need m*m+2*m, prefer m*m+m+m*nrhs) if( lwork>=ldb*nrhs+iwork-1 .and. nrhs>1_${ik}$ ) then call stdlib${ii}$_sgemm( 'T', 'N', m, nrhs, m, one, work( il ), ldwork,b, ldb, zero, & work( iwork ), ldb ) call stdlib${ii}$_slacpy( 'G', m, nrhs, work( iwork ), ldb, b, ldb ) else if( nrhs>1_${ik}$ ) then chunk = ( lwork-iwork+1 ) / m do i = 1, nrhs, chunk bl = min( nrhs-i+1, chunk ) call stdlib${ii}$_sgemm( 'T', 'N', m, bl, m, one, work( il ), ldwork,b( 1_${ik}$, i ), ldb,& zero, work( iwork ), m ) call stdlib${ii}$_slacpy( 'G', m, bl, work( iwork ), m, b( 1_${ik}$, i ),ldb ) end do else call stdlib${ii}$_sgemv( 'T', m, m, one, work( il ), ldwork, b( 1_${ik}$, 1_${ik}$ ),1_${ik}$, zero, work( & iwork ), 1_${ik}$ ) call stdlib${ii}$_scopy( m, work( iwork ), 1_${ik}$, b( 1_${ik}$, 1_${ik}$ ), 1_${ik}$ ) end if ! zero out below first m rows of b call stdlib${ii}$_slaset( 'F', n-m, nrhs, zero, zero, b( m+1, 1_${ik}$ ), ldb ) iwork = itau + m ! multiply transpose(q) by b ! (workspace: need m+nrhs, prefer m+nrhs*nb) call stdlib${ii}$_sormlq( 'L', 'T', n, nrhs, m, a, lda, work( itau ), b,ldb, work( iwork )& , lwork-iwork+1, info ) else ! path 2 - remaining underdetermined cases ie = 1_${ik}$ itauq = ie + m itaup = itauq + m iwork = itaup + m ! bidiagonalize a ! (workspace: need 3*m+n, prefer 3*m+(m+n)*nb) call stdlib${ii}$_sgebrd( m, n, a, lda, s, work( ie ), work( itauq ),work( itaup ), work( & iwork ), lwork-iwork+1,info ) ! multiply b by transpose of left bidiagonalizing vectors ! (workspace: need 3*m+nrhs, prefer 3*m+nrhs*nb) call stdlib${ii}$_sormbr( 'Q', 'L', 'T', m, nrhs, n, a, lda, work( itauq ),b, ldb, work( & iwork ), lwork-iwork+1, info ) ! generate right bidiagonalizing vectors in a ! (workspace: need 4*m, prefer 3*m+m*nb) call stdlib${ii}$_sorgbr( 'P', m, n, m, a, lda, work( itaup ),work( iwork ), lwork-iwork+& 1_${ik}$, info ) iwork = ie + m ! perform bidiagonal qr iteration, ! computing right singular vectors of a in a and ! multiplying b by transpose of left singular vectors ! (workspace: need bdspac) call stdlib${ii}$_sbdsqr( 'L', m, n, 0_${ik}$, nrhs, s, work( ie ), a, lda, dum,1_${ik}$, b, ldb, work( & iwork ), info ) if( info/=0 )go to 70 ! multiply b by reciprocals of singular values thr = max( rcond*s( 1_${ik}$ ), sfmin ) if( rcond<zero )thr = max( eps*s( 1_${ik}$ ), sfmin ) rank = 0_${ik}$ do i = 1, m if( s( i )>thr ) then call stdlib${ii}$_srscl( nrhs, s( i ), b( i, 1_${ik}$ ), ldb ) rank = rank + 1_${ik}$ else call stdlib${ii}$_slaset( 'F', 1_${ik}$, nrhs, zero, zero, b( i, 1_${ik}$ ), ldb ) end if end do ! multiply b by right singular vectors of a ! (workspace: need n, prefer n*nrhs) if( lwork>=ldb*nrhs .and. nrhs>1_${ik}$ ) then call stdlib${ii}$_sgemm( 'T', 'N', n, nrhs, m, one, a, lda, b, ldb, zero,work, ldb ) call stdlib${ii}$_slacpy( 'F', n, nrhs, work, ldb, b, ldb ) else if( nrhs>1_${ik}$ ) then chunk = lwork / n do i = 1, nrhs, chunk bl = min( nrhs-i+1, chunk ) call stdlib${ii}$_sgemm( 'T', 'N', n, bl, m, one, a, lda, b( 1_${ik}$, i ),ldb, zero, work,& n ) call stdlib${ii}$_slacpy( 'F', n, bl, work, n, b( 1_${ik}$, i ), ldb ) end do else call stdlib${ii}$_sgemv( 'T', m, n, one, a, lda, b, 1_${ik}$, zero, work, 1_${ik}$ ) call stdlib${ii}$_scopy( n, work, 1_${ik}$, b, 1_${ik}$ ) end if end if ! undo scaling if( iascl==1_${ik}$ ) then call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, n, nrhs, b, ldb, info ) call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, smlnum, anrm, minmn, 1_${ik}$, s, minmn,info ) else if( iascl==2_${ik}$ ) then call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, n, nrhs, b, ldb, info ) call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, bignum, anrm, minmn, 1_${ik}$, s, minmn,info ) end if if( ibscl==1_${ik}$ ) then call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, smlnum, bnrm, n, nrhs, b, ldb, info ) else if( ibscl==2_${ik}$ ) then call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, bignum, bnrm, n, nrhs, b, ldb, info ) end if 70 continue work( 1_${ik}$ ) = maxwrk return end subroutine stdlib${ii}$_sgelss module subroutine stdlib${ii}$_dgelss( m, n, nrhs, a, lda, b, ldb, s, rcond, rank,work, lwork, info ) !! DGELSS computes the minimum norm solution to a real linear least !! squares problem: !! Minimize 2-norm(| b - A*x |). !! using the singular value decomposition (SVD) of A. A is an M-by-N !! matrix which may be rank-deficient. !! Several right hand side vectors b and solution vectors x can be !! handled in a single call; they are stored as the columns of the !! M-by-NRHS right hand side matrix B and the N-by-NRHS solution matrix !! X. !! The effective rank of A is determined by treating as zero those !! singular values which are less than RCOND times the largest singular !! value. ! -- lapack driver routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments integer(${ik}$), intent(out) :: info, rank integer(${ik}$), intent(in) :: lda, ldb, lwork, m, n, nrhs real(dp), intent(in) :: rcond ! Array Arguments real(dp), intent(inout) :: a(lda,*), b(ldb,*) real(dp), intent(out) :: s(*), work(*) ! ===================================================================== ! Local Scalars logical(lk) :: lquery integer(${ik}$) :: bdspac, bl, chunk, i, iascl, ibscl, ie, il, itau, itaup, itauq, iwork, & ldwork, maxmn, maxwrk, minmn, minwrk, mm, mnthr integer(${ik}$) :: lwork_dgeqrf, lwork_dormqr, lwork_dgebrd, lwork_dormbr, lwork_dorgbr, & lwork_dormlq, lwork_dgelqf real(dp) :: anrm, bignum, bnrm, eps, sfmin, smlnum, thr ! Local Arrays real(dp) :: dum(1_${ik}$) ! Intrinsic Functions ! Executable Statements ! test the input arguments info = 0_${ik}$ minmn = min( m, n ) maxmn = max( m, n ) lquery = ( lwork==-1_${ik}$ ) if( m<0_${ik}$ ) then info = -1_${ik}$ else if( n<0_${ik}$ ) then info = -2_${ik}$ else if( nrhs<0_${ik}$ ) then info = -3_${ik}$ else if( lda<max( 1_${ik}$, m ) ) then info = -5_${ik}$ else if( ldb<max( 1_${ik}$, maxmn ) ) then info = -7_${ik}$ end if ! compute workspace ! (note: comments in the code beginning "workspace:" describe the ! minimal amount of workspace needed at that point in the code, ! as well as the preferred amount for good performance. ! nb refers to the optimal block size for the immediately ! following subroutine, as returned by stdlib${ii}$_ilaenv.) if( info==0_${ik}$ ) then minwrk = 1_${ik}$ maxwrk = 1_${ik}$ if( minmn>0_${ik}$ ) then mm = m mnthr = stdlib${ii}$_ilaenv( 6_${ik}$, 'DGELSS', ' ', m, n, nrhs, -1_${ik}$ ) if( m>=n .and. m>=mnthr ) then ! path 1a - overdetermined, with many more rows than ! columns ! compute space needed for stdlib${ii}$_dgeqrf call stdlib${ii}$_dgeqrf( m, n, a, lda, dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, info ) lwork_dgeqrf=dum(1_${ik}$) ! compute space needed for stdlib${ii}$_dormqr call stdlib${ii}$_dormqr( 'L', 'T', m, nrhs, n, a, lda, dum(1_${ik}$), b,ldb, dum(1_${ik}$), -1_${ik}$, & info ) lwork_dormqr=dum(1_${ik}$) mm = n maxwrk = max( maxwrk, n + lwork_dgeqrf ) maxwrk = max( maxwrk, n + lwork_dormqr ) end if if( m>=n ) then ! path 1 - overdetermined or exactly determined ! compute workspace needed for stdlib${ii}$_dbdsqr bdspac = max( 1_${ik}$, 5_${ik}$*n ) ! compute space needed for stdlib${ii}$_dgebrd call stdlib${ii}$_dgebrd( mm, n, a, lda, s, dum(1_${ik}$), dum(1_${ik}$),dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, info & ) lwork_dgebrd=dum(1_${ik}$) ! compute space needed for stdlib${ii}$_dormbr call stdlib${ii}$_dormbr( 'Q', 'L', 'T', mm, nrhs, n, a, lda, dum(1_${ik}$),b, ldb, dum(1_${ik}$),& -1_${ik}$, info ) lwork_dormbr=dum(1_${ik}$) ! compute space needed for stdlib${ii}$_dorgbr call stdlib${ii}$_dorgbr( 'P', n, n, n, a, lda, dum(1_${ik}$),dum(1_${ik}$), -1_${ik}$, info ) lwork_dorgbr=dum(1_${ik}$) ! compute total workspace needed maxwrk = max( maxwrk, 3_${ik}$*n + lwork_dgebrd ) maxwrk = max( maxwrk, 3_${ik}$*n + lwork_dormbr ) maxwrk = max( maxwrk, 3_${ik}$*n + lwork_dorgbr ) maxwrk = max( maxwrk, bdspac ) maxwrk = max( maxwrk, n*nrhs ) minwrk = max( 3_${ik}$*n + mm, 3_${ik}$*n + nrhs, bdspac ) maxwrk = max( minwrk, maxwrk ) end if if( n>m ) then ! compute workspace needed for stdlib${ii}$_dbdsqr bdspac = max( 1_${ik}$, 5_${ik}$*m ) minwrk = max( 3_${ik}$*m+nrhs, 3_${ik}$*m+n, bdspac ) if( n>=mnthr ) then ! path 2a - underdetermined, with many more columns ! than rows ! compute space needed for stdlib${ii}$_dgelqf call stdlib${ii}$_dgelqf( m, n, a, lda, dum(1_${ik}$), dum(1_${ik}$),-1_${ik}$, info ) lwork_dgelqf=dum(1_${ik}$) ! compute space needed for stdlib${ii}$_dgebrd call stdlib${ii}$_dgebrd( m, m, a, lda, s, dum(1_${ik}$), dum(1_${ik}$),dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, & info ) lwork_dgebrd=dum(1_${ik}$) ! compute space needed for stdlib${ii}$_dormbr call stdlib${ii}$_dormbr( 'Q', 'L', 'T', m, nrhs, n, a, lda,dum(1_${ik}$), b, ldb, dum(& 1_${ik}$), -1_${ik}$, info ) lwork_dormbr=dum(1_${ik}$) ! compute space needed for stdlib${ii}$_dorgbr call stdlib${ii}$_dorgbr( 'P', m, m, m, a, lda, dum(1_${ik}$),dum(1_${ik}$), -1_${ik}$, info ) lwork_dorgbr=dum(1_${ik}$) ! compute space needed for stdlib${ii}$_dormlq call stdlib${ii}$_dormlq( 'L', 'T', n, nrhs, m, a, lda, dum(1_${ik}$),b, ldb, dum(1_${ik}$), -& 1_${ik}$, info ) lwork_dormlq=dum(1_${ik}$) ! compute total workspace needed maxwrk = m + lwork_dgelqf maxwrk = max( maxwrk, m*m + 4_${ik}$*m + lwork_dgebrd ) maxwrk = max( maxwrk, m*m + 4_${ik}$*m + lwork_dormbr ) maxwrk = max( maxwrk, m*m + 4_${ik}$*m + lwork_dorgbr ) maxwrk = max( maxwrk, m*m + m + bdspac ) if( nrhs>1_${ik}$ ) then maxwrk = max( maxwrk, m*m + m + m*nrhs ) else maxwrk = max( maxwrk, m*m + 2_${ik}$*m ) end if maxwrk = max( maxwrk, m + lwork_dormlq ) else ! path 2 - underdetermined ! compute space needed for stdlib${ii}$_dgebrd call stdlib${ii}$_dgebrd( m, n, a, lda, s, dum(1_${ik}$), dum(1_${ik}$),dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, & info ) lwork_dgebrd=dum(1_${ik}$) ! compute space needed for stdlib${ii}$_dormbr call stdlib${ii}$_dormbr( 'Q', 'L', 'T', m, nrhs, m, a, lda,dum(1_${ik}$), b, ldb, dum(& 1_${ik}$), -1_${ik}$, info ) lwork_dormbr=dum(1_${ik}$) ! compute space needed for stdlib${ii}$_dorgbr call stdlib${ii}$_dorgbr( 'P', m, n, m, a, lda, dum(1_${ik}$),dum(1_${ik}$), -1_${ik}$, info ) lwork_dorgbr=dum(1_${ik}$) maxwrk = 3_${ik}$*m + lwork_dgebrd maxwrk = max( maxwrk, 3_${ik}$*m + lwork_dormbr ) maxwrk = max( maxwrk, 3_${ik}$*m + lwork_dorgbr ) maxwrk = max( maxwrk, bdspac ) maxwrk = max( maxwrk, n*nrhs ) end if end if maxwrk = max( minwrk, maxwrk ) end if work( 1_${ik}$ ) = maxwrk if( lwork<minwrk .and. .not.lquery )info = -12_${ik}$ end if if( info/=0_${ik}$ ) then call stdlib${ii}$_xerbla( 'DGELSS', -info ) return else if( lquery ) then return end if ! quick return if possible if( m==0_${ik}$ .or. n==0_${ik}$ ) then rank = 0_${ik}$ return end if ! get machine parameters eps = stdlib${ii}$_dlamch( 'P' ) sfmin = stdlib${ii}$_dlamch( 'S' ) smlnum = sfmin / eps bignum = one / smlnum call stdlib${ii}$_dlabad( smlnum, bignum ) ! scale a if max element outside range [smlnum,bignum] anrm = stdlib${ii}$_dlange( 'M', m, n, a, lda, work ) iascl = 0_${ik}$ if( anrm>zero .and. anrm<smlnum ) then ! scale matrix norm up to smlnum call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, m, n, a, lda, info ) iascl = 1_${ik}$ else if( anrm>bignum ) then ! scale matrix norm down to bignum call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, m, n, a, lda, info ) iascl = 2_${ik}$ else if( anrm==zero ) then ! matrix all zero. return zero solution. call stdlib${ii}$_dlaset( 'F', max( m, n ), nrhs, zero, zero, b, ldb ) call stdlib${ii}$_dlaset( 'F', minmn, 1_${ik}$, zero, zero, s, minmn ) rank = 0_${ik}$ go to 70 end if ! scale b if max element outside range [smlnum,bignum] bnrm = stdlib${ii}$_dlange( 'M', m, nrhs, b, ldb, work ) ibscl = 0_${ik}$ if( bnrm>zero .and. bnrm<smlnum ) then ! scale matrix norm up to smlnum call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, bnrm, smlnum, m, nrhs, b, ldb, info ) ibscl = 1_${ik}$ else if( bnrm>bignum ) then ! scale matrix norm down to bignum call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, bnrm, bignum, m, nrhs, b, ldb, info ) ibscl = 2_${ik}$ end if ! overdetermined case if( m>=n ) then ! path 1 - overdetermined or exactly determined mm = m if( m>=mnthr ) then ! path 1a - overdetermined, with many more rows than columns mm = n itau = 1_${ik}$ iwork = itau + n ! compute a=q*r ! (workspace: need 2*n, prefer n+n*nb) call stdlib${ii}$_dgeqrf( m, n, a, lda, work( itau ), work( iwork ),lwork-iwork+1, & info ) ! multiply b by transpose(q) ! (workspace: need n+nrhs, prefer n+nrhs*nb) call stdlib${ii}$_dormqr( 'L', 'T', m, nrhs, n, a, lda, work( itau ), b,ldb, work( & iwork ), lwork-iwork+1, info ) ! zero out below r if( n>1_${ik}$ )call stdlib${ii}$_dlaset( 'L', n-1, n-1, zero, zero, a( 2_${ik}$, 1_${ik}$ ), lda ) end if ie = 1_${ik}$ itauq = ie + n itaup = itauq + n iwork = itaup + n ! bidiagonalize r in a ! (workspace: need 3*n+mm, prefer 3*n+(mm+n)*nb) call stdlib${ii}$_dgebrd( mm, n, a, lda, s, work( ie ), work( itauq ),work( itaup ), work(& iwork ), lwork-iwork+1,info ) ! multiply b by transpose of left bidiagonalizing vectors of r ! (workspace: need 3*n+nrhs, prefer 3*n+nrhs*nb) call stdlib${ii}$_dormbr( 'Q', 'L', 'T', mm, nrhs, n, a, lda, work( itauq ),b, ldb, work( & iwork ), lwork-iwork+1, info ) ! generate right bidiagonalizing vectors of r in a ! (workspace: need 4*n-1, prefer 3*n+(n-1)*nb) call stdlib${ii}$_dorgbr( 'P', n, n, n, a, lda, work( itaup ),work( iwork ), lwork-iwork+& 1_${ik}$, info ) iwork = ie + n ! perform bidiagonal qr iteration ! multiply b by transpose of left singular vectors ! compute right singular vectors in a ! (workspace: need bdspac) call stdlib${ii}$_dbdsqr( 'U', n, n, 0_${ik}$, nrhs, s, work( ie ), a, lda, dum,1_${ik}$, b, ldb, work( & iwork ), info ) if( info/=0 )go to 70 ! multiply b by reciprocals of singular values thr = max( rcond*s( 1_${ik}$ ), sfmin ) if( rcond<zero )thr = max( eps*s( 1_${ik}$ ), sfmin ) rank = 0_${ik}$ do i = 1, n if( s( i )>thr ) then call stdlib${ii}$_drscl( nrhs, s( i ), b( i, 1_${ik}$ ), ldb ) rank = rank + 1_${ik}$ else call stdlib${ii}$_dlaset( 'F', 1_${ik}$, nrhs, zero, zero, b( i, 1_${ik}$ ), ldb ) end if end do ! multiply b by right singular vectors ! (workspace: need n, prefer n*nrhs) if( lwork>=ldb*nrhs .and. nrhs>1_${ik}$ ) then call stdlib${ii}$_dgemm( 'T', 'N', n, nrhs, n, one, a, lda, b, ldb, zero,work, ldb ) call stdlib${ii}$_dlacpy( 'G', n, nrhs, work, ldb, b, ldb ) else if( nrhs>1_${ik}$ ) then chunk = lwork / n do i = 1, nrhs, chunk bl = min( nrhs-i+1, chunk ) call stdlib${ii}$_dgemm( 'T', 'N', n, bl, n, one, a, lda, b( 1_${ik}$, i ),ldb, zero, work,& n ) call stdlib${ii}$_dlacpy( 'G', n, bl, work, n, b( 1_${ik}$, i ), ldb ) end do else call stdlib${ii}$_dgemv( 'T', n, n, one, a, lda, b, 1_${ik}$, zero, work, 1_${ik}$ ) call stdlib${ii}$_dcopy( n, work, 1_${ik}$, b, 1_${ik}$ ) end if else if( n>=mnthr .and. lwork>=4_${ik}$*m+m*m+max( m, 2_${ik}$*m-4, nrhs, n-3*m ) ) then ! path 2a - underdetermined, with many more columns than rows ! and sufficient workspace for an efficient algorithm ldwork = m if( lwork>=max( 4_${ik}$*m+m*lda+max( m, 2_${ik}$*m-4, nrhs, n-3*m ),m*lda+m+m*nrhs ) )ldwork = & lda itau = 1_${ik}$ iwork = m + 1_${ik}$ ! compute a=l*q ! (workspace: need 2*m, prefer m+m*nb) call stdlib${ii}$_dgelqf( m, n, a, lda, work( itau ), work( iwork ),lwork-iwork+1, info ) il = iwork ! copy l to work(il), zeroing out above it call stdlib${ii}$_dlacpy( 'L', m, m, a, lda, work( il ), ldwork ) call stdlib${ii}$_dlaset( 'U', m-1, m-1, zero, zero, work( il+ldwork ),ldwork ) ie = il + ldwork*m itauq = ie + m itaup = itauq + m iwork = itaup + m ! bidiagonalize l in work(il) ! (workspace: need m*m+5*m, prefer m*m+4*m+2*m*nb) call stdlib${ii}$_dgebrd( m, m, work( il ), ldwork, s, work( ie ),work( itauq ), work( & itaup ), work( iwork ),lwork-iwork+1, info ) ! multiply b by transpose of left bidiagonalizing vectors of l ! (workspace: need m*m+4*m+nrhs, prefer m*m+4*m+nrhs*nb) call stdlib${ii}$_dormbr( 'Q', 'L', 'T', m, nrhs, m, work( il ), ldwork,work( itauq ), b, & ldb, work( iwork ),lwork-iwork+1, info ) ! generate right bidiagonalizing vectors of r in work(il) ! (workspace: need m*m+5*m-1, prefer m*m+4*m+(m-1)*nb) call stdlib${ii}$_dorgbr( 'P', m, m, m, work( il ), ldwork, work( itaup ),work( iwork ), & lwork-iwork+1, info ) iwork = ie + m ! perform bidiagonal qr iteration, ! computing right singular vectors of l in work(il) and ! multiplying b by transpose of left singular vectors ! (workspace: need m*m+m+bdspac) call stdlib${ii}$_dbdsqr( 'U', m, m, 0_${ik}$, nrhs, s, work( ie ), work( il ),ldwork, a, lda, b,& ldb, work( iwork ), info ) if( info/=0 )go to 70 ! multiply b by reciprocals of singular values thr = max( rcond*s( 1_${ik}$ ), sfmin ) if( rcond<zero )thr = max( eps*s( 1_${ik}$ ), sfmin ) rank = 0_${ik}$ do i = 1, m if( s( i )>thr ) then call stdlib${ii}$_drscl( nrhs, s( i ), b( i, 1_${ik}$ ), ldb ) rank = rank + 1_${ik}$ else call stdlib${ii}$_dlaset( 'F', 1_${ik}$, nrhs, zero, zero, b( i, 1_${ik}$ ), ldb ) end if end do iwork = ie ! multiply b by right singular vectors of l in work(il) ! (workspace: need m*m+2*m, prefer m*m+m+m*nrhs) if( lwork>=ldb*nrhs+iwork-1 .and. nrhs>1_${ik}$ ) then call stdlib${ii}$_dgemm( 'T', 'N', m, nrhs, m, one, work( il ), ldwork,b, ldb, zero, & work( iwork ), ldb ) call stdlib${ii}$_dlacpy( 'G', m, nrhs, work( iwork ), ldb, b, ldb ) else if( nrhs>1_${ik}$ ) then chunk = ( lwork-iwork+1 ) / m do i = 1, nrhs, chunk bl = min( nrhs-i+1, chunk ) call stdlib${ii}$_dgemm( 'T', 'N', m, bl, m, one, work( il ), ldwork,b( 1_${ik}$, i ), ldb,& zero, work( iwork ), m ) call stdlib${ii}$_dlacpy( 'G', m, bl, work( iwork ), m, b( 1_${ik}$, i ),ldb ) end do else call stdlib${ii}$_dgemv( 'T', m, m, one, work( il ), ldwork, b( 1_${ik}$, 1_${ik}$ ),1_${ik}$, zero, work( & iwork ), 1_${ik}$ ) call stdlib${ii}$_dcopy( m, work( iwork ), 1_${ik}$, b( 1_${ik}$, 1_${ik}$ ), 1_${ik}$ ) end if ! zero out below first m rows of b call stdlib${ii}$_dlaset( 'F', n-m, nrhs, zero, zero, b( m+1, 1_${ik}$ ), ldb ) iwork = itau + m ! multiply transpose(q) by b ! (workspace: need m+nrhs, prefer m+nrhs*nb) call stdlib${ii}$_dormlq( 'L', 'T', n, nrhs, m, a, lda, work( itau ), b,ldb, work( iwork )& , lwork-iwork+1, info ) else ! path 2 - remaining underdetermined cases ie = 1_${ik}$ itauq = ie + m itaup = itauq + m iwork = itaup + m ! bidiagonalize a ! (workspace: need 3*m+n, prefer 3*m+(m+n)*nb) call stdlib${ii}$_dgebrd( m, n, a, lda, s, work( ie ), work( itauq ),work( itaup ), work( & iwork ), lwork-iwork+1,info ) ! multiply b by transpose of left bidiagonalizing vectors ! (workspace: need 3*m+nrhs, prefer 3*m+nrhs*nb) call stdlib${ii}$_dormbr( 'Q', 'L', 'T', m, nrhs, n, a, lda, work( itauq ),b, ldb, work( & iwork ), lwork-iwork+1, info ) ! generate right bidiagonalizing vectors in a ! (workspace: need 4*m, prefer 3*m+m*nb) call stdlib${ii}$_dorgbr( 'P', m, n, m, a, lda, work( itaup ),work( iwork ), lwork-iwork+& 1_${ik}$, info ) iwork = ie + m ! perform bidiagonal qr iteration, ! computing right singular vectors of a in a and ! multiplying b by transpose of left singular vectors ! (workspace: need bdspac) call stdlib${ii}$_dbdsqr( 'L', m, n, 0_${ik}$, nrhs, s, work( ie ), a, lda, dum,1_${ik}$, b, ldb, work( & iwork ), info ) if( info/=0 )go to 70 ! multiply b by reciprocals of singular values thr = max( rcond*s( 1_${ik}$ ), sfmin ) if( rcond<zero )thr = max( eps*s( 1_${ik}$ ), sfmin ) rank = 0_${ik}$ do i = 1, m if( s( i )>thr ) then call stdlib${ii}$_drscl( nrhs, s( i ), b( i, 1_${ik}$ ), ldb ) rank = rank + 1_${ik}$ else call stdlib${ii}$_dlaset( 'F', 1_${ik}$, nrhs, zero, zero, b( i, 1_${ik}$ ), ldb ) end if end do ! multiply b by right singular vectors of a ! (workspace: need n, prefer n*nrhs) if( lwork>=ldb*nrhs .and. nrhs>1_${ik}$ ) then call stdlib${ii}$_dgemm( 'T', 'N', n, nrhs, m, one, a, lda, b, ldb, zero,work, ldb ) call stdlib${ii}$_dlacpy( 'F', n, nrhs, work, ldb, b, ldb ) else if( nrhs>1_${ik}$ ) then chunk = lwork / n do i = 1, nrhs, chunk bl = min( nrhs-i+1, chunk ) call stdlib${ii}$_dgemm( 'T', 'N', n, bl, m, one, a, lda, b( 1_${ik}$, i ),ldb, zero, work,& n ) call stdlib${ii}$_dlacpy( 'F', n, bl, work, n, b( 1_${ik}$, i ), ldb ) end do else call stdlib${ii}$_dgemv( 'T', m, n, one, a, lda, b, 1_${ik}$, zero, work, 1_${ik}$ ) call stdlib${ii}$_dcopy( n, work, 1_${ik}$, b, 1_${ik}$ ) end if end if ! undo scaling if( iascl==1_${ik}$ ) then call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, n, nrhs, b, ldb, info ) call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, smlnum, anrm, minmn, 1_${ik}$, s, minmn,info ) else if( iascl==2_${ik}$ ) then call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, n, nrhs, b, ldb, info ) call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, bignum, anrm, minmn, 1_${ik}$, s, minmn,info ) end if if( ibscl==1_${ik}$ ) then call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, smlnum, bnrm, n, nrhs, b, ldb, info ) else if( ibscl==2_${ik}$ ) then call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, bignum, bnrm, n, nrhs, b, ldb, info ) end if 70 continue work( 1_${ik}$ ) = maxwrk return end subroutine stdlib${ii}$_dgelss #:for rk,rt,ri in REAL_KINDS_TYPES #:if not rk in ["sp","dp"] module subroutine stdlib${ii}$_${ri}$gelss( m, n, nrhs, a, lda, b, ldb, s, rcond, rank,work, lwork, info ) !! DGELSS: computes the minimum norm solution to a real linear least !! squares problem: !! Minimize 2-norm(| b - A*x |). !! using the singular value decomposition (SVD) of A. A is an M-by-N !! matrix which may be rank-deficient. !! Several right hand side vectors b and solution vectors x can be !! handled in a single call; they are stored as the columns of the !! M-by-NRHS right hand side matrix B and the N-by-NRHS solution matrix !! X. !! The effective rank of A is determined by treating as zero those !! singular values which are less than RCOND times the largest singular !! value. ! -- lapack driver routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments integer(${ik}$), intent(out) :: info, rank integer(${ik}$), intent(in) :: lda, ldb, lwork, m, n, nrhs real(${rk}$), intent(in) :: rcond ! Array Arguments real(${rk}$), intent(inout) :: a(lda,*), b(ldb,*) real(${rk}$), intent(out) :: s(*), work(*) ! ===================================================================== ! Local Scalars logical(lk) :: lquery integer(${ik}$) :: bdspac, bl, chunk, i, iascl, ibscl, ie, il, itau, itaup, itauq, iwork, & ldwork, maxmn, maxwrk, minmn, minwrk, mm, mnthr integer(${ik}$) :: lwork_qgeqrf, lwork_qormqr, lwork_qgebrd, lwork_qormbr, lwork_qorgbr, & lwork_qormlq, lwork_qgelqf real(${rk}$) :: anrm, bignum, bnrm, eps, sfmin, smlnum, thr ! Local Arrays real(${rk}$) :: dum(1_${ik}$) ! Intrinsic Functions ! Executable Statements ! test the input arguments info = 0_${ik}$ minmn = min( m, n ) maxmn = max( m, n ) lquery = ( lwork==-1_${ik}$ ) if( m<0_${ik}$ ) then info = -1_${ik}$ else if( n<0_${ik}$ ) then info = -2_${ik}$ else if( nrhs<0_${ik}$ ) then info = -3_${ik}$ else if( lda<max( 1_${ik}$, m ) ) then info = -5_${ik}$ else if( ldb<max( 1_${ik}$, maxmn ) ) then info = -7_${ik}$ end if ! compute workspace ! (note: comments in the code beginning "workspace:" describe the ! minimal amount of workspace needed at that point in the code, ! as well as the preferred amount for good performance. ! nb refers to the optimal block size for the immediately ! following subroutine, as returned by stdlib${ii}$_ilaenv.) if( info==0_${ik}$ ) then minwrk = 1_${ik}$ maxwrk = 1_${ik}$ if( minmn>0_${ik}$ ) then mm = m mnthr = stdlib${ii}$_ilaenv( 6_${ik}$, 'DGELSS', ' ', m, n, nrhs, -1_${ik}$ ) if( m>=n .and. m>=mnthr ) then ! path 1a - overdetermined, with many more rows than ! columns ! compute space needed for stdlib${ii}$_${ri}$geqrf call stdlib${ii}$_${ri}$geqrf( m, n, a, lda, dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, info ) lwork_qgeqrf=dum(1_${ik}$) ! compute space needed for stdlib${ii}$_${ri}$ormqr call stdlib${ii}$_${ri}$ormqr( 'L', 'T', m, nrhs, n, a, lda, dum(1_${ik}$), b,ldb, dum(1_${ik}$), -1_${ik}$, & info ) lwork_qormqr=dum(1_${ik}$) mm = n maxwrk = max( maxwrk, n + lwork_qgeqrf ) maxwrk = max( maxwrk, n + lwork_qormqr ) end if if( m>=n ) then ! path 1 - overdetermined or exactly determined ! compute workspace needed for stdlib${ii}$_${ri}$bdsqr bdspac = max( 1_${ik}$, 5_${ik}$*n ) ! compute space needed for stdlib${ii}$_${ri}$gebrd call stdlib${ii}$_${ri}$gebrd( mm, n, a, lda, s, dum(1_${ik}$), dum(1_${ik}$),dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, info & ) lwork_qgebrd=dum(1_${ik}$) ! compute space needed for stdlib${ii}$_${ri}$ormbr call stdlib${ii}$_${ri}$ormbr( 'Q', 'L', 'T', mm, nrhs, n, a, lda, dum(1_${ik}$),b, ldb, dum(1_${ik}$),& -1_${ik}$, info ) lwork_qormbr=dum(1_${ik}$) ! compute space needed for stdlib${ii}$_${ri}$orgbr call stdlib${ii}$_${ri}$orgbr( 'P', n, n, n, a, lda, dum(1_${ik}$),dum(1_${ik}$), -1_${ik}$, info ) lwork_qorgbr=dum(1_${ik}$) ! compute total workspace needed maxwrk = max( maxwrk, 3_${ik}$*n + lwork_qgebrd ) maxwrk = max( maxwrk, 3_${ik}$*n + lwork_qormbr ) maxwrk = max( maxwrk, 3_${ik}$*n + lwork_qorgbr ) maxwrk = max( maxwrk, bdspac ) maxwrk = max( maxwrk, n*nrhs ) minwrk = max( 3_${ik}$*n + mm, 3_${ik}$*n + nrhs, bdspac ) maxwrk = max( minwrk, maxwrk ) end if if( n>m ) then ! compute workspace needed for stdlib${ii}$_${ri}$bdsqr bdspac = max( 1_${ik}$, 5_${ik}$*m ) minwrk = max( 3_${ik}$*m+nrhs, 3_${ik}$*m+n, bdspac ) if( n>=mnthr ) then ! path 2a - underdetermined, with many more columns ! than rows ! compute space needed for stdlib${ii}$_${ri}$gelqf call stdlib${ii}$_${ri}$gelqf( m, n, a, lda, dum(1_${ik}$), dum(1_${ik}$),-1_${ik}$, info ) lwork_qgelqf=dum(1_${ik}$) ! compute space needed for stdlib${ii}$_${ri}$gebrd call stdlib${ii}$_${ri}$gebrd( m, m, a, lda, s, dum(1_${ik}$), dum(1_${ik}$),dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, & info ) lwork_qgebrd=dum(1_${ik}$) ! compute space needed for stdlib${ii}$_${ri}$ormbr call stdlib${ii}$_${ri}$ormbr( 'Q', 'L', 'T', m, nrhs, n, a, lda,dum(1_${ik}$), b, ldb, dum(& 1_${ik}$), -1_${ik}$, info ) lwork_qormbr=dum(1_${ik}$) ! compute space needed for stdlib${ii}$_${ri}$orgbr call stdlib${ii}$_${ri}$orgbr( 'P', m, m, m, a, lda, dum(1_${ik}$),dum(1_${ik}$), -1_${ik}$, info ) lwork_qorgbr=dum(1_${ik}$) ! compute space needed for stdlib${ii}$_${ri}$ormlq call stdlib${ii}$_${ri}$ormlq( 'L', 'T', n, nrhs, m, a, lda, dum(1_${ik}$),b, ldb, dum(1_${ik}$), -& 1_${ik}$, info ) lwork_qormlq=dum(1_${ik}$) ! compute total workspace needed maxwrk = m + lwork_qgelqf maxwrk = max( maxwrk, m*m + 4_${ik}$*m + lwork_qgebrd ) maxwrk = max( maxwrk, m*m + 4_${ik}$*m + lwork_qormbr ) maxwrk = max( maxwrk, m*m + 4_${ik}$*m + lwork_qorgbr ) maxwrk = max( maxwrk, m*m + m + bdspac ) if( nrhs>1_${ik}$ ) then maxwrk = max( maxwrk, m*m + m + m*nrhs ) else maxwrk = max( maxwrk, m*m + 2_${ik}$*m ) end if maxwrk = max( maxwrk, m + lwork_qormlq ) else ! path 2 - underdetermined ! compute space needed for stdlib${ii}$_${ri}$gebrd call stdlib${ii}$_${ri}$gebrd( m, n, a, lda, s, dum(1_${ik}$), dum(1_${ik}$),dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, & info ) lwork_qgebrd=dum(1_${ik}$) ! compute space needed for stdlib${ii}$_${ri}$ormbr call stdlib${ii}$_${ri}$ormbr( 'Q', 'L', 'T', m, nrhs, m, a, lda,dum(1_${ik}$), b, ldb, dum(& 1_${ik}$), -1_${ik}$, info ) lwork_qormbr=dum(1_${ik}$) ! compute space needed for stdlib${ii}$_${ri}$orgbr call stdlib${ii}$_${ri}$orgbr( 'P', m, n, m, a, lda, dum(1_${ik}$),dum(1_${ik}$), -1_${ik}$, info ) lwork_qorgbr=dum(1_${ik}$) maxwrk = 3_${ik}$*m + lwork_qgebrd maxwrk = max( maxwrk, 3_${ik}$*m + lwork_qormbr ) maxwrk = max( maxwrk, 3_${ik}$*m + lwork_qorgbr ) maxwrk = max( maxwrk, bdspac ) maxwrk = max( maxwrk, n*nrhs ) end if end if maxwrk = max( minwrk, maxwrk ) end if work( 1_${ik}$ ) = maxwrk if( lwork<minwrk .and. .not.lquery )info = -12_${ik}$ end if if( info/=0_${ik}$ ) then call stdlib${ii}$_xerbla( 'DGELSS', -info ) return else if( lquery ) then return end if ! quick return if possible if( m==0_${ik}$ .or. n==0_${ik}$ ) then rank = 0_${ik}$ return end if ! get machine parameters eps = stdlib${ii}$_${ri}$lamch( 'P' ) sfmin = stdlib${ii}$_${ri}$lamch( 'S' ) smlnum = sfmin / eps bignum = one / smlnum call stdlib${ii}$_${ri}$labad( smlnum, bignum ) ! scale a if max element outside range [smlnum,bignum] anrm = stdlib${ii}$_${ri}$lange( 'M', m, n, a, lda, work ) iascl = 0_${ik}$ if( anrm>zero .and. anrm<smlnum ) then ! scale matrix norm up to smlnum call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, m, n, a, lda, info ) iascl = 1_${ik}$ else if( anrm>bignum ) then ! scale matrix norm down to bignum call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, m, n, a, lda, info ) iascl = 2_${ik}$ else if( anrm==zero ) then ! matrix all zero. return zero solution. call stdlib${ii}$_${ri}$laset( 'F', max( m, n ), nrhs, zero, zero, b, ldb ) call stdlib${ii}$_${ri}$laset( 'F', minmn, 1_${ik}$, zero, zero, s, minmn ) rank = 0_${ik}$ go to 70 end if ! scale b if max element outside range [smlnum,bignum] bnrm = stdlib${ii}$_${ri}$lange( 'M', m, nrhs, b, ldb, work ) ibscl = 0_${ik}$ if( bnrm>zero .and. bnrm<smlnum ) then ! scale matrix norm up to smlnum call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, bnrm, smlnum, m, nrhs, b, ldb, info ) ibscl = 1_${ik}$ else if( bnrm>bignum ) then ! scale matrix norm down to bignum call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, bnrm, bignum, m, nrhs, b, ldb, info ) ibscl = 2_${ik}$ end if ! overdetermined case if( m>=n ) then ! path 1 - overdetermined or exactly determined mm = m if( m>=mnthr ) then ! path 1a - overdetermined, with many more rows than columns mm = n itau = 1_${ik}$ iwork = itau + n ! compute a=q*r ! (workspace: need 2*n, prefer n+n*nb) call stdlib${ii}$_${ri}$geqrf( m, n, a, lda, work( itau ), work( iwork ),lwork-iwork+1, & info ) ! multiply b by transpose(q) ! (workspace: need n+nrhs, prefer n+nrhs*nb) call stdlib${ii}$_${ri}$ormqr( 'L', 'T', m, nrhs, n, a, lda, work( itau ), b,ldb, work( & iwork ), lwork-iwork+1, info ) ! zero out below r if( n>1_${ik}$ )call stdlib${ii}$_${ri}$laset( 'L', n-1, n-1, zero, zero, a( 2_${ik}$, 1_${ik}$ ), lda ) end if ie = 1_${ik}$ itauq = ie + n itaup = itauq + n iwork = itaup + n ! bidiagonalize r in a ! (workspace: need 3*n+mm, prefer 3*n+(mm+n)*nb) call stdlib${ii}$_${ri}$gebrd( mm, n, a, lda, s, work( ie ), work( itauq ),work( itaup ), work(& iwork ), lwork-iwork+1,info ) ! multiply b by transpose of left bidiagonalizing vectors of r ! (workspace: need 3*n+nrhs, prefer 3*n+nrhs*nb) call stdlib${ii}$_${ri}$ormbr( 'Q', 'L', 'T', mm, nrhs, n, a, lda, work( itauq ),b, ldb, work( & iwork ), lwork-iwork+1, info ) ! generate right bidiagonalizing vectors of r in a ! (workspace: need 4*n-1, prefer 3*n+(n-1)*nb) call stdlib${ii}$_${ri}$orgbr( 'P', n, n, n, a, lda, work( itaup ),work( iwork ), lwork-iwork+& 1_${ik}$, info ) iwork = ie + n ! perform bidiagonal qr iteration ! multiply b by transpose of left singular vectors ! compute right singular vectors in a ! (workspace: need bdspac) call stdlib${ii}$_${ri}$bdsqr( 'U', n, n, 0_${ik}$, nrhs, s, work( ie ), a, lda, dum,1_${ik}$, b, ldb, work( & iwork ), info ) if( info/=0 )go to 70 ! multiply b by reciprocals of singular values thr = max( rcond*s( 1_${ik}$ ), sfmin ) if( rcond<zero )thr = max( eps*s( 1_${ik}$ ), sfmin ) rank = 0_${ik}$ do i = 1, n if( s( i )>thr ) then call stdlib${ii}$_${ri}$rscl( nrhs, s( i ), b( i, 1_${ik}$ ), ldb ) rank = rank + 1_${ik}$ else call stdlib${ii}$_${ri}$laset( 'F', 1_${ik}$, nrhs, zero, zero, b( i, 1_${ik}$ ), ldb ) end if end do ! multiply b by right singular vectors ! (workspace: need n, prefer n*nrhs) if( lwork>=ldb*nrhs .and. nrhs>1_${ik}$ ) then call stdlib${ii}$_${ri}$gemm( 'T', 'N', n, nrhs, n, one, a, lda, b, ldb, zero,work, ldb ) call stdlib${ii}$_${ri}$lacpy( 'G', n, nrhs, work, ldb, b, ldb ) else if( nrhs>1_${ik}$ ) then chunk = lwork / n do i = 1, nrhs, chunk bl = min( nrhs-i+1, chunk ) call stdlib${ii}$_${ri}$gemm( 'T', 'N', n, bl, n, one, a, lda, b( 1_${ik}$, i ),ldb, zero, work,& n ) call stdlib${ii}$_${ri}$lacpy( 'G', n, bl, work, n, b( 1_${ik}$, i ), ldb ) end do else call stdlib${ii}$_${ri}$gemv( 'T', n, n, one, a, lda, b, 1_${ik}$, zero, work, 1_${ik}$ ) call stdlib${ii}$_${ri}$copy( n, work, 1_${ik}$, b, 1_${ik}$ ) end if else if( n>=mnthr .and. lwork>=4_${ik}$*m+m*m+max( m, 2_${ik}$*m-4, nrhs, n-3*m ) ) then ! path 2a - underdetermined, with many more columns than rows ! and sufficient workspace for an efficient algorithm ldwork = m if( lwork>=max( 4_${ik}$*m+m*lda+max( m, 2_${ik}$*m-4, nrhs, n-3*m ),m*lda+m+m*nrhs ) )ldwork = & lda itau = 1_${ik}$ iwork = m + 1_${ik}$ ! compute a=l*q ! (workspace: need 2*m, prefer m+m*nb) call stdlib${ii}$_${ri}$gelqf( m, n, a, lda, work( itau ), work( iwork ),lwork-iwork+1, info ) il = iwork ! copy l to work(il), zeroing out above it call stdlib${ii}$_${ri}$lacpy( 'L', m, m, a, lda, work( il ), ldwork ) call stdlib${ii}$_${ri}$laset( 'U', m-1, m-1, zero, zero, work( il+ldwork ),ldwork ) ie = il + ldwork*m itauq = ie + m itaup = itauq + m iwork = itaup + m ! bidiagonalize l in work(il) ! (workspace: need m*m+5*m, prefer m*m+4*m+2*m*nb) call stdlib${ii}$_${ri}$gebrd( m, m, work( il ), ldwork, s, work( ie ),work( itauq ), work( & itaup ), work( iwork ),lwork-iwork+1, info ) ! multiply b by transpose of left bidiagonalizing vectors of l ! (workspace: need m*m+4*m+nrhs, prefer m*m+4*m+nrhs*nb) call stdlib${ii}$_${ri}$ormbr( 'Q', 'L', 'T', m, nrhs, m, work( il ), ldwork,work( itauq ), b, & ldb, work( iwork ),lwork-iwork+1, info ) ! generate right bidiagonalizing vectors of r in work(il) ! (workspace: need m*m+5*m-1, prefer m*m+4*m+(m-1)*nb) call stdlib${ii}$_${ri}$orgbr( 'P', m, m, m, work( il ), ldwork, work( itaup ),work( iwork ), & lwork-iwork+1, info ) iwork = ie + m ! perform bidiagonal qr iteration, ! computing right singular vectors of l in work(il) and ! multiplying b by transpose of left singular vectors ! (workspace: need m*m+m+bdspac) call stdlib${ii}$_${ri}$bdsqr( 'U', m, m, 0_${ik}$, nrhs, s, work( ie ), work( il ),ldwork, a, lda, b,& ldb, work( iwork ), info ) if( info/=0 )go to 70 ! multiply b by reciprocals of singular values thr = max( rcond*s( 1_${ik}$ ), sfmin ) if( rcond<zero )thr = max( eps*s( 1_${ik}$ ), sfmin ) rank = 0_${ik}$ do i = 1, m if( s( i )>thr ) then call stdlib${ii}$_${ri}$rscl( nrhs, s( i ), b( i, 1_${ik}$ ), ldb ) rank = rank + 1_${ik}$ else call stdlib${ii}$_${ri}$laset( 'F', 1_${ik}$, nrhs, zero, zero, b( i, 1_${ik}$ ), ldb ) end if end do iwork = ie ! multiply b by right singular vectors of l in work(il) ! (workspace: need m*m+2*m, prefer m*m+m+m*nrhs) if( lwork>=ldb*nrhs+iwork-1 .and. nrhs>1_${ik}$ ) then call stdlib${ii}$_${ri}$gemm( 'T', 'N', m, nrhs, m, one, work( il ), ldwork,b, ldb, zero, & work( iwork ), ldb ) call stdlib${ii}$_${ri}$lacpy( 'G', m, nrhs, work( iwork ), ldb, b, ldb ) else if( nrhs>1_${ik}$ ) then chunk = ( lwork-iwork+1 ) / m do i = 1, nrhs, chunk bl = min( nrhs-i+1, chunk ) call stdlib${ii}$_${ri}$gemm( 'T', 'N', m, bl, m, one, work( il ), ldwork,b( 1_${ik}$, i ), ldb,& zero, work( iwork ), m ) call stdlib${ii}$_${ri}$lacpy( 'G', m, bl, work( iwork ), m, b( 1_${ik}$, i ),ldb ) end do else call stdlib${ii}$_${ri}$gemv( 'T', m, m, one, work( il ), ldwork, b( 1_${ik}$, 1_${ik}$ ),1_${ik}$, zero, work( & iwork ), 1_${ik}$ ) call stdlib${ii}$_${ri}$copy( m, work( iwork ), 1_${ik}$, b( 1_${ik}$, 1_${ik}$ ), 1_${ik}$ ) end if ! zero out below first m rows of b call stdlib${ii}$_${ri}$laset( 'F', n-m, nrhs, zero, zero, b( m+1, 1_${ik}$ ), ldb ) iwork = itau + m ! multiply transpose(q) by b ! (workspace: need m+nrhs, prefer m+nrhs*nb) call stdlib${ii}$_${ri}$ormlq( 'L', 'T', n, nrhs, m, a, lda, work( itau ), b,ldb, work( iwork )& , lwork-iwork+1, info ) else ! path 2 - remaining underdetermined cases ie = 1_${ik}$ itauq = ie + m itaup = itauq + m iwork = itaup + m ! bidiagonalize a ! (workspace: need 3*m+n, prefer 3*m+(m+n)*nb) call stdlib${ii}$_${ri}$gebrd( m, n, a, lda, s, work( ie ), work( itauq ),work( itaup ), work( & iwork ), lwork-iwork+1,info ) ! multiply b by transpose of left bidiagonalizing vectors ! (workspace: need 3*m+nrhs, prefer 3*m+nrhs*nb) call stdlib${ii}$_${ri}$ormbr( 'Q', 'L', 'T', m, nrhs, n, a, lda, work( itauq ),b, ldb, work( & iwork ), lwork-iwork+1, info ) ! generate right bidiagonalizing vectors in a ! (workspace: need 4*m, prefer 3*m+m*nb) call stdlib${ii}$_${ri}$orgbr( 'P', m, n, m, a, lda, work( itaup ),work( iwork ), lwork-iwork+& 1_${ik}$, info ) iwork = ie + m ! perform bidiagonal qr iteration, ! computing right singular vectors of a in a and ! multiplying b by transpose of left singular vectors ! (workspace: need bdspac) call stdlib${ii}$_${ri}$bdsqr( 'L', m, n, 0_${ik}$, nrhs, s, work( ie ), a, lda, dum,1_${ik}$, b, ldb, work( & iwork ), info ) if( info/=0 )go to 70 ! multiply b by reciprocals of singular values thr = max( rcond*s( 1_${ik}$ ), sfmin ) if( rcond<zero )thr = max( eps*s( 1_${ik}$ ), sfmin ) rank = 0_${ik}$ do i = 1, m if( s( i )>thr ) then call stdlib${ii}$_${ri}$rscl( nrhs, s( i ), b( i, 1_${ik}$ ), ldb ) rank = rank + 1_${ik}$ else call stdlib${ii}$_${ri}$laset( 'F', 1_${ik}$, nrhs, zero, zero, b( i, 1_${ik}$ ), ldb ) end if end do ! multiply b by right singular vectors of a ! (workspace: need n, prefer n*nrhs) if( lwork>=ldb*nrhs .and. nrhs>1_${ik}$ ) then call stdlib${ii}$_${ri}$gemm( 'T', 'N', n, nrhs, m, one, a, lda, b, ldb, zero,work, ldb ) call stdlib${ii}$_${ri}$lacpy( 'F', n, nrhs, work, ldb, b, ldb ) else if( nrhs>1_${ik}$ ) then chunk = lwork / n do i = 1, nrhs, chunk bl = min( nrhs-i+1, chunk ) call stdlib${ii}$_${ri}$gemm( 'T', 'N', n, bl, m, one, a, lda, b( 1_${ik}$, i ),ldb, zero, work,& n ) call stdlib${ii}$_${ri}$lacpy( 'F', n, bl, work, n, b( 1_${ik}$, i ), ldb ) end do else call stdlib${ii}$_${ri}$gemv( 'T', m, n, one, a, lda, b, 1_${ik}$, zero, work, 1_${ik}$ ) call stdlib${ii}$_${ri}$copy( n, work, 1_${ik}$, b, 1_${ik}$ ) end if end if ! undo scaling if( iascl==1_${ik}$ ) then call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, n, nrhs, b, ldb, info ) call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, smlnum, anrm, minmn, 1_${ik}$, s, minmn,info ) else if( iascl==2_${ik}$ ) then call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, n, nrhs, b, ldb, info ) call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, bignum, anrm, minmn, 1_${ik}$, s, minmn,info ) end if if( ibscl==1_${ik}$ ) then call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, smlnum, bnrm, n, nrhs, b, ldb, info ) else if( ibscl==2_${ik}$ ) then call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, bignum, bnrm, n, nrhs, b, ldb, info ) end if 70 continue work( 1_${ik}$ ) = maxwrk return end subroutine stdlib${ii}$_${ri}$gelss #:endif #:endfor module subroutine stdlib${ii}$_cgelss( m, n, nrhs, a, lda, b, ldb, s, rcond, rank,work, lwork, rwork, & !! CGELSS computes the minimum norm solution to a complex linear !! least squares problem: !! Minimize 2-norm(| b - A*x |). !! using the singular value decomposition (SVD) of A. A is an M-by-N !! matrix which may be rank-deficient. !! Several right hand side vectors b and solution vectors x can be !! handled in a single call; they are stored as the columns of the !! M-by-NRHS right hand side matrix B and the N-by-NRHS solution matrix !! X. !! The effective rank of A is determined by treating as zero those !! singular values which are less than RCOND times the largest singular !! value. info ) ! -- lapack driver routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments integer(${ik}$), intent(out) :: info, rank integer(${ik}$), intent(in) :: lda, ldb, lwork, m, n, nrhs real(sp), intent(in) :: rcond ! Array Arguments real(sp), intent(out) :: rwork(*), s(*) complex(sp), intent(inout) :: a(lda,*), b(ldb,*) complex(sp), intent(out) :: work(*) ! ===================================================================== ! Local Scalars logical(lk) :: lquery integer(${ik}$) :: bl, chunk, i, iascl, ibscl, ie, il, irwork, itau, itaup, itauq, iwork, & ldwork, maxmn, maxwrk, minmn, minwrk, mm, mnthr integer(${ik}$) :: lwork_cgeqrf, lwork_cunmqr, lwork_cgebrd, lwork_cunmbr, lwork_cungbr, & lwork_cunmlq, lwork_cgelqf real(sp) :: anrm, bignum, bnrm, eps, sfmin, smlnum, thr ! Local Arrays complex(sp) :: dum(1_${ik}$) ! Intrinsic Functions ! Executable Statements ! test the input arguments info = 0_${ik}$ minmn = min( m, n ) maxmn = max( m, n ) lquery = ( lwork==-1_${ik}$ ) if( m<0_${ik}$ ) then info = -1_${ik}$ else if( n<0_${ik}$ ) then info = -2_${ik}$ else if( nrhs<0_${ik}$ ) then info = -3_${ik}$ else if( lda<max( 1_${ik}$, m ) ) then info = -5_${ik}$ else if( ldb<max( 1_${ik}$, maxmn ) ) then info = -7_${ik}$ end if ! compute workspace ! (note: comments in the code beginning "workspace:" describe the ! minimal amount of workspace needed at that point in the code, ! as well as the preferred amount for good performance. ! cworkspace refers to complex workspace, and rworkspace refers ! to real workspace. nb refers to the optimal block size for the ! immediately following subroutine, as returned by stdlib${ii}$_ilaenv.) if( info==0_${ik}$ ) then minwrk = 1_${ik}$ maxwrk = 1_${ik}$ if( minmn>0_${ik}$ ) then mm = m mnthr = stdlib${ii}$_ilaenv( 6_${ik}$, 'CGELSS', ' ', m, n, nrhs, -1_${ik}$ ) if( m>=n .and. m>=mnthr ) then ! path 1a - overdetermined, with many more rows than ! columns ! compute space needed for stdlib${ii}$_cgeqrf call stdlib${ii}$_cgeqrf( m, n, a, lda, dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, info ) lwork_cgeqrf = real( dum(1_${ik}$),KIND=sp) ! compute space needed for stdlib${ii}$_cunmqr call stdlib${ii}$_cunmqr( 'L', 'C', m, nrhs, n, a, lda, dum(1_${ik}$), b,ldb, dum(1_${ik}$), -1_${ik}$, & info ) lwork_cunmqr = real( dum(1_${ik}$),KIND=sp) mm = n maxwrk = max( maxwrk, n + n*stdlib${ii}$_ilaenv( 1_${ik}$, 'CGEQRF', ' ', m,n, -1_${ik}$, -1_${ik}$ ) ) maxwrk = max( maxwrk, n + nrhs*stdlib${ii}$_ilaenv( 1_${ik}$, 'CUNMQR', 'LC',m, nrhs, n, -& 1_${ik}$ ) ) end if if( m>=n ) then ! path 1 - overdetermined or exactly determined ! compute space needed for stdlib${ii}$_cgebrd call stdlib${ii}$_cgebrd( mm, n, a, lda, s, s, dum(1_${ik}$), dum(1_${ik}$), dum(1_${ik}$),-1_${ik}$, info ) lwork_cgebrd = real( dum(1_${ik}$),KIND=sp) ! compute space needed for stdlib${ii}$_cunmbr call stdlib${ii}$_cunmbr( 'Q', 'L', 'C', mm, nrhs, n, a, lda, dum(1_${ik}$),b, ldb, dum(1_${ik}$),& -1_${ik}$, info ) lwork_cunmbr = real( dum(1_${ik}$),KIND=sp) ! compute space needed for stdlib${ii}$_cungbr call stdlib${ii}$_cungbr( 'P', n, n, n, a, lda, dum(1_${ik}$),dum(1_${ik}$), -1_${ik}$, info ) lwork_cungbr = real( dum(1_${ik}$),KIND=sp) ! compute total workspace needed maxwrk = max( maxwrk, 2_${ik}$*n + lwork_cgebrd ) maxwrk = max( maxwrk, 2_${ik}$*n + lwork_cunmbr ) maxwrk = max( maxwrk, 2_${ik}$*n + lwork_cungbr ) maxwrk = max( maxwrk, n*nrhs ) minwrk = 2_${ik}$*n + max( nrhs, m ) end if if( n>m ) then minwrk = 2_${ik}$*m + max( nrhs, n ) if( n>=mnthr ) then ! path 2a - underdetermined, with many more columns ! than rows ! compute space needed for stdlib${ii}$_cgelqf call stdlib${ii}$_cgelqf( m, n, a, lda, dum(1_${ik}$), dum(1_${ik}$),-1_${ik}$, info ) lwork_cgelqf = real( dum(1_${ik}$),KIND=sp) ! compute space needed for stdlib${ii}$_cgebrd call stdlib${ii}$_cgebrd( m, m, a, lda, s, s, dum(1_${ik}$), dum(1_${ik}$),dum(1_${ik}$), -1_${ik}$, info ) lwork_cgebrd = real( dum(1_${ik}$),KIND=sp) ! compute space needed for stdlib${ii}$_cunmbr call stdlib${ii}$_cunmbr( 'Q', 'L', 'C', m, nrhs, n, a, lda,dum(1_${ik}$), b, ldb, dum(& 1_${ik}$), -1_${ik}$, info ) lwork_cunmbr = real( dum(1_${ik}$),KIND=sp) ! compute space needed for stdlib${ii}$_cungbr call stdlib${ii}$_cungbr( 'P', m, m, m, a, lda, dum(1_${ik}$),dum(1_${ik}$), -1_${ik}$, info ) lwork_cungbr = real( dum(1_${ik}$),KIND=sp) ! compute space needed for stdlib${ii}$_cunmlq call stdlib${ii}$_cunmlq( 'L', 'C', n, nrhs, m, a, lda, dum(1_${ik}$),b, ldb, dum(1_${ik}$), -& 1_${ik}$, info ) lwork_cunmlq = real( dum(1_${ik}$),KIND=sp) ! compute total workspace needed maxwrk = m + lwork_cgelqf maxwrk = max( maxwrk, 3_${ik}$*m + m*m + lwork_cgebrd ) maxwrk = max( maxwrk, 3_${ik}$*m + m*m + lwork_cunmbr ) maxwrk = max( maxwrk, 3_${ik}$*m + m*m + lwork_cungbr ) if( nrhs>1_${ik}$ ) then maxwrk = max( maxwrk, m*m + m + m*nrhs ) else maxwrk = max( maxwrk, m*m + 2_${ik}$*m ) end if maxwrk = max( maxwrk, m + lwork_cunmlq ) else ! path 2 - underdetermined ! compute space needed for stdlib${ii}$_cgebrd call stdlib${ii}$_cgebrd( m, n, a, lda, s, s, dum(1_${ik}$), dum(1_${ik}$),dum(1_${ik}$), -1_${ik}$, info ) lwork_cgebrd = real( dum(1_${ik}$),KIND=sp) ! compute space needed for stdlib${ii}$_cunmbr call stdlib${ii}$_cunmbr( 'Q', 'L', 'C', m, nrhs, m, a, lda,dum(1_${ik}$), b, ldb, dum(& 1_${ik}$), -1_${ik}$, info ) lwork_cunmbr = real( dum(1_${ik}$),KIND=sp) ! compute space needed for stdlib${ii}$_cungbr call stdlib${ii}$_cungbr( 'P', m, n, m, a, lda, dum(1_${ik}$),dum(1_${ik}$), -1_${ik}$, info ) lwork_cungbr = real( dum(1_${ik}$),KIND=sp) maxwrk = 2_${ik}$*m + lwork_cgebrd maxwrk = max( maxwrk, 2_${ik}$*m + lwork_cunmbr ) maxwrk = max( maxwrk, 2_${ik}$*m + lwork_cungbr ) maxwrk = max( maxwrk, n*nrhs ) end if end if maxwrk = max( minwrk, maxwrk ) end if work( 1_${ik}$ ) = maxwrk if( lwork<minwrk .and. .not.lquery )info = -12_${ik}$ end if if( info/=0_${ik}$ ) then call stdlib${ii}$_xerbla( 'CGELSS', -info ) return else if( lquery ) then return end if ! quick return if possible if( m==0_${ik}$ .or. n==0_${ik}$ ) then rank = 0_${ik}$ return end if ! get machine parameters eps = stdlib${ii}$_slamch( 'P' ) sfmin = stdlib${ii}$_slamch( 'S' ) smlnum = sfmin / eps bignum = one / smlnum call stdlib${ii}$_slabad( smlnum, bignum ) ! scale a if max element outside range [smlnum,bignum] anrm = stdlib${ii}$_clange( 'M', m, n, a, lda, rwork ) iascl = 0_${ik}$ if( anrm>zero .and. anrm<smlnum ) then ! scale matrix norm up to smlnum call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, m, n, a, lda, info ) iascl = 1_${ik}$ else if( anrm>bignum ) then ! scale matrix norm down to bignum call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, m, n, a, lda, info ) iascl = 2_${ik}$ else if( anrm==zero ) then ! matrix all zero. return zero solution. call stdlib${ii}$_claset( 'F', max( m, n ), nrhs, czero, czero, b, ldb ) call stdlib${ii}$_slaset( 'F', minmn, 1_${ik}$, zero, zero, s, minmn ) rank = 0_${ik}$ go to 70 end if ! scale b if max element outside range [smlnum,bignum] bnrm = stdlib${ii}$_clange( 'M', m, nrhs, b, ldb, rwork ) ibscl = 0_${ik}$ if( bnrm>zero .and. bnrm<smlnum ) then ! scale matrix norm up to smlnum call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, bnrm, smlnum, m, nrhs, b, ldb, info ) ibscl = 1_${ik}$ else if( bnrm>bignum ) then ! scale matrix norm down to bignum call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, bnrm, bignum, m, nrhs, b, ldb, info ) ibscl = 2_${ik}$ end if ! overdetermined case if( m>=n ) then ! path 1 - overdetermined or exactly determined mm = m if( m>=mnthr ) then ! path 1a - overdetermined, with many more rows than columns mm = n itau = 1_${ik}$ iwork = itau + n ! compute a=q*r ! (cworkspace: need 2*n, prefer n+n*nb) ! (rworkspace: none) call stdlib${ii}$_cgeqrf( m, n, a, lda, work( itau ), work( iwork ),lwork-iwork+1, & info ) ! multiply b by transpose(q) ! (cworkspace: need n+nrhs, prefer n+nrhs*nb) ! (rworkspace: none) call stdlib${ii}$_cunmqr( 'L', 'C', m, nrhs, n, a, lda, work( itau ), b,ldb, work( & iwork ), lwork-iwork+1, info ) ! zero out below r if( n>1_${ik}$ )call stdlib${ii}$_claset( 'L', n-1, n-1, czero, czero, a( 2_${ik}$, 1_${ik}$ ),lda ) end if ie = 1_${ik}$ itauq = 1_${ik}$ itaup = itauq + n iwork = itaup + n ! bidiagonalize r in a ! (cworkspace: need 2*n+mm, prefer 2*n+(mm+n)*nb) ! (rworkspace: need n) call stdlib${ii}$_cgebrd( mm, n, a, lda, s, rwork( ie ), work( itauq ),work( itaup ), & work( iwork ), lwork-iwork+1,info ) ! multiply b by transpose of left bidiagonalizing vectors of r ! (cworkspace: need 2*n+nrhs, prefer 2*n+nrhs*nb) ! (rworkspace: none) call stdlib${ii}$_cunmbr( 'Q', 'L', 'C', mm, nrhs, n, a, lda, work( itauq ),b, ldb, work( & iwork ), lwork-iwork+1, info ) ! generate right bidiagonalizing vectors of r in a ! (cworkspace: need 3*n-1, prefer 2*n+(n-1)*nb) ! (rworkspace: none) call stdlib${ii}$_cungbr( 'P', n, n, n, a, lda, work( itaup ),work( iwork ), lwork-iwork+& 1_${ik}$, info ) irwork = ie + n ! perform bidiagonal qr iteration ! multiply b by transpose of left singular vectors ! compute right singular vectors in a ! (cworkspace: none) ! (rworkspace: need bdspac) call stdlib${ii}$_cbdsqr( 'U', n, n, 0_${ik}$, nrhs, s, rwork( ie ), a, lda, dum,1_${ik}$, b, ldb, & rwork( irwork ), info ) if( info/=0 )go to 70 ! multiply b by reciprocals of singular values thr = max( rcond*s( 1_${ik}$ ), sfmin ) if( rcond<zero )thr = max( eps*s( 1_${ik}$ ), sfmin ) rank = 0_${ik}$ do i = 1, n if( s( i )>thr ) then call stdlib${ii}$_csrscl( nrhs, s( i ), b( i, 1_${ik}$ ), ldb ) rank = rank + 1_${ik}$ else call stdlib${ii}$_claset( 'F', 1_${ik}$, nrhs, czero, czero, b( i, 1_${ik}$ ), ldb ) end if end do ! multiply b by right singular vectors ! (cworkspace: need n, prefer n*nrhs) ! (rworkspace: none) if( lwork>=ldb*nrhs .and. nrhs>1_${ik}$ ) then call stdlib${ii}$_cgemm( 'C', 'N', n, nrhs, n, cone, a, lda, b, ldb,czero, work, ldb ) call stdlib${ii}$_clacpy( 'G', n, nrhs, work, ldb, b, ldb ) else if( nrhs>1_${ik}$ ) then chunk = lwork / n do i = 1, nrhs, chunk bl = min( nrhs-i+1, chunk ) call stdlib${ii}$_cgemm( 'C', 'N', n, bl, n, cone, a, lda, b( 1_${ik}$, i ),ldb, czero, & work, n ) call stdlib${ii}$_clacpy( 'G', n, bl, work, n, b( 1_${ik}$, i ), ldb ) end do else call stdlib${ii}$_cgemv( 'C', n, n, cone, a, lda, b, 1_${ik}$, czero, work, 1_${ik}$ ) call stdlib${ii}$_ccopy( n, work, 1_${ik}$, b, 1_${ik}$ ) end if else if( n>=mnthr .and. lwork>=3_${ik}$*m+m*m+max( m, nrhs, n-2*m ) )then ! underdetermined case, m much less than n ! path 2a - underdetermined, with many more columns than rows ! and sufficient workspace for an efficient algorithm ldwork = m if( lwork>=3_${ik}$*m+m*lda+max( m, nrhs, n-2*m ) )ldwork = lda itau = 1_${ik}$ iwork = m + 1_${ik}$ ! compute a=l*q ! (cworkspace: need 2*m, prefer m+m*nb) ! (rworkspace: none) call stdlib${ii}$_cgelqf( m, n, a, lda, work( itau ), work( iwork ),lwork-iwork+1, info ) il = iwork ! copy l to work(il), zeroing out above it call stdlib${ii}$_clacpy( 'L', m, m, a, lda, work( il ), ldwork ) call stdlib${ii}$_claset( 'U', m-1, m-1, czero, czero, work( il+ldwork ),ldwork ) ie = 1_${ik}$ itauq = il + ldwork*m itaup = itauq + m iwork = itaup + m ! bidiagonalize l in work(il) ! (cworkspace: need m*m+4*m, prefer m*m+3*m+2*m*nb) ! (rworkspace: need m) call stdlib${ii}$_cgebrd( m, m, work( il ), ldwork, s, rwork( ie ),work( itauq ), work( & itaup ), work( iwork ),lwork-iwork+1, info ) ! multiply b by transpose of left bidiagonalizing vectors of l ! (cworkspace: need m*m+3*m+nrhs, prefer m*m+3*m+nrhs*nb) ! (rworkspace: none) call stdlib${ii}$_cunmbr( 'Q', 'L', 'C', m, nrhs, m, work( il ), ldwork,work( itauq ), b, & ldb, work( iwork ),lwork-iwork+1, info ) ! generate right bidiagonalizing vectors of r in work(il) ! (cworkspace: need m*m+4*m-1, prefer m*m+3*m+(m-1)*nb) ! (rworkspace: none) call stdlib${ii}$_cungbr( 'P', m, m, m, work( il ), ldwork, work( itaup ),work( iwork ), & lwork-iwork+1, info ) irwork = ie + m ! perform bidiagonal qr iteration, computing right singular ! vectors of l in work(il) and multiplying b by transpose of ! left singular vectors ! (cworkspace: need m*m) ! (rworkspace: need bdspac) call stdlib${ii}$_cbdsqr( 'U', m, m, 0_${ik}$, nrhs, s, rwork( ie ), work( il ),ldwork, a, lda, & b, ldb, rwork( irwork ), info ) if( info/=0 )go to 70 ! multiply b by reciprocals of singular values thr = max( rcond*s( 1_${ik}$ ), sfmin ) if( rcond<zero )thr = max( eps*s( 1_${ik}$ ), sfmin ) rank = 0_${ik}$ do i = 1, m if( s( i )>thr ) then call stdlib${ii}$_csrscl( nrhs, s( i ), b( i, 1_${ik}$ ), ldb ) rank = rank + 1_${ik}$ else call stdlib${ii}$_claset( 'F', 1_${ik}$, nrhs, czero, czero, b( i, 1_${ik}$ ), ldb ) end if end do iwork = il + m*ldwork ! multiply b by right singular vectors of l in work(il) ! (cworkspace: need m*m+2*m, prefer m*m+m+m*nrhs) ! (rworkspace: none) if( lwork>=ldb*nrhs+iwork-1 .and. nrhs>1_${ik}$ ) then call stdlib${ii}$_cgemm( 'C', 'N', m, nrhs, m, cone, work( il ), ldwork,b, ldb, czero, & work( iwork ), ldb ) call stdlib${ii}$_clacpy( 'G', m, nrhs, work( iwork ), ldb, b, ldb ) else if( nrhs>1_${ik}$ ) then chunk = ( lwork-iwork+1 ) / m do i = 1, nrhs, chunk bl = min( nrhs-i+1, chunk ) call stdlib${ii}$_cgemm( 'C', 'N', m, bl, m, cone, work( il ), ldwork,b( 1_${ik}$, i ), & ldb, czero, work( iwork ), m ) call stdlib${ii}$_clacpy( 'G', m, bl, work( iwork ), m, b( 1_${ik}$, i ),ldb ) end do else call stdlib${ii}$_cgemv( 'C', m, m, cone, work( il ), ldwork, b( 1_${ik}$, 1_${ik}$ ),1_${ik}$, czero, work(& iwork ), 1_${ik}$ ) call stdlib${ii}$_ccopy( m, work( iwork ), 1_${ik}$, b( 1_${ik}$, 1_${ik}$ ), 1_${ik}$ ) end if ! zero out below first m rows of b call stdlib${ii}$_claset( 'F', n-m, nrhs, czero, czero, b( m+1, 1_${ik}$ ), ldb ) iwork = itau + m ! multiply transpose(q) by b ! (cworkspace: need m+nrhs, prefer m+nhrs*nb) ! (rworkspace: none) call stdlib${ii}$_cunmlq( 'L', 'C', n, nrhs, m, a, lda, work( itau ), b,ldb, work( iwork )& , lwork-iwork+1, info ) else ! path 2 - remaining underdetermined cases ie = 1_${ik}$ itauq = 1_${ik}$ itaup = itauq + m iwork = itaup + m ! bidiagonalize a ! (cworkspace: need 3*m, prefer 2*m+(m+n)*nb) ! (rworkspace: need n) call stdlib${ii}$_cgebrd( m, n, a, lda, s, rwork( ie ), work( itauq ),work( itaup ), work(& iwork ), lwork-iwork+1,info ) ! multiply b by transpose of left bidiagonalizing vectors ! (cworkspace: need 2*m+nrhs, prefer 2*m+nrhs*nb) ! (rworkspace: none) call stdlib${ii}$_cunmbr( 'Q', 'L', 'C', m, nrhs, n, a, lda, work( itauq ),b, ldb, work( & iwork ), lwork-iwork+1, info ) ! generate right bidiagonalizing vectors in a ! (cworkspace: need 3*m, prefer 2*m+m*nb) ! (rworkspace: none) call stdlib${ii}$_cungbr( 'P', m, n, m, a, lda, work( itaup ),work( iwork ), lwork-iwork+& 1_${ik}$, info ) irwork = ie + m ! perform bidiagonal qr iteration, ! computing right singular vectors of a in a and ! multiplying b by transpose of left singular vectors ! (cworkspace: none) ! (rworkspace: need bdspac) call stdlib${ii}$_cbdsqr( 'L', m, n, 0_${ik}$, nrhs, s, rwork( ie ), a, lda, dum,1_${ik}$, b, ldb, & rwork( irwork ), info ) if( info/=0 )go to 70 ! multiply b by reciprocals of singular values thr = max( rcond*s( 1_${ik}$ ), sfmin ) if( rcond<zero )thr = max( eps*s( 1_${ik}$ ), sfmin ) rank = 0_${ik}$ do i = 1, m if( s( i )>thr ) then call stdlib${ii}$_csrscl( nrhs, s( i ), b( i, 1_${ik}$ ), ldb ) rank = rank + 1_${ik}$ else call stdlib${ii}$_claset( 'F', 1_${ik}$, nrhs, czero, czero, b( i, 1_${ik}$ ), ldb ) end if end do ! multiply b by right singular vectors of a ! (cworkspace: need n, prefer n*nrhs) ! (rworkspace: none) if( lwork>=ldb*nrhs .and. nrhs>1_${ik}$ ) then call stdlib${ii}$_cgemm( 'C', 'N', n, nrhs, m, cone, a, lda, b, ldb,czero, work, ldb ) call stdlib${ii}$_clacpy( 'G', n, nrhs, work, ldb, b, ldb ) else if( nrhs>1_${ik}$ ) then chunk = lwork / n do i = 1, nrhs, chunk bl = min( nrhs-i+1, chunk ) call stdlib${ii}$_cgemm( 'C', 'N', n, bl, m, cone, a, lda, b( 1_${ik}$, i ),ldb, czero, & work, n ) call stdlib${ii}$_clacpy( 'F', n, bl, work, n, b( 1_${ik}$, i ), ldb ) end do else call stdlib${ii}$_cgemv( 'C', m, n, cone, a, lda, b, 1_${ik}$, czero, work, 1_${ik}$ ) call stdlib${ii}$_ccopy( n, work, 1_${ik}$, b, 1_${ik}$ ) end if end if ! undo scaling if( iascl==1_${ik}$ ) then call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, n, nrhs, b, ldb, info ) call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, smlnum, anrm, minmn, 1_${ik}$, s, minmn,info ) else if( iascl==2_${ik}$ ) then call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, n, nrhs, b, ldb, info ) call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, bignum, anrm, minmn, 1_${ik}$, s, minmn,info ) end if if( ibscl==1_${ik}$ ) then call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, smlnum, bnrm, n, nrhs, b, ldb, info ) else if( ibscl==2_${ik}$ ) then call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, bignum, bnrm, n, nrhs, b, ldb, info ) end if 70 continue work( 1_${ik}$ ) = maxwrk return end subroutine stdlib${ii}$_cgelss module subroutine stdlib${ii}$_zgelss( m, n, nrhs, a, lda, b, ldb, s, rcond, rank,work, lwork, rwork, & !! ZGELSS computes the minimum norm solution to a complex linear !! least squares problem: !! Minimize 2-norm(| b - A*x |). !! using the singular value decomposition (SVD) of A. A is an M-by-N !! matrix which may be rank-deficient. !! Several right hand side vectors b and solution vectors x can be !! handled in a single call; they are stored as the columns of the !! M-by-NRHS right hand side matrix B and the N-by-NRHS solution matrix !! X. !! The effective rank of A is determined by treating as zero those !! singular values which are less than RCOND times the largest singular !! value. info ) ! -- lapack driver routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments integer(${ik}$), intent(out) :: info, rank integer(${ik}$), intent(in) :: lda, ldb, lwork, m, n, nrhs real(dp), intent(in) :: rcond ! Array Arguments real(dp), intent(out) :: rwork(*), s(*) complex(dp), intent(inout) :: a(lda,*), b(ldb,*) complex(dp), intent(out) :: work(*) ! ===================================================================== ! Local Scalars logical(lk) :: lquery integer(${ik}$) :: bl, chunk, i, iascl, ibscl, ie, il, irwork, itau, itaup, itauq, iwork, & ldwork, maxmn, maxwrk, minmn, minwrk, mm, mnthr integer(${ik}$) :: lwork_zgeqrf, lwork_zunmqr, lwork_zgebrd, lwork_zunmbr, lwork_zungbr, & lwork_zunmlq, lwork_zgelqf real(dp) :: anrm, bignum, bnrm, eps, sfmin, smlnum, thr ! Local Arrays complex(dp) :: dum(1_${ik}$) ! Intrinsic Functions ! Executable Statements ! test the input arguments info = 0_${ik}$ minmn = min( m, n ) maxmn = max( m, n ) lquery = ( lwork==-1_${ik}$ ) if( m<0_${ik}$ ) then info = -1_${ik}$ else if( n<0_${ik}$ ) then info = -2_${ik}$ else if( nrhs<0_${ik}$ ) then info = -3_${ik}$ else if( lda<max( 1_${ik}$, m ) ) then info = -5_${ik}$ else if( ldb<max( 1_${ik}$, maxmn ) ) then info = -7_${ik}$ end if ! compute workspace ! (note: comments in the code beginning "workspace:" describe the ! minimal amount of workspace needed at that point in the code, ! as well as the preferred amount for good performance. ! cworkspace refers to complex workspace, and rworkspace refers ! to real workspace. nb refers to the optimal block size for the ! immediately following subroutine, as returned by stdlib${ii}$_ilaenv.) if( info==0_${ik}$ ) then minwrk = 1_${ik}$ maxwrk = 1_${ik}$ if( minmn>0_${ik}$ ) then mm = m mnthr = stdlib${ii}$_ilaenv( 6_${ik}$, 'ZGELSS', ' ', m, n, nrhs, -1_${ik}$ ) if( m>=n .and. m>=mnthr ) then ! path 1a - overdetermined, with many more rows than ! columns ! compute space needed for stdlib${ii}$_zgeqrf call stdlib${ii}$_zgeqrf( m, n, a, lda, dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, info ) lwork_zgeqrf = real( dum(1_${ik}$),KIND=dp) ! compute space needed for stdlib${ii}$_zunmqr call stdlib${ii}$_zunmqr( 'L', 'C', m, nrhs, n, a, lda, dum(1_${ik}$), b,ldb, dum(1_${ik}$), -1_${ik}$, & info ) lwork_zunmqr = real( dum(1_${ik}$),KIND=dp) mm = n maxwrk = max( maxwrk, n + n*stdlib${ii}$_ilaenv( 1_${ik}$, 'ZGEQRF', ' ', m,n, -1_${ik}$, -1_${ik}$ ) ) maxwrk = max( maxwrk, n + nrhs*stdlib${ii}$_ilaenv( 1_${ik}$, 'ZUNMQR', 'LC',m, nrhs, n, -& 1_${ik}$ ) ) end if if( m>=n ) then ! path 1 - overdetermined or exactly determined ! compute space needed for stdlib${ii}$_zgebrd call stdlib${ii}$_zgebrd( mm, n, a, lda, s, s, dum(1_${ik}$), dum(1_${ik}$), dum(1_${ik}$),-1_${ik}$, info ) lwork_zgebrd = real( dum(1_${ik}$),KIND=dp) ! compute space needed for stdlib${ii}$_zunmbr call stdlib${ii}$_zunmbr( 'Q', 'L', 'C', mm, nrhs, n, a, lda, dum(1_${ik}$),b, ldb, dum(1_${ik}$),& -1_${ik}$, info ) lwork_zunmbr = real( dum(1_${ik}$),KIND=dp) ! compute space needed for stdlib${ii}$_zungbr call stdlib${ii}$_zungbr( 'P', n, n, n, a, lda, dum(1_${ik}$),dum(1_${ik}$), -1_${ik}$, info ) lwork_zungbr = real( dum(1_${ik}$),KIND=dp) ! compute total workspace needed maxwrk = max( maxwrk, 2_${ik}$*n + lwork_zgebrd ) maxwrk = max( maxwrk, 2_${ik}$*n + lwork_zunmbr ) maxwrk = max( maxwrk, 2_${ik}$*n + lwork_zungbr ) maxwrk = max( maxwrk, n*nrhs ) minwrk = 2_${ik}$*n + max( nrhs, m ) end if if( n>m ) then minwrk = 2_${ik}$*m + max( nrhs, n ) if( n>=mnthr ) then ! path 2a - underdetermined, with many more columns ! than rows ! compute space needed for stdlib${ii}$_zgelqf call stdlib${ii}$_zgelqf( m, n, a, lda, dum(1_${ik}$), dum(1_${ik}$),-1_${ik}$, info ) lwork_zgelqf = real( dum(1_${ik}$),KIND=dp) ! compute space needed for stdlib${ii}$_zgebrd call stdlib${ii}$_zgebrd( m, m, a, lda, s, s, dum(1_${ik}$), dum(1_${ik}$),dum(1_${ik}$), -1_${ik}$, info ) lwork_zgebrd = real( dum(1_${ik}$),KIND=dp) ! compute space needed for stdlib${ii}$_zunmbr call stdlib${ii}$_zunmbr( 'Q', 'L', 'C', m, nrhs, n, a, lda,dum(1_${ik}$), b, ldb, dum(& 1_${ik}$), -1_${ik}$, info ) lwork_zunmbr = real( dum(1_${ik}$),KIND=dp) ! compute space needed for stdlib${ii}$_zungbr call stdlib${ii}$_zungbr( 'P', m, m, m, a, lda, dum(1_${ik}$),dum(1_${ik}$), -1_${ik}$, info ) lwork_zungbr = real( dum(1_${ik}$),KIND=dp) ! compute space needed for stdlib${ii}$_zunmlq call stdlib${ii}$_zunmlq( 'L', 'C', n, nrhs, m, a, lda, dum(1_${ik}$),b, ldb, dum(1_${ik}$), -& 1_${ik}$, info ) lwork_zunmlq = real( dum(1_${ik}$),KIND=dp) ! compute total workspace needed maxwrk = m + lwork_zgelqf maxwrk = max( maxwrk, 3_${ik}$*m + m*m + lwork_zgebrd ) maxwrk = max( maxwrk, 3_${ik}$*m + m*m + lwork_zunmbr ) maxwrk = max( maxwrk, 3_${ik}$*m + m*m + lwork_zungbr ) if( nrhs>1_${ik}$ ) then maxwrk = max( maxwrk, m*m + m + m*nrhs ) else maxwrk = max( maxwrk, m*m + 2_${ik}$*m ) end if maxwrk = max( maxwrk, m + lwork_zunmlq ) else ! path 2 - underdetermined ! compute space needed for stdlib${ii}$_zgebrd call stdlib${ii}$_zgebrd( m, n, a, lda, s, s, dum(1_${ik}$), dum(1_${ik}$),dum(1_${ik}$), -1_${ik}$, info ) lwork_zgebrd = real( dum(1_${ik}$),KIND=dp) ! compute space needed for stdlib${ii}$_zunmbr call stdlib${ii}$_zunmbr( 'Q', 'L', 'C', m, nrhs, m, a, lda,dum(1_${ik}$), b, ldb, dum(& 1_${ik}$), -1_${ik}$, info ) lwork_zunmbr = real( dum(1_${ik}$),KIND=dp) ! compute space needed for stdlib${ii}$_zungbr call stdlib${ii}$_zungbr( 'P', m, n, m, a, lda, dum(1_${ik}$),dum(1_${ik}$), -1_${ik}$, info ) lwork_zungbr = real( dum(1_${ik}$),KIND=dp) maxwrk = 2_${ik}$*m + lwork_zgebrd maxwrk = max( maxwrk, 2_${ik}$*m + lwork_zunmbr ) maxwrk = max( maxwrk, 2_${ik}$*m + lwork_zungbr ) maxwrk = max( maxwrk, n*nrhs ) end if end if maxwrk = max( minwrk, maxwrk ) end if work( 1_${ik}$ ) = maxwrk if( lwork<minwrk .and. .not.lquery )info = -12_${ik}$ end if if( info/=0_${ik}$ ) then call stdlib${ii}$_xerbla( 'ZGELSS', -info ) return else if( lquery ) then return end if ! quick return if possible if( m==0_${ik}$ .or. n==0_${ik}$ ) then rank = 0_${ik}$ return end if ! get machine parameters eps = stdlib${ii}$_dlamch( 'P' ) sfmin = stdlib${ii}$_dlamch( 'S' ) smlnum = sfmin / eps bignum = one / smlnum call stdlib${ii}$_dlabad( smlnum, bignum ) ! scale a if max element outside range [smlnum,bignum] anrm = stdlib${ii}$_zlange( 'M', m, n, a, lda, rwork ) iascl = 0_${ik}$ if( anrm>zero .and. anrm<smlnum ) then ! scale matrix norm up to smlnum call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, m, n, a, lda, info ) iascl = 1_${ik}$ else if( anrm>bignum ) then ! scale matrix norm down to bignum call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, m, n, a, lda, info ) iascl = 2_${ik}$ else if( anrm==zero ) then ! matrix all zero. return zero solution. call stdlib${ii}$_zlaset( 'F', max( m, n ), nrhs, czero, czero, b, ldb ) call stdlib${ii}$_dlaset( 'F', minmn, 1_${ik}$, zero, zero, s, minmn ) rank = 0_${ik}$ go to 70 end if ! scale b if max element outside range [smlnum,bignum] bnrm = stdlib${ii}$_zlange( 'M', m, nrhs, b, ldb, rwork ) ibscl = 0_${ik}$ if( bnrm>zero .and. bnrm<smlnum ) then ! scale matrix norm up to smlnum call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, bnrm, smlnum, m, nrhs, b, ldb, info ) ibscl = 1_${ik}$ else if( bnrm>bignum ) then ! scale matrix norm down to bignum call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, bnrm, bignum, m, nrhs, b, ldb, info ) ibscl = 2_${ik}$ end if ! overdetermined case if( m>=n ) then ! path 1 - overdetermined or exactly determined mm = m if( m>=mnthr ) then ! path 1a - overdetermined, with many more rows than columns mm = n itau = 1_${ik}$ iwork = itau + n ! compute a=q*r ! (cworkspace: need 2*n, prefer n+n*nb) ! (rworkspace: none) call stdlib${ii}$_zgeqrf( m, n, a, lda, work( itau ), work( iwork ),lwork-iwork+1, & info ) ! multiply b by transpose(q) ! (cworkspace: need n+nrhs, prefer n+nrhs*nb) ! (rworkspace: none) call stdlib${ii}$_zunmqr( 'L', 'C', m, nrhs, n, a, lda, work( itau ), b,ldb, work( & iwork ), lwork-iwork+1, info ) ! zero out below r if( n>1_${ik}$ )call stdlib${ii}$_zlaset( 'L', n-1, n-1, czero, czero, a( 2_${ik}$, 1_${ik}$ ),lda ) end if ie = 1_${ik}$ itauq = 1_${ik}$ itaup = itauq + n iwork = itaup + n ! bidiagonalize r in a ! (cworkspace: need 2*n+mm, prefer 2*n+(mm+n)*nb) ! (rworkspace: need n) call stdlib${ii}$_zgebrd( mm, n, a, lda, s, rwork( ie ), work( itauq ),work( itaup ), & work( iwork ), lwork-iwork+1,info ) ! multiply b by transpose of left bidiagonalizing vectors of r ! (cworkspace: need 2*n+nrhs, prefer 2*n+nrhs*nb) ! (rworkspace: none) call stdlib${ii}$_zunmbr( 'Q', 'L', 'C', mm, nrhs, n, a, lda, work( itauq ),b, ldb, work( & iwork ), lwork-iwork+1, info ) ! generate right bidiagonalizing vectors of r in a ! (cworkspace: need 3*n-1, prefer 2*n+(n-1)*nb) ! (rworkspace: none) call stdlib${ii}$_zungbr( 'P', n, n, n, a, lda, work( itaup ),work( iwork ), lwork-iwork+& 1_${ik}$, info ) irwork = ie + n ! perform bidiagonal qr iteration ! multiply b by transpose of left singular vectors ! compute right singular vectors in a ! (cworkspace: none) ! (rworkspace: need bdspac) call stdlib${ii}$_zbdsqr( 'U', n, n, 0_${ik}$, nrhs, s, rwork( ie ), a, lda, dum,1_${ik}$, b, ldb, & rwork( irwork ), info ) if( info/=0 )go to 70 ! multiply b by reciprocals of singular values thr = max( rcond*s( 1_${ik}$ ), sfmin ) if( rcond<zero )thr = max( eps*s( 1_${ik}$ ), sfmin ) rank = 0_${ik}$ do i = 1, n if( s( i )>thr ) then call stdlib${ii}$_zdrscl( nrhs, s( i ), b( i, 1_${ik}$ ), ldb ) rank = rank + 1_${ik}$ else call stdlib${ii}$_zlaset( 'F', 1_${ik}$, nrhs, czero, czero, b( i, 1_${ik}$ ), ldb ) end if end do ! multiply b by right singular vectors ! (cworkspace: need n, prefer n*nrhs) ! (rworkspace: none) if( lwork>=ldb*nrhs .and. nrhs>1_${ik}$ ) then call stdlib${ii}$_zgemm( 'C', 'N', n, nrhs, n, cone, a, lda, b, ldb,czero, work, ldb ) call stdlib${ii}$_zlacpy( 'G', n, nrhs, work, ldb, b, ldb ) else if( nrhs>1_${ik}$ ) then chunk = lwork / n do i = 1, nrhs, chunk bl = min( nrhs-i+1, chunk ) call stdlib${ii}$_zgemm( 'C', 'N', n, bl, n, cone, a, lda, b( 1_${ik}$, i ),ldb, czero, & work, n ) call stdlib${ii}$_zlacpy( 'G', n, bl, work, n, b( 1_${ik}$, i ), ldb ) end do else call stdlib${ii}$_zgemv( 'C', n, n, cone, a, lda, b, 1_${ik}$, czero, work, 1_${ik}$ ) call stdlib${ii}$_zcopy( n, work, 1_${ik}$, b, 1_${ik}$ ) end if else if( n>=mnthr .and. lwork>=3_${ik}$*m+m*m+max( m, nrhs, n-2*m ) )then ! underdetermined case, m much less than n ! path 2a - underdetermined, with many more columns than rows ! and sufficient workspace for an efficient algorithm ldwork = m if( lwork>=3_${ik}$*m+m*lda+max( m, nrhs, n-2*m ) )ldwork = lda itau = 1_${ik}$ iwork = m + 1_${ik}$ ! compute a=l*q ! (cworkspace: need 2*m, prefer m+m*nb) ! (rworkspace: none) call stdlib${ii}$_zgelqf( m, n, a, lda, work( itau ), work( iwork ),lwork-iwork+1, info ) il = iwork ! copy l to work(il), zeroing out above it call stdlib${ii}$_zlacpy( 'L', m, m, a, lda, work( il ), ldwork ) call stdlib${ii}$_zlaset( 'U', m-1, m-1, czero, czero, work( il+ldwork ),ldwork ) ie = 1_${ik}$ itauq = il + ldwork*m itaup = itauq + m iwork = itaup + m ! bidiagonalize l in work(il) ! (cworkspace: need m*m+4*m, prefer m*m+3*m+2*m*nb) ! (rworkspace: need m) call stdlib${ii}$_zgebrd( m, m, work( il ), ldwork, s, rwork( ie ),work( itauq ), work( & itaup ), work( iwork ),lwork-iwork+1, info ) ! multiply b by transpose of left bidiagonalizing vectors of l ! (cworkspace: need m*m+3*m+nrhs, prefer m*m+3*m+nrhs*nb) ! (rworkspace: none) call stdlib${ii}$_zunmbr( 'Q', 'L', 'C', m, nrhs, m, work( il ), ldwork,work( itauq ), b, & ldb, work( iwork ),lwork-iwork+1, info ) ! generate right bidiagonalizing vectors of r in work(il) ! (cworkspace: need m*m+4*m-1, prefer m*m+3*m+(m-1)*nb) ! (rworkspace: none) call stdlib${ii}$_zungbr( 'P', m, m, m, work( il ), ldwork, work( itaup ),work( iwork ), & lwork-iwork+1, info ) irwork = ie + m ! perform bidiagonal qr iteration, computing right singular ! vectors of l in work(il) and multiplying b by transpose of ! left singular vectors ! (cworkspace: need m*m) ! (rworkspace: need bdspac) call stdlib${ii}$_zbdsqr( 'U', m, m, 0_${ik}$, nrhs, s, rwork( ie ), work( il ),ldwork, a, lda, & b, ldb, rwork( irwork ), info ) if( info/=0 )go to 70 ! multiply b by reciprocals of singular values thr = max( rcond*s( 1_${ik}$ ), sfmin ) if( rcond<zero )thr = max( eps*s( 1_${ik}$ ), sfmin ) rank = 0_${ik}$ do i = 1, m if( s( i )>thr ) then call stdlib${ii}$_zdrscl( nrhs, s( i ), b( i, 1_${ik}$ ), ldb ) rank = rank + 1_${ik}$ else call stdlib${ii}$_zlaset( 'F', 1_${ik}$, nrhs, czero, czero, b( i, 1_${ik}$ ), ldb ) end if end do iwork = il + m*ldwork ! multiply b by right singular vectors of l in work(il) ! (cworkspace: need m*m+2*m, prefer m*m+m+m*nrhs) ! (rworkspace: none) if( lwork>=ldb*nrhs+iwork-1 .and. nrhs>1_${ik}$ ) then call stdlib${ii}$_zgemm( 'C', 'N', m, nrhs, m, cone, work( il ), ldwork,b, ldb, czero, & work( iwork ), ldb ) call stdlib${ii}$_zlacpy( 'G', m, nrhs, work( iwork ), ldb, b, ldb ) else if( nrhs>1_${ik}$ ) then chunk = ( lwork-iwork+1 ) / m do i = 1, nrhs, chunk bl = min( nrhs-i+1, chunk ) call stdlib${ii}$_zgemm( 'C', 'N', m, bl, m, cone, work( il ), ldwork,b( 1_${ik}$, i ), & ldb, czero, work( iwork ), m ) call stdlib${ii}$_zlacpy( 'G', m, bl, work( iwork ), m, b( 1_${ik}$, i ),ldb ) end do else call stdlib${ii}$_zgemv( 'C', m, m, cone, work( il ), ldwork, b( 1_${ik}$, 1_${ik}$ ),1_${ik}$, czero, work(& iwork ), 1_${ik}$ ) call stdlib${ii}$_zcopy( m, work( iwork ), 1_${ik}$, b( 1_${ik}$, 1_${ik}$ ), 1_${ik}$ ) end if ! zero out below first m rows of b call stdlib${ii}$_zlaset( 'F', n-m, nrhs, czero, czero, b( m+1, 1_${ik}$ ), ldb ) iwork = itau + m ! multiply transpose(q) by b ! (cworkspace: need m+nrhs, prefer m+nhrs*nb) ! (rworkspace: none) call stdlib${ii}$_zunmlq( 'L', 'C', n, nrhs, m, a, lda, work( itau ), b,ldb, work( iwork )& , lwork-iwork+1, info ) else ! path 2 - remaining underdetermined cases ie = 1_${ik}$ itauq = 1_${ik}$ itaup = itauq + m iwork = itaup + m ! bidiagonalize a ! (cworkspace: need 3*m, prefer 2*m+(m+n)*nb) ! (rworkspace: need n) call stdlib${ii}$_zgebrd( m, n, a, lda, s, rwork( ie ), work( itauq ),work( itaup ), work(& iwork ), lwork-iwork+1,info ) ! multiply b by transpose of left bidiagonalizing vectors ! (cworkspace: need 2*m+nrhs, prefer 2*m+nrhs*nb) ! (rworkspace: none) call stdlib${ii}$_zunmbr( 'Q', 'L', 'C', m, nrhs, n, a, lda, work( itauq ),b, ldb, work( & iwork ), lwork-iwork+1, info ) ! generate right bidiagonalizing vectors in a ! (cworkspace: need 3*m, prefer 2*m+m*nb) ! (rworkspace: none) call stdlib${ii}$_zungbr( 'P', m, n, m, a, lda, work( itaup ),work( iwork ), lwork-iwork+& 1_${ik}$, info ) irwork = ie + m ! perform bidiagonal qr iteration, ! computing right singular vectors of a in a and ! multiplying b by transpose of left singular vectors ! (cworkspace: none) ! (rworkspace: need bdspac) call stdlib${ii}$_zbdsqr( 'L', m, n, 0_${ik}$, nrhs, s, rwork( ie ), a, lda, dum,1_${ik}$, b, ldb, & rwork( irwork ), info ) if( info/=0 )go to 70 ! multiply b by reciprocals of singular values thr = max( rcond*s( 1_${ik}$ ), sfmin ) if( rcond<zero )thr = max( eps*s( 1_${ik}$ ), sfmin ) rank = 0_${ik}$ do i = 1, m if( s( i )>thr ) then call stdlib${ii}$_zdrscl( nrhs, s( i ), b( i, 1_${ik}$ ), ldb ) rank = rank + 1_${ik}$ else call stdlib${ii}$_zlaset( 'F', 1_${ik}$, nrhs, czero, czero, b( i, 1_${ik}$ ), ldb ) end if end do ! multiply b by right singular vectors of a ! (cworkspace: need n, prefer n*nrhs) ! (rworkspace: none) if( lwork>=ldb*nrhs .and. nrhs>1_${ik}$ ) then call stdlib${ii}$_zgemm( 'C', 'N', n, nrhs, m, cone, a, lda, b, ldb,czero, work, ldb ) call stdlib${ii}$_zlacpy( 'G', n, nrhs, work, ldb, b, ldb ) else if( nrhs>1_${ik}$ ) then chunk = lwork / n do i = 1, nrhs, chunk bl = min( nrhs-i+1, chunk ) call stdlib${ii}$_zgemm( 'C', 'N', n, bl, m, cone, a, lda, b( 1_${ik}$, i ),ldb, czero, & work, n ) call stdlib${ii}$_zlacpy( 'F', n, bl, work, n, b( 1_${ik}$, i ), ldb ) end do else call stdlib${ii}$_zgemv( 'C', m, n, cone, a, lda, b, 1_${ik}$, czero, work, 1_${ik}$ ) call stdlib${ii}$_zcopy( n, work, 1_${ik}$, b, 1_${ik}$ ) end if end if ! undo scaling if( iascl==1_${ik}$ ) then call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, n, nrhs, b, ldb, info ) call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, smlnum, anrm, minmn, 1_${ik}$, s, minmn,info ) else if( iascl==2_${ik}$ ) then call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, n, nrhs, b, ldb, info ) call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, bignum, anrm, minmn, 1_${ik}$, s, minmn,info ) end if if( ibscl==1_${ik}$ ) then call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, smlnum, bnrm, n, nrhs, b, ldb, info ) else if( ibscl==2_${ik}$ ) then call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, bignum, bnrm, n, nrhs, b, ldb, info ) end if 70 continue work( 1_${ik}$ ) = maxwrk return end subroutine stdlib${ii}$_zgelss #:for ck,ct,ci in CMPLX_KINDS_TYPES #:if not ck in ["sp","dp"] module subroutine stdlib${ii}$_${ci}$gelss( m, n, nrhs, a, lda, b, ldb, s, rcond, rank,work, lwork, rwork, & !! ZGELSS: computes the minimum norm solution to a complex linear !! least squares problem: !! Minimize 2-norm(| b - A*x |). !! using the singular value decomposition (SVD) of A. A is an M-by-N !! matrix which may be rank-deficient. !! Several right hand side vectors b and solution vectors x can be !! handled in a single call; they are stored as the columns of the !! M-by-NRHS right hand side matrix B and the N-by-NRHS solution matrix !! X. !! The effective rank of A is determined by treating as zero those !! singular values which are less than RCOND times the largest singular !! value. info ) ! -- lapack driver routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_${ck}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments integer(${ik}$), intent(out) :: info, rank integer(${ik}$), intent(in) :: lda, ldb, lwork, m, n, nrhs real(${ck}$), intent(in) :: rcond ! Array Arguments real(${ck}$), intent(out) :: rwork(*), s(*) complex(${ck}$), intent(inout) :: a(lda,*), b(ldb,*) complex(${ck}$), intent(out) :: work(*) ! ===================================================================== ! Local Scalars logical(lk) :: lquery integer(${ik}$) :: bl, chunk, i, iascl, ibscl, ie, il, irwork, itau, itaup, itauq, iwork, & ldwork, maxmn, maxwrk, minmn, minwrk, mm, mnthr integer(${ik}$) :: lwork_wgeqrf, lwork_wunmqr, lwork_wgebrd, lwork_wunmbr, lwork_wungbr, & lwork_wunmlq, lwork_wgelqf real(${ck}$) :: anrm, bignum, bnrm, eps, sfmin, smlnum, thr ! Local Arrays complex(${ck}$) :: dum(1_${ik}$) ! Intrinsic Functions ! Executable Statements ! test the input arguments info = 0_${ik}$ minmn = min( m, n ) maxmn = max( m, n ) lquery = ( lwork==-1_${ik}$ ) if( m<0_${ik}$ ) then info = -1_${ik}$ else if( n<0_${ik}$ ) then info = -2_${ik}$ else if( nrhs<0_${ik}$ ) then info = -3_${ik}$ else if( lda<max( 1_${ik}$, m ) ) then info = -5_${ik}$ else if( ldb<max( 1_${ik}$, maxmn ) ) then info = -7_${ik}$ end if ! compute workspace ! (note: comments in the code beginning "workspace:" describe the ! minimal amount of workspace needed at that point in the code, ! as well as the preferred amount for good performance. ! cworkspace refers to complex workspace, and rworkspace refers ! to real workspace. nb refers to the optimal block size for the ! immediately following subroutine, as returned by stdlib${ii}$_ilaenv.) if( info==0_${ik}$ ) then minwrk = 1_${ik}$ maxwrk = 1_${ik}$ if( minmn>0_${ik}$ ) then mm = m mnthr = stdlib${ii}$_ilaenv( 6_${ik}$, 'ZGELSS', ' ', m, n, nrhs, -1_${ik}$ ) if( m>=n .and. m>=mnthr ) then ! path 1a - overdetermined, with many more rows than ! columns ! compute space needed for stdlib${ii}$_${ci}$geqrf call stdlib${ii}$_${ci}$geqrf( m, n, a, lda, dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, info ) lwork_wgeqrf = real( dum(1_${ik}$),KIND=${ck}$) ! compute space needed for stdlib${ii}$_${ci}$unmqr call stdlib${ii}$_${ci}$unmqr( 'L', 'C', m, nrhs, n, a, lda, dum(1_${ik}$), b,ldb, dum(1_${ik}$), -1_${ik}$, & info ) lwork_wunmqr = real( dum(1_${ik}$),KIND=${ck}$) mm = n maxwrk = max( maxwrk, n + n*stdlib${ii}$_ilaenv( 1_${ik}$, 'ZGEQRF', ' ', m,n, -1_${ik}$, -1_${ik}$ ) ) maxwrk = max( maxwrk, n + nrhs*stdlib${ii}$_ilaenv( 1_${ik}$, 'ZUNMQR', 'LC',m, nrhs, n, -& 1_${ik}$ ) ) end if if( m>=n ) then ! path 1 - overdetermined or exactly determined ! compute space needed for stdlib${ii}$_${ci}$gebrd call stdlib${ii}$_${ci}$gebrd( mm, n, a, lda, s, s, dum(1_${ik}$), dum(1_${ik}$), dum(1_${ik}$),-1_${ik}$, info ) lwork_wgebrd = real( dum(1_${ik}$),KIND=${ck}$) ! compute space needed for stdlib${ii}$_${ci}$unmbr call stdlib${ii}$_${ci}$unmbr( 'Q', 'L', 'C', mm, nrhs, n, a, lda, dum(1_${ik}$),b, ldb, dum(1_${ik}$),& -1_${ik}$, info ) lwork_wunmbr = real( dum(1_${ik}$),KIND=${ck}$) ! compute space needed for stdlib${ii}$_${ci}$ungbr call stdlib${ii}$_${ci}$ungbr( 'P', n, n, n, a, lda, dum(1_${ik}$),dum(1_${ik}$), -1_${ik}$, info ) lwork_wungbr = real( dum(1_${ik}$),KIND=${ck}$) ! compute total workspace needed maxwrk = max( maxwrk, 2_${ik}$*n + lwork_wgebrd ) maxwrk = max( maxwrk, 2_${ik}$*n + lwork_wunmbr ) maxwrk = max( maxwrk, 2_${ik}$*n + lwork_wungbr ) maxwrk = max( maxwrk, n*nrhs ) minwrk = 2_${ik}$*n + max( nrhs, m ) end if if( n>m ) then minwrk = 2_${ik}$*m + max( nrhs, n ) if( n>=mnthr ) then ! path 2a - underdetermined, with many more columns ! than rows ! compute space needed for stdlib${ii}$_${ci}$gelqf call stdlib${ii}$_${ci}$gelqf( m, n, a, lda, dum(1_${ik}$), dum(1_${ik}$),-1_${ik}$, info ) lwork_wgelqf = real( dum(1_${ik}$),KIND=${ck}$) ! compute space needed for stdlib${ii}$_${ci}$gebrd call stdlib${ii}$_${ci}$gebrd( m, m, a, lda, s, s, dum(1_${ik}$), dum(1_${ik}$),dum(1_${ik}$), -1_${ik}$, info ) lwork_wgebrd = real( dum(1_${ik}$),KIND=${ck}$) ! compute space needed for stdlib${ii}$_${ci}$unmbr call stdlib${ii}$_${ci}$unmbr( 'Q', 'L', 'C', m, nrhs, n, a, lda,dum(1_${ik}$), b, ldb, dum(& 1_${ik}$), -1_${ik}$, info ) lwork_wunmbr = real( dum(1_${ik}$),KIND=${ck}$) ! compute space needed for stdlib${ii}$_${ci}$ungbr call stdlib${ii}$_${ci}$ungbr( 'P', m, m, m, a, lda, dum(1_${ik}$),dum(1_${ik}$), -1_${ik}$, info ) lwork_wungbr = real( dum(1_${ik}$),KIND=${ck}$) ! compute space needed for stdlib${ii}$_${ci}$unmlq call stdlib${ii}$_${ci}$unmlq( 'L', 'C', n, nrhs, m, a, lda, dum(1_${ik}$),b, ldb, dum(1_${ik}$), -& 1_${ik}$, info ) lwork_wunmlq = real( dum(1_${ik}$),KIND=${ck}$) ! compute total workspace needed maxwrk = m + lwork_wgelqf maxwrk = max( maxwrk, 3_${ik}$*m + m*m + lwork_wgebrd ) maxwrk = max( maxwrk, 3_${ik}$*m + m*m + lwork_wunmbr ) maxwrk = max( maxwrk, 3_${ik}$*m + m*m + lwork_wungbr ) if( nrhs>1_${ik}$ ) then maxwrk = max( maxwrk, m*m + m + m*nrhs ) else maxwrk = max( maxwrk, m*m + 2_${ik}$*m ) end if maxwrk = max( maxwrk, m + lwork_wunmlq ) else ! path 2 - underdetermined ! compute space needed for stdlib${ii}$_${ci}$gebrd call stdlib${ii}$_${ci}$gebrd( m, n, a, lda, s, s, dum(1_${ik}$), dum(1_${ik}$),dum(1_${ik}$), -1_${ik}$, info ) lwork_wgebrd = real( dum(1_${ik}$),KIND=${ck}$) ! compute space needed for stdlib${ii}$_${ci}$unmbr call stdlib${ii}$_${ci}$unmbr( 'Q', 'L', 'C', m, nrhs, m, a, lda,dum(1_${ik}$), b, ldb, dum(& 1_${ik}$), -1_${ik}$, info ) lwork_wunmbr = real( dum(1_${ik}$),KIND=${ck}$) ! compute space needed for stdlib${ii}$_${ci}$ungbr call stdlib${ii}$_${ci}$ungbr( 'P', m, n, m, a, lda, dum(1_${ik}$),dum(1_${ik}$), -1_${ik}$, info ) lwork_wungbr = real( dum(1_${ik}$),KIND=${ck}$) maxwrk = 2_${ik}$*m + lwork_wgebrd maxwrk = max( maxwrk, 2_${ik}$*m + lwork_wunmbr ) maxwrk = max( maxwrk, 2_${ik}$*m + lwork_wungbr ) maxwrk = max( maxwrk, n*nrhs ) end if end if maxwrk = max( minwrk, maxwrk ) end if work( 1_${ik}$ ) = maxwrk if( lwork<minwrk .and. .not.lquery )info = -12_${ik}$ end if if( info/=0_${ik}$ ) then call stdlib${ii}$_xerbla( 'ZGELSS', -info ) return else if( lquery ) then return end if ! quick return if possible if( m==0_${ik}$ .or. n==0_${ik}$ ) then rank = 0_${ik}$ return end if ! get machine parameters eps = stdlib${ii}$_${c2ri(ci)}$lamch( 'P' ) sfmin = stdlib${ii}$_${c2ri(ci)}$lamch( 'S' ) smlnum = sfmin / eps bignum = one / smlnum call stdlib${ii}$_${c2ri(ci)}$labad( smlnum, bignum ) ! scale a if max element outside range [smlnum,bignum] anrm = stdlib${ii}$_${ci}$lange( 'M', m, n, a, lda, rwork ) iascl = 0_${ik}$ if( anrm>zero .and. anrm<smlnum ) then ! scale matrix norm up to smlnum call stdlib${ii}$_${ci}$lascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, m, n, a, lda, info ) iascl = 1_${ik}$ else if( anrm>bignum ) then ! scale matrix norm down to bignum call stdlib${ii}$_${ci}$lascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, m, n, a, lda, info ) iascl = 2_${ik}$ else if( anrm==zero ) then ! matrix all zero. return zero solution. call stdlib${ii}$_${ci}$laset( 'F', max( m, n ), nrhs, czero, czero, b, ldb ) call stdlib${ii}$_${c2ri(ci)}$laset( 'F', minmn, 1_${ik}$, zero, zero, s, minmn ) rank = 0_${ik}$ go to 70 end if ! scale b if max element outside range [smlnum,bignum] bnrm = stdlib${ii}$_${ci}$lange( 'M', m, nrhs, b, ldb, rwork ) ibscl = 0_${ik}$ if( bnrm>zero .and. bnrm<smlnum ) then ! scale matrix norm up to smlnum call stdlib${ii}$_${ci}$lascl( 'G', 0_${ik}$, 0_${ik}$, bnrm, smlnum, m, nrhs, b, ldb, info ) ibscl = 1_${ik}$ else if( bnrm>bignum ) then ! scale matrix norm down to bignum call stdlib${ii}$_${ci}$lascl( 'G', 0_${ik}$, 0_${ik}$, bnrm, bignum, m, nrhs, b, ldb, info ) ibscl = 2_${ik}$ end if ! overdetermined case if( m>=n ) then ! path 1 - overdetermined or exactly determined mm = m if( m>=mnthr ) then ! path 1a - overdetermined, with many more rows than columns mm = n itau = 1_${ik}$ iwork = itau + n ! compute a=q*r ! (cworkspace: need 2*n, prefer n+n*nb) ! (rworkspace: none) call stdlib${ii}$_${ci}$geqrf( m, n, a, lda, work( itau ), work( iwork ),lwork-iwork+1, & info ) ! multiply b by transpose(q) ! (cworkspace: need n+nrhs, prefer n+nrhs*nb) ! (rworkspace: none) call stdlib${ii}$_${ci}$unmqr( 'L', 'C', m, nrhs, n, a, lda, work( itau ), b,ldb, work( & iwork ), lwork-iwork+1, info ) ! zero out below r if( n>1_${ik}$ )call stdlib${ii}$_${ci}$laset( 'L', n-1, n-1, czero, czero, a( 2_${ik}$, 1_${ik}$ ),lda ) end if ie = 1_${ik}$ itauq = 1_${ik}$ itaup = itauq + n iwork = itaup + n ! bidiagonalize r in a ! (cworkspace: need 2*n+mm, prefer 2*n+(mm+n)*nb) ! (rworkspace: need n) call stdlib${ii}$_${ci}$gebrd( mm, n, a, lda, s, rwork( ie ), work( itauq ),work( itaup ), & work( iwork ), lwork-iwork+1,info ) ! multiply b by transpose of left bidiagonalizing vectors of r ! (cworkspace: need 2*n+nrhs, prefer 2*n+nrhs*nb) ! (rworkspace: none) call stdlib${ii}$_${ci}$unmbr( 'Q', 'L', 'C', mm, nrhs, n, a, lda, work( itauq ),b, ldb, work( & iwork ), lwork-iwork+1, info ) ! generate right bidiagonalizing vectors of r in a ! (cworkspace: need 3*n-1, prefer 2*n+(n-1)*nb) ! (rworkspace: none) call stdlib${ii}$_${ci}$ungbr( 'P', n, n, n, a, lda, work( itaup ),work( iwork ), lwork-iwork+& 1_${ik}$, info ) irwork = ie + n ! perform bidiagonal qr iteration ! multiply b by transpose of left singular vectors ! compute right singular vectors in a ! (cworkspace: none) ! (rworkspace: need bdspac) call stdlib${ii}$_${ci}$bdsqr( 'U', n, n, 0_${ik}$, nrhs, s, rwork( ie ), a, lda, dum,1_${ik}$, b, ldb, & rwork( irwork ), info ) if( info/=0 )go to 70 ! multiply b by reciprocals of singular values thr = max( rcond*s( 1_${ik}$ ), sfmin ) if( rcond<zero )thr = max( eps*s( 1_${ik}$ ), sfmin ) rank = 0_${ik}$ do i = 1, n if( s( i )>thr ) then call stdlib${ii}$_${ci}$drscl( nrhs, s( i ), b( i, 1_${ik}$ ), ldb ) rank = rank + 1_${ik}$ else call stdlib${ii}$_${ci}$laset( 'F', 1_${ik}$, nrhs, czero, czero, b( i, 1_${ik}$ ), ldb ) end if end do ! multiply b by right singular vectors ! (cworkspace: need n, prefer n*nrhs) ! (rworkspace: none) if( lwork>=ldb*nrhs .and. nrhs>1_${ik}$ ) then call stdlib${ii}$_${ci}$gemm( 'C', 'N', n, nrhs, n, cone, a, lda, b, ldb,czero, work, ldb ) call stdlib${ii}$_${ci}$lacpy( 'G', n, nrhs, work, ldb, b, ldb ) else if( nrhs>1_${ik}$ ) then chunk = lwork / n do i = 1, nrhs, chunk bl = min( nrhs-i+1, chunk ) call stdlib${ii}$_${ci}$gemm( 'C', 'N', n, bl, n, cone, a, lda, b( 1_${ik}$, i ),ldb, czero, & work, n ) call stdlib${ii}$_${ci}$lacpy( 'G', n, bl, work, n, b( 1_${ik}$, i ), ldb ) end do else call stdlib${ii}$_${ci}$gemv( 'C', n, n, cone, a, lda, b, 1_${ik}$, czero, work, 1_${ik}$ ) call stdlib${ii}$_${ci}$copy( n, work, 1_${ik}$, b, 1_${ik}$ ) end if else if( n>=mnthr .and. lwork>=3_${ik}$*m+m*m+max( m, nrhs, n-2*m ) )then ! underdetermined case, m much less than n ! path 2a - underdetermined, with many more columns than rows ! and sufficient workspace for an efficient algorithm ldwork = m if( lwork>=3_${ik}$*m+m*lda+max( m, nrhs, n-2*m ) )ldwork = lda itau = 1_${ik}$ iwork = m + 1_${ik}$ ! compute a=l*q ! (cworkspace: need 2*m, prefer m+m*nb) ! (rworkspace: none) call stdlib${ii}$_${ci}$gelqf( m, n, a, lda, work( itau ), work( iwork ),lwork-iwork+1, info ) il = iwork ! copy l to work(il), zeroing out above it call stdlib${ii}$_${ci}$lacpy( 'L', m, m, a, lda, work( il ), ldwork ) call stdlib${ii}$_${ci}$laset( 'U', m-1, m-1, czero, czero, work( il+ldwork ),ldwork ) ie = 1_${ik}$ itauq = il + ldwork*m itaup = itauq + m iwork = itaup + m ! bidiagonalize l in work(il) ! (cworkspace: need m*m+4*m, prefer m*m+3*m+2*m*nb) ! (rworkspace: need m) call stdlib${ii}$_${ci}$gebrd( m, m, work( il ), ldwork, s, rwork( ie ),work( itauq ), work( & itaup ), work( iwork ),lwork-iwork+1, info ) ! multiply b by transpose of left bidiagonalizing vectors of l ! (cworkspace: need m*m+3*m+nrhs, prefer m*m+3*m+nrhs*nb) ! (rworkspace: none) call stdlib${ii}$_${ci}$unmbr( 'Q', 'L', 'C', m, nrhs, m, work( il ), ldwork,work( itauq ), b, & ldb, work( iwork ),lwork-iwork+1, info ) ! generate right bidiagonalizing vectors of r in work(il) ! (cworkspace: need m*m+4*m-1, prefer m*m+3*m+(m-1)*nb) ! (rworkspace: none) call stdlib${ii}$_${ci}$ungbr( 'P', m, m, m, work( il ), ldwork, work( itaup ),work( iwork ), & lwork-iwork+1, info ) irwork = ie + m ! perform bidiagonal qr iteration, computing right singular ! vectors of l in work(il) and multiplying b by transpose of ! left singular vectors ! (cworkspace: need m*m) ! (rworkspace: need bdspac) call stdlib${ii}$_${ci}$bdsqr( 'U', m, m, 0_${ik}$, nrhs, s, rwork( ie ), work( il ),ldwork, a, lda, & b, ldb, rwork( irwork ), info ) if( info/=0 )go to 70 ! multiply b by reciprocals of singular values thr = max( rcond*s( 1_${ik}$ ), sfmin ) if( rcond<zero )thr = max( eps*s( 1_${ik}$ ), sfmin ) rank = 0_${ik}$ do i = 1, m if( s( i )>thr ) then call stdlib${ii}$_${ci}$drscl( nrhs, s( i ), b( i, 1_${ik}$ ), ldb ) rank = rank + 1_${ik}$ else call stdlib${ii}$_${ci}$laset( 'F', 1_${ik}$, nrhs, czero, czero, b( i, 1_${ik}$ ), ldb ) end if end do iwork = il + m*ldwork ! multiply b by right singular vectors of l in work(il) ! (cworkspace: need m*m+2*m, prefer m*m+m+m*nrhs) ! (rworkspace: none) if( lwork>=ldb*nrhs+iwork-1 .and. nrhs>1_${ik}$ ) then call stdlib${ii}$_${ci}$gemm( 'C', 'N', m, nrhs, m, cone, work( il ), ldwork,b, ldb, czero, & work( iwork ), ldb ) call stdlib${ii}$_${ci}$lacpy( 'G', m, nrhs, work( iwork ), ldb, b, ldb ) else if( nrhs>1_${ik}$ ) then chunk = ( lwork-iwork+1 ) / m do i = 1, nrhs, chunk bl = min( nrhs-i+1, chunk ) call stdlib${ii}$_${ci}$gemm( 'C', 'N', m, bl, m, cone, work( il ), ldwork,b( 1_${ik}$, i ), & ldb, czero, work( iwork ), m ) call stdlib${ii}$_${ci}$lacpy( 'G', m, bl, work( iwork ), m, b( 1_${ik}$, i ),ldb ) end do else call stdlib${ii}$_${ci}$gemv( 'C', m, m, cone, work( il ), ldwork, b( 1_${ik}$, 1_${ik}$ ),1_${ik}$, czero, work(& iwork ), 1_${ik}$ ) call stdlib${ii}$_${ci}$copy( m, work( iwork ), 1_${ik}$, b( 1_${ik}$, 1_${ik}$ ), 1_${ik}$ ) end if ! zero out below first m rows of b call stdlib${ii}$_${ci}$laset( 'F', n-m, nrhs, czero, czero, b( m+1, 1_${ik}$ ), ldb ) iwork = itau + m ! multiply transpose(q) by b ! (cworkspace: need m+nrhs, prefer m+nhrs*nb) ! (rworkspace: none) call stdlib${ii}$_${ci}$unmlq( 'L', 'C', n, nrhs, m, a, lda, work( itau ), b,ldb, work( iwork )& , lwork-iwork+1, info ) else ! path 2 - remaining underdetermined cases ie = 1_${ik}$ itauq = 1_${ik}$ itaup = itauq + m iwork = itaup + m ! bidiagonalize a ! (cworkspace: need 3*m, prefer 2*m+(m+n)*nb) ! (rworkspace: need n) call stdlib${ii}$_${ci}$gebrd( m, n, a, lda, s, rwork( ie ), work( itauq ),work( itaup ), work(& iwork ), lwork-iwork+1,info ) ! multiply b by transpose of left bidiagonalizing vectors ! (cworkspace: need 2*m+nrhs, prefer 2*m+nrhs*nb) ! (rworkspace: none) call stdlib${ii}$_${ci}$unmbr( 'Q', 'L', 'C', m, nrhs, n, a, lda, work( itauq ),b, ldb, work( & iwork ), lwork-iwork+1, info ) ! generate right bidiagonalizing vectors in a ! (cworkspace: need 3*m, prefer 2*m+m*nb) ! (rworkspace: none) call stdlib${ii}$_${ci}$ungbr( 'P', m, n, m, a, lda, work( itaup ),work( iwork ), lwork-iwork+& 1_${ik}$, info ) irwork = ie + m ! perform bidiagonal qr iteration, ! computing right singular vectors of a in a and ! multiplying b by transpose of left singular vectors ! (cworkspace: none) ! (rworkspace: need bdspac) call stdlib${ii}$_${ci}$bdsqr( 'L', m, n, 0_${ik}$, nrhs, s, rwork( ie ), a, lda, dum,1_${ik}$, b, ldb, & rwork( irwork ), info ) if( info/=0 )go to 70 ! multiply b by reciprocals of singular values thr = max( rcond*s( 1_${ik}$ ), sfmin ) if( rcond<zero )thr = max( eps*s( 1_${ik}$ ), sfmin ) rank = 0_${ik}$ do i = 1, m if( s( i )>thr ) then call stdlib${ii}$_${ci}$drscl( nrhs, s( i ), b( i, 1_${ik}$ ), ldb ) rank = rank + 1_${ik}$ else call stdlib${ii}$_${ci}$laset( 'F', 1_${ik}$, nrhs, czero, czero, b( i, 1_${ik}$ ), ldb ) end if end do ! multiply b by right singular vectors of a ! (cworkspace: need n, prefer n*nrhs) ! (rworkspace: none) if( lwork>=ldb*nrhs .and. nrhs>1_${ik}$ ) then call stdlib${ii}$_${ci}$gemm( 'C', 'N', n, nrhs, m, cone, a, lda, b, ldb,czero, work, ldb ) call stdlib${ii}$_${ci}$lacpy( 'G', n, nrhs, work, ldb, b, ldb ) else if( nrhs>1_${ik}$ ) then chunk = lwork / n do i = 1, nrhs, chunk bl = min( nrhs-i+1, chunk ) call stdlib${ii}$_${ci}$gemm( 'C', 'N', n, bl, m, cone, a, lda, b( 1_${ik}$, i ),ldb, czero, & work, n ) call stdlib${ii}$_${ci}$lacpy( 'F', n, bl, work, n, b( 1_${ik}$, i ), ldb ) end do else call stdlib${ii}$_${ci}$gemv( 'C', m, n, cone, a, lda, b, 1_${ik}$, czero, work, 1_${ik}$ ) call stdlib${ii}$_${ci}$copy( n, work, 1_${ik}$, b, 1_${ik}$ ) end if end if ! undo scaling if( iascl==1_${ik}$ ) then call stdlib${ii}$_${ci}$lascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, n, nrhs, b, ldb, info ) call stdlib${ii}$_${c2ri(ci)}$lascl( 'G', 0_${ik}$, 0_${ik}$, smlnum, anrm, minmn, 1_${ik}$, s, minmn,info ) else if( iascl==2_${ik}$ ) then call stdlib${ii}$_${ci}$lascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, n, nrhs, b, ldb, info ) call stdlib${ii}$_${c2ri(ci)}$lascl( 'G', 0_${ik}$, 0_${ik}$, bignum, anrm, minmn, 1_${ik}$, s, minmn,info ) end if if( ibscl==1_${ik}$ ) then call stdlib${ii}$_${ci}$lascl( 'G', 0_${ik}$, 0_${ik}$, smlnum, bnrm, n, nrhs, b, ldb, info ) else if( ibscl==2_${ik}$ ) then call stdlib${ii}$_${ci}$lascl( 'G', 0_${ik}$, 0_${ik}$, bignum, bnrm, n, nrhs, b, ldb, info ) end if 70 continue work( 1_${ik}$ ) = maxwrk return end subroutine stdlib${ii}$_${ci}$gelss #:endif #:endfor module subroutine stdlib${ii}$_sgelsy( m, n, nrhs, a, lda, b, ldb, jpvt, rcond, rank,work, lwork, info ) !! SGELSY computes the minimum-norm solution to a real linear least !! squares problem: !! minimize || A * X - B || !! using a complete orthogonal factorization of A. A is an M-by-N !! matrix which may be rank-deficient. !! Several right hand side vectors b and solution vectors x can be !! handled in a single call; they are stored as the columns of the !! M-by-NRHS right hand side matrix B and the N-by-NRHS solution !! matrix X. !! The routine first computes a QR factorization with column pivoting: !! A * P = Q * [ R11 R12 ] !! [ 0 R22 ] !! with R11 defined as the largest leading submatrix whose estimated !! condition number is less than 1/RCOND. The order of R11, RANK, !! is the effective rank of A. !! Then, R22 is considered to be negligible, and R12 is annihilated !! by orthogonal transformations from the right, arriving at the !! complete orthogonal factorization: !! A * P = Q * [ T11 0 ] * Z !! [ 0 0 ] !! The minimum-norm solution is then !! X = P * Z**T [ inv(T11)*Q1**T*B ] !! [ 0 ] !! where Q1 consists of the first RANK columns of Q. !! This routine is basically identical to the original xGELSX except !! three differences: !! o The call to the subroutine xGEQPF has been substituted by the !! the call to the subroutine xGEQP3. This subroutine is a Blas-3 !! version of the QR factorization with column pivoting. !! o Matrix B (the right hand side) is updated with Blas-3. !! o The permutation of matrix B (the right hand side) is faster and !! more simple. ! -- lapack driver routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments integer(${ik}$), intent(out) :: info, rank integer(${ik}$), intent(in) :: lda, ldb, lwork, m, n, nrhs real(sp), intent(in) :: rcond ! Array Arguments integer(${ik}$), intent(inout) :: jpvt(*) real(sp), intent(inout) :: a(lda,*), b(ldb,*) real(sp), intent(out) :: work(*) ! ===================================================================== ! Parameters integer(${ik}$), parameter :: imax = 1_${ik}$ integer(${ik}$), parameter :: imin = 2_${ik}$ ! Local Scalars logical(lk) :: lquery integer(${ik}$) :: i, iascl, ibscl, ismax, ismin, j, lwkmin, lwkopt, mn, nb, nb1, nb2, & nb3, nb4 real(sp) :: anrm, bignum, bnrm, c1, c2, s1, s2, smax, smaxpr, smin, sminpr, smlnum, & wsize ! Intrinsic Functions ! Executable Statements mn = min( m, n ) ismin = mn + 1_${ik}$ ismax = 2_${ik}$*mn + 1_${ik}$ ! test the input arguments. info = 0_${ik}$ lquery = ( lwork==-1_${ik}$ ) if( m<0_${ik}$ ) then info = -1_${ik}$ else if( n<0_${ik}$ ) then info = -2_${ik}$ else if( nrhs<0_${ik}$ ) then info = -3_${ik}$ else if( lda<max( 1_${ik}$, m ) ) then info = -5_${ik}$ else if( ldb<max( 1_${ik}$, m, n ) ) then info = -7_${ik}$ end if ! figure out optimal block size if( info==0_${ik}$ ) then if( mn==0_${ik}$ .or. nrhs==0_${ik}$ ) then lwkmin = 1_${ik}$ lwkopt = 1_${ik}$ else nb1 = stdlib${ii}$_ilaenv( 1_${ik}$, 'SGEQRF', ' ', m, n, -1_${ik}$, -1_${ik}$ ) nb2 = stdlib${ii}$_ilaenv( 1_${ik}$, 'SGERQF', ' ', m, n, -1_${ik}$, -1_${ik}$ ) nb3 = stdlib${ii}$_ilaenv( 1_${ik}$, 'SORMQR', ' ', m, n, nrhs, -1_${ik}$ ) nb4 = stdlib${ii}$_ilaenv( 1_${ik}$, 'SORMRQ', ' ', m, n, nrhs, -1_${ik}$ ) nb = max( nb1, nb2, nb3, nb4 ) lwkmin = mn + max( 2_${ik}$*mn, n + 1_${ik}$, mn + nrhs ) lwkopt = max( lwkmin,mn + 2_${ik}$*n + nb*( n + 1_${ik}$ ), 2_${ik}$*mn + nb*nrhs ) end if work( 1_${ik}$ ) = lwkopt if( lwork<lwkmin .and. .not.lquery ) then info = -12_${ik}$ end if end if if( info/=0_${ik}$ ) then call stdlib${ii}$_xerbla( 'SGELSY', -info ) return else if( lquery ) then return end if ! quick return if possible if( mn==0_${ik}$ .or. nrhs==0_${ik}$ ) then rank = 0_${ik}$ return end if ! get machine parameters smlnum = stdlib${ii}$_slamch( 'S' ) / stdlib${ii}$_slamch( 'P' ) bignum = one / smlnum call stdlib${ii}$_slabad( smlnum, bignum ) ! scale a, b if max entries outside range [smlnum,bignum] anrm = stdlib${ii}$_slange( 'M', m, n, a, lda, work ) iascl = 0_${ik}$ if( anrm>zero .and. anrm<smlnum ) then ! scale matrix norm up to smlnum call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, m, n, a, lda, info ) iascl = 1_${ik}$ else if( anrm>bignum ) then ! scale matrix norm down to bignum call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, m, n, a, lda, info ) iascl = 2_${ik}$ else if( anrm==zero ) then ! matrix all zero. return zero solution. call stdlib${ii}$_slaset( 'F', max( m, n ), nrhs, zero, zero, b, ldb ) rank = 0_${ik}$ go to 70 end if bnrm = stdlib${ii}$_slange( 'M', m, nrhs, b, ldb, work ) ibscl = 0_${ik}$ if( bnrm>zero .and. bnrm<smlnum ) then ! scale matrix norm up to smlnum call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, bnrm, smlnum, m, nrhs, b, ldb, info ) ibscl = 1_${ik}$ else if( bnrm>bignum ) then ! scale matrix norm down to bignum call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, bnrm, bignum, m, nrhs, b, ldb, info ) ibscl = 2_${ik}$ end if ! compute qr factorization with column pivoting of a: ! a * p = q * r call stdlib${ii}$_sgeqp3( m, n, a, lda, jpvt, work( 1_${ik}$ ), work( mn+1 ),lwork-mn, info ) wsize = mn + work( mn+1 ) ! workspace: mn+2*n+nb*(n+1). ! details of householder rotations stored in work(1:mn). ! determine rank using incremental condition estimation work( ismin ) = one work( ismax ) = one smax = abs( a( 1_${ik}$, 1_${ik}$ ) ) smin = smax if( abs( a( 1_${ik}$, 1_${ik}$ ) )==zero ) then rank = 0_${ik}$ call stdlib${ii}$_slaset( 'F', max( m, n ), nrhs, zero, zero, b, ldb ) go to 70 else rank = 1_${ik}$ end if 10 continue if( rank<mn ) then i = rank + 1_${ik}$ call stdlib${ii}$_slaic1( imin, rank, work( ismin ), smin, a( 1_${ik}$, i ),a( i, i ), sminpr, & s1, c1 ) call stdlib${ii}$_slaic1( imax, rank, work( ismax ), smax, a( 1_${ik}$, i ),a( i, i ), smaxpr, & s2, c2 ) if( smaxpr*rcond<=sminpr ) then do i = 1, rank work( ismin+i-1 ) = s1*work( ismin+i-1 ) work( ismax+i-1 ) = s2*work( ismax+i-1 ) end do work( ismin+rank ) = c1 work( ismax+rank ) = c2 smin = sminpr smax = smaxpr rank = rank + 1_${ik}$ go to 10 end if end if ! workspace: 3*mn. ! logically partition r = [ r11 r12 ] ! [ 0 r22 ] ! where r11 = r(1:rank,1:rank) ! [r11,r12] = [ t11, 0 ] * y if( rank<n )call stdlib${ii}$_stzrzf( rank, n, a, lda, work( mn+1 ), work( 2_${ik}$*mn+1 ),lwork-& 2_${ik}$*mn, info ) ! workspace: 2*mn. ! details of householder rotations stored in work(mn+1:2*mn) ! b(1:m,1:nrhs) := q**t * b(1:m,1:nrhs) call stdlib${ii}$_sormqr( 'LEFT', 'TRANSPOSE', m, nrhs, mn, a, lda, work( 1_${ik}$ ),b, ldb, work( & 2_${ik}$*mn+1 ), lwork-2*mn, info ) wsize = max( wsize, 2_${ik}$*mn+work( 2_${ik}$*mn+1 ) ) ! workspace: 2*mn+nb*nrhs. ! b(1:rank,1:nrhs) := inv(t11) * b(1:rank,1:nrhs) call stdlib${ii}$_strsm( 'LEFT', 'UPPER', 'NO TRANSPOSE', 'NON-UNIT', rank,nrhs, one, a, lda,& b, ldb ) do j = 1, nrhs do i = rank + 1, n b( i, j ) = zero end do end do ! b(1:n,1:nrhs) := y**t * b(1:n,1:nrhs) if( rank<n ) then call stdlib${ii}$_sormrz( 'LEFT', 'TRANSPOSE', n, nrhs, rank, n-rank, a,lda, work( mn+1 ),& b, ldb, work( 2_${ik}$*mn+1 ),lwork-2*mn, info ) end if ! workspace: 2*mn+nrhs. ! b(1:n,1:nrhs) := p * b(1:n,1:nrhs) do j = 1, nrhs do i = 1, n work( jpvt( i ) ) = b( i, j ) end do call stdlib${ii}$_scopy( n, work( 1_${ik}$ ), 1_${ik}$, b( 1_${ik}$, j ), 1_${ik}$ ) end do ! workspace: n. ! undo scaling if( iascl==1_${ik}$ ) then call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, n, nrhs, b, ldb, info ) call stdlib${ii}$_slascl( 'U', 0_${ik}$, 0_${ik}$, smlnum, anrm, rank, rank, a, lda,info ) else if( iascl==2_${ik}$ ) then call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, n, nrhs, b, ldb, info ) call stdlib${ii}$_slascl( 'U', 0_${ik}$, 0_${ik}$, bignum, anrm, rank, rank, a, lda,info ) end if if( ibscl==1_${ik}$ ) then call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, smlnum, bnrm, n, nrhs, b, ldb, info ) else if( ibscl==2_${ik}$ ) then call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, bignum, bnrm, n, nrhs, b, ldb, info ) end if 70 continue work( 1_${ik}$ ) = lwkopt return end subroutine stdlib${ii}$_sgelsy module subroutine stdlib${ii}$_dgelsy( m, n, nrhs, a, lda, b, ldb, jpvt, rcond, rank,work, lwork, info ) !! DGELSY computes the minimum-norm solution to a real linear least !! squares problem: !! minimize || A * X - B || !! using a complete orthogonal factorization of A. A is an M-by-N !! matrix which may be rank-deficient. !! Several right hand side vectors b and solution vectors x can be !! handled in a single call; they are stored as the columns of the !! M-by-NRHS right hand side matrix B and the N-by-NRHS solution !! matrix X. !! The routine first computes a QR factorization with column pivoting: !! A * P = Q * [ R11 R12 ] !! [ 0 R22 ] !! with R11 defined as the largest leading submatrix whose estimated !! condition number is less than 1/RCOND. The order of R11, RANK, !! is the effective rank of A. !! Then, R22 is considered to be negligible, and R12 is annihilated !! by orthogonal transformations from the right, arriving at the !! complete orthogonal factorization: !! A * P = Q * [ T11 0 ] * Z !! [ 0 0 ] !! The minimum-norm solution is then !! X = P * Z**T [ inv(T11)*Q1**T*B ] !! [ 0 ] !! where Q1 consists of the first RANK columns of Q. !! This routine is basically identical to the original xGELSX except !! three differences: !! o The call to the subroutine xGEQPF has been substituted by the !! the call to the subroutine xGEQP3. This subroutine is a Blas-3 !! version of the QR factorization with column pivoting. !! o Matrix B (the right hand side) is updated with Blas-3. !! o The permutation of matrix B (the right hand side) is faster and !! more simple. ! -- lapack driver routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments integer(${ik}$), intent(out) :: info, rank integer(${ik}$), intent(in) :: lda, ldb, lwork, m, n, nrhs real(dp), intent(in) :: rcond ! Array Arguments integer(${ik}$), intent(inout) :: jpvt(*) real(dp), intent(inout) :: a(lda,*), b(ldb,*) real(dp), intent(out) :: work(*) ! ===================================================================== ! Parameters integer(${ik}$), parameter :: imax = 1_${ik}$ integer(${ik}$), parameter :: imin = 2_${ik}$ ! Local Scalars logical(lk) :: lquery integer(${ik}$) :: i, iascl, ibscl, ismax, ismin, j, lwkmin, lwkopt, mn, nb, nb1, nb2, & nb3, nb4 real(dp) :: anrm, bignum, bnrm, c1, c2, s1, s2, smax, smaxpr, smin, sminpr, smlnum, & wsize ! Intrinsic Functions ! Executable Statements mn = min( m, n ) ismin = mn + 1_${ik}$ ismax = 2_${ik}$*mn + 1_${ik}$ ! test the input arguments. info = 0_${ik}$ lquery = ( lwork==-1_${ik}$ ) if( m<0_${ik}$ ) then info = -1_${ik}$ else if( n<0_${ik}$ ) then info = -2_${ik}$ else if( nrhs<0_${ik}$ ) then info = -3_${ik}$ else if( lda<max( 1_${ik}$, m ) ) then info = -5_${ik}$ else if( ldb<max( 1_${ik}$, m, n ) ) then info = -7_${ik}$ end if ! figure out optimal block size if( info==0_${ik}$ ) then if( mn==0_${ik}$ .or. nrhs==0_${ik}$ ) then lwkmin = 1_${ik}$ lwkopt = 1_${ik}$ else nb1 = stdlib${ii}$_ilaenv( 1_${ik}$, 'DGEQRF', ' ', m, n, -1_${ik}$, -1_${ik}$ ) nb2 = stdlib${ii}$_ilaenv( 1_${ik}$, 'DGERQF', ' ', m, n, -1_${ik}$, -1_${ik}$ ) nb3 = stdlib${ii}$_ilaenv( 1_${ik}$, 'DORMQR', ' ', m, n, nrhs, -1_${ik}$ ) nb4 = stdlib${ii}$_ilaenv( 1_${ik}$, 'DORMRQ', ' ', m, n, nrhs, -1_${ik}$ ) nb = max( nb1, nb2, nb3, nb4 ) lwkmin = mn + max( 2_${ik}$*mn, n + 1_${ik}$, mn + nrhs ) lwkopt = max( lwkmin,mn + 2_${ik}$*n + nb*( n + 1_${ik}$ ), 2_${ik}$*mn + nb*nrhs ) end if work( 1_${ik}$ ) = lwkopt if( lwork<lwkmin .and. .not.lquery ) then info = -12_${ik}$ end if end if if( info/=0_${ik}$ ) then call stdlib${ii}$_xerbla( 'DGELSY', -info ) return else if( lquery ) then return end if ! quick return if possible if( mn==0_${ik}$ .or. nrhs==0_${ik}$ ) then rank = 0_${ik}$ return end if ! get machine parameters smlnum = stdlib${ii}$_dlamch( 'S' ) / stdlib${ii}$_dlamch( 'P' ) bignum = one / smlnum call stdlib${ii}$_dlabad( smlnum, bignum ) ! scale a, b if max entries outside range [smlnum,bignum] anrm = stdlib${ii}$_dlange( 'M', m, n, a, lda, work ) iascl = 0_${ik}$ if( anrm>zero .and. anrm<smlnum ) then ! scale matrix norm up to smlnum call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, m, n, a, lda, info ) iascl = 1_${ik}$ else if( anrm>bignum ) then ! scale matrix norm down to bignum call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, m, n, a, lda, info ) iascl = 2_${ik}$ else if( anrm==zero ) then ! matrix all zero. return zero solution. call stdlib${ii}$_dlaset( 'F', max( m, n ), nrhs, zero, zero, b, ldb ) rank = 0_${ik}$ go to 70 end if bnrm = stdlib${ii}$_dlange( 'M', m, nrhs, b, ldb, work ) ibscl = 0_${ik}$ if( bnrm>zero .and. bnrm<smlnum ) then ! scale matrix norm up to smlnum call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, bnrm, smlnum, m, nrhs, b, ldb, info ) ibscl = 1_${ik}$ else if( bnrm>bignum ) then ! scale matrix norm down to bignum call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, bnrm, bignum, m, nrhs, b, ldb, info ) ibscl = 2_${ik}$ end if ! compute qr factorization with column pivoting of a: ! a * p = q * r call stdlib${ii}$_dgeqp3( m, n, a, lda, jpvt, work( 1_${ik}$ ), work( mn+1 ),lwork-mn, info ) wsize = mn + work( mn+1 ) ! workspace: mn+2*n+nb*(n+1). ! details of householder rotations stored in work(1:mn). ! determine rank using incremental condition estimation work( ismin ) = one work( ismax ) = one smax = abs( a( 1_${ik}$, 1_${ik}$ ) ) smin = smax if( abs( a( 1_${ik}$, 1_${ik}$ ) )==zero ) then rank = 0_${ik}$ call stdlib${ii}$_dlaset( 'F', max( m, n ), nrhs, zero, zero, b, ldb ) go to 70 else rank = 1_${ik}$ end if 10 continue if( rank<mn ) then i = rank + 1_${ik}$ call stdlib${ii}$_dlaic1( imin, rank, work( ismin ), smin, a( 1_${ik}$, i ),a( i, i ), sminpr, & s1, c1 ) call stdlib${ii}$_dlaic1( imax, rank, work( ismax ), smax, a( 1_${ik}$, i ),a( i, i ), smaxpr, & s2, c2 ) if( smaxpr*rcond<=sminpr ) then do i = 1, rank work( ismin+i-1 ) = s1*work( ismin+i-1 ) work( ismax+i-1 ) = s2*work( ismax+i-1 ) end do work( ismin+rank ) = c1 work( ismax+rank ) = c2 smin = sminpr smax = smaxpr rank = rank + 1_${ik}$ go to 10 end if end if ! workspace: 3*mn. ! logically partition r = [ r11 r12 ] ! [ 0 r22 ] ! where r11 = r(1:rank,1:rank) ! [r11,r12] = [ t11, 0 ] * y if( rank<n )call stdlib${ii}$_dtzrzf( rank, n, a, lda, work( mn+1 ), work( 2_${ik}$*mn+1 ),lwork-& 2_${ik}$*mn, info ) ! workspace: 2*mn. ! details of householder rotations stored in work(mn+1:2*mn) ! b(1:m,1:nrhs) := q**t * b(1:m,1:nrhs) call stdlib${ii}$_dormqr( 'LEFT', 'TRANSPOSE', m, nrhs, mn, a, lda, work( 1_${ik}$ ),b, ldb, work( & 2_${ik}$*mn+1 ), lwork-2*mn, info ) wsize = max( wsize, 2_${ik}$*mn+work( 2_${ik}$*mn+1 ) ) ! workspace: 2*mn+nb*nrhs. ! b(1:rank,1:nrhs) := inv(t11) * b(1:rank,1:nrhs) call stdlib${ii}$_dtrsm( 'LEFT', 'UPPER', 'NO TRANSPOSE', 'NON-UNIT', rank,nrhs, one, a, lda,& b, ldb ) do j = 1, nrhs do i = rank + 1, n b( i, j ) = zero end do end do ! b(1:n,1:nrhs) := y**t * b(1:n,1:nrhs) if( rank<n ) then call stdlib${ii}$_dormrz( 'LEFT', 'TRANSPOSE', n, nrhs, rank, n-rank, a,lda, work( mn+1 ),& b, ldb, work( 2_${ik}$*mn+1 ),lwork-2*mn, info ) end if ! workspace: 2*mn+nrhs. ! b(1:n,1:nrhs) := p * b(1:n,1:nrhs) do j = 1, nrhs do i = 1, n work( jpvt( i ) ) = b( i, j ) end do call stdlib${ii}$_dcopy( n, work( 1_${ik}$ ), 1_${ik}$, b( 1_${ik}$, j ), 1_${ik}$ ) end do ! workspace: n. ! undo scaling if( iascl==1_${ik}$ ) then call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, n, nrhs, b, ldb, info ) call stdlib${ii}$_dlascl( 'U', 0_${ik}$, 0_${ik}$, smlnum, anrm, rank, rank, a, lda,info ) else if( iascl==2_${ik}$ ) then call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, n, nrhs, b, ldb, info ) call stdlib${ii}$_dlascl( 'U', 0_${ik}$, 0_${ik}$, bignum, anrm, rank, rank, a, lda,info ) end if if( ibscl==1_${ik}$ ) then call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, smlnum, bnrm, n, nrhs, b, ldb, info ) else if( ibscl==2_${ik}$ ) then call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, bignum, bnrm, n, nrhs, b, ldb, info ) end if 70 continue work( 1_${ik}$ ) = lwkopt return end subroutine stdlib${ii}$_dgelsy #:for rk,rt,ri in REAL_KINDS_TYPES #:if not rk in ["sp","dp"] module subroutine stdlib${ii}$_${ri}$gelsy( m, n, nrhs, a, lda, b, ldb, jpvt, rcond, rank,work, lwork, info ) !! DGELSY: computes the minimum-norm solution to a real linear least !! squares problem: !! minimize || A * X - B || !! using a complete orthogonal factorization of A. A is an M-by-N !! matrix which may be rank-deficient. !! Several right hand side vectors b and solution vectors x can be !! handled in a single call; they are stored as the columns of the !! M-by-NRHS right hand side matrix B and the N-by-NRHS solution !! matrix X. !! The routine first computes a QR factorization with column pivoting: !! A * P = Q * [ R11 R12 ] !! [ 0 R22 ] !! with R11 defined as the largest leading submatrix whose estimated !! condition number is less than 1/RCOND. The order of R11, RANK, !! is the effective rank of A. !! Then, R22 is considered to be negligible, and R12 is annihilated !! by orthogonal transformations from the right, arriving at the !! complete orthogonal factorization: !! A * P = Q * [ T11 0 ] * Z !! [ 0 0 ] !! The minimum-norm solution is then !! X = P * Z**T [ inv(T11)*Q1**T*B ] !! [ 0 ] !! where Q1 consists of the first RANK columns of Q. !! This routine is basically identical to the original xGELSX except !! three differences: !! o The call to the subroutine xGEQPF has been substituted by the !! the call to the subroutine xGEQP3. This subroutine is a Blas-3 !! version of the QR factorization with column pivoting. !! o Matrix B (the right hand side) is updated with Blas-3. !! o The permutation of matrix B (the right hand side) is faster and !! more simple. ! -- lapack driver routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments integer(${ik}$), intent(out) :: info, rank integer(${ik}$), intent(in) :: lda, ldb, lwork, m, n, nrhs real(${rk}$), intent(in) :: rcond ! Array Arguments integer(${ik}$), intent(inout) :: jpvt(*) real(${rk}$), intent(inout) :: a(lda,*), b(ldb,*) real(${rk}$), intent(out) :: work(*) ! ===================================================================== ! Parameters integer(${ik}$), parameter :: imax = 1_${ik}$ integer(${ik}$), parameter :: imin = 2_${ik}$ ! Local Scalars logical(lk) :: lquery integer(${ik}$) :: i, iascl, ibscl, ismax, ismin, j, lwkmin, lwkopt, mn, nb, nb1, nb2, & nb3, nb4 real(${rk}$) :: anrm, bignum, bnrm, c1, c2, s1, s2, smax, smaxpr, smin, sminpr, smlnum, & wsize ! Intrinsic Functions ! Executable Statements mn = min( m, n ) ismin = mn + 1_${ik}$ ismax = 2_${ik}$*mn + 1_${ik}$ ! test the input arguments. info = 0_${ik}$ lquery = ( lwork==-1_${ik}$ ) if( m<0_${ik}$ ) then info = -1_${ik}$ else if( n<0_${ik}$ ) then info = -2_${ik}$ else if( nrhs<0_${ik}$ ) then info = -3_${ik}$ else if( lda<max( 1_${ik}$, m ) ) then info = -5_${ik}$ else if( ldb<max( 1_${ik}$, m, n ) ) then info = -7_${ik}$ end if ! figure out optimal block size if( info==0_${ik}$ ) then if( mn==0_${ik}$ .or. nrhs==0_${ik}$ ) then lwkmin = 1_${ik}$ lwkopt = 1_${ik}$ else nb1 = stdlib${ii}$_ilaenv( 1_${ik}$, 'DGEQRF', ' ', m, n, -1_${ik}$, -1_${ik}$ ) nb2 = stdlib${ii}$_ilaenv( 1_${ik}$, 'DGERQF', ' ', m, n, -1_${ik}$, -1_${ik}$ ) nb3 = stdlib${ii}$_ilaenv( 1_${ik}$, 'DORMQR', ' ', m, n, nrhs, -1_${ik}$ ) nb4 = stdlib${ii}$_ilaenv( 1_${ik}$, 'DORMRQ', ' ', m, n, nrhs, -1_${ik}$ ) nb = max( nb1, nb2, nb3, nb4 ) lwkmin = mn + max( 2_${ik}$*mn, n + 1_${ik}$, mn + nrhs ) lwkopt = max( lwkmin,mn + 2_${ik}$*n + nb*( n + 1_${ik}$ ), 2_${ik}$*mn + nb*nrhs ) end if work( 1_${ik}$ ) = lwkopt if( lwork<lwkmin .and. .not.lquery ) then info = -12_${ik}$ end if end if if( info/=0_${ik}$ ) then call stdlib${ii}$_xerbla( 'DGELSY', -info ) return else if( lquery ) then return end if ! quick return if possible if( mn==0_${ik}$ .or. nrhs==0_${ik}$ ) then rank = 0_${ik}$ return end if ! get machine parameters smlnum = stdlib${ii}$_${ri}$lamch( 'S' ) / stdlib${ii}$_${ri}$lamch( 'P' ) bignum = one / smlnum call stdlib${ii}$_${ri}$labad( smlnum, bignum ) ! scale a, b if max entries outside range [smlnum,bignum] anrm = stdlib${ii}$_${ri}$lange( 'M', m, n, a, lda, work ) iascl = 0_${ik}$ if( anrm>zero .and. anrm<smlnum ) then ! scale matrix norm up to smlnum call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, m, n, a, lda, info ) iascl = 1_${ik}$ else if( anrm>bignum ) then ! scale matrix norm down to bignum call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, m, n, a, lda, info ) iascl = 2_${ik}$ else if( anrm==zero ) then ! matrix all zero. return zero solution. call stdlib${ii}$_${ri}$laset( 'F', max( m, n ), nrhs, zero, zero, b, ldb ) rank = 0_${ik}$ go to 70 end if bnrm = stdlib${ii}$_${ri}$lange( 'M', m, nrhs, b, ldb, work ) ibscl = 0_${ik}$ if( bnrm>zero .and. bnrm<smlnum ) then ! scale matrix norm up to smlnum call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, bnrm, smlnum, m, nrhs, b, ldb, info ) ibscl = 1_${ik}$ else if( bnrm>bignum ) then ! scale matrix norm down to bignum call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, bnrm, bignum, m, nrhs, b, ldb, info ) ibscl = 2_${ik}$ end if ! compute qr factorization with column pivoting of a: ! a * p = q * r call stdlib${ii}$_${ri}$geqp3( m, n, a, lda, jpvt, work( 1_${ik}$ ), work( mn+1 ),lwork-mn, info ) wsize = mn + work( mn+1 ) ! workspace: mn+2*n+nb*(n+1). ! details of householder rotations stored in work(1:mn). ! determine rank using incremental condition estimation work( ismin ) = one work( ismax ) = one smax = abs( a( 1_${ik}$, 1_${ik}$ ) ) smin = smax if( abs( a( 1_${ik}$, 1_${ik}$ ) )==zero ) then rank = 0_${ik}$ call stdlib${ii}$_${ri}$laset( 'F', max( m, n ), nrhs, zero, zero, b, ldb ) go to 70 else rank = 1_${ik}$ end if 10 continue if( rank<mn ) then i = rank + 1_${ik}$ call stdlib${ii}$_${ri}$laic1( imin, rank, work( ismin ), smin, a( 1_${ik}$, i ),a( i, i ), sminpr, & s1, c1 ) call stdlib${ii}$_${ri}$laic1( imax, rank, work( ismax ), smax, a( 1_${ik}$, i ),a( i, i ), smaxpr, & s2, c2 ) if( smaxpr*rcond<=sminpr ) then do i = 1, rank work( ismin+i-1 ) = s1*work( ismin+i-1 ) work( ismax+i-1 ) = s2*work( ismax+i-1 ) end do work( ismin+rank ) = c1 work( ismax+rank ) = c2 smin = sminpr smax = smaxpr rank = rank + 1_${ik}$ go to 10 end if end if ! workspace: 3*mn. ! logically partition r = [ r11 r12 ] ! [ 0 r22 ] ! where r11 = r(1:rank,1:rank) ! [r11,r12] = [ t11, 0 ] * y if( rank<n )call stdlib${ii}$_${ri}$tzrzf( rank, n, a, lda, work( mn+1 ), work( 2_${ik}$*mn+1 ),lwork-& 2_${ik}$*mn, info ) ! workspace: 2*mn. ! details of householder rotations stored in work(mn+1:2*mn) ! b(1:m,1:nrhs) := q**t * b(1:m,1:nrhs) call stdlib${ii}$_${ri}$ormqr( 'LEFT', 'TRANSPOSE', m, nrhs, mn, a, lda, work( 1_${ik}$ ),b, ldb, work( & 2_${ik}$*mn+1 ), lwork-2*mn, info ) wsize = max( wsize, 2_${ik}$*mn+work( 2_${ik}$*mn+1 ) ) ! workspace: 2*mn+nb*nrhs. ! b(1:rank,1:nrhs) := inv(t11) * b(1:rank,1:nrhs) call stdlib${ii}$_${ri}$trsm( 'LEFT', 'UPPER', 'NO TRANSPOSE', 'NON-UNIT', rank,nrhs, one, a, lda,& b, ldb ) do j = 1, nrhs do i = rank + 1, n b( i, j ) = zero end do end do ! b(1:n,1:nrhs) := y**t * b(1:n,1:nrhs) if( rank<n ) then call stdlib${ii}$_${ri}$ormrz( 'LEFT', 'TRANSPOSE', n, nrhs, rank, n-rank, a,lda, work( mn+1 ),& b, ldb, work( 2_${ik}$*mn+1 ),lwork-2*mn, info ) end if ! workspace: 2*mn+nrhs. ! b(1:n,1:nrhs) := p * b(1:n,1:nrhs) do j = 1, nrhs do i = 1, n work( jpvt( i ) ) = b( i, j ) end do call stdlib${ii}$_${ri}$copy( n, work( 1_${ik}$ ), 1_${ik}$, b( 1_${ik}$, j ), 1_${ik}$ ) end do ! workspace: n. ! undo scaling if( iascl==1_${ik}$ ) then call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, n, nrhs, b, ldb, info ) call stdlib${ii}$_${ri}$lascl( 'U', 0_${ik}$, 0_${ik}$, smlnum, anrm, rank, rank, a, lda,info ) else if( iascl==2_${ik}$ ) then call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, n, nrhs, b, ldb, info ) call stdlib${ii}$_${ri}$lascl( 'U', 0_${ik}$, 0_${ik}$, bignum, anrm, rank, rank, a, lda,info ) end if if( ibscl==1_${ik}$ ) then call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, smlnum, bnrm, n, nrhs, b, ldb, info ) else if( ibscl==2_${ik}$ ) then call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, bignum, bnrm, n, nrhs, b, ldb, info ) end if 70 continue work( 1_${ik}$ ) = lwkopt return end subroutine stdlib${ii}$_${ri}$gelsy #:endif #:endfor module subroutine stdlib${ii}$_cgelsy( m, n, nrhs, a, lda, b, ldb, jpvt, rcond, rank,work, lwork, rwork, & !! CGELSY computes the minimum-norm solution to a complex linear least !! squares problem: !! minimize || A * X - B || !! using a complete orthogonal factorization of A. A is an M-by-N !! matrix which may be rank-deficient. !! Several right hand side vectors b and solution vectors x can be !! handled in a single call; they are stored as the columns of the !! M-by-NRHS right hand side matrix B and the N-by-NRHS solution !! matrix X. !! The routine first computes a QR factorization with column pivoting: !! A * P = Q * [ R11 R12 ] !! [ 0 R22 ] !! with R11 defined as the largest leading submatrix whose estimated !! condition number is less than 1/RCOND. The order of R11, RANK, !! is the effective rank of A. !! Then, R22 is considered to be negligible, and R12 is annihilated !! by unitary transformations from the right, arriving at the !! complete orthogonal factorization: !! A * P = Q * [ T11 0 ] * Z !! [ 0 0 ] !! The minimum-norm solution is then !! X = P * Z**H [ inv(T11)*Q1**H*B ] !! [ 0 ] !! where Q1 consists of the first RANK columns of Q. !! This routine is basically identical to the original xGELSX except !! three differences: !! o The permutation of matrix B (the right hand side) is faster and !! more simple. !! o The call to the subroutine xGEQPF has been substituted by the !! the call to the subroutine xGEQP3. This subroutine is a Blas-3 !! version of the QR factorization with column pivoting. !! o Matrix B (the right hand side) is updated with Blas-3. info ) ! -- lapack driver routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments integer(${ik}$), intent(out) :: info, rank integer(${ik}$), intent(in) :: lda, ldb, lwork, m, n, nrhs real(sp), intent(in) :: rcond ! Array Arguments integer(${ik}$), intent(inout) :: jpvt(*) real(sp), intent(out) :: rwork(*) complex(sp), intent(inout) :: a(lda,*), b(ldb,*) complex(sp), intent(out) :: work(*) ! ===================================================================== ! Parameters integer(${ik}$), parameter :: imax = 1_${ik}$ integer(${ik}$), parameter :: imin = 2_${ik}$ ! Local Scalars logical(lk) :: lquery integer(${ik}$) :: i, iascl, ibscl, ismax, ismin, j, lwkopt, mn, nb, nb1, nb2, nb3, & nb4 real(sp) :: anrm, bignum, bnrm, smax, smaxpr, smin, sminpr, smlnum, wsize complex(sp) :: c1, c2, s1, s2 ! Intrinsic Functions ! Executable Statements mn = min( m, n ) ismin = mn + 1_${ik}$ ismax = 2_${ik}$*mn + 1_${ik}$ ! test the input arguments. info = 0_${ik}$ nb1 = stdlib${ii}$_ilaenv( 1_${ik}$, 'CGEQRF', ' ', m, n, -1_${ik}$, -1_${ik}$ ) nb2 = stdlib${ii}$_ilaenv( 1_${ik}$, 'CGERQF', ' ', m, n, -1_${ik}$, -1_${ik}$ ) nb3 = stdlib${ii}$_ilaenv( 1_${ik}$, 'CUNMQR', ' ', m, n, nrhs, -1_${ik}$ ) nb4 = stdlib${ii}$_ilaenv( 1_${ik}$, 'CUNMRQ', ' ', m, n, nrhs, -1_${ik}$ ) nb = max( nb1, nb2, nb3, nb4 ) lwkopt = max( 1_${ik}$, mn+2*n+nb*(n+1), 2_${ik}$*mn+nb*nrhs ) work( 1_${ik}$ ) = cmplx( lwkopt,KIND=sp) lquery = ( lwork==-1_${ik}$ ) if( m<0_${ik}$ ) then info = -1_${ik}$ else if( n<0_${ik}$ ) then info = -2_${ik}$ else if( nrhs<0_${ik}$ ) then info = -3_${ik}$ else if( lda<max( 1_${ik}$, m ) ) then info = -5_${ik}$ else if( ldb<max( 1_${ik}$, m, n ) ) then info = -7_${ik}$ else if( lwork<( mn+max( 2_${ik}$*mn, n+1, mn+nrhs ) ) .and..not.lquery ) then info = -12_${ik}$ end if if( info/=0_${ik}$ ) then call stdlib${ii}$_xerbla( 'CGELSY', -info ) return else if( lquery ) then return end if ! quick return if possible if( min( m, n, nrhs )==0_${ik}$ ) then rank = 0_${ik}$ return end if ! get machine parameters smlnum = stdlib${ii}$_slamch( 'S' ) / stdlib${ii}$_slamch( 'P' ) bignum = one / smlnum call stdlib${ii}$_slabad( smlnum, bignum ) ! scale a, b if max entries outside range [smlnum,bignum] anrm = stdlib${ii}$_clange( 'M', m, n, a, lda, rwork ) iascl = 0_${ik}$ if( anrm>zero .and. anrm<smlnum ) then ! scale matrix norm up to smlnum call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, m, n, a, lda, info ) iascl = 1_${ik}$ else if( anrm>bignum ) then ! scale matrix norm down to bignum call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, m, n, a, lda, info ) iascl = 2_${ik}$ else if( anrm==zero ) then ! matrix all zero. return zero solution. call stdlib${ii}$_claset( 'F', max( m, n ), nrhs, czero, czero, b, ldb ) rank = 0_${ik}$ go to 70 end if bnrm = stdlib${ii}$_clange( 'M', m, nrhs, b, ldb, rwork ) ibscl = 0_${ik}$ if( bnrm>zero .and. bnrm<smlnum ) then ! scale matrix norm up to smlnum call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, bnrm, smlnum, m, nrhs, b, ldb, info ) ibscl = 1_${ik}$ else if( bnrm>bignum ) then ! scale matrix norm down to bignum call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, bnrm, bignum, m, nrhs, b, ldb, info ) ibscl = 2_${ik}$ end if ! compute qr factorization with column pivoting of a: ! a * p = q * r call stdlib${ii}$_cgeqp3( m, n, a, lda, jpvt, work( 1_${ik}$ ), work( mn+1 ),lwork-mn, rwork, info ) wsize = mn + real( work( mn+1 ),KIND=sp) ! complex workspace: mn+nb*(n+1). real workspace 2*n. ! details of householder rotations stored in work(1:mn). ! determine rank using incremental condition estimation work( ismin ) = cone work( ismax ) = cone smax = abs( a( 1_${ik}$, 1_${ik}$ ) ) smin = smax if( abs( a( 1_${ik}$, 1_${ik}$ ) )==zero ) then rank = 0_${ik}$ call stdlib${ii}$_claset( 'F', max( m, n ), nrhs, czero, czero, b, ldb ) go to 70 else rank = 1_${ik}$ end if 10 continue if( rank<mn ) then i = rank + 1_${ik}$ call stdlib${ii}$_claic1( imin, rank, work( ismin ), smin, a( 1_${ik}$, i ),a( i, i ), sminpr, & s1, c1 ) call stdlib${ii}$_claic1( imax, rank, work( ismax ), smax, a( 1_${ik}$, i ),a( i, i ), smaxpr, & s2, c2 ) if( smaxpr*rcond<=sminpr ) then do i = 1, rank work( ismin+i-1 ) = s1*work( ismin+i-1 ) work( ismax+i-1 ) = s2*work( ismax+i-1 ) end do work( ismin+rank ) = c1 work( ismax+rank ) = c2 smin = sminpr smax = smaxpr rank = rank + 1_${ik}$ go to 10 end if end if ! complex workspace: 3*mn. ! logically partition r = [ r11 r12 ] ! [ 0 r22 ] ! where r11 = r(1:rank,1:rank) ! [r11,r12] = [ t11, 0 ] * y if( rank<n )call stdlib${ii}$_ctzrzf( rank, n, a, lda, work( mn+1 ), work( 2_${ik}$*mn+1 ),lwork-& 2_${ik}$*mn, info ) ! complex workspace: 2*mn. ! details of householder rotations stored in work(mn+1:2*mn) ! b(1:m,1:nrhs) := q**h * b(1:m,1:nrhs) call stdlib${ii}$_cunmqr( 'LEFT', 'CONJUGATE TRANSPOSE', m, nrhs, mn, a, lda,work( 1_${ik}$ ), b, & ldb, work( 2_${ik}$*mn+1 ), lwork-2*mn, info ) wsize = max( wsize, 2_${ik}$*mn+real( work( 2_${ik}$*mn+1 ),KIND=sp) ) ! complex workspace: 2*mn+nb*nrhs. ! b(1:rank,1:nrhs) := inv(t11) * b(1:rank,1:nrhs) call stdlib${ii}$_ctrsm( 'LEFT', 'UPPER', 'NO TRANSPOSE', 'NON-UNIT', rank,nrhs, cone, a, & lda, b, ldb ) do j = 1, nrhs do i = rank + 1, n b( i, j ) = czero end do end do ! b(1:n,1:nrhs) := y**h * b(1:n,1:nrhs) if( rank<n ) then call stdlib${ii}$_cunmrz( 'LEFT', 'CONJUGATE TRANSPOSE', n, nrhs, rank,n-rank, a, lda, & work( mn+1 ), b, ldb,work( 2_${ik}$*mn+1 ), lwork-2*mn, info ) end if ! complex workspace: 2*mn+nrhs. ! b(1:n,1:nrhs) := p * b(1:n,1:nrhs) do j = 1, nrhs do i = 1, n work( jpvt( i ) ) = b( i, j ) end do call stdlib${ii}$_ccopy( n, work( 1_${ik}$ ), 1_${ik}$, b( 1_${ik}$, j ), 1_${ik}$ ) end do ! complex workspace: n. ! undo scaling if( iascl==1_${ik}$ ) then call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, n, nrhs, b, ldb, info ) call stdlib${ii}$_clascl( 'U', 0_${ik}$, 0_${ik}$, smlnum, anrm, rank, rank, a, lda,info ) else if( iascl==2_${ik}$ ) then call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, n, nrhs, b, ldb, info ) call stdlib${ii}$_clascl( 'U', 0_${ik}$, 0_${ik}$, bignum, anrm, rank, rank, a, lda,info ) end if if( ibscl==1_${ik}$ ) then call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, smlnum, bnrm, n, nrhs, b, ldb, info ) else if( ibscl==2_${ik}$ ) then call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, bignum, bnrm, n, nrhs, b, ldb, info ) end if 70 continue work( 1_${ik}$ ) = cmplx( lwkopt,KIND=sp) return end subroutine stdlib${ii}$_cgelsy module subroutine stdlib${ii}$_zgelsy( m, n, nrhs, a, lda, b, ldb, jpvt, rcond, rank,work, lwork, rwork, & !! ZGELSY computes the minimum-norm solution to a complex linear least !! squares problem: !! minimize || A * X - B || !! using a complete orthogonal factorization of A. A is an M-by-N !! matrix which may be rank-deficient. !! Several right hand side vectors b and solution vectors x can be !! handled in a single call; they are stored as the columns of the !! M-by-NRHS right hand side matrix B and the N-by-NRHS solution !! matrix X. !! The routine first computes a QR factorization with column pivoting: !! A * P = Q * [ R11 R12 ] !! [ 0 R22 ] !! with R11 defined as the largest leading submatrix whose estimated !! condition number is less than 1/RCOND. The order of R11, RANK, !! is the effective rank of A. !! Then, R22 is considered to be negligible, and R12 is annihilated !! by unitary transformations from the right, arriving at the !! complete orthogonal factorization: !! A * P = Q * [ T11 0 ] * Z !! [ 0 0 ] !! The minimum-norm solution is then !! X = P * Z**H [ inv(T11)*Q1**H*B ] !! [ 0 ] !! where Q1 consists of the first RANK columns of Q. !! This routine is basically identical to the original xGELSX except !! three differences: !! o The permutation of matrix B (the right hand side) is faster and !! more simple. !! o The call to the subroutine xGEQPF has been substituted by the !! the call to the subroutine xGEQP3. This subroutine is a Blas-3 !! version of the QR factorization with column pivoting. !! o Matrix B (the right hand side) is updated with Blas-3. info ) ! -- lapack driver routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments integer(${ik}$), intent(out) :: info, rank integer(${ik}$), intent(in) :: lda, ldb, lwork, m, n, nrhs real(dp), intent(in) :: rcond ! Array Arguments integer(${ik}$), intent(inout) :: jpvt(*) real(dp), intent(out) :: rwork(*) complex(dp), intent(inout) :: a(lda,*), b(ldb,*) complex(dp), intent(out) :: work(*) ! ===================================================================== ! Parameters integer(${ik}$), parameter :: imax = 1_${ik}$ integer(${ik}$), parameter :: imin = 2_${ik}$ ! Local Scalars logical(lk) :: lquery integer(${ik}$) :: i, iascl, ibscl, ismax, ismin, j, lwkopt, mn, nb, nb1, nb2, nb3, & nb4 real(dp) :: anrm, bignum, bnrm, smax, smaxpr, smin, sminpr, smlnum, wsize complex(dp) :: c1, c2, s1, s2 ! Intrinsic Functions ! Executable Statements mn = min( m, n ) ismin = mn + 1_${ik}$ ismax = 2_${ik}$*mn + 1_${ik}$ ! test the input arguments. info = 0_${ik}$ nb1 = stdlib${ii}$_ilaenv( 1_${ik}$, 'ZGEQRF', ' ', m, n, -1_${ik}$, -1_${ik}$ ) nb2 = stdlib${ii}$_ilaenv( 1_${ik}$, 'ZGERQF', ' ', m, n, -1_${ik}$, -1_${ik}$ ) nb3 = stdlib${ii}$_ilaenv( 1_${ik}$, 'ZUNMQR', ' ', m, n, nrhs, -1_${ik}$ ) nb4 = stdlib${ii}$_ilaenv( 1_${ik}$, 'ZUNMRQ', ' ', m, n, nrhs, -1_${ik}$ ) nb = max( nb1, nb2, nb3, nb4 ) lwkopt = max( 1_${ik}$, mn+2*n+nb*( n+1 ), 2_${ik}$*mn+nb*nrhs ) work( 1_${ik}$ ) = cmplx( lwkopt,KIND=dp) lquery = ( lwork==-1_${ik}$ ) if( m<0_${ik}$ ) then info = -1_${ik}$ else if( n<0_${ik}$ ) then info = -2_${ik}$ else if( nrhs<0_${ik}$ ) then info = -3_${ik}$ else if( lda<max( 1_${ik}$, m ) ) then info = -5_${ik}$ else if( ldb<max( 1_${ik}$, m, n ) ) then info = -7_${ik}$ else if( lwork<( mn+max( 2_${ik}$*mn, n+1, mn+nrhs ) ) .and. .not.lquery ) then info = -12_${ik}$ end if if( info/=0_${ik}$ ) then call stdlib${ii}$_xerbla( 'ZGELSY', -info ) return else if( lquery ) then return end if ! quick return if possible if( min( m, n, nrhs )==0_${ik}$ ) then rank = 0_${ik}$ return end if ! get machine parameters smlnum = stdlib${ii}$_dlamch( 'S' ) / stdlib${ii}$_dlamch( 'P' ) bignum = one / smlnum call stdlib${ii}$_dlabad( smlnum, bignum ) ! scale a, b if max entries outside range [smlnum,bignum] anrm = stdlib${ii}$_zlange( 'M', m, n, a, lda, rwork ) iascl = 0_${ik}$ if( anrm>zero .and. anrm<smlnum ) then ! scale matrix norm up to smlnum call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, m, n, a, lda, info ) iascl = 1_${ik}$ else if( anrm>bignum ) then ! scale matrix norm down to bignum call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, m, n, a, lda, info ) iascl = 2_${ik}$ else if( anrm==zero ) then ! matrix all zero. return zero solution. call stdlib${ii}$_zlaset( 'F', max( m, n ), nrhs, czero, czero, b, ldb ) rank = 0_${ik}$ go to 70 end if bnrm = stdlib${ii}$_zlange( 'M', m, nrhs, b, ldb, rwork ) ibscl = 0_${ik}$ if( bnrm>zero .and. bnrm<smlnum ) then ! scale matrix norm up to smlnum call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, bnrm, smlnum, m, nrhs, b, ldb, info ) ibscl = 1_${ik}$ else if( bnrm>bignum ) then ! scale matrix norm down to bignum call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, bnrm, bignum, m, nrhs, b, ldb, info ) ibscl = 2_${ik}$ end if ! compute qr factorization with column pivoting of a: ! a * p = q * r call stdlib${ii}$_zgeqp3( m, n, a, lda, jpvt, work( 1_${ik}$ ), work( mn+1 ),lwork-mn, rwork, info ) wsize = mn + real( work( mn+1 ),KIND=dp) ! complex workspace: mn+nb*(n+1). real workspace 2*n. ! details of householder rotations stored in work(1:mn). ! determine rank using incremental condition estimation work( ismin ) = cone work( ismax ) = cone smax = abs( a( 1_${ik}$, 1_${ik}$ ) ) smin = smax if( abs( a( 1_${ik}$, 1_${ik}$ ) )==zero ) then rank = 0_${ik}$ call stdlib${ii}$_zlaset( 'F', max( m, n ), nrhs, czero, czero, b, ldb ) go to 70 else rank = 1_${ik}$ end if 10 continue if( rank<mn ) then i = rank + 1_${ik}$ call stdlib${ii}$_zlaic1( imin, rank, work( ismin ), smin, a( 1_${ik}$, i ),a( i, i ), sminpr, & s1, c1 ) call stdlib${ii}$_zlaic1( imax, rank, work( ismax ), smax, a( 1_${ik}$, i ),a( i, i ), smaxpr, & s2, c2 ) if( smaxpr*rcond<=sminpr ) then do i = 1, rank work( ismin+i-1 ) = s1*work( ismin+i-1 ) work( ismax+i-1 ) = s2*work( ismax+i-1 ) end do work( ismin+rank ) = c1 work( ismax+rank ) = c2 smin = sminpr smax = smaxpr rank = rank + 1_${ik}$ go to 10 end if end if ! complex workspace: 3*mn. ! logically partition r = [ r11 r12 ] ! [ 0 r22 ] ! where r11 = r(1:rank,1:rank) ! [r11,r12] = [ t11, 0 ] * y if( rank<n )call stdlib${ii}$_ztzrzf( rank, n, a, lda, work( mn+1 ), work( 2_${ik}$*mn+1 ),lwork-& 2_${ik}$*mn, info ) ! complex workspace: 2*mn. ! details of householder rotations stored in work(mn+1:2*mn) ! b(1:m,1:nrhs) := q**h * b(1:m,1:nrhs) call stdlib${ii}$_zunmqr( 'LEFT', 'CONJUGATE TRANSPOSE', m, nrhs, mn, a, lda,work( 1_${ik}$ ), b, & ldb, work( 2_${ik}$*mn+1 ), lwork-2*mn, info ) wsize = max( wsize, 2_${ik}$*mn+real( work( 2_${ik}$*mn+1 ),KIND=dp) ) ! complex workspace: 2*mn+nb*nrhs. ! b(1:rank,1:nrhs) := inv(t11) * b(1:rank,1:nrhs) call stdlib${ii}$_ztrsm( 'LEFT', 'UPPER', 'NO TRANSPOSE', 'NON-UNIT', rank,nrhs, cone, a, & lda, b, ldb ) do j = 1, nrhs do i = rank + 1, n b( i, j ) = czero end do end do ! b(1:n,1:nrhs) := y**h * b(1:n,1:nrhs) if( rank<n ) then call stdlib${ii}$_zunmrz( 'LEFT', 'CONJUGATE TRANSPOSE', n, nrhs, rank,n-rank, a, lda, & work( mn+1 ), b, ldb,work( 2_${ik}$*mn+1 ), lwork-2*mn, info ) end if ! complex workspace: 2*mn+nrhs. ! b(1:n,1:nrhs) := p * b(1:n,1:nrhs) do j = 1, nrhs do i = 1, n work( jpvt( i ) ) = b( i, j ) end do call stdlib${ii}$_zcopy( n, work( 1_${ik}$ ), 1_${ik}$, b( 1_${ik}$, j ), 1_${ik}$ ) end do ! complex workspace: n. ! undo scaling if( iascl==1_${ik}$ ) then call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, n, nrhs, b, ldb, info ) call stdlib${ii}$_zlascl( 'U', 0_${ik}$, 0_${ik}$, smlnum, anrm, rank, rank, a, lda,info ) else if( iascl==2_${ik}$ ) then call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, n, nrhs, b, ldb, info ) call stdlib${ii}$_zlascl( 'U', 0_${ik}$, 0_${ik}$, bignum, anrm, rank, rank, a, lda,info ) end if if( ibscl==1_${ik}$ ) then call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, smlnum, bnrm, n, nrhs, b, ldb, info ) else if( ibscl==2_${ik}$ ) then call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, bignum, bnrm, n, nrhs, b, ldb, info ) end if 70 continue work( 1_${ik}$ ) = cmplx( lwkopt,KIND=dp) return end subroutine stdlib${ii}$_zgelsy #:for ck,ct,ci in CMPLX_KINDS_TYPES #:if not ck in ["sp","dp"] module subroutine stdlib${ii}$_${ci}$gelsy( m, n, nrhs, a, lda, b, ldb, jpvt, rcond, rank,work, lwork, rwork, & !! ZGELSY: computes the minimum-norm solution to a complex linear least !! squares problem: !! minimize || A * X - B || !! using a complete orthogonal factorization of A. A is an M-by-N !! matrix which may be rank-deficient. !! Several right hand side vectors b and solution vectors x can be !! handled in a single call; they are stored as the columns of the !! M-by-NRHS right hand side matrix B and the N-by-NRHS solution !! matrix X. !! The routine first computes a QR factorization with column pivoting: !! A * P = Q * [ R11 R12 ] !! [ 0 R22 ] !! with R11 defined as the largest leading submatrix whose estimated !! condition number is less than 1/RCOND. The order of R11, RANK, !! is the effective rank of A. !! Then, R22 is considered to be negligible, and R12 is annihilated !! by unitary transformations from the right, arriving at the !! complete orthogonal factorization: !! A * P = Q * [ T11 0 ] * Z !! [ 0 0 ] !! The minimum-norm solution is then !! X = P * Z**H [ inv(T11)*Q1**H*B ] !! [ 0 ] !! where Q1 consists of the first RANK columns of Q. !! This routine is basically identical to the original xGELSX except !! three differences: !! o The permutation of matrix B (the right hand side) is faster and !! more simple. !! o The call to the subroutine xGEQPF has been substituted by the !! the call to the subroutine xGEQP3. This subroutine is a Blas-3 !! version of the QR factorization with column pivoting. !! o Matrix B (the right hand side) is updated with Blas-3. info ) ! -- lapack driver routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_${ck}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments integer(${ik}$), intent(out) :: info, rank integer(${ik}$), intent(in) :: lda, ldb, lwork, m, n, nrhs real(${ck}$), intent(in) :: rcond ! Array Arguments integer(${ik}$), intent(inout) :: jpvt(*) real(${ck}$), intent(out) :: rwork(*) complex(${ck}$), intent(inout) :: a(lda,*), b(ldb,*) complex(${ck}$), intent(out) :: work(*) ! ===================================================================== ! Parameters integer(${ik}$), parameter :: imax = 1_${ik}$ integer(${ik}$), parameter :: imin = 2_${ik}$ ! Local Scalars logical(lk) :: lquery integer(${ik}$) :: i, iascl, ibscl, ismax, ismin, j, lwkopt, mn, nb, nb1, nb2, nb3, & nb4 real(${ck}$) :: anrm, bignum, bnrm, smax, smaxpr, smin, sminpr, smlnum, wsize complex(${ck}$) :: c1, c2, s1, s2 ! Intrinsic Functions ! Executable Statements mn = min( m, n ) ismin = mn + 1_${ik}$ ismax = 2_${ik}$*mn + 1_${ik}$ ! test the input arguments. info = 0_${ik}$ nb1 = stdlib${ii}$_ilaenv( 1_${ik}$, 'ZGEQRF', ' ', m, n, -1_${ik}$, -1_${ik}$ ) nb2 = stdlib${ii}$_ilaenv( 1_${ik}$, 'ZGERQF', ' ', m, n, -1_${ik}$, -1_${ik}$ ) nb3 = stdlib${ii}$_ilaenv( 1_${ik}$, 'ZUNMQR', ' ', m, n, nrhs, -1_${ik}$ ) nb4 = stdlib${ii}$_ilaenv( 1_${ik}$, 'ZUNMRQ', ' ', m, n, nrhs, -1_${ik}$ ) nb = max( nb1, nb2, nb3, nb4 ) lwkopt = max( 1_${ik}$, mn+2*n+nb*( n+1 ), 2_${ik}$*mn+nb*nrhs ) work( 1_${ik}$ ) = cmplx( lwkopt,KIND=${ck}$) lquery = ( lwork==-1_${ik}$ ) if( m<0_${ik}$ ) then info = -1_${ik}$ else if( n<0_${ik}$ ) then info = -2_${ik}$ else if( nrhs<0_${ik}$ ) then info = -3_${ik}$ else if( lda<max( 1_${ik}$, m ) ) then info = -5_${ik}$ else if( ldb<max( 1_${ik}$, m, n ) ) then info = -7_${ik}$ else if( lwork<( mn+max( 2_${ik}$*mn, n+1, mn+nrhs ) ) .and. .not.lquery ) then info = -12_${ik}$ end if if( info/=0_${ik}$ ) then call stdlib${ii}$_xerbla( 'ZGELSY', -info ) return else if( lquery ) then return end if ! quick return if possible if( min( m, n, nrhs )==0_${ik}$ ) then rank = 0_${ik}$ return end if ! get machine parameters smlnum = stdlib${ii}$_${c2ri(ci)}$lamch( 'S' ) / stdlib${ii}$_${c2ri(ci)}$lamch( 'P' ) bignum = one / smlnum call stdlib${ii}$_${c2ri(ci)}$labad( smlnum, bignum ) ! scale a, b if max entries outside range [smlnum,bignum] anrm = stdlib${ii}$_${ci}$lange( 'M', m, n, a, lda, rwork ) iascl = 0_${ik}$ if( anrm>zero .and. anrm<smlnum ) then ! scale matrix norm up to smlnum call stdlib${ii}$_${ci}$lascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, m, n, a, lda, info ) iascl = 1_${ik}$ else if( anrm>bignum ) then ! scale matrix norm down to bignum call stdlib${ii}$_${ci}$lascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, m, n, a, lda, info ) iascl = 2_${ik}$ else if( anrm==zero ) then ! matrix all zero. return zero solution. call stdlib${ii}$_${ci}$laset( 'F', max( m, n ), nrhs, czero, czero, b, ldb ) rank = 0_${ik}$ go to 70 end if bnrm = stdlib${ii}$_${ci}$lange( 'M', m, nrhs, b, ldb, rwork ) ibscl = 0_${ik}$ if( bnrm>zero .and. bnrm<smlnum ) then ! scale matrix norm up to smlnum call stdlib${ii}$_${ci}$lascl( 'G', 0_${ik}$, 0_${ik}$, bnrm, smlnum, m, nrhs, b, ldb, info ) ibscl = 1_${ik}$ else if( bnrm>bignum ) then ! scale matrix norm down to bignum call stdlib${ii}$_${ci}$lascl( 'G', 0_${ik}$, 0_${ik}$, bnrm, bignum, m, nrhs, b, ldb, info ) ibscl = 2_${ik}$ end if ! compute qr factorization with column pivoting of a: ! a * p = q * r call stdlib${ii}$_${ci}$geqp3( m, n, a, lda, jpvt, work( 1_${ik}$ ), work( mn+1 ),lwork-mn, rwork, info ) wsize = mn + real( work( mn+1 ),KIND=${ck}$) ! complex workspace: mn+nb*(n+1). real workspace 2*n. ! details of householder rotations stored in work(1:mn). ! determine rank using incremental condition estimation work( ismin ) = cone work( ismax ) = cone smax = abs( a( 1_${ik}$, 1_${ik}$ ) ) smin = smax if( abs( a( 1_${ik}$, 1_${ik}$ ) )==zero ) then rank = 0_${ik}$ call stdlib${ii}$_${ci}$laset( 'F', max( m, n ), nrhs, czero, czero, b, ldb ) go to 70 else rank = 1_${ik}$ end if 10 continue if( rank<mn ) then i = rank + 1_${ik}$ call stdlib${ii}$_${ci}$laic1( imin, rank, work( ismin ), smin, a( 1_${ik}$, i ),a( i, i ), sminpr, & s1, c1 ) call stdlib${ii}$_${ci}$laic1( imax, rank, work( ismax ), smax, a( 1_${ik}$, i ),a( i, i ), smaxpr, & s2, c2 ) if( smaxpr*rcond<=sminpr ) then do i = 1, rank work( ismin+i-1 ) = s1*work( ismin+i-1 ) work( ismax+i-1 ) = s2*work( ismax+i-1 ) end do work( ismin+rank ) = c1 work( ismax+rank ) = c2 smin = sminpr smax = smaxpr rank = rank + 1_${ik}$ go to 10 end if end if ! complex workspace: 3*mn. ! logically partition r = [ r11 r12 ] ! [ 0 r22 ] ! where r11 = r(1:rank,1:rank) ! [r11,r12] = [ t11, 0 ] * y if( rank<n )call stdlib${ii}$_${ci}$tzrzf( rank, n, a, lda, work( mn+1 ), work( 2_${ik}$*mn+1 ),lwork-& 2_${ik}$*mn, info ) ! complex workspace: 2*mn. ! details of householder rotations stored in work(mn+1:2*mn) ! b(1:m,1:nrhs) := q**h * b(1:m,1:nrhs) call stdlib${ii}$_${ci}$unmqr( 'LEFT', 'CONJUGATE TRANSPOSE', m, nrhs, mn, a, lda,work( 1_${ik}$ ), b, & ldb, work( 2_${ik}$*mn+1 ), lwork-2*mn, info ) wsize = max( wsize, 2_${ik}$*mn+real( work( 2_${ik}$*mn+1 ),KIND=${ck}$) ) ! complex workspace: 2*mn+nb*nrhs. ! b(1:rank,1:nrhs) := inv(t11) * b(1:rank,1:nrhs) call stdlib${ii}$_${ci}$trsm( 'LEFT', 'UPPER', 'NO TRANSPOSE', 'NON-UNIT', rank,nrhs, cone, a, & lda, b, ldb ) do j = 1, nrhs do i = rank + 1, n b( i, j ) = czero end do end do ! b(1:n,1:nrhs) := y**h * b(1:n,1:nrhs) if( rank<n ) then call stdlib${ii}$_${ci}$unmrz( 'LEFT', 'CONJUGATE TRANSPOSE', n, nrhs, rank,n-rank, a, lda, & work( mn+1 ), b, ldb,work( 2_${ik}$*mn+1 ), lwork-2*mn, info ) end if ! complex workspace: 2*mn+nrhs. ! b(1:n,1:nrhs) := p * b(1:n,1:nrhs) do j = 1, nrhs do i = 1, n work( jpvt( i ) ) = b( i, j ) end do call stdlib${ii}$_${ci}$copy( n, work( 1_${ik}$ ), 1_${ik}$, b( 1_${ik}$, j ), 1_${ik}$ ) end do ! complex workspace: n. ! undo scaling if( iascl==1_${ik}$ ) then call stdlib${ii}$_${ci}$lascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, n, nrhs, b, ldb, info ) call stdlib${ii}$_${ci}$lascl( 'U', 0_${ik}$, 0_${ik}$, smlnum, anrm, rank, rank, a, lda,info ) else if( iascl==2_${ik}$ ) then call stdlib${ii}$_${ci}$lascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, n, nrhs, b, ldb, info ) call stdlib${ii}$_${ci}$lascl( 'U', 0_${ik}$, 0_${ik}$, bignum, anrm, rank, rank, a, lda,info ) end if if( ibscl==1_${ik}$ ) then call stdlib${ii}$_${ci}$lascl( 'G', 0_${ik}$, 0_${ik}$, smlnum, bnrm, n, nrhs, b, ldb, info ) else if( ibscl==2_${ik}$ ) then call stdlib${ii}$_${ci}$lascl( 'G', 0_${ik}$, 0_${ik}$, bignum, bnrm, n, nrhs, b, ldb, info ) end if 70 continue work( 1_${ik}$ ) = cmplx( lwkopt,KIND=${ck}$) return end subroutine stdlib${ii}$_${ci}$gelsy #:endif #:endfor module subroutine stdlib${ii}$_sgels( trans, m, n, nrhs, a, lda, b, ldb, work, lwork,info ) !! SGELS solves overdetermined or underdetermined real linear systems !! involving an M-by-N matrix A, or its transpose, using a QR or LQ !! factorization of A. It is assumed that A has full rank. !! The following options are provided: !! 1. If TRANS = 'N' and m >= n: find the least squares solution of !! an overdetermined system, i.e., solve the least squares problem !! minimize || B - A*X ||. !! 2. If TRANS = 'N' and m < n: find the minimum norm solution of !! an underdetermined system A * X = B. !! 3. If TRANS = 'T' and m >= n: find the minimum norm solution of !! an underdetermined system A**T * X = B. !! 4. If TRANS = 'T' and m < n: find the least squares solution of !! an overdetermined system, i.e., solve the least squares problem !! minimize || B - A**T * X ||. !! Several right hand side vectors b and solution vectors x can be !! handled in a single call; they are stored as the columns of the !! M-by-NRHS right hand side matrix B and the N-by-NRHS solution !! matrix X. ! -- lapack driver routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments character, intent(in) :: trans integer(${ik}$), intent(out) :: info integer(${ik}$), intent(in) :: lda, ldb, lwork, m, n, nrhs ! Array Arguments real(sp), intent(inout) :: a(lda,*), b(ldb,*) real(sp), intent(out) :: work(*) ! ===================================================================== ! Local Scalars logical(lk) :: lquery, tpsd integer(${ik}$) :: brow, i, iascl, ibscl, j, mn, nb, scllen, wsize real(sp) :: anrm, bignum, bnrm, smlnum ! Local Arrays real(sp) :: rwork(1_${ik}$) ! Intrinsic Functions ! Executable Statements ! test the input arguments. info = 0_${ik}$ mn = min( m, n ) lquery = ( lwork==-1_${ik}$ ) if( .not.( stdlib_lsame( trans, 'N' ) .or. stdlib_lsame( trans, 'T' ) ) ) then info = -1_${ik}$ else if( m<0_${ik}$ ) then info = -2_${ik}$ else if( n<0_${ik}$ ) then info = -3_${ik}$ else if( nrhs<0_${ik}$ ) then info = -4_${ik}$ else if( lda<max( 1_${ik}$, m ) ) then info = -6_${ik}$ else if( ldb<max( 1_${ik}$, m, n ) ) then info = -8_${ik}$ else if( lwork<max( 1_${ik}$, mn + max( mn, nrhs ) ) .and..not.lquery ) then info = -10_${ik}$ end if ! figure out optimal block size if( info==0_${ik}$ .or. info==-10_${ik}$ ) then tpsd = .true. if( stdlib_lsame( trans, 'N' ) )tpsd = .false. if( m>=n ) then nb = stdlib${ii}$_ilaenv( 1_${ik}$, 'SGEQRF', ' ', m, n, -1_${ik}$, -1_${ik}$ ) if( tpsd ) then nb = max( nb, stdlib${ii}$_ilaenv( 1_${ik}$, 'SORMQR', 'LN', m, nrhs, n,-1_${ik}$ ) ) else nb = max( nb, stdlib${ii}$_ilaenv( 1_${ik}$, 'SORMQR', 'LT', m, nrhs, n,-1_${ik}$ ) ) end if else nb = stdlib${ii}$_ilaenv( 1_${ik}$, 'SGELQF', ' ', m, n, -1_${ik}$, -1_${ik}$ ) if( tpsd ) then nb = max( nb, stdlib${ii}$_ilaenv( 1_${ik}$, 'SORMLQ', 'LT', n, nrhs, m,-1_${ik}$ ) ) else nb = max( nb, stdlib${ii}$_ilaenv( 1_${ik}$, 'SORMLQ', 'LN', n, nrhs, m,-1_${ik}$ ) ) end if end if wsize = max( 1_${ik}$, mn + max( mn, nrhs )*nb ) work( 1_${ik}$ ) = real( wsize,KIND=sp) end if if( info/=0_${ik}$ ) then call stdlib${ii}$_xerbla( 'SGELS ', -info ) return else if( lquery ) then return end if ! quick return if possible if( min( m, n, nrhs )==0_${ik}$ ) then call stdlib${ii}$_slaset( 'FULL', max( m, n ), nrhs, zero, zero, b, ldb ) return end if ! get machine parameters smlnum = stdlib${ii}$_slamch( 'S' ) / stdlib${ii}$_slamch( 'P' ) bignum = one / smlnum call stdlib${ii}$_slabad( smlnum, bignum ) ! scale a, b if max element outside range [smlnum,bignum] anrm = stdlib${ii}$_slange( 'M', m, n, a, lda, rwork ) iascl = 0_${ik}$ if( anrm>zero .and. anrm<smlnum ) then ! scale matrix norm up to smlnum call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, m, n, a, lda, info ) iascl = 1_${ik}$ else if( anrm>bignum ) then ! scale matrix norm down to bignum call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, m, n, a, lda, info ) iascl = 2_${ik}$ else if( anrm==zero ) then ! matrix all zero. return zero solution. call stdlib${ii}$_slaset( 'F', max( m, n ), nrhs, zero, zero, b, ldb ) go to 50 end if brow = m if( tpsd )brow = n bnrm = stdlib${ii}$_slange( 'M', brow, nrhs, b, ldb, rwork ) ibscl = 0_${ik}$ if( bnrm>zero .and. bnrm<smlnum ) then ! scale matrix norm up to smlnum call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, bnrm, smlnum, brow, nrhs, b, ldb,info ) ibscl = 1_${ik}$ else if( bnrm>bignum ) then ! scale matrix norm down to bignum call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, bnrm, bignum, brow, nrhs, b, ldb,info ) ibscl = 2_${ik}$ end if if( m>=n ) then ! compute qr factorization of a call stdlib${ii}$_sgeqrf( m, n, a, lda, work( 1_${ik}$ ), work( mn+1 ), lwork-mn,info ) ! workspace at least n, optimally n*nb if( .not.tpsd ) then ! least-squares problem min || a * x - b || ! b(1:m,1:nrhs) := q**t * b(1:m,1:nrhs) call stdlib${ii}$_sormqr( 'LEFT', 'TRANSPOSE', m, nrhs, n, a, lda,work( 1_${ik}$ ), b, ldb, & work( mn+1 ), lwork-mn,info ) ! workspace at least nrhs, optimally nrhs*nb ! b(1:n,1:nrhs) := inv(r) * b(1:n,1:nrhs) call stdlib${ii}$_strtrs( 'UPPER', 'NO TRANSPOSE', 'NON-UNIT', n, nrhs,a, lda, b, ldb, & info ) if( info>0_${ik}$ ) then return end if scllen = n else ! underdetermined system of equations a**t * x = b ! b(1:n,1:nrhs) := inv(r**t) * b(1:n,1:nrhs) call stdlib${ii}$_strtrs( 'UPPER', 'TRANSPOSE', 'NON-UNIT', n, nrhs,a, lda, b, ldb, & info ) if( info>0_${ik}$ ) then return end if ! b(n+1:m,1:nrhs) = zero do j = 1, nrhs do i = n + 1, m b( i, j ) = zero end do end do ! b(1:m,1:nrhs) := q(1:n,:) * b(1:n,1:nrhs) call stdlib${ii}$_sormqr( 'LEFT', 'NO TRANSPOSE', m, nrhs, n, a, lda,work( 1_${ik}$ ), b, ldb,& work( mn+1 ), lwork-mn,info ) ! workspace at least nrhs, optimally nrhs*nb scllen = m end if else ! compute lq factorization of a call stdlib${ii}$_sgelqf( m, n, a, lda, work( 1_${ik}$ ), work( mn+1 ), lwork-mn,info ) ! workspace at least m, optimally m*nb. if( .not.tpsd ) then ! underdetermined system of equations a * x = b ! b(1:m,1:nrhs) := inv(l) * b(1:m,1:nrhs) call stdlib${ii}$_strtrs( 'LOWER', 'NO TRANSPOSE', 'NON-UNIT', m, nrhs,a, lda, b, ldb, & info ) if( info>0_${ik}$ ) then return end if ! b(m+1:n,1:nrhs) = 0 do j = 1, nrhs do i = m + 1, n b( i, j ) = zero end do end do ! b(1:n,1:nrhs) := q(1:n,:)**t * b(1:m,1:nrhs) call stdlib${ii}$_sormlq( 'LEFT', 'TRANSPOSE', n, nrhs, m, a, lda,work( 1_${ik}$ ), b, ldb, & work( mn+1 ), lwork-mn,info ) ! workspace at least nrhs, optimally nrhs*nb scllen = n else ! overdetermined system min || a**t * x - b || ! b(1:n,1:nrhs) := q * b(1:n,1:nrhs) call stdlib${ii}$_sormlq( 'LEFT', 'NO TRANSPOSE', n, nrhs, m, a, lda,work( 1_${ik}$ ), b, ldb,& work( mn+1 ), lwork-mn,info ) ! workspace at least nrhs, optimally nrhs*nb ! b(1:m,1:nrhs) := inv(l**t) * b(1:m,1:nrhs) call stdlib${ii}$_strtrs( 'LOWER', 'TRANSPOSE', 'NON-UNIT', m, nrhs,a, lda, b, ldb, & info ) if( info>0_${ik}$ ) then return end if scllen = m end if end if ! undo scaling if( iascl==1_${ik}$ ) then call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, scllen, nrhs, b, ldb,info ) else if( iascl==2_${ik}$ ) then call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, scllen, nrhs, b, ldb,info ) end if if( ibscl==1_${ik}$ ) then call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, smlnum, bnrm, scllen, nrhs, b, ldb,info ) else if( ibscl==2_${ik}$ ) then call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, bignum, bnrm, scllen, nrhs, b, ldb,info ) end if 50 continue work( 1_${ik}$ ) = real( wsize,KIND=sp) return end subroutine stdlib${ii}$_sgels module subroutine stdlib${ii}$_dgels( trans, m, n, nrhs, a, lda, b, ldb, work, lwork,info ) !! DGELS solves overdetermined or underdetermined real linear systems !! involving an M-by-N matrix A, or its transpose, using a QR or LQ !! factorization of A. It is assumed that A has full rank. !! The following options are provided: !! 1. If TRANS = 'N' and m >= n: find the least squares solution of !! an overdetermined system, i.e., solve the least squares problem !! minimize || B - A*X ||. !! 2. If TRANS = 'N' and m < n: find the minimum norm solution of !! an underdetermined system A * X = B. !! 3. If TRANS = 'T' and m >= n: find the minimum norm solution of !! an underdetermined system A**T * X = B. !! 4. If TRANS = 'T' and m < n: find the least squares solution of !! an overdetermined system, i.e., solve the least squares problem !! minimize || B - A**T * X ||. !! Several right hand side vectors b and solution vectors x can be !! handled in a single call; they are stored as the columns of the !! M-by-NRHS right hand side matrix B and the N-by-NRHS solution !! matrix X. ! -- lapack driver routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments character, intent(in) :: trans integer(${ik}$), intent(out) :: info integer(${ik}$), intent(in) :: lda, ldb, lwork, m, n, nrhs ! Array Arguments real(dp), intent(inout) :: a(lda,*), b(ldb,*) real(dp), intent(out) :: work(*) ! ===================================================================== ! Local Scalars logical(lk) :: lquery, tpsd integer(${ik}$) :: brow, i, iascl, ibscl, j, mn, nb, scllen, wsize real(dp) :: anrm, bignum, bnrm, smlnum ! Local Arrays real(dp) :: rwork(1_${ik}$) ! Intrinsic Functions ! Executable Statements ! test the input arguments. info = 0_${ik}$ mn = min( m, n ) lquery = ( lwork==-1_${ik}$ ) if( .not.( stdlib_lsame( trans, 'N' ) .or. stdlib_lsame( trans, 'T' ) ) ) then info = -1_${ik}$ else if( m<0_${ik}$ ) then info = -2_${ik}$ else if( n<0_${ik}$ ) then info = -3_${ik}$ else if( nrhs<0_${ik}$ ) then info = -4_${ik}$ else if( lda<max( 1_${ik}$, m ) ) then info = -6_${ik}$ else if( ldb<max( 1_${ik}$, m, n ) ) then info = -8_${ik}$ else if( lwork<max( 1_${ik}$, mn+max( mn, nrhs ) ) .and. .not.lquery )then info = -10_${ik}$ end if ! figure out optimal block size if( info==0_${ik}$ .or. info==-10_${ik}$ ) then tpsd = .true. if( stdlib_lsame( trans, 'N' ) )tpsd = .false. if( m>=n ) then nb = stdlib${ii}$_ilaenv( 1_${ik}$, 'DGEQRF', ' ', m, n, -1_${ik}$, -1_${ik}$ ) if( tpsd ) then nb = max( nb, stdlib${ii}$_ilaenv( 1_${ik}$, 'DORMQR', 'LN', m, nrhs, n,-1_${ik}$ ) ) else nb = max( nb, stdlib${ii}$_ilaenv( 1_${ik}$, 'DORMQR', 'LT', m, nrhs, n,-1_${ik}$ ) ) end if else nb = stdlib${ii}$_ilaenv( 1_${ik}$, 'DGELQF', ' ', m, n, -1_${ik}$, -1_${ik}$ ) if( tpsd ) then nb = max( nb, stdlib${ii}$_ilaenv( 1_${ik}$, 'DORMLQ', 'LT', n, nrhs, m,-1_${ik}$ ) ) else nb = max( nb, stdlib${ii}$_ilaenv( 1_${ik}$, 'DORMLQ', 'LN', n, nrhs, m,-1_${ik}$ ) ) end if end if wsize = max( 1_${ik}$, mn+max( mn, nrhs )*nb ) work( 1_${ik}$ ) = real( wsize,KIND=dp) end if if( info/=0_${ik}$ ) then call stdlib${ii}$_xerbla( 'DGELS ', -info ) return else if( lquery ) then return end if ! quick return if possible if( min( m, n, nrhs )==0_${ik}$ ) then call stdlib${ii}$_dlaset( 'FULL', max( m, n ), nrhs, zero, zero, b, ldb ) return end if ! get machine parameters smlnum = stdlib${ii}$_dlamch( 'S' ) / stdlib${ii}$_dlamch( 'P' ) bignum = one / smlnum call stdlib${ii}$_dlabad( smlnum, bignum ) ! scale a, b if max element outside range [smlnum,bignum] anrm = stdlib${ii}$_dlange( 'M', m, n, a, lda, rwork ) iascl = 0_${ik}$ if( anrm>zero .and. anrm<smlnum ) then ! scale matrix norm up to smlnum call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, m, n, a, lda, info ) iascl = 1_${ik}$ else if( anrm>bignum ) then ! scale matrix norm down to bignum call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, m, n, a, lda, info ) iascl = 2_${ik}$ else if( anrm==zero ) then ! matrix all zero. return zero solution. call stdlib${ii}$_dlaset( 'F', max( m, n ), nrhs, zero, zero, b, ldb ) go to 50 end if brow = m if( tpsd )brow = n bnrm = stdlib${ii}$_dlange( 'M', brow, nrhs, b, ldb, rwork ) ibscl = 0_${ik}$ if( bnrm>zero .and. bnrm<smlnum ) then ! scale matrix norm up to smlnum call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, bnrm, smlnum, brow, nrhs, b, ldb,info ) ibscl = 1_${ik}$ else if( bnrm>bignum ) then ! scale matrix norm down to bignum call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, bnrm, bignum, brow, nrhs, b, ldb,info ) ibscl = 2_${ik}$ end if if( m>=n ) then ! compute qr factorization of a call stdlib${ii}$_dgeqrf( m, n, a, lda, work( 1_${ik}$ ), work( mn+1 ), lwork-mn,info ) ! workspace at least n, optimally n*nb if( .not.tpsd ) then ! least-squares problem min || a * x - b || ! b(1:m,1:nrhs) := q**t * b(1:m,1:nrhs) call stdlib${ii}$_dormqr( 'LEFT', 'TRANSPOSE', m, nrhs, n, a, lda,work( 1_${ik}$ ), b, ldb, & work( mn+1 ), lwork-mn,info ) ! workspace at least nrhs, optimally nrhs*nb ! b(1:n,1:nrhs) := inv(r) * b(1:n,1:nrhs) call stdlib${ii}$_dtrtrs( 'UPPER', 'NO TRANSPOSE', 'NON-UNIT', n, nrhs,a, lda, b, ldb, & info ) if( info>0_${ik}$ ) then return end if scllen = n else ! underdetermined system of equations a**t * x = b ! b(1:n,1:nrhs) := inv(r**t) * b(1:n,1:nrhs) call stdlib${ii}$_dtrtrs( 'UPPER', 'TRANSPOSE', 'NON-UNIT', n, nrhs,a, lda, b, ldb, & info ) if( info>0_${ik}$ ) then return end if ! b(n+1:m,1:nrhs) = zero do j = 1, nrhs do i = n + 1, m b( i, j ) = zero end do end do ! b(1:m,1:nrhs) := q(1:n,:) * b(1:n,1:nrhs) call stdlib${ii}$_dormqr( 'LEFT', 'NO TRANSPOSE', m, nrhs, n, a, lda,work( 1_${ik}$ ), b, ldb,& work( mn+1 ), lwork-mn,info ) ! workspace at least nrhs, optimally nrhs*nb scllen = m end if else ! compute lq factorization of a call stdlib${ii}$_dgelqf( m, n, a, lda, work( 1_${ik}$ ), work( mn+1 ), lwork-mn,info ) ! workspace at least m, optimally m*nb. if( .not.tpsd ) then ! underdetermined system of equations a * x = b ! b(1:m,1:nrhs) := inv(l) * b(1:m,1:nrhs) call stdlib${ii}$_dtrtrs( 'LOWER', 'NO TRANSPOSE', 'NON-UNIT', m, nrhs,a, lda, b, ldb, & info ) if( info>0_${ik}$ ) then return end if ! b(m+1:n,1:nrhs) = 0 do j = 1, nrhs do i = m + 1, n b( i, j ) = zero end do end do ! b(1:n,1:nrhs) := q(1:n,:)**t * b(1:m,1:nrhs) call stdlib${ii}$_dormlq( 'LEFT', 'TRANSPOSE', n, nrhs, m, a, lda,work( 1_${ik}$ ), b, ldb, & work( mn+1 ), lwork-mn,info ) ! workspace at least nrhs, optimally nrhs*nb scllen = n else ! overdetermined system min || a**t * x - b || ! b(1:n,1:nrhs) := q * b(1:n,1:nrhs) call stdlib${ii}$_dormlq( 'LEFT', 'NO TRANSPOSE', n, nrhs, m, a, lda,work( 1_${ik}$ ), b, ldb,& work( mn+1 ), lwork-mn,info ) ! workspace at least nrhs, optimally nrhs*nb ! b(1:m,1:nrhs) := inv(l**t) * b(1:m,1:nrhs) call stdlib${ii}$_dtrtrs( 'LOWER', 'TRANSPOSE', 'NON-UNIT', m, nrhs,a, lda, b, ldb, & info ) if( info>0_${ik}$ ) then return end if scllen = m end if end if ! undo scaling if( iascl==1_${ik}$ ) then call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, scllen, nrhs, b, ldb,info ) else if( iascl==2_${ik}$ ) then call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, scllen, nrhs, b, ldb,info ) end if if( ibscl==1_${ik}$ ) then call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, smlnum, bnrm, scllen, nrhs, b, ldb,info ) else if( ibscl==2_${ik}$ ) then call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, bignum, bnrm, scllen, nrhs, b, ldb,info ) end if 50 continue work( 1_${ik}$ ) = real( wsize,KIND=dp) return end subroutine stdlib${ii}$_dgels #:for rk,rt,ri in REAL_KINDS_TYPES #:if not rk in ["sp","dp"] module subroutine stdlib${ii}$_${ri}$gels( trans, m, n, nrhs, a, lda, b, ldb, work, lwork,info ) !! DGELS: solves overdetermined or underdetermined real linear systems !! involving an M-by-N matrix A, or its transpose, using a QR or LQ !! factorization of A. It is assumed that A has full rank. !! The following options are provided: !! 1. If TRANS = 'N' and m >= n: find the least squares solution of !! an overdetermined system, i.e., solve the least squares problem !! minimize || B - A*X ||. !! 2. If TRANS = 'N' and m < n: find the minimum norm solution of !! an underdetermined system A * X = B. !! 3. If TRANS = 'T' and m >= n: find the minimum norm solution of !! an underdetermined system A**T * X = B. !! 4. If TRANS = 'T' and m < n: find the least squares solution of !! an overdetermined system, i.e., solve the least squares problem !! minimize || B - A**T * X ||. !! Several right hand side vectors b and solution vectors x can be !! handled in a single call; they are stored as the columns of the !! M-by-NRHS right hand side matrix B and the N-by-NRHS solution !! matrix X. ! -- lapack driver routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments character, intent(in) :: trans integer(${ik}$), intent(out) :: info integer(${ik}$), intent(in) :: lda, ldb, lwork, m, n, nrhs ! Array Arguments real(${rk}$), intent(inout) :: a(lda,*), b(ldb,*) real(${rk}$), intent(out) :: work(*) ! ===================================================================== ! Local Scalars logical(lk) :: lquery, tpsd integer(${ik}$) :: brow, i, iascl, ibscl, j, mn, nb, scllen, wsize real(${rk}$) :: anrm, bignum, bnrm, smlnum ! Local Arrays real(${rk}$) :: rwork(1_${ik}$) ! Intrinsic Functions ! Executable Statements ! test the input arguments. info = 0_${ik}$ mn = min( m, n ) lquery = ( lwork==-1_${ik}$ ) if( .not.( stdlib_lsame( trans, 'N' ) .or. stdlib_lsame( trans, 'T' ) ) ) then info = -1_${ik}$ else if( m<0_${ik}$ ) then info = -2_${ik}$ else if( n<0_${ik}$ ) then info = -3_${ik}$ else if( nrhs<0_${ik}$ ) then info = -4_${ik}$ else if( lda<max( 1_${ik}$, m ) ) then info = -6_${ik}$ else if( ldb<max( 1_${ik}$, m, n ) ) then info = -8_${ik}$ else if( lwork<max( 1_${ik}$, mn+max( mn, nrhs ) ) .and. .not.lquery )then info = -10_${ik}$ end if ! figure out optimal block size if( info==0_${ik}$ .or. info==-10_${ik}$ ) then tpsd = .true. if( stdlib_lsame( trans, 'N' ) )tpsd = .false. if( m>=n ) then nb = stdlib${ii}$_ilaenv( 1_${ik}$, 'DGEQRF', ' ', m, n, -1_${ik}$, -1_${ik}$ ) if( tpsd ) then nb = max( nb, stdlib${ii}$_ilaenv( 1_${ik}$, 'DORMQR', 'LN', m, nrhs, n,-1_${ik}$ ) ) else nb = max( nb, stdlib${ii}$_ilaenv( 1_${ik}$, 'DORMQR', 'LT', m, nrhs, n,-1_${ik}$ ) ) end if else nb = stdlib${ii}$_ilaenv( 1_${ik}$, 'DGELQF', ' ', m, n, -1_${ik}$, -1_${ik}$ ) if( tpsd ) then nb = max( nb, stdlib${ii}$_ilaenv( 1_${ik}$, 'DORMLQ', 'LT', n, nrhs, m,-1_${ik}$ ) ) else nb = max( nb, stdlib${ii}$_ilaenv( 1_${ik}$, 'DORMLQ', 'LN', n, nrhs, m,-1_${ik}$ ) ) end if end if wsize = max( 1_${ik}$, mn+max( mn, nrhs )*nb ) work( 1_${ik}$ ) = real( wsize,KIND=${rk}$) end if if( info/=0_${ik}$ ) then call stdlib${ii}$_xerbla( 'DGELS ', -info ) return else if( lquery ) then return end if ! quick return if possible if( min( m, n, nrhs )==0_${ik}$ ) then call stdlib${ii}$_${ri}$laset( 'FULL', max( m, n ), nrhs, zero, zero, b, ldb ) return end if ! get machine parameters smlnum = stdlib${ii}$_${ri}$lamch( 'S' ) / stdlib${ii}$_${ri}$lamch( 'P' ) bignum = one / smlnum call stdlib${ii}$_${ri}$labad( smlnum, bignum ) ! scale a, b if max element outside range [smlnum,bignum] anrm = stdlib${ii}$_${ri}$lange( 'M', m, n, a, lda, rwork ) iascl = 0_${ik}$ if( anrm>zero .and. anrm<smlnum ) then ! scale matrix norm up to smlnum call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, m, n, a, lda, info ) iascl = 1_${ik}$ else if( anrm>bignum ) then ! scale matrix norm down to bignum call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, m, n, a, lda, info ) iascl = 2_${ik}$ else if( anrm==zero ) then ! matrix all zero. return zero solution. call stdlib${ii}$_${ri}$laset( 'F', max( m, n ), nrhs, zero, zero, b, ldb ) go to 50 end if brow = m if( tpsd )brow = n bnrm = stdlib${ii}$_${ri}$lange( 'M', brow, nrhs, b, ldb, rwork ) ibscl = 0_${ik}$ if( bnrm>zero .and. bnrm<smlnum ) then ! scale matrix norm up to smlnum call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, bnrm, smlnum, brow, nrhs, b, ldb,info ) ibscl = 1_${ik}$ else if( bnrm>bignum ) then ! scale matrix norm down to bignum call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, bnrm, bignum, brow, nrhs, b, ldb,info ) ibscl = 2_${ik}$ end if if( m>=n ) then ! compute qr factorization of a call stdlib${ii}$_${ri}$geqrf( m, n, a, lda, work( 1_${ik}$ ), work( mn+1 ), lwork-mn,info ) ! workspace at least n, optimally n*nb if( .not.tpsd ) then ! least-squares problem min || a * x - b || ! b(1:m,1:nrhs) := q**t * b(1:m,1:nrhs) call stdlib${ii}$_${ri}$ormqr( 'LEFT', 'TRANSPOSE', m, nrhs, n, a, lda,work( 1_${ik}$ ), b, ldb, & work( mn+1 ), lwork-mn,info ) ! workspace at least nrhs, optimally nrhs*nb ! b(1:n,1:nrhs) := inv(r) * b(1:n,1:nrhs) call stdlib${ii}$_${ri}$trtrs( 'UPPER', 'NO TRANSPOSE', 'NON-UNIT', n, nrhs,a, lda, b, ldb, & info ) if( info>0_${ik}$ ) then return end if scllen = n else ! underdetermined system of equations a**t * x = b ! b(1:n,1:nrhs) := inv(r**t) * b(1:n,1:nrhs) call stdlib${ii}$_${ri}$trtrs( 'UPPER', 'TRANSPOSE', 'NON-UNIT', n, nrhs,a, lda, b, ldb, & info ) if( info>0_${ik}$ ) then return end if ! b(n+1:m,1:nrhs) = zero do j = 1, nrhs do i = n + 1, m b( i, j ) = zero end do end do ! b(1:m,1:nrhs) := q(1:n,:) * b(1:n,1:nrhs) call stdlib${ii}$_${ri}$ormqr( 'LEFT', 'NO TRANSPOSE', m, nrhs, n, a, lda,work( 1_${ik}$ ), b, ldb,& work( mn+1 ), lwork-mn,info ) ! workspace at least nrhs, optimally nrhs*nb scllen = m end if else ! compute lq factorization of a call stdlib${ii}$_${ri}$gelqf( m, n, a, lda, work( 1_${ik}$ ), work( mn+1 ), lwork-mn,info ) ! workspace at least m, optimally m*nb. if( .not.tpsd ) then ! underdetermined system of equations a * x = b ! b(1:m,1:nrhs) := inv(l) * b(1:m,1:nrhs) call stdlib${ii}$_${ri}$trtrs( 'LOWER', 'NO TRANSPOSE', 'NON-UNIT', m, nrhs,a, lda, b, ldb, & info ) if( info>0_${ik}$ ) then return end if ! b(m+1:n,1:nrhs) = 0 do j = 1, nrhs do i = m + 1, n b( i, j ) = zero end do end do ! b(1:n,1:nrhs) := q(1:n,:)**t * b(1:m,1:nrhs) call stdlib${ii}$_${ri}$ormlq( 'LEFT', 'TRANSPOSE', n, nrhs, m, a, lda,work( 1_${ik}$ ), b, ldb, & work( mn+1 ), lwork-mn,info ) ! workspace at least nrhs, optimally nrhs*nb scllen = n else ! overdetermined system min || a**t * x - b || ! b(1:n,1:nrhs) := q * b(1:n,1:nrhs) call stdlib${ii}$_${ri}$ormlq( 'LEFT', 'NO TRANSPOSE', n, nrhs, m, a, lda,work( 1_${ik}$ ), b, ldb,& work( mn+1 ), lwork-mn,info ) ! workspace at least nrhs, optimally nrhs*nb ! b(1:m,1:nrhs) := inv(l**t) * b(1:m,1:nrhs) call stdlib${ii}$_${ri}$trtrs( 'LOWER', 'TRANSPOSE', 'NON-UNIT', m, nrhs,a, lda, b, ldb, & info ) if( info>0_${ik}$ ) then return end if scllen = m end if end if ! undo scaling if( iascl==1_${ik}$ ) then call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, scllen, nrhs, b, ldb,info ) else if( iascl==2_${ik}$ ) then call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, scllen, nrhs, b, ldb,info ) end if if( ibscl==1_${ik}$ ) then call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, smlnum, bnrm, scllen, nrhs, b, ldb,info ) else if( ibscl==2_${ik}$ ) then call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, bignum, bnrm, scllen, nrhs, b, ldb,info ) end if 50 continue work( 1_${ik}$ ) = real( wsize,KIND=${rk}$) return end subroutine stdlib${ii}$_${ri}$gels #:endif #:endfor module subroutine stdlib${ii}$_cgels( trans, m, n, nrhs, a, lda, b, ldb, work, lwork,info ) !! CGELS solves overdetermined or underdetermined complex linear systems !! involving an M-by-N matrix A, or its conjugate-transpose, using a QR !! or LQ factorization of A. It is assumed that A has full rank. !! The following options are provided: !! 1. If TRANS = 'N' and m >= n: find the least squares solution of !! an overdetermined system, i.e., solve the least squares problem !! minimize || B - A*X ||. !! 2. If TRANS = 'N' and m < n: find the minimum norm solution of !! an underdetermined system A * X = B. !! 3. If TRANS = 'C' and m >= n: find the minimum norm solution of !! an underdetermined system A**H * X = B. !! 4. If TRANS = 'C' and m < n: find the least squares solution of !! an overdetermined system, i.e., solve the least squares problem !! minimize || B - A**H * X ||. !! Several right hand side vectors b and solution vectors x can be !! handled in a single call; they are stored as the columns of the !! M-by-NRHS right hand side matrix B and the N-by-NRHS solution !! matrix X. ! -- lapack driver routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments character, intent(in) :: trans integer(${ik}$), intent(out) :: info integer(${ik}$), intent(in) :: lda, ldb, lwork, m, n, nrhs ! Array Arguments complex(sp), intent(inout) :: a(lda,*), b(ldb,*) complex(sp), intent(out) :: work(*) ! ===================================================================== ! Local Scalars logical(lk) :: lquery, tpsd integer(${ik}$) :: brow, i, iascl, ibscl, j, mn, nb, scllen, wsize real(sp) :: anrm, bignum, bnrm, smlnum ! Local Arrays real(sp) :: rwork(1_${ik}$) ! Intrinsic Functions ! Executable Statements ! test the input arguments. info = 0_${ik}$ mn = min( m, n ) lquery = ( lwork==-1_${ik}$ ) if( .not.( stdlib_lsame( trans, 'N' ) .or. stdlib_lsame( trans, 'C' ) ) ) then info = -1_${ik}$ else if( m<0_${ik}$ ) then info = -2_${ik}$ else if( n<0_${ik}$ ) then info = -3_${ik}$ else if( nrhs<0_${ik}$ ) then info = -4_${ik}$ else if( lda<max( 1_${ik}$, m ) ) then info = -6_${ik}$ else if( ldb<max( 1_${ik}$, m, n ) ) then info = -8_${ik}$ else if( lwork<max( 1_${ik}$, mn+max( mn, nrhs ) ) .and..not.lquery ) then info = -10_${ik}$ end if ! figure out optimal block size if( info==0_${ik}$ .or. info==-10_${ik}$ ) then tpsd = .true. if( stdlib_lsame( trans, 'N' ) )tpsd = .false. if( m>=n ) then nb = stdlib${ii}$_ilaenv( 1_${ik}$, 'CGEQRF', ' ', m, n, -1_${ik}$, -1_${ik}$ ) if( tpsd ) then nb = max( nb, stdlib${ii}$_ilaenv( 1_${ik}$, 'CUNMQR', 'LN', m, nrhs, n,-1_${ik}$ ) ) else nb = max( nb, stdlib${ii}$_ilaenv( 1_${ik}$, 'CUNMQR', 'LC', m, nrhs, n,-1_${ik}$ ) ) end if else nb = stdlib${ii}$_ilaenv( 1_${ik}$, 'CGELQF', ' ', m, n, -1_${ik}$, -1_${ik}$ ) if( tpsd ) then nb = max( nb, stdlib${ii}$_ilaenv( 1_${ik}$, 'CUNMLQ', 'LC', n, nrhs, m,-1_${ik}$ ) ) else nb = max( nb, stdlib${ii}$_ilaenv( 1_${ik}$, 'CUNMLQ', 'LN', n, nrhs, m,-1_${ik}$ ) ) end if end if wsize = max( 1_${ik}$, mn + max( mn, nrhs )*nb ) work( 1_${ik}$ ) = real( wsize,KIND=sp) end if if( info/=0_${ik}$ ) then call stdlib${ii}$_xerbla( 'CGELS ', -info ) return else if( lquery ) then return end if ! quick return if possible if( min( m, n, nrhs )==0_${ik}$ ) then call stdlib${ii}$_claset( 'FULL', max( m, n ), nrhs, czero, czero, b, ldb ) return end if ! get machine parameters smlnum = stdlib${ii}$_slamch( 'S' ) / stdlib${ii}$_slamch( 'P' ) bignum = one / smlnum call stdlib${ii}$_slabad( smlnum, bignum ) ! scale a, b if max element outside range [smlnum,bignum] anrm = stdlib${ii}$_clange( 'M', m, n, a, lda, rwork ) iascl = 0_${ik}$ if( anrm>zero .and. anrm<smlnum ) then ! scale matrix norm up to smlnum call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, m, n, a, lda, info ) iascl = 1_${ik}$ else if( anrm>bignum ) then ! scale matrix norm down to bignum call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, m, n, a, lda, info ) iascl = 2_${ik}$ else if( anrm==zero ) then ! matrix all zero. return zero solution. call stdlib${ii}$_claset( 'F', max( m, n ), nrhs, czero, czero, b, ldb ) go to 50 end if brow = m if( tpsd )brow = n bnrm = stdlib${ii}$_clange( 'M', brow, nrhs, b, ldb, rwork ) ibscl = 0_${ik}$ if( bnrm>zero .and. bnrm<smlnum ) then ! scale matrix norm up to smlnum call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, bnrm, smlnum, brow, nrhs, b, ldb,info ) ibscl = 1_${ik}$ else if( bnrm>bignum ) then ! scale matrix norm down to bignum call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, bnrm, bignum, brow, nrhs, b, ldb,info ) ibscl = 2_${ik}$ end if if( m>=n ) then ! compute qr factorization of a call stdlib${ii}$_cgeqrf( m, n, a, lda, work( 1_${ik}$ ), work( mn+1 ), lwork-mn,info ) ! workspace at least n, optimally n*nb if( .not.tpsd ) then ! least-squares problem min || a * x - b || ! b(1:m,1:nrhs) := q**h * b(1:m,1:nrhs) call stdlib${ii}$_cunmqr( 'LEFT', 'CONJUGATE TRANSPOSE', m, nrhs, n, a,lda, work( 1_${ik}$ ), & b, ldb, work( mn+1 ), lwork-mn,info ) ! workspace at least nrhs, optimally nrhs*nb ! b(1:n,1:nrhs) := inv(r) * b(1:n,1:nrhs) call stdlib${ii}$_ctrtrs( 'UPPER', 'NO TRANSPOSE', 'NON-UNIT', n, nrhs,a, lda, b, ldb, & info ) if( info>0_${ik}$ ) then return end if scllen = n else ! underdetermined system of equations a**t * x = b ! b(1:n,1:nrhs) := inv(r**h) * b(1:n,1:nrhs) call stdlib${ii}$_ctrtrs( 'UPPER', 'CONJUGATE TRANSPOSE','NON-UNIT',n, nrhs, a, lda, b,& ldb, info ) if( info>0_${ik}$ ) then return end if ! b(n+1:m,1:nrhs) = zero do j = 1, nrhs do i = n + 1, m b( i, j ) = czero end do end do ! b(1:m,1:nrhs) := q(1:n,:) * b(1:n,1:nrhs) call stdlib${ii}$_cunmqr( 'LEFT', 'NO TRANSPOSE', m, nrhs, n, a, lda,work( 1_${ik}$ ), b, ldb,& work( mn+1 ), lwork-mn,info ) ! workspace at least nrhs, optimally nrhs*nb scllen = m end if else ! compute lq factorization of a call stdlib${ii}$_cgelqf( m, n, a, lda, work( 1_${ik}$ ), work( mn+1 ), lwork-mn,info ) ! workspace at least m, optimally m*nb. if( .not.tpsd ) then ! underdetermined system of equations a * x = b ! b(1:m,1:nrhs) := inv(l) * b(1:m,1:nrhs) call stdlib${ii}$_ctrtrs( 'LOWER', 'NO TRANSPOSE', 'NON-UNIT', m, nrhs,a, lda, b, ldb, & info ) if( info>0_${ik}$ ) then return end if ! b(m+1:n,1:nrhs) = 0 do j = 1, nrhs do i = m + 1, n b( i, j ) = czero end do end do ! b(1:n,1:nrhs) := q(1:n,:)**h * b(1:m,1:nrhs) call stdlib${ii}$_cunmlq( 'LEFT', 'CONJUGATE TRANSPOSE', n, nrhs, m, a,lda, work( 1_${ik}$ ), & b, ldb, work( mn+1 ), lwork-mn,info ) ! workspace at least nrhs, optimally nrhs*nb scllen = n else ! overdetermined system min || a**h * x - b || ! b(1:n,1:nrhs) := q * b(1:n,1:nrhs) call stdlib${ii}$_cunmlq( 'LEFT', 'NO TRANSPOSE', n, nrhs, m, a, lda,work( 1_${ik}$ ), b, ldb,& work( mn+1 ), lwork-mn,info ) ! workspace at least nrhs, optimally nrhs*nb ! b(1:m,1:nrhs) := inv(l**h) * b(1:m,1:nrhs) call stdlib${ii}$_ctrtrs( 'LOWER', 'CONJUGATE TRANSPOSE', 'NON-UNIT',m, nrhs, a, lda, & b, ldb, info ) if( info>0_${ik}$ ) then return end if scllen = m end if end if ! undo scaling if( iascl==1_${ik}$ ) then call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, scllen, nrhs, b, ldb,info ) else if( iascl==2_${ik}$ ) then call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, scllen, nrhs, b, ldb,info ) end if if( ibscl==1_${ik}$ ) then call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, smlnum, bnrm, scllen, nrhs, b, ldb,info ) else if( ibscl==2_${ik}$ ) then call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, bignum, bnrm, scllen, nrhs, b, ldb,info ) end if 50 continue work( 1_${ik}$ ) = real( wsize,KIND=sp) return end subroutine stdlib${ii}$_cgels module subroutine stdlib${ii}$_zgels( trans, m, n, nrhs, a, lda, b, ldb, work, lwork,info ) !! ZGELS solves overdetermined or underdetermined complex linear systems !! involving an M-by-N matrix A, or its conjugate-transpose, using a QR !! or LQ factorization of A. It is assumed that A has full rank. !! The following options are provided: !! 1. If TRANS = 'N' and m >= n: find the least squares solution of !! an overdetermined system, i.e., solve the least squares problem !! minimize || B - A*X ||. !! 2. If TRANS = 'N' and m < n: find the minimum norm solution of !! an underdetermined system A * X = B. !! 3. If TRANS = 'C' and m >= n: find the minimum norm solution of !! an underdetermined system A**H * X = B. !! 4. If TRANS = 'C' and m < n: find the least squares solution of !! an overdetermined system, i.e., solve the least squares problem !! minimize || B - A**H * X ||. !! Several right hand side vectors b and solution vectors x can be !! handled in a single call; they are stored as the columns of the !! M-by-NRHS right hand side matrix B and the N-by-NRHS solution !! matrix X. ! -- lapack driver routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments character, intent(in) :: trans integer(${ik}$), intent(out) :: info integer(${ik}$), intent(in) :: lda, ldb, lwork, m, n, nrhs ! Array Arguments complex(dp), intent(inout) :: a(lda,*), b(ldb,*) complex(dp), intent(out) :: work(*) ! ===================================================================== ! Local Scalars logical(lk) :: lquery, tpsd integer(${ik}$) :: brow, i, iascl, ibscl, j, mn, nb, scllen, wsize real(dp) :: anrm, bignum, bnrm, smlnum ! Local Arrays real(dp) :: rwork(1_${ik}$) ! Intrinsic Functions ! Executable Statements ! test the input arguments. info = 0_${ik}$ mn = min( m, n ) lquery = ( lwork==-1_${ik}$ ) if( .not.( stdlib_lsame( trans, 'N' ) .or. stdlib_lsame( trans, 'C' ) ) ) then info = -1_${ik}$ else if( m<0_${ik}$ ) then info = -2_${ik}$ else if( n<0_${ik}$ ) then info = -3_${ik}$ else if( nrhs<0_${ik}$ ) then info = -4_${ik}$ else if( lda<max( 1_${ik}$, m ) ) then info = -6_${ik}$ else if( ldb<max( 1_${ik}$, m, n ) ) then info = -8_${ik}$ else if( lwork<max( 1_${ik}$, mn+max( mn, nrhs ) ) .and. .not.lquery )then info = -10_${ik}$ end if ! figure out optimal block size if( info==0_${ik}$ .or. info==-10_${ik}$ ) then tpsd = .true. if( stdlib_lsame( trans, 'N' ) )tpsd = .false. if( m>=n ) then nb = stdlib${ii}$_ilaenv( 1_${ik}$, 'ZGEQRF', ' ', m, n, -1_${ik}$, -1_${ik}$ ) if( tpsd ) then nb = max( nb, stdlib${ii}$_ilaenv( 1_${ik}$, 'ZUNMQR', 'LN', m, nrhs, n,-1_${ik}$ ) ) else nb = max( nb, stdlib${ii}$_ilaenv( 1_${ik}$, 'ZUNMQR', 'LC', m, nrhs, n,-1_${ik}$ ) ) end if else nb = stdlib${ii}$_ilaenv( 1_${ik}$, 'ZGELQF', ' ', m, n, -1_${ik}$, -1_${ik}$ ) if( tpsd ) then nb = max( nb, stdlib${ii}$_ilaenv( 1_${ik}$, 'ZUNMLQ', 'LC', n, nrhs, m,-1_${ik}$ ) ) else nb = max( nb, stdlib${ii}$_ilaenv( 1_${ik}$, 'ZUNMLQ', 'LN', n, nrhs, m,-1_${ik}$ ) ) end if end if wsize = max( 1_${ik}$, mn+max( mn, nrhs )*nb ) work( 1_${ik}$ ) = real( wsize,KIND=dp) end if if( info/=0_${ik}$ ) then call stdlib${ii}$_xerbla( 'ZGELS ', -info ) return else if( lquery ) then return end if ! quick return if possible if( min( m, n, nrhs )==0_${ik}$ ) then call stdlib${ii}$_zlaset( 'FULL', max( m, n ), nrhs, czero, czero, b, ldb ) return end if ! get machine parameters smlnum = stdlib${ii}$_dlamch( 'S' ) / stdlib${ii}$_dlamch( 'P' ) bignum = one / smlnum call stdlib${ii}$_dlabad( smlnum, bignum ) ! scale a, b if max element outside range [smlnum,bignum] anrm = stdlib${ii}$_zlange( 'M', m, n, a, lda, rwork ) iascl = 0_${ik}$ if( anrm>zero .and. anrm<smlnum ) then ! scale matrix norm up to smlnum call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, m, n, a, lda, info ) iascl = 1_${ik}$ else if( anrm>bignum ) then ! scale matrix norm down to bignum call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, m, n, a, lda, info ) iascl = 2_${ik}$ else if( anrm==zero ) then ! matrix all zero. return zero solution. call stdlib${ii}$_zlaset( 'F', max( m, n ), nrhs, czero, czero, b, ldb ) go to 50 end if brow = m if( tpsd )brow = n bnrm = stdlib${ii}$_zlange( 'M', brow, nrhs, b, ldb, rwork ) ibscl = 0_${ik}$ if( bnrm>zero .and. bnrm<smlnum ) then ! scale matrix norm up to smlnum call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, bnrm, smlnum, brow, nrhs, b, ldb,info ) ibscl = 1_${ik}$ else if( bnrm>bignum ) then ! scale matrix norm down to bignum call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, bnrm, bignum, brow, nrhs, b, ldb,info ) ibscl = 2_${ik}$ end if if( m>=n ) then ! compute qr factorization of a call stdlib${ii}$_zgeqrf( m, n, a, lda, work( 1_${ik}$ ), work( mn+1 ), lwork-mn,info ) ! workspace at least n, optimally n*nb if( .not.tpsd ) then ! least-squares problem min || a * x - b || ! b(1:m,1:nrhs) := q**h * b(1:m,1:nrhs) call stdlib${ii}$_zunmqr( 'LEFT', 'CONJUGATE TRANSPOSE', m, nrhs, n, a,lda, work( 1_${ik}$ ), & b, ldb, work( mn+1 ), lwork-mn,info ) ! workspace at least nrhs, optimally nrhs*nb ! b(1:n,1:nrhs) := inv(r) * b(1:n,1:nrhs) call stdlib${ii}$_ztrtrs( 'UPPER', 'NO TRANSPOSE', 'NON-UNIT', n, nrhs,a, lda, b, ldb, & info ) if( info>0_${ik}$ ) then return end if scllen = n else ! underdetermined system of equations a**t * x = b ! b(1:n,1:nrhs) := inv(r**h) * b(1:n,1:nrhs) call stdlib${ii}$_ztrtrs( 'UPPER', 'CONJUGATE TRANSPOSE','NON-UNIT',n, nrhs, a, lda, b,& ldb, info ) if( info>0_${ik}$ ) then return end if ! b(n+1:m,1:nrhs) = zero do j = 1, nrhs do i = n + 1, m b( i, j ) = czero end do end do ! b(1:m,1:nrhs) := q(1:n,:) * b(1:n,1:nrhs) call stdlib${ii}$_zunmqr( 'LEFT', 'NO TRANSPOSE', m, nrhs, n, a, lda,work( 1_${ik}$ ), b, ldb,& work( mn+1 ), lwork-mn,info ) ! workspace at least nrhs, optimally nrhs*nb scllen = m end if else ! compute lq factorization of a call stdlib${ii}$_zgelqf( m, n, a, lda, work( 1_${ik}$ ), work( mn+1 ), lwork-mn,info ) ! workspace at least m, optimally m*nb. if( .not.tpsd ) then ! underdetermined system of equations a * x = b ! b(1:m,1:nrhs) := inv(l) * b(1:m,1:nrhs) call stdlib${ii}$_ztrtrs( 'LOWER', 'NO TRANSPOSE', 'NON-UNIT', m, nrhs,a, lda, b, ldb, & info ) if( info>0_${ik}$ ) then return end if ! b(m+1:n,1:nrhs) = 0 do j = 1, nrhs do i = m + 1, n b( i, j ) = czero end do end do ! b(1:n,1:nrhs) := q(1:n,:)**h * b(1:m,1:nrhs) call stdlib${ii}$_zunmlq( 'LEFT', 'CONJUGATE TRANSPOSE', n, nrhs, m, a,lda, work( 1_${ik}$ ), & b, ldb, work( mn+1 ), lwork-mn,info ) ! workspace at least nrhs, optimally nrhs*nb scllen = n else ! overdetermined system min || a**h * x - b || ! b(1:n,1:nrhs) := q * b(1:n,1:nrhs) call stdlib${ii}$_zunmlq( 'LEFT', 'NO TRANSPOSE', n, nrhs, m, a, lda,work( 1_${ik}$ ), b, ldb,& work( mn+1 ), lwork-mn,info ) ! workspace at least nrhs, optimally nrhs*nb ! b(1:m,1:nrhs) := inv(l**h) * b(1:m,1:nrhs) call stdlib${ii}$_ztrtrs( 'LOWER', 'CONJUGATE TRANSPOSE', 'NON-UNIT',m, nrhs, a, lda, & b, ldb, info ) if( info>0_${ik}$ ) then return end if scllen = m end if end if ! undo scaling if( iascl==1_${ik}$ ) then call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, scllen, nrhs, b, ldb,info ) else if( iascl==2_${ik}$ ) then call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, scllen, nrhs, b, ldb,info ) end if if( ibscl==1_${ik}$ ) then call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, smlnum, bnrm, scllen, nrhs, b, ldb,info ) else if( ibscl==2_${ik}$ ) then call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, bignum, bnrm, scllen, nrhs, b, ldb,info ) end if 50 continue work( 1_${ik}$ ) = real( wsize,KIND=dp) return end subroutine stdlib${ii}$_zgels #:for ck,ct,ci in CMPLX_KINDS_TYPES #:if not ck in ["sp","dp"] module subroutine stdlib${ii}$_${ci}$gels( trans, m, n, nrhs, a, lda, b, ldb, work, lwork,info ) !! ZGELS: solves overdetermined or underdetermined complex linear systems !! involving an M-by-N matrix A, or its conjugate-transpose, using a QR !! or LQ factorization of A. It is assumed that A has full rank. !! The following options are provided: !! 1. If TRANS = 'N' and m >= n: find the least squares solution of !! an overdetermined system, i.e., solve the least squares problem !! minimize || B - A*X ||. !! 2. If TRANS = 'N' and m < n: find the minimum norm solution of !! an underdetermined system A * X = B. !! 3. If TRANS = 'C' and m >= n: find the minimum norm solution of !! an underdetermined system A**H * X = B. !! 4. If TRANS = 'C' and m < n: find the least squares solution of !! an overdetermined system, i.e., solve the least squares problem !! minimize || B - A**H * X ||. !! Several right hand side vectors b and solution vectors x can be !! handled in a single call; they are stored as the columns of the !! M-by-NRHS right hand side matrix B and the N-by-NRHS solution !! matrix X. ! -- lapack driver routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_${ck}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments character, intent(in) :: trans integer(${ik}$), intent(out) :: info integer(${ik}$), intent(in) :: lda, ldb, lwork, m, n, nrhs ! Array Arguments complex(${ck}$), intent(inout) :: a(lda,*), b(ldb,*) complex(${ck}$), intent(out) :: work(*) ! ===================================================================== ! Local Scalars logical(lk) :: lquery, tpsd integer(${ik}$) :: brow, i, iascl, ibscl, j, mn, nb, scllen, wsize real(${ck}$) :: anrm, bignum, bnrm, smlnum ! Local Arrays real(${ck}$) :: rwork(1_${ik}$) ! Intrinsic Functions ! Executable Statements ! test the input arguments. info = 0_${ik}$ mn = min( m, n ) lquery = ( lwork==-1_${ik}$ ) if( .not.( stdlib_lsame( trans, 'N' ) .or. stdlib_lsame( trans, 'C' ) ) ) then info = -1_${ik}$ else if( m<0_${ik}$ ) then info = -2_${ik}$ else if( n<0_${ik}$ ) then info = -3_${ik}$ else if( nrhs<0_${ik}$ ) then info = -4_${ik}$ else if( lda<max( 1_${ik}$, m ) ) then info = -6_${ik}$ else if( ldb<max( 1_${ik}$, m, n ) ) then info = -8_${ik}$ else if( lwork<max( 1_${ik}$, mn+max( mn, nrhs ) ) .and. .not.lquery )then info = -10_${ik}$ end if ! figure out optimal block size if( info==0_${ik}$ .or. info==-10_${ik}$ ) then tpsd = .true. if( stdlib_lsame( trans, 'N' ) )tpsd = .false. if( m>=n ) then nb = stdlib${ii}$_ilaenv( 1_${ik}$, 'ZGEQRF', ' ', m, n, -1_${ik}$, -1_${ik}$ ) if( tpsd ) then nb = max( nb, stdlib${ii}$_ilaenv( 1_${ik}$, 'ZUNMQR', 'LN', m, nrhs, n,-1_${ik}$ ) ) else nb = max( nb, stdlib${ii}$_ilaenv( 1_${ik}$, 'ZUNMQR', 'LC', m, nrhs, n,-1_${ik}$ ) ) end if else nb = stdlib${ii}$_ilaenv( 1_${ik}$, 'ZGELQF', ' ', m, n, -1_${ik}$, -1_${ik}$ ) if( tpsd ) then nb = max( nb, stdlib${ii}$_ilaenv( 1_${ik}$, 'ZUNMLQ', 'LC', n, nrhs, m,-1_${ik}$ ) ) else nb = max( nb, stdlib${ii}$_ilaenv( 1_${ik}$, 'ZUNMLQ', 'LN', n, nrhs, m,-1_${ik}$ ) ) end if end if wsize = max( 1_${ik}$, mn+max( mn, nrhs )*nb ) work( 1_${ik}$ ) = real( wsize,KIND=${ck}$) end if if( info/=0_${ik}$ ) then call stdlib${ii}$_xerbla( 'ZGELS ', -info ) return else if( lquery ) then return end if ! quick return if possible if( min( m, n, nrhs )==0_${ik}$ ) then call stdlib${ii}$_${ci}$laset( 'FULL', max( m, n ), nrhs, czero, czero, b, ldb ) return end if ! get machine parameters smlnum = stdlib${ii}$_${c2ri(ci)}$lamch( 'S' ) / stdlib${ii}$_${c2ri(ci)}$lamch( 'P' ) bignum = one / smlnum call stdlib${ii}$_${c2ri(ci)}$labad( smlnum, bignum ) ! scale a, b if max element outside range [smlnum,bignum] anrm = stdlib${ii}$_${ci}$lange( 'M', m, n, a, lda, rwork ) iascl = 0_${ik}$ if( anrm>zero .and. anrm<smlnum ) then ! scale matrix norm up to smlnum call stdlib${ii}$_${ci}$lascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, m, n, a, lda, info ) iascl = 1_${ik}$ else if( anrm>bignum ) then ! scale matrix norm down to bignum call stdlib${ii}$_${ci}$lascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, m, n, a, lda, info ) iascl = 2_${ik}$ else if( anrm==zero ) then ! matrix all zero. return zero solution. call stdlib${ii}$_${ci}$laset( 'F', max( m, n ), nrhs, czero, czero, b, ldb ) go to 50 end if brow = m if( tpsd )brow = n bnrm = stdlib${ii}$_${ci}$lange( 'M', brow, nrhs, b, ldb, rwork ) ibscl = 0_${ik}$ if( bnrm>zero .and. bnrm<smlnum ) then ! scale matrix norm up to smlnum call stdlib${ii}$_${ci}$lascl( 'G', 0_${ik}$, 0_${ik}$, bnrm, smlnum, brow, nrhs, b, ldb,info ) ibscl = 1_${ik}$ else if( bnrm>bignum ) then ! scale matrix norm down to bignum call stdlib${ii}$_${ci}$lascl( 'G', 0_${ik}$, 0_${ik}$, bnrm, bignum, brow, nrhs, b, ldb,info ) ibscl = 2_${ik}$ end if if( m>=n ) then ! compute qr factorization of a call stdlib${ii}$_${ci}$geqrf( m, n, a, lda, work( 1_${ik}$ ), work( mn+1 ), lwork-mn,info ) ! workspace at least n, optimally n*nb if( .not.tpsd ) then ! least-squares problem min || a * x - b || ! b(1:m,1:nrhs) := q**h * b(1:m,1:nrhs) call stdlib${ii}$_${ci}$unmqr( 'LEFT', 'CONJUGATE TRANSPOSE', m, nrhs, n, a,lda, work( 1_${ik}$ ), & b, ldb, work( mn+1 ), lwork-mn,info ) ! workspace at least nrhs, optimally nrhs*nb ! b(1:n,1:nrhs) := inv(r) * b(1:n,1:nrhs) call stdlib${ii}$_${ci}$trtrs( 'UPPER', 'NO TRANSPOSE', 'NON-UNIT', n, nrhs,a, lda, b, ldb, & info ) if( info>0_${ik}$ ) then return end if scllen = n else ! underdetermined system of equations a**t * x = b ! b(1:n,1:nrhs) := inv(r**h) * b(1:n,1:nrhs) call stdlib${ii}$_${ci}$trtrs( 'UPPER', 'CONJUGATE TRANSPOSE','NON-UNIT',n, nrhs, a, lda, b,& ldb, info ) if( info>0_${ik}$ ) then return end if ! b(n+1:m,1:nrhs) = zero do j = 1, nrhs do i = n + 1, m b( i, j ) = czero end do end do ! b(1:m,1:nrhs) := q(1:n,:) * b(1:n,1:nrhs) call stdlib${ii}$_${ci}$unmqr( 'LEFT', 'NO TRANSPOSE', m, nrhs, n, a, lda,work( 1_${ik}$ ), b, ldb,& work( mn+1 ), lwork-mn,info ) ! workspace at least nrhs, optimally nrhs*nb scllen = m end if else ! compute lq factorization of a call stdlib${ii}$_${ci}$gelqf( m, n, a, lda, work( 1_${ik}$ ), work( mn+1 ), lwork-mn,info ) ! workspace at least m, optimally m*nb. if( .not.tpsd ) then ! underdetermined system of equations a * x = b ! b(1:m,1:nrhs) := inv(l) * b(1:m,1:nrhs) call stdlib${ii}$_${ci}$trtrs( 'LOWER', 'NO TRANSPOSE', 'NON-UNIT', m, nrhs,a, lda, b, ldb, & info ) if( info>0_${ik}$ ) then return end if ! b(m+1:n,1:nrhs) = 0 do j = 1, nrhs do i = m + 1, n b( i, j ) = czero end do end do ! b(1:n,1:nrhs) := q(1:n,:)**h * b(1:m,1:nrhs) call stdlib${ii}$_${ci}$unmlq( 'LEFT', 'CONJUGATE TRANSPOSE', n, nrhs, m, a,lda, work( 1_${ik}$ ), & b, ldb, work( mn+1 ), lwork-mn,info ) ! workspace at least nrhs, optimally nrhs*nb scllen = n else ! overdetermined system min || a**h * x - b || ! b(1:n,1:nrhs) := q * b(1:n,1:nrhs) call stdlib${ii}$_${ci}$unmlq( 'LEFT', 'NO TRANSPOSE', n, nrhs, m, a, lda,work( 1_${ik}$ ), b, ldb,& work( mn+1 ), lwork-mn,info ) ! workspace at least nrhs, optimally nrhs*nb ! b(1:m,1:nrhs) := inv(l**h) * b(1:m,1:nrhs) call stdlib${ii}$_${ci}$trtrs( 'LOWER', 'CONJUGATE TRANSPOSE', 'NON-UNIT',m, nrhs, a, lda, & b, ldb, info ) if( info>0_${ik}$ ) then return end if scllen = m end if end if ! undo scaling if( iascl==1_${ik}$ ) then call stdlib${ii}$_${ci}$lascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, scllen, nrhs, b, ldb,info ) else if( iascl==2_${ik}$ ) then call stdlib${ii}$_${ci}$lascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, scllen, nrhs, b, ldb,info ) end if if( ibscl==1_${ik}$ ) then call stdlib${ii}$_${ci}$lascl( 'G', 0_${ik}$, 0_${ik}$, smlnum, bnrm, scllen, nrhs, b, ldb,info ) else if( ibscl==2_${ik}$ ) then call stdlib${ii}$_${ci}$lascl( 'G', 0_${ik}$, 0_${ik}$, bignum, bnrm, scllen, nrhs, b, ldb,info ) end if 50 continue work( 1_${ik}$ ) = real( wsize,KIND=${ck}$) return end subroutine stdlib${ii}$_${ci}$gels #:endif #:endfor module subroutine stdlib${ii}$_sgelsd( m, n, nrhs, a, lda, b, ldb, s, rcond,rank, work, lwork, iwork, & !! SGELSD computes the minimum-norm solution to a real linear least !! squares problem: !! minimize 2-norm(| b - A*x |) !! using the singular value decomposition (SVD) of A. A is an M-by-N !! matrix which may be rank-deficient. !! Several right hand side vectors b and solution vectors x can be !! handled in a single call; they are stored as the columns of the !! M-by-NRHS right hand side matrix B and the N-by-NRHS solution !! matrix X. !! The problem is solved in three steps: !! (1) Reduce the coefficient matrix A to bidiagonal form with !! Householder transformations, reducing the original problem !! into a "bidiagonal least squares problem" (BLS) !! (2) Solve the BLS using a divide and conquer approach. !! (3) Apply back all the Householder transformations to solve !! the original least squares problem. !! The effective rank of A is determined by treating as zero those !! singular values which are less than RCOND times the largest singular !! value. !! The divide and conquer algorithm makes very mild assumptions about !! floating point arithmetic. It will work on machines with a guard !! digit in add/subtract, or on those binary machines without guard !! digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or !! Cray-2. It could conceivably fail on hexadecimal or decimal machines !! without guard digits, but we know of none. info ) ! -- lapack driver routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments integer(${ik}$), intent(out) :: info, rank integer(${ik}$), intent(in) :: lda, ldb, lwork, m, n, nrhs real(sp), intent(in) :: rcond ! Array Arguments integer(${ik}$), intent(out) :: iwork(*) real(sp), intent(inout) :: a(lda,*), b(ldb,*) real(sp), intent(out) :: s(*), work(*) ! ===================================================================== ! Local Scalars logical(lk) :: lquery integer(${ik}$) :: iascl, ibscl, ie, il, itau, itaup, itauq, ldwork, liwork, maxmn, & maxwrk, minmn, minwrk, mm, mnthr, nlvl, nwork, smlsiz, wlalsd real(sp) :: anrm, bignum, bnrm, eps, sfmin, smlnum ! Intrinsic Functions ! Executable Statements ! test the input arguments. info = 0_${ik}$ minmn = min( m, n ) maxmn = max( m, n ) lquery = ( lwork==-1_${ik}$ ) if( m<0_${ik}$ ) then info = -1_${ik}$ else if( n<0_${ik}$ ) then info = -2_${ik}$ else if( nrhs<0_${ik}$ ) then info = -3_${ik}$ else if( lda<max( 1_${ik}$, m ) ) then info = -5_${ik}$ else if( ldb<max( 1_${ik}$, maxmn ) ) then info = -7_${ik}$ end if ! compute workspace. ! (note: comments in the code beginning "workspace:" describe the ! minimal amount of workspace needed at that point in the code, ! as well as the preferred amount for good performance. ! nb refers to the optimal block size for the immediately ! following subroutine, as returned by stdlib${ii}$_ilaenv.) if( info==0_${ik}$ ) then minwrk = 1_${ik}$ maxwrk = 1_${ik}$ liwork = 1_${ik}$ if( minmn>0_${ik}$ ) then smlsiz = stdlib${ii}$_ilaenv( 9_${ik}$, 'SGELSD', ' ', 0_${ik}$, 0_${ik}$, 0_${ik}$, 0_${ik}$ ) mnthr = stdlib${ii}$_ilaenv( 6_${ik}$, 'SGELSD', ' ', m, n, nrhs, -1_${ik}$ ) nlvl = max( int( log( real( minmn,KIND=sp) / real( smlsiz + 1_${ik}$,KIND=sp) ) /log( & two ),KIND=${ik}$) + 1_${ik}$, 0_${ik}$ ) liwork = 3_${ik}$*minmn*nlvl + 11_${ik}$*minmn mm = m if( m>=n .and. m>=mnthr ) then ! path 1a - overdetermined, with many more rows than ! columns. mm = n maxwrk = max( maxwrk, n + n*stdlib${ii}$_ilaenv( 1_${ik}$, 'SGEQRF', ' ', m,n, -1_${ik}$, -1_${ik}$ ) ) maxwrk = max( maxwrk, n + nrhs*stdlib${ii}$_ilaenv( 1_${ik}$, 'SORMQR', 'LT',m, nrhs, n, -& 1_${ik}$ ) ) end if if( m>=n ) then ! path 1 - overdetermined or exactly determined. maxwrk = max( maxwrk, 3_${ik}$*n + ( mm + n )*stdlib${ii}$_ilaenv( 1_${ik}$,'SGEBRD', ' ', mm, n, & -1_${ik}$, -1_${ik}$ ) ) maxwrk = max( maxwrk, 3_${ik}$*n + nrhs*stdlib${ii}$_ilaenv( 1_${ik}$, 'SORMBR','QLT', mm, nrhs, & n, -1_${ik}$ ) ) maxwrk = max( maxwrk, 3_${ik}$*n + ( n - 1_${ik}$ )*stdlib${ii}$_ilaenv( 1_${ik}$,'SORMBR', 'PLN', n, & nrhs, n, -1_${ik}$ ) ) wlalsd = 9_${ik}$*n + 2_${ik}$*n*smlsiz + 8_${ik}$*n*nlvl + n*nrhs +( smlsiz + 1_${ik}$ )**2_${ik}$ maxwrk = max( maxwrk, 3_${ik}$*n + wlalsd ) minwrk = max( 3_${ik}$*n + mm, 3_${ik}$*n + nrhs, 3_${ik}$*n + wlalsd ) end if if( n>m ) then wlalsd = 9_${ik}$*m + 2_${ik}$*m*smlsiz + 8_${ik}$*m*nlvl + m*nrhs +( smlsiz + 1_${ik}$ )**2_${ik}$ if( n>=mnthr ) then ! path 2a - underdetermined, with many more columns ! than rows. maxwrk = m + m*stdlib${ii}$_ilaenv( 1_${ik}$, 'SGELQF', ' ', m, n, -1_${ik}$,-1_${ik}$ ) maxwrk = max( maxwrk, m*m + 4_${ik}$*m + 2_${ik}$*m*stdlib${ii}$_ilaenv( 1_${ik}$,'SGEBRD', ' ', m, m,& -1_${ik}$, -1_${ik}$ ) ) maxwrk = max( maxwrk, m*m + 4_${ik}$*m + nrhs*stdlib${ii}$_ilaenv( 1_${ik}$,'SORMBR', 'QLT', m,& nrhs, m, -1_${ik}$ ) ) maxwrk = max( maxwrk, m*m + 4_${ik}$*m + ( m - 1_${ik}$ )*stdlib${ii}$_ilaenv( 1_${ik}$,'SORMBR', & 'PLN', m, nrhs, m, -1_${ik}$ ) ) if( nrhs>1_${ik}$ ) then maxwrk = max( maxwrk, m*m + m + m*nrhs ) else maxwrk = max( maxwrk, m*m + 2_${ik}$*m ) end if maxwrk = max( maxwrk, m + nrhs*stdlib${ii}$_ilaenv( 1_${ik}$, 'SORMLQ','LT', n, nrhs, m,& -1_${ik}$ ) ) maxwrk = max( maxwrk, m*m + 4_${ik}$*m + wlalsd ) ! xxx: ensure the path 2a case below is triggered. the workspace ! calculation should use queries for all routines eventually. maxwrk = max( maxwrk,4_${ik}$*m+m*m+max( m, 2_${ik}$*m-4, nrhs, n-3*m ) ) else ! path 2 - remaining underdetermined cases. maxwrk = 3_${ik}$*m + ( n + m )*stdlib${ii}$_ilaenv( 1_${ik}$, 'SGEBRD', ' ', m,n, -1_${ik}$, -1_${ik}$ ) maxwrk = max( maxwrk, 3_${ik}$*m + nrhs*stdlib${ii}$_ilaenv( 1_${ik}$, 'SORMBR','QLT', m, nrhs,& n, -1_${ik}$ ) ) maxwrk = max( maxwrk, 3_${ik}$*m + m*stdlib${ii}$_ilaenv( 1_${ik}$, 'SORMBR','PLN', n, nrhs, m,& -1_${ik}$ ) ) maxwrk = max( maxwrk, 3_${ik}$*m + wlalsd ) end if minwrk = max( 3_${ik}$*m + nrhs, 3_${ik}$*m + m, 3_${ik}$*m + wlalsd ) end if end if minwrk = min( minwrk, maxwrk ) work( 1_${ik}$ ) = maxwrk iwork( 1_${ik}$ ) = liwork if( lwork<minwrk .and. .not.lquery ) then info = -12_${ik}$ end if end if if( info/=0_${ik}$ ) then call stdlib${ii}$_xerbla( 'SGELSD', -info ) return else if( lquery ) then return end if ! quick return if possible. if( m==0_${ik}$ .or. n==0_${ik}$ ) then rank = 0_${ik}$ return end if ! get machine parameters. eps = stdlib${ii}$_slamch( 'P' ) sfmin = stdlib${ii}$_slamch( 'S' ) smlnum = sfmin / eps bignum = one / smlnum call stdlib${ii}$_slabad( smlnum, bignum ) ! scale a if max entry outside range [smlnum,bignum]. anrm = stdlib${ii}$_slange( 'M', m, n, a, lda, work ) iascl = 0_${ik}$ if( anrm>zero .and. anrm<smlnum ) then ! scale matrix norm up to smlnum. call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, m, n, a, lda, info ) iascl = 1_${ik}$ else if( anrm>bignum ) then ! scale matrix norm down to bignum. call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, m, n, a, lda, info ) iascl = 2_${ik}$ else if( anrm==zero ) then ! matrix all zero. return zero solution. call stdlib${ii}$_slaset( 'F', max( m, n ), nrhs, zero, zero, b, ldb ) call stdlib${ii}$_slaset( 'F', minmn, 1_${ik}$, zero, zero, s, 1_${ik}$ ) rank = 0_${ik}$ go to 10 end if ! scale b if max entry outside range [smlnum,bignum]. bnrm = stdlib${ii}$_slange( 'M', m, nrhs, b, ldb, work ) ibscl = 0_${ik}$ if( bnrm>zero .and. bnrm<smlnum ) then ! scale matrix norm up to smlnum. call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, bnrm, smlnum, m, nrhs, b, ldb, info ) ibscl = 1_${ik}$ else if( bnrm>bignum ) then ! scale matrix norm down to bignum. call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, bnrm, bignum, m, nrhs, b, ldb, info ) ibscl = 2_${ik}$ end if ! if m < n make sure certain entries of b are zero. if( m<n )call stdlib${ii}$_slaset( 'F', n-m, nrhs, zero, zero, b( m+1, 1_${ik}$ ), ldb ) ! overdetermined case. if( m>=n ) then ! path 1 - overdetermined or exactly determined. mm = m if( m>=mnthr ) then ! path 1a - overdetermined, with many more rows than columns. mm = n itau = 1_${ik}$ nwork = itau + n ! compute a=q*r. ! (workspace: need 2*n, prefer n+n*nb) call stdlib${ii}$_sgeqrf( m, n, a, lda, work( itau ), work( nwork ),lwork-nwork+1, & info ) ! multiply b by transpose(q). ! (workspace: need n+nrhs, prefer n+nrhs*nb) call stdlib${ii}$_sormqr( 'L', 'T', m, nrhs, n, a, lda, work( itau ), b,ldb, work( & nwork ), lwork-nwork+1, info ) ! zero out below r. if( n>1_${ik}$ ) then call stdlib${ii}$_slaset( 'L', n-1, n-1, zero, zero, a( 2_${ik}$, 1_${ik}$ ), lda ) end if end if ie = 1_${ik}$ itauq = ie + n itaup = itauq + n nwork = itaup + n ! bidiagonalize r in a. ! (workspace: need 3*n+mm, prefer 3*n+(mm+n)*nb) call stdlib${ii}$_sgebrd( mm, n, a, lda, s, work( ie ), work( itauq ),work( itaup ), work(& nwork ), lwork-nwork+1,info ) ! multiply b by transpose of left bidiagonalizing vectors of r. ! (workspace: need 3*n+nrhs, prefer 3*n+nrhs*nb) call stdlib${ii}$_sormbr( 'Q', 'L', 'T', mm, nrhs, n, a, lda, work( itauq ),b, ldb, work( & nwork ), lwork-nwork+1, info ) ! solve the bidiagonal least squares problem. call stdlib${ii}$_slalsd( 'U', smlsiz, n, nrhs, s, work( ie ), b, ldb,rcond, rank, work( & nwork ), iwork, info ) if( info/=0_${ik}$ ) then go to 10 end if ! multiply b by right bidiagonalizing vectors of r. call stdlib${ii}$_sormbr( 'P', 'L', 'N', n, nrhs, n, a, lda, work( itaup ),b, ldb, work( & nwork ), lwork-nwork+1, info ) else if( n>=mnthr .and. lwork>=4_${ik}$*m+m*m+max( m, 2_${ik}$*m-4, nrhs, n-3*m, wlalsd ) ) & then ! path 2a - underdetermined, with many more columns than rows ! and sufficient workspace for an efficient algorithm. ldwork = m if( lwork>=max( 4_${ik}$*m+m*lda+max( m, 2_${ik}$*m-4, nrhs, n-3*m ),m*lda+m+m*nrhs, 4_${ik}$*m+m*lda+& wlalsd ) )ldwork = lda itau = 1_${ik}$ nwork = m + 1_${ik}$ ! compute a=l*q. ! (workspace: need 2*m, prefer m+m*nb) call stdlib${ii}$_sgelqf( m, n, a, lda, work( itau ), work( nwork ),lwork-nwork+1, info ) il = nwork ! copy l to work(il), zeroing out above its diagonal. call stdlib${ii}$_slacpy( 'L', m, m, a, lda, work( il ), ldwork ) call stdlib${ii}$_slaset( 'U', m-1, m-1, zero, zero, work( il+ldwork ),ldwork ) ie = il + ldwork*m itauq = ie + m itaup = itauq + m nwork = itaup + m ! bidiagonalize l in work(il). ! (workspace: need m*m+5*m, prefer m*m+4*m+2*m*nb) call stdlib${ii}$_sgebrd( m, m, work( il ), ldwork, s, work( ie ),work( itauq ), work( & itaup ), work( nwork ),lwork-nwork+1, info ) ! multiply b by transpose of left bidiagonalizing vectors of l. ! (workspace: need m*m+4*m+nrhs, prefer m*m+4*m+nrhs*nb) call stdlib${ii}$_sormbr( 'Q', 'L', 'T', m, nrhs, m, work( il ), ldwork,work( itauq ), b, & ldb, work( nwork ),lwork-nwork+1, info ) ! solve the bidiagonal least squares problem. call stdlib${ii}$_slalsd( 'U', smlsiz, m, nrhs, s, work( ie ), b, ldb,rcond, rank, work( & nwork ), iwork, info ) if( info/=0_${ik}$ ) then go to 10 end if ! multiply b by right bidiagonalizing vectors of l. call stdlib${ii}$_sormbr( 'P', 'L', 'N', m, nrhs, m, work( il ), ldwork,work( itaup ), b, & ldb, work( nwork ),lwork-nwork+1, info ) ! zero out below first m rows of b. call stdlib${ii}$_slaset( 'F', n-m, nrhs, zero, zero, b( m+1, 1_${ik}$ ), ldb ) nwork = itau + m ! multiply transpose(q) by b. ! (workspace: need m+nrhs, prefer m+nrhs*nb) call stdlib${ii}$_sormlq( 'L', 'T', n, nrhs, m, a, lda, work( itau ), b,ldb, work( nwork )& , lwork-nwork+1, info ) else ! path 2 - remaining underdetermined cases. ie = 1_${ik}$ itauq = ie + m itaup = itauq + m nwork = itaup + m ! bidiagonalize a. ! (workspace: need 3*m+n, prefer 3*m+(m+n)*nb) call stdlib${ii}$_sgebrd( m, n, a, lda, s, work( ie ), work( itauq ),work( itaup ), work( & nwork ), lwork-nwork+1,info ) ! multiply b by transpose of left bidiagonalizing vectors. ! (workspace: need 3*m+nrhs, prefer 3*m+nrhs*nb) call stdlib${ii}$_sormbr( 'Q', 'L', 'T', m, nrhs, n, a, lda, work( itauq ),b, ldb, work( & nwork ), lwork-nwork+1, info ) ! solve the bidiagonal least squares problem. call stdlib${ii}$_slalsd( 'L', smlsiz, m, nrhs, s, work( ie ), b, ldb,rcond, rank, work( & nwork ), iwork, info ) if( info/=0_${ik}$ ) then go to 10 end if ! multiply b by right bidiagonalizing vectors of a. call stdlib${ii}$_sormbr( 'P', 'L', 'N', n, nrhs, m, a, lda, work( itaup ),b, ldb, work( & nwork ), lwork-nwork+1, info ) end if ! undo scaling. if( iascl==1_${ik}$ ) then call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, n, nrhs, b, ldb, info ) call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, smlnum, anrm, minmn, 1_${ik}$, s, minmn,info ) else if( iascl==2_${ik}$ ) then call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, n, nrhs, b, ldb, info ) call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, bignum, anrm, minmn, 1_${ik}$, s, minmn,info ) end if if( ibscl==1_${ik}$ ) then call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, smlnum, bnrm, n, nrhs, b, ldb, info ) else if( ibscl==2_${ik}$ ) then call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, bignum, bnrm, n, nrhs, b, ldb, info ) end if 10 continue work( 1_${ik}$ ) = maxwrk iwork( 1_${ik}$ ) = liwork return end subroutine stdlib${ii}$_sgelsd module subroutine stdlib${ii}$_dgelsd( m, n, nrhs, a, lda, b, ldb, s, rcond, rank,work, lwork, iwork, & !! DGELSD computes the minimum-norm solution to a real linear least !! squares problem: !! minimize 2-norm(| b - A*x |) !! using the singular value decomposition (SVD) of A. A is an M-by-N !! matrix which may be rank-deficient. !! Several right hand side vectors b and solution vectors x can be !! handled in a single call; they are stored as the columns of the !! M-by-NRHS right hand side matrix B and the N-by-NRHS solution !! matrix X. !! The problem is solved in three steps: !! (1) Reduce the coefficient matrix A to bidiagonal form with !! Householder transformations, reducing the original problem !! into a "bidiagonal least squares problem" (BLS) !! (2) Solve the BLS using a divide and conquer approach. !! (3) Apply back all the Householder transformations to solve !! the original least squares problem. !! The effective rank of A is determined by treating as zero those !! singular values which are less than RCOND times the largest singular !! value. !! The divide and conquer algorithm makes very mild assumptions about !! floating point arithmetic. It will work on machines with a guard !! digit in add/subtract, or on those binary machines without guard !! digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or !! Cray-2. It could conceivably fail on hexadecimal or decimal machines !! without guard digits, but we know of none. info ) ! -- lapack driver routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments integer(${ik}$), intent(out) :: info, rank integer(${ik}$), intent(in) :: lda, ldb, lwork, m, n, nrhs real(dp), intent(in) :: rcond ! Array Arguments integer(${ik}$), intent(out) :: iwork(*) real(dp), intent(inout) :: a(lda,*), b(ldb,*) real(dp), intent(out) :: s(*), work(*) ! ===================================================================== ! Local Scalars logical(lk) :: lquery integer(${ik}$) :: iascl, ibscl, ie, il, itau, itaup, itauq, ldwork, liwork, maxmn, & maxwrk, minmn, minwrk, mm, mnthr, nlvl, nwork, smlsiz, wlalsd real(dp) :: anrm, bignum, bnrm, eps, sfmin, smlnum ! Intrinsic Functions ! Executable Statements ! test the input arguments. info = 0_${ik}$ minmn = min( m, n ) maxmn = max( m, n ) mnthr = stdlib${ii}$_ilaenv( 6_${ik}$, 'DGELSD', ' ', m, n, nrhs, -1_${ik}$ ) lquery = ( lwork==-1_${ik}$ ) if( m<0_${ik}$ ) then info = -1_${ik}$ else if( n<0_${ik}$ ) then info = -2_${ik}$ else if( nrhs<0_${ik}$ ) then info = -3_${ik}$ else if( lda<max( 1_${ik}$, m ) ) then info = -5_${ik}$ else if( ldb<max( 1_${ik}$, maxmn ) ) then info = -7_${ik}$ end if smlsiz = stdlib${ii}$_ilaenv( 9_${ik}$, 'DGELSD', ' ', 0_${ik}$, 0_${ik}$, 0_${ik}$, 0_${ik}$ ) ! compute workspace. ! (note: comments in the code beginning "workspace:" describe the ! minimal amount of workspace needed at that point in the code, ! as well as the preferred amount for good performance. ! nb refers to the optimal block size for the immediately ! following subroutine, as returned by stdlib${ii}$_ilaenv.) minwrk = 1_${ik}$ liwork = 1_${ik}$ minmn = max( 1_${ik}$, minmn ) nlvl = max( int( log( real( minmn,KIND=dp) / real( smlsiz+1,KIND=dp) ) /log( two ),& KIND=${ik}$) + 1_${ik}$, 0_${ik}$ ) if( info==0_${ik}$ ) then maxwrk = 0_${ik}$ liwork = 3_${ik}$*minmn*nlvl + 11_${ik}$*minmn mm = m if( m>=n .and. m>=mnthr ) then ! path 1a - overdetermined, with many more rows than columns. mm = n maxwrk = max( maxwrk, n+n*stdlib${ii}$_ilaenv( 1_${ik}$, 'DGEQRF', ' ', m, n,-1_${ik}$, -1_${ik}$ ) ) maxwrk = max( maxwrk, n+nrhs*stdlib${ii}$_ilaenv( 1_${ik}$, 'DORMQR', 'LT', m, nrhs, n, -1_${ik}$ ) ) end if if( m>=n ) then ! path 1 - overdetermined or exactly determined. maxwrk = max( maxwrk, 3_${ik}$*n+( mm+n )*stdlib${ii}$_ilaenv( 1_${ik}$, 'DGEBRD', ' ', mm, n, -1_${ik}$, -& 1_${ik}$ ) ) maxwrk = max( maxwrk, 3_${ik}$*n+nrhs*stdlib${ii}$_ilaenv( 1_${ik}$, 'DORMBR', 'QLT', mm, nrhs, n, -& 1_${ik}$ ) ) maxwrk = max( maxwrk, 3_${ik}$*n+( n-1 )*stdlib${ii}$_ilaenv( 1_${ik}$, 'DORMBR', 'PLN', n, nrhs, n, & -1_${ik}$ ) ) wlalsd = 9_${ik}$*n+2*n*smlsiz+8*n*nlvl+n*nrhs+(smlsiz+1)**2_${ik}$ maxwrk = max( maxwrk, 3_${ik}$*n+wlalsd ) minwrk = max( 3_${ik}$*n+mm, 3_${ik}$*n+nrhs, 3_${ik}$*n+wlalsd ) end if if( n>m ) then wlalsd = 9_${ik}$*m+2*m*smlsiz+8*m*nlvl+m*nrhs+(smlsiz+1)**2_${ik}$ if( n>=mnthr ) then ! path 2a - underdetermined, with many more columns ! than rows. maxwrk = m + m*stdlib${ii}$_ilaenv( 1_${ik}$, 'DGELQF', ' ', m, n, -1_${ik}$, -1_${ik}$ ) maxwrk = max( maxwrk, m*m+4*m+2*m*stdlib${ii}$_ilaenv( 1_${ik}$, 'DGEBRD', ' ', m, m, -1_${ik}$, -& 1_${ik}$ ) ) maxwrk = max( maxwrk, m*m+4*m+nrhs*stdlib${ii}$_ilaenv( 1_${ik}$, 'DORMBR', 'QLT', m, nrhs,& m, -1_${ik}$ ) ) maxwrk = max( maxwrk, m*m+4*m+( m-1 )*stdlib${ii}$_ilaenv( 1_${ik}$, 'DORMBR', 'PLN', m, & nrhs, m, -1_${ik}$ ) ) if( nrhs>1_${ik}$ ) then maxwrk = max( maxwrk, m*m+m+m*nrhs ) else maxwrk = max( maxwrk, m*m+2*m ) end if maxwrk = max( maxwrk, m+nrhs*stdlib${ii}$_ilaenv( 1_${ik}$, 'DORMLQ', 'LT', n, nrhs, m, -1_${ik}$ & ) ) maxwrk = max( maxwrk, m*m+4*m+wlalsd ) ! xxx: ensure the path 2a case below is triggered. the workspace ! calculation should use queries for all routines eventually. maxwrk = max( maxwrk,4_${ik}$*m+m*m+max( m, 2_${ik}$*m-4, nrhs, n-3*m ) ) else ! path 2 - remaining underdetermined cases. maxwrk = 3_${ik}$*m + ( n+m )*stdlib${ii}$_ilaenv( 1_${ik}$, 'DGEBRD', ' ', m, n,-1_${ik}$, -1_${ik}$ ) maxwrk = max( maxwrk, 3_${ik}$*m+nrhs*stdlib${ii}$_ilaenv( 1_${ik}$, 'DORMBR', 'QLT', m, nrhs, n, & -1_${ik}$ ) ) maxwrk = max( maxwrk, 3_${ik}$*m+m*stdlib${ii}$_ilaenv( 1_${ik}$, 'DORMBR', 'PLN', n, nrhs, m, -1_${ik}$ & ) ) maxwrk = max( maxwrk, 3_${ik}$*m+wlalsd ) end if minwrk = max( 3_${ik}$*m+nrhs, 3_${ik}$*m+m, 3_${ik}$*m+wlalsd ) end if minwrk = min( minwrk, maxwrk ) work( 1_${ik}$ ) = maxwrk iwork( 1_${ik}$ ) = liwork if( lwork<minwrk .and. .not.lquery ) then info = -12_${ik}$ end if end if if( info/=0_${ik}$ ) then call stdlib${ii}$_xerbla( 'DGELSD', -info ) return else if( lquery ) then go to 10 end if ! quick return if possible. if( m==0_${ik}$ .or. n==0_${ik}$ ) then rank = 0_${ik}$ return end if ! get machine parameters. eps = stdlib${ii}$_dlamch( 'P' ) sfmin = stdlib${ii}$_dlamch( 'S' ) smlnum = sfmin / eps bignum = one / smlnum call stdlib${ii}$_dlabad( smlnum, bignum ) ! scale a if max entry outside range [smlnum,bignum]. anrm = stdlib${ii}$_dlange( 'M', m, n, a, lda, work ) iascl = 0_${ik}$ if( anrm>zero .and. anrm<smlnum ) then ! scale matrix norm up to smlnum. call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, m, n, a, lda, info ) iascl = 1_${ik}$ else if( anrm>bignum ) then ! scale matrix norm down to bignum. call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, m, n, a, lda, info ) iascl = 2_${ik}$ else if( anrm==zero ) then ! matrix all zero. return zero solution. call stdlib${ii}$_dlaset( 'F', max( m, n ), nrhs, zero, zero, b, ldb ) call stdlib${ii}$_dlaset( 'F', minmn, 1_${ik}$, zero, zero, s, 1_${ik}$ ) rank = 0_${ik}$ go to 10 end if ! scale b if max entry outside range [smlnum,bignum]. bnrm = stdlib${ii}$_dlange( 'M', m, nrhs, b, ldb, work ) ibscl = 0_${ik}$ if( bnrm>zero .and. bnrm<smlnum ) then ! scale matrix norm up to smlnum. call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, bnrm, smlnum, m, nrhs, b, ldb, info ) ibscl = 1_${ik}$ else if( bnrm>bignum ) then ! scale matrix norm down to bignum. call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, bnrm, bignum, m, nrhs, b, ldb, info ) ibscl = 2_${ik}$ end if ! if m < n make sure certain entries of b are zero. if( m<n )call stdlib${ii}$_dlaset( 'F', n-m, nrhs, zero, zero, b( m+1, 1_${ik}$ ), ldb ) ! overdetermined case. if( m>=n ) then ! path 1 - overdetermined or exactly determined. mm = m if( m>=mnthr ) then ! path 1a - overdetermined, with many more rows than columns. mm = n itau = 1_${ik}$ nwork = itau + n ! compute a=q*r. ! (workspace: need 2*n, prefer n+n*nb) call stdlib${ii}$_dgeqrf( m, n, a, lda, work( itau ), work( nwork ),lwork-nwork+1, & info ) ! multiply b by transpose(q). ! (workspace: need n+nrhs, prefer n+nrhs*nb) call stdlib${ii}$_dormqr( 'L', 'T', m, nrhs, n, a, lda, work( itau ), b,ldb, work( & nwork ), lwork-nwork+1, info ) ! zero out below r. if( n>1_${ik}$ ) then call stdlib${ii}$_dlaset( 'L', n-1, n-1, zero, zero, a( 2_${ik}$, 1_${ik}$ ), lda ) end if end if ie = 1_${ik}$ itauq = ie + n itaup = itauq + n nwork = itaup + n ! bidiagonalize r in a. ! (workspace: need 3*n+mm, prefer 3*n+(mm+n)*nb) call stdlib${ii}$_dgebrd( mm, n, a, lda, s, work( ie ), work( itauq ),work( itaup ), work(& nwork ), lwork-nwork+1,info ) ! multiply b by transpose of left bidiagonalizing vectors of r. ! (workspace: need 3*n+nrhs, prefer 3*n+nrhs*nb) call stdlib${ii}$_dormbr( 'Q', 'L', 'T', mm, nrhs, n, a, lda, work( itauq ),b, ldb, work( & nwork ), lwork-nwork+1, info ) ! solve the bidiagonal least squares problem. call stdlib${ii}$_dlalsd( 'U', smlsiz, n, nrhs, s, work( ie ), b, ldb,rcond, rank, work( & nwork ), iwork, info ) if( info/=0_${ik}$ ) then go to 10 end if ! multiply b by right bidiagonalizing vectors of r. call stdlib${ii}$_dormbr( 'P', 'L', 'N', n, nrhs, n, a, lda, work( itaup ),b, ldb, work( & nwork ), lwork-nwork+1, info ) else if( n>=mnthr .and. lwork>=4_${ik}$*m+m*m+max( m, 2_${ik}$*m-4, nrhs, n-3*m, wlalsd ) ) & then ! path 2a - underdetermined, with many more columns than rows ! and sufficient workspace for an efficient algorithm. ldwork = m if( lwork>=max( 4_${ik}$*m+m*lda+max( m, 2_${ik}$*m-4, nrhs, n-3*m ),m*lda+m+m*nrhs, 4_${ik}$*m+m*lda+& wlalsd ) )ldwork = lda itau = 1_${ik}$ nwork = m + 1_${ik}$ ! compute a=l*q. ! (workspace: need 2*m, prefer m+m*nb) call stdlib${ii}$_dgelqf( m, n, a, lda, work( itau ), work( nwork ),lwork-nwork+1, info ) il = nwork ! copy l to work(il), zeroing out above its diagonal. call stdlib${ii}$_dlacpy( 'L', m, m, a, lda, work( il ), ldwork ) call stdlib${ii}$_dlaset( 'U', m-1, m-1, zero, zero, work( il+ldwork ),ldwork ) ie = il + ldwork*m itauq = ie + m itaup = itauq + m nwork = itaup + m ! bidiagonalize l in work(il). ! (workspace: need m*m+5*m, prefer m*m+4*m+2*m*nb) call stdlib${ii}$_dgebrd( m, m, work( il ), ldwork, s, work( ie ),work( itauq ), work( & itaup ), work( nwork ),lwork-nwork+1, info ) ! multiply b by transpose of left bidiagonalizing vectors of l. ! (workspace: need m*m+4*m+nrhs, prefer m*m+4*m+nrhs*nb) call stdlib${ii}$_dormbr( 'Q', 'L', 'T', m, nrhs, m, work( il ), ldwork,work( itauq ), b, & ldb, work( nwork ),lwork-nwork+1, info ) ! solve the bidiagonal least squares problem. call stdlib${ii}$_dlalsd( 'U', smlsiz, m, nrhs, s, work( ie ), b, ldb,rcond, rank, work( & nwork ), iwork, info ) if( info/=0_${ik}$ ) then go to 10 end if ! multiply b by right bidiagonalizing vectors of l. call stdlib${ii}$_dormbr( 'P', 'L', 'N', m, nrhs, m, work( il ), ldwork,work( itaup ), b, & ldb, work( nwork ),lwork-nwork+1, info ) ! zero out below first m rows of b. call stdlib${ii}$_dlaset( 'F', n-m, nrhs, zero, zero, b( m+1, 1_${ik}$ ), ldb ) nwork = itau + m ! multiply transpose(q) by b. ! (workspace: need m+nrhs, prefer m+nrhs*nb) call stdlib${ii}$_dormlq( 'L', 'T', n, nrhs, m, a, lda, work( itau ), b,ldb, work( nwork )& , lwork-nwork+1, info ) else ! path 2 - remaining underdetermined cases. ie = 1_${ik}$ itauq = ie + m itaup = itauq + m nwork = itaup + m ! bidiagonalize a. ! (workspace: need 3*m+n, prefer 3*m+(m+n)*nb) call stdlib${ii}$_dgebrd( m, n, a, lda, s, work( ie ), work( itauq ),work( itaup ), work( & nwork ), lwork-nwork+1,info ) ! multiply b by transpose of left bidiagonalizing vectors. ! (workspace: need 3*m+nrhs, prefer 3*m+nrhs*nb) call stdlib${ii}$_dormbr( 'Q', 'L', 'T', m, nrhs, n, a, lda, work( itauq ),b, ldb, work( & nwork ), lwork-nwork+1, info ) ! solve the bidiagonal least squares problem. call stdlib${ii}$_dlalsd( 'L', smlsiz, m, nrhs, s, work( ie ), b, ldb,rcond, rank, work( & nwork ), iwork, info ) if( info/=0_${ik}$ ) then go to 10 end if ! multiply b by right bidiagonalizing vectors of a. call stdlib${ii}$_dormbr( 'P', 'L', 'N', n, nrhs, m, a, lda, work( itaup ),b, ldb, work( & nwork ), lwork-nwork+1, info ) end if ! undo scaling. if( iascl==1_${ik}$ ) then call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, n, nrhs, b, ldb, info ) call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, smlnum, anrm, minmn, 1_${ik}$, s, minmn,info ) else if( iascl==2_${ik}$ ) then call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, n, nrhs, b, ldb, info ) call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, bignum, anrm, minmn, 1_${ik}$, s, minmn,info ) end if if( ibscl==1_${ik}$ ) then call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, smlnum, bnrm, n, nrhs, b, ldb, info ) else if( ibscl==2_${ik}$ ) then call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, bignum, bnrm, n, nrhs, b, ldb, info ) end if 10 continue work( 1_${ik}$ ) = maxwrk iwork( 1_${ik}$ ) = liwork return end subroutine stdlib${ii}$_dgelsd #:for rk,rt,ri in REAL_KINDS_TYPES #:if not rk in ["sp","dp"] module subroutine stdlib${ii}$_${ri}$gelsd( m, n, nrhs, a, lda, b, ldb, s, rcond, rank,work, lwork, iwork, & !! DGELSD: computes the minimum-norm solution to a real linear least !! squares problem: !! minimize 2-norm(| b - A*x |) !! using the singular value decomposition (SVD) of A. A is an M-by-N !! matrix which may be rank-deficient. !! Several right hand side vectors b and solution vectors x can be !! handled in a single call; they are stored as the columns of the !! M-by-NRHS right hand side matrix B and the N-by-NRHS solution !! matrix X. !! The problem is solved in three steps: !! (1) Reduce the coefficient matrix A to bidiagonal form with !! Householder transformations, reducing the original problem !! into a "bidiagonal least squares problem" (BLS) !! (2) Solve the BLS using a divide and conquer approach. !! (3) Apply back all the Householder transformations to solve !! the original least squares problem. !! The effective rank of A is determined by treating as zero those !! singular values which are less than RCOND times the largest singular !! value. !! The divide and conquer algorithm makes very mild assumptions about !! floating point arithmetic. It will work on machines with a guard !! digit in add/subtract, or on those binary machines without guard !! digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or !! Cray-2. It could conceivably fail on hexadecimal or decimal machines !! without guard digits, but we know of none. info ) ! -- lapack driver routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments integer(${ik}$), intent(out) :: info, rank integer(${ik}$), intent(in) :: lda, ldb, lwork, m, n, nrhs real(${rk}$), intent(in) :: rcond ! Array Arguments integer(${ik}$), intent(out) :: iwork(*) real(${rk}$), intent(inout) :: a(lda,*), b(ldb,*) real(${rk}$), intent(out) :: s(*), work(*) ! ===================================================================== ! Local Scalars logical(lk) :: lquery integer(${ik}$) :: iascl, ibscl, ie, il, itau, itaup, itauq, ldwork, liwork, maxmn, & maxwrk, minmn, minwrk, mm, mnthr, nlvl, nwork, smlsiz, wlalsd real(${rk}$) :: anrm, bignum, bnrm, eps, sfmin, smlnum ! Intrinsic Functions ! Executable Statements ! test the input arguments. info = 0_${ik}$ minmn = min( m, n ) maxmn = max( m, n ) mnthr = stdlib${ii}$_ilaenv( 6_${ik}$, 'DGELSD', ' ', m, n, nrhs, -1_${ik}$ ) lquery = ( lwork==-1_${ik}$ ) if( m<0_${ik}$ ) then info = -1_${ik}$ else if( n<0_${ik}$ ) then info = -2_${ik}$ else if( nrhs<0_${ik}$ ) then info = -3_${ik}$ else if( lda<max( 1_${ik}$, m ) ) then info = -5_${ik}$ else if( ldb<max( 1_${ik}$, maxmn ) ) then info = -7_${ik}$ end if smlsiz = stdlib${ii}$_ilaenv( 9_${ik}$, 'DGELSD', ' ', 0_${ik}$, 0_${ik}$, 0_${ik}$, 0_${ik}$ ) ! compute workspace. ! (note: comments in the code beginning "workspace:" describe the ! minimal amount of workspace needed at that point in the code, ! as well as the preferred amount for good performance. ! nb refers to the optimal block size for the immediately ! following subroutine, as returned by stdlib${ii}$_ilaenv.) minwrk = 1_${ik}$ liwork = 1_${ik}$ minmn = max( 1_${ik}$, minmn ) nlvl = max( int( log( real( minmn,KIND=${rk}$) / real( smlsiz+1,KIND=${rk}$) ) /log( two ),& KIND=${ik}$) + 1_${ik}$, 0_${ik}$ ) if( info==0_${ik}$ ) then maxwrk = 0_${ik}$ liwork = 3_${ik}$*minmn*nlvl + 11_${ik}$*minmn mm = m if( m>=n .and. m>=mnthr ) then ! path 1a - overdetermined, with many more rows than columns. mm = n maxwrk = max( maxwrk, n+n*stdlib${ii}$_ilaenv( 1_${ik}$, 'DGEQRF', ' ', m, n,-1_${ik}$, -1_${ik}$ ) ) maxwrk = max( maxwrk, n+nrhs*stdlib${ii}$_ilaenv( 1_${ik}$, 'DORMQR', 'LT', m, nrhs, n, -1_${ik}$ ) ) end if if( m>=n ) then ! path 1 - overdetermined or exactly determined. maxwrk = max( maxwrk, 3_${ik}$*n+( mm+n )*stdlib${ii}$_ilaenv( 1_${ik}$, 'DGEBRD', ' ', mm, n, -1_${ik}$, -& 1_${ik}$ ) ) maxwrk = max( maxwrk, 3_${ik}$*n+nrhs*stdlib${ii}$_ilaenv( 1_${ik}$, 'DORMBR', 'QLT', mm, nrhs, n, -& 1_${ik}$ ) ) maxwrk = max( maxwrk, 3_${ik}$*n+( n-1 )*stdlib${ii}$_ilaenv( 1_${ik}$, 'DORMBR', 'PLN', n, nrhs, n, & -1_${ik}$ ) ) wlalsd = 9_${ik}$*n+2*n*smlsiz+8*n*nlvl+n*nrhs+(smlsiz+1)**2_${ik}$ maxwrk = max( maxwrk, 3_${ik}$*n+wlalsd ) minwrk = max( 3_${ik}$*n+mm, 3_${ik}$*n+nrhs, 3_${ik}$*n+wlalsd ) end if if( n>m ) then wlalsd = 9_${ik}$*m+2*m*smlsiz+8*m*nlvl+m*nrhs+(smlsiz+1)**2_${ik}$ if( n>=mnthr ) then ! path 2a - underdetermined, with many more columns ! than rows. maxwrk = m + m*stdlib${ii}$_ilaenv( 1_${ik}$, 'DGELQF', ' ', m, n, -1_${ik}$, -1_${ik}$ ) maxwrk = max( maxwrk, m*m+4*m+2*m*stdlib${ii}$_ilaenv( 1_${ik}$, 'DGEBRD', ' ', m, m, -1_${ik}$, -& 1_${ik}$ ) ) maxwrk = max( maxwrk, m*m+4*m+nrhs*stdlib${ii}$_ilaenv( 1_${ik}$, 'DORMBR', 'QLT', m, nrhs,& m, -1_${ik}$ ) ) maxwrk = max( maxwrk, m*m+4*m+( m-1 )*stdlib${ii}$_ilaenv( 1_${ik}$, 'DORMBR', 'PLN', m, & nrhs, m, -1_${ik}$ ) ) if( nrhs>1_${ik}$ ) then maxwrk = max( maxwrk, m*m+m+m*nrhs ) else maxwrk = max( maxwrk, m*m+2*m ) end if maxwrk = max( maxwrk, m+nrhs*stdlib${ii}$_ilaenv( 1_${ik}$, 'DORMLQ', 'LT', n, nrhs, m, -1_${ik}$ & ) ) maxwrk = max( maxwrk, m*m+4*m+wlalsd ) ! xxx: ensure the path 2a case below is triggered. the workspace ! calculation should use queries for all routines eventually. maxwrk = max( maxwrk,4_${ik}$*m+m*m+max( m, 2_${ik}$*m-4, nrhs, n-3*m ) ) else ! path 2 - remaining underdetermined cases. maxwrk = 3_${ik}$*m + ( n+m )*stdlib${ii}$_ilaenv( 1_${ik}$, 'DGEBRD', ' ', m, n,-1_${ik}$, -1_${ik}$ ) maxwrk = max( maxwrk, 3_${ik}$*m+nrhs*stdlib${ii}$_ilaenv( 1_${ik}$, 'DORMBR', 'QLT', m, nrhs, n, & -1_${ik}$ ) ) maxwrk = max( maxwrk, 3_${ik}$*m+m*stdlib${ii}$_ilaenv( 1_${ik}$, 'DORMBR', 'PLN', n, nrhs, m, -1_${ik}$ & ) ) maxwrk = max( maxwrk, 3_${ik}$*m+wlalsd ) end if minwrk = max( 3_${ik}$*m+nrhs, 3_${ik}$*m+m, 3_${ik}$*m+wlalsd ) end if minwrk = min( minwrk, maxwrk ) work( 1_${ik}$ ) = maxwrk iwork( 1_${ik}$ ) = liwork if( lwork<minwrk .and. .not.lquery ) then info = -12_${ik}$ end if end if if( info/=0_${ik}$ ) then call stdlib${ii}$_xerbla( 'DGELSD', -info ) return else if( lquery ) then go to 10 end if ! quick return if possible. if( m==0_${ik}$ .or. n==0_${ik}$ ) then rank = 0_${ik}$ return end if ! get machine parameters. eps = stdlib${ii}$_${ri}$lamch( 'P' ) sfmin = stdlib${ii}$_${ri}$lamch( 'S' ) smlnum = sfmin / eps bignum = one / smlnum call stdlib${ii}$_${ri}$labad( smlnum, bignum ) ! scale a if max entry outside range [smlnum,bignum]. anrm = stdlib${ii}$_${ri}$lange( 'M', m, n, a, lda, work ) iascl = 0_${ik}$ if( anrm>zero .and. anrm<smlnum ) then ! scale matrix norm up to smlnum. call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, m, n, a, lda, info ) iascl = 1_${ik}$ else if( anrm>bignum ) then ! scale matrix norm down to bignum. call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, m, n, a, lda, info ) iascl = 2_${ik}$ else if( anrm==zero ) then ! matrix all zero. return zero solution. call stdlib${ii}$_${ri}$laset( 'F', max( m, n ), nrhs, zero, zero, b, ldb ) call stdlib${ii}$_${ri}$laset( 'F', minmn, 1_${ik}$, zero, zero, s, 1_${ik}$ ) rank = 0_${ik}$ go to 10 end if ! scale b if max entry outside range [smlnum,bignum]. bnrm = stdlib${ii}$_${ri}$lange( 'M', m, nrhs, b, ldb, work ) ibscl = 0_${ik}$ if( bnrm>zero .and. bnrm<smlnum ) then ! scale matrix norm up to smlnum. call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, bnrm, smlnum, m, nrhs, b, ldb, info ) ibscl = 1_${ik}$ else if( bnrm>bignum ) then ! scale matrix norm down to bignum. call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, bnrm, bignum, m, nrhs, b, ldb, info ) ibscl = 2_${ik}$ end if ! if m < n make sure certain entries of b are zero. if( m<n )call stdlib${ii}$_${ri}$laset( 'F', n-m, nrhs, zero, zero, b( m+1, 1_${ik}$ ), ldb ) ! overdetermined case. if( m>=n ) then ! path 1 - overdetermined or exactly determined. mm = m if( m>=mnthr ) then ! path 1a - overdetermined, with many more rows than columns. mm = n itau = 1_${ik}$ nwork = itau + n ! compute a=q*r. ! (workspace: need 2*n, prefer n+n*nb) call stdlib${ii}$_${ri}$geqrf( m, n, a, lda, work( itau ), work( nwork ),lwork-nwork+1, & info ) ! multiply b by transpose(q). ! (workspace: need n+nrhs, prefer n+nrhs*nb) call stdlib${ii}$_${ri}$ormqr( 'L', 'T', m, nrhs, n, a, lda, work( itau ), b,ldb, work( & nwork ), lwork-nwork+1, info ) ! zero out below r. if( n>1_${ik}$ ) then call stdlib${ii}$_${ri}$laset( 'L', n-1, n-1, zero, zero, a( 2_${ik}$, 1_${ik}$ ), lda ) end if end if ie = 1_${ik}$ itauq = ie + n itaup = itauq + n nwork = itaup + n ! bidiagonalize r in a. ! (workspace: need 3*n+mm, prefer 3*n+(mm+n)*nb) call stdlib${ii}$_${ri}$gebrd( mm, n, a, lda, s, work( ie ), work( itauq ),work( itaup ), work(& nwork ), lwork-nwork+1,info ) ! multiply b by transpose of left bidiagonalizing vectors of r. ! (workspace: need 3*n+nrhs, prefer 3*n+nrhs*nb) call stdlib${ii}$_${ri}$ormbr( 'Q', 'L', 'T', mm, nrhs, n, a, lda, work( itauq ),b, ldb, work( & nwork ), lwork-nwork+1, info ) ! solve the bidiagonal least squares problem. call stdlib${ii}$_${ri}$lalsd( 'U', smlsiz, n, nrhs, s, work( ie ), b, ldb,rcond, rank, work( & nwork ), iwork, info ) if( info/=0_${ik}$ ) then go to 10 end if ! multiply b by right bidiagonalizing vectors of r. call stdlib${ii}$_${ri}$ormbr( 'P', 'L', 'N', n, nrhs, n, a, lda, work( itaup ),b, ldb, work( & nwork ), lwork-nwork+1, info ) else if( n>=mnthr .and. lwork>=4_${ik}$*m+m*m+max( m, 2_${ik}$*m-4, nrhs, n-3*m, wlalsd ) ) & then ! path 2a - underdetermined, with many more columns than rows ! and sufficient workspace for an efficient algorithm. ldwork = m if( lwork>=max( 4_${ik}$*m+m*lda+max( m, 2_${ik}$*m-4, nrhs, n-3*m ),m*lda+m+m*nrhs, 4_${ik}$*m+m*lda+& wlalsd ) )ldwork = lda itau = 1_${ik}$ nwork = m + 1_${ik}$ ! compute a=l*q. ! (workspace: need 2*m, prefer m+m*nb) call stdlib${ii}$_${ri}$gelqf( m, n, a, lda, work( itau ), work( nwork ),lwork-nwork+1, info ) il = nwork ! copy l to work(il), zeroing out above its diagonal. call stdlib${ii}$_${ri}$lacpy( 'L', m, m, a, lda, work( il ), ldwork ) call stdlib${ii}$_${ri}$laset( 'U', m-1, m-1, zero, zero, work( il+ldwork ),ldwork ) ie = il + ldwork*m itauq = ie + m itaup = itauq + m nwork = itaup + m ! bidiagonalize l in work(il). ! (workspace: need m*m+5*m, prefer m*m+4*m+2*m*nb) call stdlib${ii}$_${ri}$gebrd( m, m, work( il ), ldwork, s, work( ie ),work( itauq ), work( & itaup ), work( nwork ),lwork-nwork+1, info ) ! multiply b by transpose of left bidiagonalizing vectors of l. ! (workspace: need m*m+4*m+nrhs, prefer m*m+4*m+nrhs*nb) call stdlib${ii}$_${ri}$ormbr( 'Q', 'L', 'T', m, nrhs, m, work( il ), ldwork,work( itauq ), b, & ldb, work( nwork ),lwork-nwork+1, info ) ! solve the bidiagonal least squares problem. call stdlib${ii}$_${ri}$lalsd( 'U', smlsiz, m, nrhs, s, work( ie ), b, ldb,rcond, rank, work( & nwork ), iwork, info ) if( info/=0_${ik}$ ) then go to 10 end if ! multiply b by right bidiagonalizing vectors of l. call stdlib${ii}$_${ri}$ormbr( 'P', 'L', 'N', m, nrhs, m, work( il ), ldwork,work( itaup ), b, & ldb, work( nwork ),lwork-nwork+1, info ) ! zero out below first m rows of b. call stdlib${ii}$_${ri}$laset( 'F', n-m, nrhs, zero, zero, b( m+1, 1_${ik}$ ), ldb ) nwork = itau + m ! multiply transpose(q) by b. ! (workspace: need m+nrhs, prefer m+nrhs*nb) call stdlib${ii}$_${ri}$ormlq( 'L', 'T', n, nrhs, m, a, lda, work( itau ), b,ldb, work( nwork )& , lwork-nwork+1, info ) else ! path 2 - remaining underdetermined cases. ie = 1_${ik}$ itauq = ie + m itaup = itauq + m nwork = itaup + m ! bidiagonalize a. ! (workspace: need 3*m+n, prefer 3*m+(m+n)*nb) call stdlib${ii}$_${ri}$gebrd( m, n, a, lda, s, work( ie ), work( itauq ),work( itaup ), work( & nwork ), lwork-nwork+1,info ) ! multiply b by transpose of left bidiagonalizing vectors. ! (workspace: need 3*m+nrhs, prefer 3*m+nrhs*nb) call stdlib${ii}$_${ri}$ormbr( 'Q', 'L', 'T', m, nrhs, n, a, lda, work( itauq ),b, ldb, work( & nwork ), lwork-nwork+1, info ) ! solve the bidiagonal least squares problem. call stdlib${ii}$_${ri}$lalsd( 'L', smlsiz, m, nrhs, s, work( ie ), b, ldb,rcond, rank, work( & nwork ), iwork, info ) if( info/=0_${ik}$ ) then go to 10 end if ! multiply b by right bidiagonalizing vectors of a. call stdlib${ii}$_${ri}$ormbr( 'P', 'L', 'N', n, nrhs, m, a, lda, work( itaup ),b, ldb, work( & nwork ), lwork-nwork+1, info ) end if ! undo scaling. if( iascl==1_${ik}$ ) then call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, n, nrhs, b, ldb, info ) call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, smlnum, anrm, minmn, 1_${ik}$, s, minmn,info ) else if( iascl==2_${ik}$ ) then call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, n, nrhs, b, ldb, info ) call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, bignum, anrm, minmn, 1_${ik}$, s, minmn,info ) end if if( ibscl==1_${ik}$ ) then call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, smlnum, bnrm, n, nrhs, b, ldb, info ) else if( ibscl==2_${ik}$ ) then call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, bignum, bnrm, n, nrhs, b, ldb, info ) end if 10 continue work( 1_${ik}$ ) = maxwrk iwork( 1_${ik}$ ) = liwork return end subroutine stdlib${ii}$_${ri}$gelsd #:endif #:endfor module subroutine stdlib${ii}$_cgelsd( m, n, nrhs, a, lda, b, ldb, s, rcond, rank,work, lwork, rwork, & !! CGELSD computes the minimum-norm solution to a real linear least !! squares problem: !! minimize 2-norm(| b - A*x |) !! using the singular value decomposition (SVD) of A. A is an M-by-N !! matrix which may be rank-deficient. !! Several right hand side vectors b and solution vectors x can be !! handled in a single call; they are stored as the columns of the !! M-by-NRHS right hand side matrix B and the N-by-NRHS solution !! matrix X. !! The problem is solved in three steps: !! (1) Reduce the coefficient matrix A to bidiagonal form with !! Householder transformations, reducing the original problem !! into a "bidiagonal least squares problem" (BLS) !! (2) Solve the BLS using a divide and conquer approach. !! (3) Apply back all the Householder transformations to solve !! the original least squares problem. !! The effective rank of A is determined by treating as zero those !! singular values which are less than RCOND times the largest singular !! value. !! The divide and conquer algorithm makes very mild assumptions about !! floating point arithmetic. It will work on machines with a guard !! digit in add/subtract, or on those binary machines without guard !! digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or !! Cray-2. It could conceivably fail on hexadecimal or decimal machines !! without guard digits, but we know of none. iwork, info ) ! -- lapack driver routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments integer(${ik}$), intent(out) :: info, rank integer(${ik}$), intent(in) :: lda, ldb, lwork, m, n, nrhs real(sp), intent(in) :: rcond ! Array Arguments integer(${ik}$), intent(out) :: iwork(*) real(sp), intent(out) :: rwork(*), s(*) complex(sp), intent(inout) :: a(lda,*), b(ldb,*) complex(sp), intent(out) :: work(*) ! ===================================================================== ! Local Scalars logical(lk) :: lquery integer(${ik}$) :: iascl, ibscl, ie, il, itau, itaup, itauq, ldwork, liwork, lrwork, & maxmn, maxwrk, minmn, minwrk, mm, mnthr, nlvl, nrwork, nwork, smlsiz real(sp) :: anrm, bignum, bnrm, eps, sfmin, smlnum ! Intrinsic Functions ! Executable Statements ! test the input arguments. info = 0_${ik}$ minmn = min( m, n ) maxmn = max( m, n ) lquery = ( lwork==-1_${ik}$ ) if( m<0_${ik}$ ) then info = -1_${ik}$ else if( n<0_${ik}$ ) then info = -2_${ik}$ else if( nrhs<0_${ik}$ ) then info = -3_${ik}$ else if( lda<max( 1_${ik}$, m ) ) then info = -5_${ik}$ else if( ldb<max( 1_${ik}$, maxmn ) ) then info = -7_${ik}$ end if ! compute workspace. ! (note: comments in the code beginning "workspace:" describe the ! minimal amount of workspace needed at that point in the code, ! as well as the preferred amount for good performance. ! nb refers to the optimal block size for the immediately ! following subroutine, as returned by stdlib${ii}$_ilaenv.) if( info==0_${ik}$ ) then minwrk = 1_${ik}$ maxwrk = 1_${ik}$ liwork = 1_${ik}$ lrwork = 1_${ik}$ if( minmn>0_${ik}$ ) then smlsiz = stdlib${ii}$_ilaenv( 9_${ik}$, 'CGELSD', ' ', 0_${ik}$, 0_${ik}$, 0_${ik}$, 0_${ik}$ ) mnthr = stdlib${ii}$_ilaenv( 6_${ik}$, 'CGELSD', ' ', m, n, nrhs, -1_${ik}$ ) nlvl = max( int( log( real( minmn,KIND=sp) / real( smlsiz + 1_${ik}$,KIND=sp) ) /log( & two ),KIND=${ik}$) + 1_${ik}$, 0_${ik}$ ) liwork = 3_${ik}$*minmn*nlvl + 11_${ik}$*minmn mm = m if( m>=n .and. m>=mnthr ) then ! path 1a - overdetermined, with many more rows than ! columns. mm = n maxwrk = max( maxwrk, n*stdlib${ii}$_ilaenv( 1_${ik}$, 'CGEQRF', ' ', m, n,-1_${ik}$, -1_${ik}$ ) ) maxwrk = max( maxwrk, nrhs*stdlib${ii}$_ilaenv( 1_${ik}$, 'CUNMQR', 'LC', m,nrhs, n, -1_${ik}$ ) ) end if if( m>=n ) then ! path 1 - overdetermined or exactly determined. lrwork = 10_${ik}$*n + 2_${ik}$*n*smlsiz + 8_${ik}$*n*nlvl + 3_${ik}$*smlsiz*nrhs +max( (smlsiz+1)**2_${ik}$, n*(& 1_${ik}$+nrhs) + 2_${ik}$*nrhs ) maxwrk = max( maxwrk, 2_${ik}$*n + ( mm + n )*stdlib${ii}$_ilaenv( 1_${ik}$,'CGEBRD', ' ', mm, n, & -1_${ik}$, -1_${ik}$ ) ) maxwrk = max( maxwrk, 2_${ik}$*n + nrhs*stdlib${ii}$_ilaenv( 1_${ik}$, 'CUNMBR','QLC', mm, nrhs, & n, -1_${ik}$ ) ) maxwrk = max( maxwrk, 2_${ik}$*n + ( n - 1_${ik}$ )*stdlib${ii}$_ilaenv( 1_${ik}$,'CUNMBR', 'PLN', n, & nrhs, n, -1_${ik}$ ) ) maxwrk = max( maxwrk, 2_${ik}$*n + n*nrhs ) minwrk = max( 2_${ik}$*n + mm, 2_${ik}$*n + n*nrhs ) end if if( n>m ) then lrwork = 10_${ik}$*m + 2_${ik}$*m*smlsiz + 8_${ik}$*m*nlvl + 3_${ik}$*smlsiz*nrhs +max( (smlsiz+1)**2_${ik}$, n*(& 1_${ik}$+nrhs) + 2_${ik}$*nrhs ) if( n>=mnthr ) then ! path 2a - underdetermined, with many more columns ! than rows. maxwrk = m + m*stdlib${ii}$_ilaenv( 1_${ik}$, 'CGELQF', ' ', m, n, -1_${ik}$,-1_${ik}$ ) maxwrk = max( maxwrk, m*m + 4_${ik}$*m + 2_${ik}$*m*stdlib${ii}$_ilaenv( 1_${ik}$,'CGEBRD', ' ', m, m,& -1_${ik}$, -1_${ik}$ ) ) maxwrk = max( maxwrk, m*m + 4_${ik}$*m + nrhs*stdlib${ii}$_ilaenv( 1_${ik}$,'CUNMBR', 'QLC', m,& nrhs, m, -1_${ik}$ ) ) maxwrk = max( maxwrk, m*m + 4_${ik}$*m + ( m - 1_${ik}$ )*stdlib${ii}$_ilaenv( 1_${ik}$,'CUNMLQ', & 'LC', n, nrhs, m, -1_${ik}$ ) ) if( nrhs>1_${ik}$ ) then maxwrk = max( maxwrk, m*m + m + m*nrhs ) else maxwrk = max( maxwrk, m*m + 2_${ik}$*m ) end if maxwrk = max( maxwrk, m*m + 4_${ik}$*m + m*nrhs ) ! xxx: ensure the path 2a case below is triggered. the workspace ! calculation should use queries for all routines eventually. maxwrk = max( maxwrk,4_${ik}$*m+m*m+max( m, 2_${ik}$*m-4, nrhs, n-3*m ) ) else ! path 2 - underdetermined. maxwrk = 2_${ik}$*m + ( n + m )*stdlib${ii}$_ilaenv( 1_${ik}$, 'CGEBRD', ' ', m,n, -1_${ik}$, -1_${ik}$ ) maxwrk = max( maxwrk, 2_${ik}$*m + nrhs*stdlib${ii}$_ilaenv( 1_${ik}$, 'CUNMBR','QLC', m, nrhs,& m, -1_${ik}$ ) ) maxwrk = max( maxwrk, 2_${ik}$*m + m*stdlib${ii}$_ilaenv( 1_${ik}$, 'CUNMBR','PLN', n, nrhs, m,& -1_${ik}$ ) ) maxwrk = max( maxwrk, 2_${ik}$*m + m*nrhs ) end if minwrk = max( 2_${ik}$*m + n, 2_${ik}$*m + m*nrhs ) end if end if minwrk = min( minwrk, maxwrk ) work( 1_${ik}$ ) = maxwrk iwork( 1_${ik}$ ) = liwork rwork( 1_${ik}$ ) = lrwork if( lwork<minwrk .and. .not.lquery ) then info = -12_${ik}$ end if end if if( info/=0_${ik}$ ) then call stdlib${ii}$_xerbla( 'CGELSD', -info ) return else if( lquery ) then return end if ! quick return if possible. if( m==0_${ik}$ .or. n==0_${ik}$ ) then rank = 0_${ik}$ return end if ! get machine parameters. eps = stdlib${ii}$_slamch( 'P' ) sfmin = stdlib${ii}$_slamch( 'S' ) smlnum = sfmin / eps bignum = one / smlnum call stdlib${ii}$_slabad( smlnum, bignum ) ! scale a if max entry outside range [smlnum,bignum]. anrm = stdlib${ii}$_clange( 'M', m, n, a, lda, rwork ) iascl = 0_${ik}$ if( anrm>zero .and. anrm<smlnum ) then ! scale matrix norm up to smlnum call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, m, n, a, lda, info ) iascl = 1_${ik}$ else if( anrm>bignum ) then ! scale matrix norm down to bignum. call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, m, n, a, lda, info ) iascl = 2_${ik}$ else if( anrm==zero ) then ! matrix all zero. return zero solution. call stdlib${ii}$_claset( 'F', max( m, n ), nrhs, czero, czero, b, ldb ) call stdlib${ii}$_slaset( 'F', minmn, 1_${ik}$, zero, zero, s, 1_${ik}$ ) rank = 0_${ik}$ go to 10 end if ! scale b if max entry outside range [smlnum,bignum]. bnrm = stdlib${ii}$_clange( 'M', m, nrhs, b, ldb, rwork ) ibscl = 0_${ik}$ if( bnrm>zero .and. bnrm<smlnum ) then ! scale matrix norm up to smlnum. call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, bnrm, smlnum, m, nrhs, b, ldb, info ) ibscl = 1_${ik}$ else if( bnrm>bignum ) then ! scale matrix norm down to bignum. call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, bnrm, bignum, m, nrhs, b, ldb, info ) ibscl = 2_${ik}$ end if ! if m < n make sure b(m+1:n,:) = 0 if( m<n )call stdlib${ii}$_claset( 'F', n-m, nrhs, czero, czero, b( m+1, 1_${ik}$ ), ldb ) ! overdetermined case. if( m>=n ) then ! path 1 - overdetermined or exactly determined. mm = m if( m>=mnthr ) then ! path 1a - overdetermined, with many more rows than columns mm = n itau = 1_${ik}$ nwork = itau + n ! compute a=q*r. ! (rworkspace: need n) ! (cworkspace: need n, prefer n*nb) call stdlib${ii}$_cgeqrf( m, n, a, lda, work( itau ), work( nwork ),lwork-nwork+1, & info ) ! multiply b by transpose(q). ! (rworkspace: need n) ! (cworkspace: need nrhs, prefer nrhs*nb) call stdlib${ii}$_cunmqr( 'L', 'C', m, nrhs, n, a, lda, work( itau ), b,ldb, work( & nwork ), lwork-nwork+1, info ) ! zero out below r. if( n>1_${ik}$ ) then call stdlib${ii}$_claset( 'L', n-1, n-1, czero, czero, a( 2_${ik}$, 1_${ik}$ ),lda ) end if end if itauq = 1_${ik}$ itaup = itauq + n nwork = itaup + n ie = 1_${ik}$ nrwork = ie + n ! bidiagonalize r in a. ! (rworkspace: need n) ! (cworkspace: need 2*n+mm, prefer 2*n+(mm+n)*nb) call stdlib${ii}$_cgebrd( mm, n, a, lda, s, rwork( ie ), work( itauq ),work( itaup ), & work( nwork ), lwork-nwork+1,info ) ! multiply b by transpose of left bidiagonalizing vectors of r. ! (cworkspace: need 2*n+nrhs, prefer 2*n+nrhs*nb) call stdlib${ii}$_cunmbr( 'Q', 'L', 'C', mm, nrhs, n, a, lda, work( itauq ),b, ldb, work( & nwork ), lwork-nwork+1, info ) ! solve the bidiagonal least squares problem. call stdlib${ii}$_clalsd( 'U', smlsiz, n, nrhs, s, rwork( ie ), b, ldb,rcond, rank, work( & nwork ), rwork( nrwork ),iwork, info ) if( info/=0_${ik}$ ) then go to 10 end if ! multiply b by right bidiagonalizing vectors of r. call stdlib${ii}$_cunmbr( 'P', 'L', 'N', n, nrhs, n, a, lda, work( itaup ),b, ldb, work( & nwork ), lwork-nwork+1, info ) else if( n>=mnthr .and. lwork>=4_${ik}$*m+m*m+max( m, 2_${ik}$*m-4, nrhs, n-3*m ) ) then ! path 2a - underdetermined, with many more columns than rows ! and sufficient workspace for an efficient algorithm. ldwork = m if( lwork>=max( 4_${ik}$*m+m*lda+max( m, 2_${ik}$*m-4, nrhs, n-3*m ),m*lda+m+m*nrhs ) )ldwork = & lda itau = 1_${ik}$ nwork = m + 1_${ik}$ ! compute a=l*q. ! (cworkspace: need 2*m, prefer m+m*nb) call stdlib${ii}$_cgelqf( m, n, a, lda, work( itau ), work( nwork ),lwork-nwork+1, info ) il = nwork ! copy l to work(il), zeroing out above its diagonal. call stdlib${ii}$_clacpy( 'L', m, m, a, lda, work( il ), ldwork ) call stdlib${ii}$_claset( 'U', m-1, m-1, czero, czero, work( il+ldwork ),ldwork ) itauq = il + ldwork*m itaup = itauq + m nwork = itaup + m ie = 1_${ik}$ nrwork = ie + m ! bidiagonalize l in work(il). ! (rworkspace: need m) ! (cworkspace: need m*m+4*m, prefer m*m+4*m+2*m*nb) call stdlib${ii}$_cgebrd( m, m, work( il ), ldwork, s, rwork( ie ),work( itauq ), work( & itaup ), work( nwork ),lwork-nwork+1, info ) ! multiply b by transpose of left bidiagonalizing vectors of l. ! (cworkspace: need m*m+4*m+nrhs, prefer m*m+4*m+nrhs*nb) call stdlib${ii}$_cunmbr( 'Q', 'L', 'C', m, nrhs, m, work( il ), ldwork,work( itauq ), b, & ldb, work( nwork ),lwork-nwork+1, info ) ! solve the bidiagonal least squares problem. call stdlib${ii}$_clalsd( 'U', smlsiz, m, nrhs, s, rwork( ie ), b, ldb,rcond, rank, work( & nwork ), rwork( nrwork ),iwork, info ) if( info/=0_${ik}$ ) then go to 10 end if ! multiply b by right bidiagonalizing vectors of l. call stdlib${ii}$_cunmbr( 'P', 'L', 'N', m, nrhs, m, work( il ), ldwork,work( itaup ), b, & ldb, work( nwork ),lwork-nwork+1, info ) ! zero out below first m rows of b. call stdlib${ii}$_claset( 'F', n-m, nrhs, czero, czero, b( m+1, 1_${ik}$ ), ldb ) nwork = itau + m ! multiply transpose(q) by b. ! (cworkspace: need nrhs, prefer nrhs*nb) call stdlib${ii}$_cunmlq( 'L', 'C', n, nrhs, m, a, lda, work( itau ), b,ldb, work( nwork )& , lwork-nwork+1, info ) else ! path 2 - remaining underdetermined cases. itauq = 1_${ik}$ itaup = itauq + m nwork = itaup + m ie = 1_${ik}$ nrwork = ie + m ! bidiagonalize a. ! (rworkspace: need m) ! (cworkspace: need 2*m+n, prefer 2*m+(m+n)*nb) call stdlib${ii}$_cgebrd( m, n, a, lda, s, rwork( ie ), work( itauq ),work( itaup ), work(& nwork ), lwork-nwork+1,info ) ! multiply b by transpose of left bidiagonalizing vectors. ! (cworkspace: need 2*m+nrhs, prefer 2*m+nrhs*nb) call stdlib${ii}$_cunmbr( 'Q', 'L', 'C', m, nrhs, n, a, lda, work( itauq ),b, ldb, work( & nwork ), lwork-nwork+1, info ) ! solve the bidiagonal least squares problem. call stdlib${ii}$_clalsd( 'L', smlsiz, m, nrhs, s, rwork( ie ), b, ldb,rcond, rank, work( & nwork ), rwork( nrwork ),iwork, info ) if( info/=0_${ik}$ ) then go to 10 end if ! multiply b by right bidiagonalizing vectors of a. call stdlib${ii}$_cunmbr( 'P', 'L', 'N', n, nrhs, m, a, lda, work( itaup ),b, ldb, work( & nwork ), lwork-nwork+1, info ) end if ! undo scaling. if( iascl==1_${ik}$ ) then call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, n, nrhs, b, ldb, info ) call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, smlnum, anrm, minmn, 1_${ik}$, s, minmn,info ) else if( iascl==2_${ik}$ ) then call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, n, nrhs, b, ldb, info ) call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, bignum, anrm, minmn, 1_${ik}$, s, minmn,info ) end if if( ibscl==1_${ik}$ ) then call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, smlnum, bnrm, n, nrhs, b, ldb, info ) else if( ibscl==2_${ik}$ ) then call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, bignum, bnrm, n, nrhs, b, ldb, info ) end if 10 continue work( 1_${ik}$ ) = maxwrk iwork( 1_${ik}$ ) = liwork rwork( 1_${ik}$ ) = lrwork return end subroutine stdlib${ii}$_cgelsd module subroutine stdlib${ii}$_zgelsd( m, n, nrhs, a, lda, b, ldb, s, rcond, rank,work, lwork, rwork, & !! ZGELSD computes the minimum-norm solution to a real linear least !! squares problem: !! minimize 2-norm(| b - A*x |) !! using the singular value decomposition (SVD) of A. A is an M-by-N !! matrix which may be rank-deficient. !! Several right hand side vectors b and solution vectors x can be !! handled in a single call; they are stored as the columns of the !! M-by-NRHS right hand side matrix B and the N-by-NRHS solution !! matrix X. !! The problem is solved in three steps: !! (1) Reduce the coefficient matrix A to bidiagonal form with !! Householder transformations, reducing the original problem !! into a "bidiagonal least squares problem" (BLS) !! (2) Solve the BLS using a divide and conquer approach. !! (3) Apply back all the Householder transformations to solve !! the original least squares problem. !! The effective rank of A is determined by treating as zero those !! singular values which are less than RCOND times the largest singular !! value. !! The divide and conquer algorithm makes very mild assumptions about !! floating point arithmetic. It will work on machines with a guard !! digit in add/subtract, or on those binary machines without guard !! digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or !! Cray-2. It could conceivably fail on hexadecimal or decimal machines !! without guard digits, but we know of none. iwork, info ) ! -- lapack driver routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments integer(${ik}$), intent(out) :: info, rank integer(${ik}$), intent(in) :: lda, ldb, lwork, m, n, nrhs real(dp), intent(in) :: rcond ! Array Arguments integer(${ik}$), intent(out) :: iwork(*) real(dp), intent(out) :: rwork(*), s(*) complex(dp), intent(inout) :: a(lda,*), b(ldb,*) complex(dp), intent(out) :: work(*) ! ===================================================================== ! Local Scalars logical(lk) :: lquery integer(${ik}$) :: iascl, ibscl, ie, il, itau, itaup, itauq, ldwork, liwork, lrwork, & maxmn, maxwrk, minmn, minwrk, mm, mnthr, nlvl, nrwork, nwork, smlsiz real(dp) :: anrm, bignum, bnrm, eps, sfmin, smlnum ! Intrinsic Functions ! Executable Statements ! test the input arguments. info = 0_${ik}$ minmn = min( m, n ) maxmn = max( m, n ) lquery = ( lwork==-1_${ik}$ ) if( m<0_${ik}$ ) then info = -1_${ik}$ else if( n<0_${ik}$ ) then info = -2_${ik}$ else if( nrhs<0_${ik}$ ) then info = -3_${ik}$ else if( lda<max( 1_${ik}$, m ) ) then info = -5_${ik}$ else if( ldb<max( 1_${ik}$, maxmn ) ) then info = -7_${ik}$ end if ! compute workspace. ! (note: comments in the code beginning "workspace:" describe the ! minimal amount of workspace needed at that point in the code, ! as well as the preferred amount for good performance. ! nb refers to the optimal block size for the immediately ! following subroutine, as returned by stdlib${ii}$_ilaenv.) if( info==0_${ik}$ ) then minwrk = 1_${ik}$ maxwrk = 1_${ik}$ liwork = 1_${ik}$ lrwork = 1_${ik}$ if( minmn>0_${ik}$ ) then smlsiz = stdlib${ii}$_ilaenv( 9_${ik}$, 'ZGELSD', ' ', 0_${ik}$, 0_${ik}$, 0_${ik}$, 0_${ik}$ ) mnthr = stdlib${ii}$_ilaenv( 6_${ik}$, 'ZGELSD', ' ', m, n, nrhs, -1_${ik}$ ) nlvl = max( int( log( real( minmn,KIND=dp) / real( smlsiz + 1_${ik}$,KIND=dp) ) /log( & two ),KIND=${ik}$) + 1_${ik}$, 0_${ik}$ ) liwork = 3_${ik}$*minmn*nlvl + 11_${ik}$*minmn mm = m if( m>=n .and. m>=mnthr ) then ! path 1a - overdetermined, with many more rows than ! columns. mm = n maxwrk = max( maxwrk, n*stdlib${ii}$_ilaenv( 1_${ik}$, 'ZGEQRF', ' ', m, n,-1_${ik}$, -1_${ik}$ ) ) maxwrk = max( maxwrk, nrhs*stdlib${ii}$_ilaenv( 1_${ik}$, 'ZUNMQR', 'LC', m,nrhs, n, -1_${ik}$ ) ) end if if( m>=n ) then ! path 1 - overdetermined or exactly determined. lrwork = 10_${ik}$*n + 2_${ik}$*n*smlsiz + 8_${ik}$*n*nlvl + 3_${ik}$*smlsiz*nrhs +max( (smlsiz+1)**2_${ik}$, n*(& 1_${ik}$+nrhs) + 2_${ik}$*nrhs ) maxwrk = max( maxwrk, 2_${ik}$*n + ( mm + n )*stdlib${ii}$_ilaenv( 1_${ik}$,'ZGEBRD', ' ', mm, n, & -1_${ik}$, -1_${ik}$ ) ) maxwrk = max( maxwrk, 2_${ik}$*n + nrhs*stdlib${ii}$_ilaenv( 1_${ik}$, 'ZUNMBR','QLC', mm, nrhs, & n, -1_${ik}$ ) ) maxwrk = max( maxwrk, 2_${ik}$*n + ( n - 1_${ik}$ )*stdlib${ii}$_ilaenv( 1_${ik}$,'ZUNMBR', 'PLN', n, & nrhs, n, -1_${ik}$ ) ) maxwrk = max( maxwrk, 2_${ik}$*n + n*nrhs ) minwrk = max( 2_${ik}$*n + mm, 2_${ik}$*n + n*nrhs ) end if if( n>m ) then lrwork = 10_${ik}$*m + 2_${ik}$*m*smlsiz + 8_${ik}$*m*nlvl + 3_${ik}$*smlsiz*nrhs +max( (smlsiz+1)**2_${ik}$, n*(& 1_${ik}$+nrhs) + 2_${ik}$*nrhs ) if( n>=mnthr ) then ! path 2a - underdetermined, with many more columns ! than rows. maxwrk = m + m*stdlib${ii}$_ilaenv( 1_${ik}$, 'ZGELQF', ' ', m, n, -1_${ik}$,-1_${ik}$ ) maxwrk = max( maxwrk, m*m + 4_${ik}$*m + 2_${ik}$*m*stdlib${ii}$_ilaenv( 1_${ik}$,'ZGEBRD', ' ', m, m,& -1_${ik}$, -1_${ik}$ ) ) maxwrk = max( maxwrk, m*m + 4_${ik}$*m + nrhs*stdlib${ii}$_ilaenv( 1_${ik}$,'ZUNMBR', 'QLC', m,& nrhs, m, -1_${ik}$ ) ) maxwrk = max( maxwrk, m*m + 4_${ik}$*m + ( m - 1_${ik}$ )*stdlib${ii}$_ilaenv( 1_${ik}$,'ZUNMLQ', & 'LC', n, nrhs, m, -1_${ik}$ ) ) if( nrhs>1_${ik}$ ) then maxwrk = max( maxwrk, m*m + m + m*nrhs ) else maxwrk = max( maxwrk, m*m + 2_${ik}$*m ) end if maxwrk = max( maxwrk, m*m + 4_${ik}$*m + m*nrhs ) ! xxx: ensure the path 2a case below is triggered. the workspace ! calculation should use queries for all routines eventually. maxwrk = max( maxwrk,4_${ik}$*m+m*m+max( m, 2_${ik}$*m-4, nrhs, n-3*m ) ) else ! path 2 - underdetermined. maxwrk = 2_${ik}$*m + ( n + m )*stdlib${ii}$_ilaenv( 1_${ik}$, 'ZGEBRD', ' ', m,n, -1_${ik}$, -1_${ik}$ ) maxwrk = max( maxwrk, 2_${ik}$*m + nrhs*stdlib${ii}$_ilaenv( 1_${ik}$, 'ZUNMBR','QLC', m, nrhs,& m, -1_${ik}$ ) ) maxwrk = max( maxwrk, 2_${ik}$*m + m*stdlib${ii}$_ilaenv( 1_${ik}$, 'ZUNMBR','PLN', n, nrhs, m,& -1_${ik}$ ) ) maxwrk = max( maxwrk, 2_${ik}$*m + m*nrhs ) end if minwrk = max( 2_${ik}$*m + n, 2_${ik}$*m + m*nrhs ) end if end if minwrk = min( minwrk, maxwrk ) work( 1_${ik}$ ) = maxwrk iwork( 1_${ik}$ ) = liwork rwork( 1_${ik}$ ) = lrwork if( lwork<minwrk .and. .not.lquery ) then info = -12_${ik}$ end if end if if( info/=0_${ik}$ ) then call stdlib${ii}$_xerbla( 'ZGELSD', -info ) return else if( lquery ) then return end if ! quick return if possible. if( m==0_${ik}$ .or. n==0_${ik}$ ) then rank = 0_${ik}$ return end if ! get machine parameters. eps = stdlib${ii}$_dlamch( 'P' ) sfmin = stdlib${ii}$_dlamch( 'S' ) smlnum = sfmin / eps bignum = one / smlnum call stdlib${ii}$_dlabad( smlnum, bignum ) ! scale a if max entry outside range [smlnum,bignum]. anrm = stdlib${ii}$_zlange( 'M', m, n, a, lda, rwork ) iascl = 0_${ik}$ if( anrm>zero .and. anrm<smlnum ) then ! scale matrix norm up to smlnum call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, m, n, a, lda, info ) iascl = 1_${ik}$ else if( anrm>bignum ) then ! scale matrix norm down to bignum. call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, m, n, a, lda, info ) iascl = 2_${ik}$ else if( anrm==zero ) then ! matrix all zero. return zero solution. call stdlib${ii}$_zlaset( 'F', max( m, n ), nrhs, czero, czero, b, ldb ) call stdlib${ii}$_dlaset( 'F', minmn, 1_${ik}$, zero, zero, s, 1_${ik}$ ) rank = 0_${ik}$ go to 10 end if ! scale b if max entry outside range [smlnum,bignum]. bnrm = stdlib${ii}$_zlange( 'M', m, nrhs, b, ldb, rwork ) ibscl = 0_${ik}$ if( bnrm>zero .and. bnrm<smlnum ) then ! scale matrix norm up to smlnum. call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, bnrm, smlnum, m, nrhs, b, ldb, info ) ibscl = 1_${ik}$ else if( bnrm>bignum ) then ! scale matrix norm down to bignum. call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, bnrm, bignum, m, nrhs, b, ldb, info ) ibscl = 2_${ik}$ end if ! if m < n make sure b(m+1:n,:) = 0 if( m<n )call stdlib${ii}$_zlaset( 'F', n-m, nrhs, czero, czero, b( m+1, 1_${ik}$ ), ldb ) ! overdetermined case. if( m>=n ) then ! path 1 - overdetermined or exactly determined. mm = m if( m>=mnthr ) then ! path 1a - overdetermined, with many more rows than columns mm = n itau = 1_${ik}$ nwork = itau + n ! compute a=q*r. ! (rworkspace: need n) ! (cworkspace: need n, prefer n*nb) call stdlib${ii}$_zgeqrf( m, n, a, lda, work( itau ), work( nwork ),lwork-nwork+1, & info ) ! multiply b by transpose(q). ! (rworkspace: need n) ! (cworkspace: need nrhs, prefer nrhs*nb) call stdlib${ii}$_zunmqr( 'L', 'C', m, nrhs, n, a, lda, work( itau ), b,ldb, work( & nwork ), lwork-nwork+1, info ) ! zero out below r. if( n>1_${ik}$ ) then call stdlib${ii}$_zlaset( 'L', n-1, n-1, czero, czero, a( 2_${ik}$, 1_${ik}$ ),lda ) end if end if itauq = 1_${ik}$ itaup = itauq + n nwork = itaup + n ie = 1_${ik}$ nrwork = ie + n ! bidiagonalize r in a. ! (rworkspace: need n) ! (cworkspace: need 2*n+mm, prefer 2*n+(mm+n)*nb) call stdlib${ii}$_zgebrd( mm, n, a, lda, s, rwork( ie ), work( itauq ),work( itaup ), & work( nwork ), lwork-nwork+1,info ) ! multiply b by transpose of left bidiagonalizing vectors of r. ! (cworkspace: need 2*n+nrhs, prefer 2*n+nrhs*nb) call stdlib${ii}$_zunmbr( 'Q', 'L', 'C', mm, nrhs, n, a, lda, work( itauq ),b, ldb, work( & nwork ), lwork-nwork+1, info ) ! solve the bidiagonal least squares problem. call stdlib${ii}$_zlalsd( 'U', smlsiz, n, nrhs, s, rwork( ie ), b, ldb,rcond, rank, work( & nwork ), rwork( nrwork ),iwork, info ) if( info/=0_${ik}$ ) then go to 10 end if ! multiply b by right bidiagonalizing vectors of r. call stdlib${ii}$_zunmbr( 'P', 'L', 'N', n, nrhs, n, a, lda, work( itaup ),b, ldb, work( & nwork ), lwork-nwork+1, info ) else if( n>=mnthr .and. lwork>=4_${ik}$*m+m*m+max( m, 2_${ik}$*m-4, nrhs, n-3*m ) ) then ! path 2a - underdetermined, with many more columns than rows ! and sufficient workspace for an efficient algorithm. ldwork = m if( lwork>=max( 4_${ik}$*m+m*lda+max( m, 2_${ik}$*m-4, nrhs, n-3*m ),m*lda+m+m*nrhs ) )ldwork = & lda itau = 1_${ik}$ nwork = m + 1_${ik}$ ! compute a=l*q. ! (cworkspace: need 2*m, prefer m+m*nb) call stdlib${ii}$_zgelqf( m, n, a, lda, work( itau ), work( nwork ),lwork-nwork+1, info ) il = nwork ! copy l to work(il), zeroing out above its diagonal. call stdlib${ii}$_zlacpy( 'L', m, m, a, lda, work( il ), ldwork ) call stdlib${ii}$_zlaset( 'U', m-1, m-1, czero, czero, work( il+ldwork ),ldwork ) itauq = il + ldwork*m itaup = itauq + m nwork = itaup + m ie = 1_${ik}$ nrwork = ie + m ! bidiagonalize l in work(il). ! (rworkspace: need m) ! (cworkspace: need m*m+4*m, prefer m*m+4*m+2*m*nb) call stdlib${ii}$_zgebrd( m, m, work( il ), ldwork, s, rwork( ie ),work( itauq ), work( & itaup ), work( nwork ),lwork-nwork+1, info ) ! multiply b by transpose of left bidiagonalizing vectors of l. ! (cworkspace: need m*m+4*m+nrhs, prefer m*m+4*m+nrhs*nb) call stdlib${ii}$_zunmbr( 'Q', 'L', 'C', m, nrhs, m, work( il ), ldwork,work( itauq ), b, & ldb, work( nwork ),lwork-nwork+1, info ) ! solve the bidiagonal least squares problem. call stdlib${ii}$_zlalsd( 'U', smlsiz, m, nrhs, s, rwork( ie ), b, ldb,rcond, rank, work( & nwork ), rwork( nrwork ),iwork, info ) if( info/=0_${ik}$ ) then go to 10 end if ! multiply b by right bidiagonalizing vectors of l. call stdlib${ii}$_zunmbr( 'P', 'L', 'N', m, nrhs, m, work( il ), ldwork,work( itaup ), b, & ldb, work( nwork ),lwork-nwork+1, info ) ! zero out below first m rows of b. call stdlib${ii}$_zlaset( 'F', n-m, nrhs, czero, czero, b( m+1, 1_${ik}$ ), ldb ) nwork = itau + m ! multiply transpose(q) by b. ! (cworkspace: need nrhs, prefer nrhs*nb) call stdlib${ii}$_zunmlq( 'L', 'C', n, nrhs, m, a, lda, work( itau ), b,ldb, work( nwork )& , lwork-nwork+1, info ) else ! path 2 - remaining underdetermined cases. itauq = 1_${ik}$ itaup = itauq + m nwork = itaup + m ie = 1_${ik}$ nrwork = ie + m ! bidiagonalize a. ! (rworkspace: need m) ! (cworkspace: need 2*m+n, prefer 2*m+(m+n)*nb) call stdlib${ii}$_zgebrd( m, n, a, lda, s, rwork( ie ), work( itauq ),work( itaup ), work(& nwork ), lwork-nwork+1,info ) ! multiply b by transpose of left bidiagonalizing vectors. ! (cworkspace: need 2*m+nrhs, prefer 2*m+nrhs*nb) call stdlib${ii}$_zunmbr( 'Q', 'L', 'C', m, nrhs, n, a, lda, work( itauq ),b, ldb, work( & nwork ), lwork-nwork+1, info ) ! solve the bidiagonal least squares problem. call stdlib${ii}$_zlalsd( 'L', smlsiz, m, nrhs, s, rwork( ie ), b, ldb,rcond, rank, work( & nwork ), rwork( nrwork ),iwork, info ) if( info/=0_${ik}$ ) then go to 10 end if ! multiply b by right bidiagonalizing vectors of a. call stdlib${ii}$_zunmbr( 'P', 'L', 'N', n, nrhs, m, a, lda, work( itaup ),b, ldb, work( & nwork ), lwork-nwork+1, info ) end if ! undo scaling. if( iascl==1_${ik}$ ) then call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, n, nrhs, b, ldb, info ) call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, smlnum, anrm, minmn, 1_${ik}$, s, minmn,info ) else if( iascl==2_${ik}$ ) then call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, n, nrhs, b, ldb, info ) call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, bignum, anrm, minmn, 1_${ik}$, s, minmn,info ) end if if( ibscl==1_${ik}$ ) then call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, smlnum, bnrm, n, nrhs, b, ldb, info ) else if( ibscl==2_${ik}$ ) then call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, bignum, bnrm, n, nrhs, b, ldb, info ) end if 10 continue work( 1_${ik}$ ) = maxwrk iwork( 1_${ik}$ ) = liwork rwork( 1_${ik}$ ) = lrwork return end subroutine stdlib${ii}$_zgelsd #:for ck,ct,ci in CMPLX_KINDS_TYPES #:if not ck in ["sp","dp"] module subroutine stdlib${ii}$_${ci}$gelsd( m, n, nrhs, a, lda, b, ldb, s, rcond, rank,work, lwork, rwork, & !! ZGELSD: computes the minimum-norm solution to a real linear least !! squares problem: !! minimize 2-norm(| b - A*x |) !! using the singular value decomposition (SVD) of A. A is an M-by-N !! matrix which may be rank-deficient. !! Several right hand side vectors b and solution vectors x can be !! handled in a single call; they are stored as the columns of the !! M-by-NRHS right hand side matrix B and the N-by-NRHS solution !! matrix X. !! The problem is solved in three steps: !! (1) Reduce the coefficient matrix A to bidiagonal form with !! Householder transformations, reducing the original problem !! into a "bidiagonal least squares problem" (BLS) !! (2) Solve the BLS using a divide and conquer approach. !! (3) Apply back all the Householder transformations to solve !! the original least squares problem. !! The effective rank of A is determined by treating as zero those !! singular values which are less than RCOND times the largest singular !! value. !! The divide and conquer algorithm makes very mild assumptions about !! floating point arithmetic. It will work on machines with a guard !! digit in add/subtract, or on those binary machines without guard !! digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or !! Cray-2. It could conceivably fail on hexadecimal or decimal machines !! without guard digits, but we know of none. iwork, info ) ! -- lapack driver routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_${ck}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments integer(${ik}$), intent(out) :: info, rank integer(${ik}$), intent(in) :: lda, ldb, lwork, m, n, nrhs real(${ck}$), intent(in) :: rcond ! Array Arguments integer(${ik}$), intent(out) :: iwork(*) real(${ck}$), intent(out) :: rwork(*), s(*) complex(${ck}$), intent(inout) :: a(lda,*), b(ldb,*) complex(${ck}$), intent(out) :: work(*) ! ===================================================================== ! Local Scalars logical(lk) :: lquery integer(${ik}$) :: iascl, ibscl, ie, il, itau, itaup, itauq, ldwork, liwork, lrwork, & maxmn, maxwrk, minmn, minwrk, mm, mnthr, nlvl, nrwork, nwork, smlsiz real(${ck}$) :: anrm, bignum, bnrm, eps, sfmin, smlnum ! Intrinsic Functions ! Executable Statements ! test the input arguments. info = 0_${ik}$ minmn = min( m, n ) maxmn = max( m, n ) lquery = ( lwork==-1_${ik}$ ) if( m<0_${ik}$ ) then info = -1_${ik}$ else if( n<0_${ik}$ ) then info = -2_${ik}$ else if( nrhs<0_${ik}$ ) then info = -3_${ik}$ else if( lda<max( 1_${ik}$, m ) ) then info = -5_${ik}$ else if( ldb<max( 1_${ik}$, maxmn ) ) then info = -7_${ik}$ end if ! compute workspace. ! (note: comments in the code beginning "workspace:" describe the ! minimal amount of workspace needed at that point in the code, ! as well as the preferred amount for good performance. ! nb refers to the optimal block size for the immediately ! following subroutine, as returned by stdlib${ii}$_ilaenv.) if( info==0_${ik}$ ) then minwrk = 1_${ik}$ maxwrk = 1_${ik}$ liwork = 1_${ik}$ lrwork = 1_${ik}$ if( minmn>0_${ik}$ ) then smlsiz = stdlib${ii}$_ilaenv( 9_${ik}$, 'ZGELSD', ' ', 0_${ik}$, 0_${ik}$, 0_${ik}$, 0_${ik}$ ) mnthr = stdlib${ii}$_ilaenv( 6_${ik}$, 'ZGELSD', ' ', m, n, nrhs, -1_${ik}$ ) nlvl = max( int( log( real( minmn,KIND=${ck}$) / real( smlsiz + 1_${ik}$,KIND=${ck}$) ) /log( & two ),KIND=${ik}$) + 1_${ik}$, 0_${ik}$ ) liwork = 3_${ik}$*minmn*nlvl + 11_${ik}$*minmn mm = m if( m>=n .and. m>=mnthr ) then ! path 1a - overdetermined, with many more rows than ! columns. mm = n maxwrk = max( maxwrk, n*stdlib${ii}$_ilaenv( 1_${ik}$, 'ZGEQRF', ' ', m, n,-1_${ik}$, -1_${ik}$ ) ) maxwrk = max( maxwrk, nrhs*stdlib${ii}$_ilaenv( 1_${ik}$, 'ZUNMQR', 'LC', m,nrhs, n, -1_${ik}$ ) ) end if if( m>=n ) then ! path 1 - overdetermined or exactly determined. lrwork = 10_${ik}$*n + 2_${ik}$*n*smlsiz + 8_${ik}$*n*nlvl + 3_${ik}$*smlsiz*nrhs +max( (smlsiz+1)**2_${ik}$, n*(& 1_${ik}$+nrhs) + 2_${ik}$*nrhs ) maxwrk = max( maxwrk, 2_${ik}$*n + ( mm + n )*stdlib${ii}$_ilaenv( 1_${ik}$,'ZGEBRD', ' ', mm, n, & -1_${ik}$, -1_${ik}$ ) ) maxwrk = max( maxwrk, 2_${ik}$*n + nrhs*stdlib${ii}$_ilaenv( 1_${ik}$, 'ZUNMBR','QLC', mm, nrhs, & n, -1_${ik}$ ) ) maxwrk = max( maxwrk, 2_${ik}$*n + ( n - 1_${ik}$ )*stdlib${ii}$_ilaenv( 1_${ik}$,'ZUNMBR', 'PLN', n, & nrhs, n, -1_${ik}$ ) ) maxwrk = max( maxwrk, 2_${ik}$*n + n*nrhs ) minwrk = max( 2_${ik}$*n + mm, 2_${ik}$*n + n*nrhs ) end if if( n>m ) then lrwork = 10_${ik}$*m + 2_${ik}$*m*smlsiz + 8_${ik}$*m*nlvl + 3_${ik}$*smlsiz*nrhs +max( (smlsiz+1)**2_${ik}$, n*(& 1_${ik}$+nrhs) + 2_${ik}$*nrhs ) if( n>=mnthr ) then ! path 2a - underdetermined, with many more columns ! than rows. maxwrk = m + m*stdlib${ii}$_ilaenv( 1_${ik}$, 'ZGELQF', ' ', m, n, -1_${ik}$,-1_${ik}$ ) maxwrk = max( maxwrk, m*m + 4_${ik}$*m + 2_${ik}$*m*stdlib${ii}$_ilaenv( 1_${ik}$,'ZGEBRD', ' ', m, m,& -1_${ik}$, -1_${ik}$ ) ) maxwrk = max( maxwrk, m*m + 4_${ik}$*m + nrhs*stdlib${ii}$_ilaenv( 1_${ik}$,'ZUNMBR', 'QLC', m,& nrhs, m, -1_${ik}$ ) ) maxwrk = max( maxwrk, m*m + 4_${ik}$*m + ( m - 1_${ik}$ )*stdlib${ii}$_ilaenv( 1_${ik}$,'ZUNMLQ', & 'LC', n, nrhs, m, -1_${ik}$ ) ) if( nrhs>1_${ik}$ ) then maxwrk = max( maxwrk, m*m + m + m*nrhs ) else maxwrk = max( maxwrk, m*m + 2_${ik}$*m ) end if maxwrk = max( maxwrk, m*m + 4_${ik}$*m + m*nrhs ) ! xxx: ensure the path 2a case below is triggered. the workspace ! calculation should use queries for all routines eventually. maxwrk = max( maxwrk,4_${ik}$*m+m*m+max( m, 2_${ik}$*m-4, nrhs, n-3*m ) ) else ! path 2 - underdetermined. maxwrk = 2_${ik}$*m + ( n + m )*stdlib${ii}$_ilaenv( 1_${ik}$, 'ZGEBRD', ' ', m,n, -1_${ik}$, -1_${ik}$ ) maxwrk = max( maxwrk, 2_${ik}$*m + nrhs*stdlib${ii}$_ilaenv( 1_${ik}$, 'ZUNMBR','QLC', m, nrhs,& m, -1_${ik}$ ) ) maxwrk = max( maxwrk, 2_${ik}$*m + m*stdlib${ii}$_ilaenv( 1_${ik}$, 'ZUNMBR','PLN', n, nrhs, m,& -1_${ik}$ ) ) maxwrk = max( maxwrk, 2_${ik}$*m + m*nrhs ) end if minwrk = max( 2_${ik}$*m + n, 2_${ik}$*m + m*nrhs ) end if end if minwrk = min( minwrk, maxwrk ) work( 1_${ik}$ ) = maxwrk iwork( 1_${ik}$ ) = liwork rwork( 1_${ik}$ ) = lrwork if( lwork<minwrk .and. .not.lquery ) then info = -12_${ik}$ end if end if if( info/=0_${ik}$ ) then call stdlib${ii}$_xerbla( 'ZGELSD', -info ) return else if( lquery ) then return end if ! quick return if possible. if( m==0_${ik}$ .or. n==0_${ik}$ ) then rank = 0_${ik}$ return end if ! get machine parameters. eps = stdlib${ii}$_${c2ri(ci)}$lamch( 'P' ) sfmin = stdlib${ii}$_${c2ri(ci)}$lamch( 'S' ) smlnum = sfmin / eps bignum = one / smlnum call stdlib${ii}$_${c2ri(ci)}$labad( smlnum, bignum ) ! scale a if max entry outside range [smlnum,bignum]. anrm = stdlib${ii}$_${ci}$lange( 'M', m, n, a, lda, rwork ) iascl = 0_${ik}$ if( anrm>zero .and. anrm<smlnum ) then ! scale matrix norm up to smlnum call stdlib${ii}$_${ci}$lascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, m, n, a, lda, info ) iascl = 1_${ik}$ else if( anrm>bignum ) then ! scale matrix norm down to bignum. call stdlib${ii}$_${ci}$lascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, m, n, a, lda, info ) iascl = 2_${ik}$ else if( anrm==zero ) then ! matrix all zero. return zero solution. call stdlib${ii}$_${ci}$laset( 'F', max( m, n ), nrhs, czero, czero, b, ldb ) call stdlib${ii}$_${c2ri(ci)}$laset( 'F', minmn, 1_${ik}$, zero, zero, s, 1_${ik}$ ) rank = 0_${ik}$ go to 10 end if ! scale b if max entry outside range [smlnum,bignum]. bnrm = stdlib${ii}$_${ci}$lange( 'M', m, nrhs, b, ldb, rwork ) ibscl = 0_${ik}$ if( bnrm>zero .and. bnrm<smlnum ) then ! scale matrix norm up to smlnum. call stdlib${ii}$_${ci}$lascl( 'G', 0_${ik}$, 0_${ik}$, bnrm, smlnum, m, nrhs, b, ldb, info ) ibscl = 1_${ik}$ else if( bnrm>bignum ) then ! scale matrix norm down to bignum. call stdlib${ii}$_${ci}$lascl( 'G', 0_${ik}$, 0_${ik}$, bnrm, bignum, m, nrhs, b, ldb, info ) ibscl = 2_${ik}$ end if ! if m < n make sure b(m+1:n,:) = 0 if( m<n )call stdlib${ii}$_${ci}$laset( 'F', n-m, nrhs, czero, czero, b( m+1, 1_${ik}$ ), ldb ) ! overdetermined case. if( m>=n ) then ! path 1 - overdetermined or exactly determined. mm = m if( m>=mnthr ) then ! path 1a - overdetermined, with many more rows than columns mm = n itau = 1_${ik}$ nwork = itau + n ! compute a=q*r. ! (rworkspace: need n) ! (cworkspace: need n, prefer n*nb) call stdlib${ii}$_${ci}$geqrf( m, n, a, lda, work( itau ), work( nwork ),lwork-nwork+1, & info ) ! multiply b by transpose(q). ! (rworkspace: need n) ! (cworkspace: need nrhs, prefer nrhs*nb) call stdlib${ii}$_${ci}$unmqr( 'L', 'C', m, nrhs, n, a, lda, work( itau ), b,ldb, work( & nwork ), lwork-nwork+1, info ) ! zero out below r. if( n>1_${ik}$ ) then call stdlib${ii}$_${ci}$laset( 'L', n-1, n-1, czero, czero, a( 2_${ik}$, 1_${ik}$ ),lda ) end if end if itauq = 1_${ik}$ itaup = itauq + n nwork = itaup + n ie = 1_${ik}$ nrwork = ie + n ! bidiagonalize r in a. ! (rworkspace: need n) ! (cworkspace: need 2*n+mm, prefer 2*n+(mm+n)*nb) call stdlib${ii}$_${ci}$gebrd( mm, n, a, lda, s, rwork( ie ), work( itauq ),work( itaup ), & work( nwork ), lwork-nwork+1,info ) ! multiply b by transpose of left bidiagonalizing vectors of r. ! (cworkspace: need 2*n+nrhs, prefer 2*n+nrhs*nb) call stdlib${ii}$_${ci}$unmbr( 'Q', 'L', 'C', mm, nrhs, n, a, lda, work( itauq ),b, ldb, work( & nwork ), lwork-nwork+1, info ) ! solve the bidiagonal least squares problem. call stdlib${ii}$_${ci}$lalsd( 'U', smlsiz, n, nrhs, s, rwork( ie ), b, ldb,rcond, rank, work( & nwork ), rwork( nrwork ),iwork, info ) if( info/=0_${ik}$ ) then go to 10 end if ! multiply b by right bidiagonalizing vectors of r. call stdlib${ii}$_${ci}$unmbr( 'P', 'L', 'N', n, nrhs, n, a, lda, work( itaup ),b, ldb, work( & nwork ), lwork-nwork+1, info ) else if( n>=mnthr .and. lwork>=4_${ik}$*m+m*m+max( m, 2_${ik}$*m-4, nrhs, n-3*m ) ) then ! path 2a - underdetermined, with many more columns than rows ! and sufficient workspace for an efficient algorithm. ldwork = m if( lwork>=max( 4_${ik}$*m+m*lda+max( m, 2_${ik}$*m-4, nrhs, n-3*m ),m*lda+m+m*nrhs ) )ldwork = & lda itau = 1_${ik}$ nwork = m + 1_${ik}$ ! compute a=l*q. ! (cworkspace: need 2*m, prefer m+m*nb) call stdlib${ii}$_${ci}$gelqf( m, n, a, lda, work( itau ), work( nwork ),lwork-nwork+1, info ) il = nwork ! copy l to work(il), zeroing out above its diagonal. call stdlib${ii}$_${ci}$lacpy( 'L', m, m, a, lda, work( il ), ldwork ) call stdlib${ii}$_${ci}$laset( 'U', m-1, m-1, czero, czero, work( il+ldwork ),ldwork ) itauq = il + ldwork*m itaup = itauq + m nwork = itaup + m ie = 1_${ik}$ nrwork = ie + m ! bidiagonalize l in work(il). ! (rworkspace: need m) ! (cworkspace: need m*m+4*m, prefer m*m+4*m+2*m*nb) call stdlib${ii}$_${ci}$gebrd( m, m, work( il ), ldwork, s, rwork( ie ),work( itauq ), work( & itaup ), work( nwork ),lwork-nwork+1, info ) ! multiply b by transpose of left bidiagonalizing vectors of l. ! (cworkspace: need m*m+4*m+nrhs, prefer m*m+4*m+nrhs*nb) call stdlib${ii}$_${ci}$unmbr( 'Q', 'L', 'C', m, nrhs, m, work( il ), ldwork,work( itauq ), b, & ldb, work( nwork ),lwork-nwork+1, info ) ! solve the bidiagonal least squares problem. call stdlib${ii}$_${ci}$lalsd( 'U', smlsiz, m, nrhs, s, rwork( ie ), b, ldb,rcond, rank, work( & nwork ), rwork( nrwork ),iwork, info ) if( info/=0_${ik}$ ) then go to 10 end if ! multiply b by right bidiagonalizing vectors of l. call stdlib${ii}$_${ci}$unmbr( 'P', 'L', 'N', m, nrhs, m, work( il ), ldwork,work( itaup ), b, & ldb, work( nwork ),lwork-nwork+1, info ) ! zero out below first m rows of b. call stdlib${ii}$_${ci}$laset( 'F', n-m, nrhs, czero, czero, b( m+1, 1_${ik}$ ), ldb ) nwork = itau + m ! multiply transpose(q) by b. ! (cworkspace: need nrhs, prefer nrhs*nb) call stdlib${ii}$_${ci}$unmlq( 'L', 'C', n, nrhs, m, a, lda, work( itau ), b,ldb, work( nwork )& , lwork-nwork+1, info ) else ! path 2 - remaining underdetermined cases. itauq = 1_${ik}$ itaup = itauq + m nwork = itaup + m ie = 1_${ik}$ nrwork = ie + m ! bidiagonalize a. ! (rworkspace: need m) ! (cworkspace: need 2*m+n, prefer 2*m+(m+n)*nb) call stdlib${ii}$_${ci}$gebrd( m, n, a, lda, s, rwork( ie ), work( itauq ),work( itaup ), work(& nwork ), lwork-nwork+1,info ) ! multiply b by transpose of left bidiagonalizing vectors. ! (cworkspace: need 2*m+nrhs, prefer 2*m+nrhs*nb) call stdlib${ii}$_${ci}$unmbr( 'Q', 'L', 'C', m, nrhs, n, a, lda, work( itauq ),b, ldb, work( & nwork ), lwork-nwork+1, info ) ! solve the bidiagonal least squares problem. call stdlib${ii}$_${ci}$lalsd( 'L', smlsiz, m, nrhs, s, rwork( ie ), b, ldb,rcond, rank, work( & nwork ), rwork( nrwork ),iwork, info ) if( info/=0_${ik}$ ) then go to 10 end if ! multiply b by right bidiagonalizing vectors of a. call stdlib${ii}$_${ci}$unmbr( 'P', 'L', 'N', n, nrhs, m, a, lda, work( itaup ),b, ldb, work( & nwork ), lwork-nwork+1, info ) end if ! undo scaling. if( iascl==1_${ik}$ ) then call stdlib${ii}$_${ci}$lascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, n, nrhs, b, ldb, info ) call stdlib${ii}$_${c2ri(ci)}$lascl( 'G', 0_${ik}$, 0_${ik}$, smlnum, anrm, minmn, 1_${ik}$, s, minmn,info ) else if( iascl==2_${ik}$ ) then call stdlib${ii}$_${ci}$lascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, n, nrhs, b, ldb, info ) call stdlib${ii}$_${c2ri(ci)}$lascl( 'G', 0_${ik}$, 0_${ik}$, bignum, anrm, minmn, 1_${ik}$, s, minmn,info ) end if if( ibscl==1_${ik}$ ) then call stdlib${ii}$_${ci}$lascl( 'G', 0_${ik}$, 0_${ik}$, smlnum, bnrm, n, nrhs, b, ldb, info ) else if( ibscl==2_${ik}$ ) then call stdlib${ii}$_${ci}$lascl( 'G', 0_${ik}$, 0_${ik}$, bignum, bnrm, n, nrhs, b, ldb, info ) end if 10 continue work( 1_${ik}$ ) = maxwrk iwork( 1_${ik}$ ) = liwork rwork( 1_${ik}$ ) = lrwork return end subroutine stdlib${ii}$_${ci}$gelsd #:endif #:endfor module subroutine stdlib${ii}$_sgetsls( trans, m, n, nrhs, a, lda, b, ldb,work, lwork, info ) !! SGETSLS solves overdetermined or underdetermined real linear systems !! involving an M-by-N matrix A, using a tall skinny QR or short wide LQ !! factorization of A. It is assumed that A has full rank. !! The following options are provided: !! 1. If TRANS = 'N' and m >= n: find the least squares solution of !! an overdetermined system, i.e., solve the least squares problem !! minimize || B - A*X ||. !! 2. If TRANS = 'N' and m < n: find the minimum norm solution of !! an underdetermined system A * X = B. !! 3. If TRANS = 'T' and m >= n: find the minimum norm solution of !! an undetermined system A**T * X = B. !! 4. If TRANS = 'T' and m < n: find the least squares solution of !! an overdetermined system, i.e., solve the least squares problem !! minimize || B - A**T * X ||. !! Several right hand side vectors b and solution vectors x can be !! handled in a single call; they are stored as the columns of the !! M-by-NRHS right hand side matrix B and the N-by-NRHS solution !! matrix X. ! -- lapack driver routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments character, intent(in) :: trans integer(${ik}$), intent(out) :: info integer(${ik}$), intent(in) :: lda, ldb, lwork, m, n, nrhs ! Array Arguments real(sp), intent(inout) :: a(lda,*), b(ldb,*) real(sp), intent(out) :: work(*) ! ===================================================================== ! Local Scalars logical(lk) :: lquery, tran integer(${ik}$) :: i, iascl, ibscl, j, maxmn, brow, scllen, tszo, tszm, lwo, lwm, lw1, & lw2, wsizeo, wsizem, info2 real(sp) :: anrm, bignum, bnrm, smlnum, tq(5_${ik}$), workq(1_${ik}$) ! Intrinsic Functions ! Executable Statements ! test the input arguments. info = 0_${ik}$ maxmn = max( m, n ) tran = stdlib_lsame( trans, 'T' ) lquery = ( lwork==-1_${ik}$ .or. lwork==-2_${ik}$ ) if( .not.( stdlib_lsame( trans, 'N' ) .or.stdlib_lsame( trans, 'T' ) ) ) then info = -1_${ik}$ else if( m<0_${ik}$ ) then info = -2_${ik}$ else if( n<0_${ik}$ ) then info = -3_${ik}$ else if( nrhs<0_${ik}$ ) then info = -4_${ik}$ else if( lda<max( 1_${ik}$, m ) ) then info = -6_${ik}$ else if( ldb<max( 1_${ik}$, m, n ) ) then info = -8_${ik}$ end if if( info==0_${ik}$ ) then ! determine the optimum and minimum lwork if( m>=n ) then call stdlib${ii}$_sgeqr( m, n, a, lda, tq, -1_${ik}$, workq, -1_${ik}$, info2 ) tszo = int( tq( 1_${ik}$ ),KIND=${ik}$) lwo = int( workq( 1_${ik}$ ),KIND=${ik}$) call stdlib${ii}$_sgemqr( 'L', trans, m, nrhs, n, a, lda, tq,tszo, b, ldb, workq, -1_${ik}$, & info2 ) lwo = max( lwo, int( workq( 1_${ik}$ ),KIND=${ik}$) ) call stdlib${ii}$_sgeqr( m, n, a, lda, tq, -2_${ik}$, workq, -2_${ik}$, info2 ) tszm = int( tq( 1_${ik}$ ),KIND=${ik}$) lwm = int( workq( 1_${ik}$ ),KIND=${ik}$) call stdlib${ii}$_sgemqr( 'L', trans, m, nrhs, n, a, lda, tq,tszm, b, ldb, workq, -1_${ik}$, & info2 ) lwm = max( lwm, int( workq( 1_${ik}$ ),KIND=${ik}$) ) wsizeo = tszo + lwo wsizem = tszm + lwm else call stdlib${ii}$_sgelq( m, n, a, lda, tq, -1_${ik}$, workq, -1_${ik}$, info2 ) tszo = int( tq( 1_${ik}$ ),KIND=${ik}$) lwo = int( workq( 1_${ik}$ ),KIND=${ik}$) call stdlib${ii}$_sgemlq( 'L', trans, n, nrhs, m, a, lda, tq,tszo, b, ldb, workq, -1_${ik}$, & info2 ) lwo = max( lwo, int( workq( 1_${ik}$ ),KIND=${ik}$) ) call stdlib${ii}$_sgelq( m, n, a, lda, tq, -2_${ik}$, workq, -2_${ik}$, info2 ) tszm = int( tq( 1_${ik}$ ),KIND=${ik}$) lwm = int( workq( 1_${ik}$ ),KIND=${ik}$) call stdlib${ii}$_sgemlq( 'L', trans, n, nrhs, m, a, lda, tq,tszm, b, ldb, workq, -1_${ik}$, & info2 ) lwm = max( lwm, int( workq( 1_${ik}$ ),KIND=${ik}$) ) wsizeo = tszo + lwo wsizem = tszm + lwm end if if( ( lwork<wsizem ).and.( .not.lquery ) ) then info = -10_${ik}$ end if work( 1_${ik}$ ) = real( wsizeo,KIND=sp) end if if( info/=0_${ik}$ ) then call stdlib${ii}$_xerbla( 'SGETSLS', -info ) return end if if( lquery ) then if( lwork==-2_${ik}$ ) work( 1_${ik}$ ) = real( wsizem,KIND=sp) return end if if( lwork<wsizeo ) then lw1 = tszm lw2 = lwm else lw1 = tszo lw2 = lwo end if ! quick return if possible if( min( m, n, nrhs )==0_${ik}$ ) then call stdlib${ii}$_slaset( 'FULL', max( m, n ), nrhs, zero, zero,b, ldb ) return end if ! get machine parameters smlnum = stdlib${ii}$_slamch( 'S' ) / stdlib${ii}$_slamch( 'P' ) bignum = one / smlnum call stdlib${ii}$_slabad( smlnum, bignum ) ! scale a, b if max element outside range [smlnum,bignum] anrm = stdlib${ii}$_slange( 'M', m, n, a, lda, work ) iascl = 0_${ik}$ if( anrm>zero .and. anrm<smlnum ) then ! scale matrix norm up to smlnum call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, m, n, a, lda, info ) iascl = 1_${ik}$ else if( anrm>bignum ) then ! scale matrix norm down to bignum call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, m, n, a, lda, info ) iascl = 2_${ik}$ else if( anrm==zero ) then ! matrix all zero. return zero solution. call stdlib${ii}$_slaset( 'F', maxmn, nrhs, zero, zero, b, ldb ) go to 50 end if brow = m if ( tran ) then brow = n end if bnrm = stdlib${ii}$_slange( 'M', brow, nrhs, b, ldb, work ) ibscl = 0_${ik}$ if( bnrm>zero .and. bnrm<smlnum ) then ! scale matrix norm up to smlnum call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, bnrm, smlnum, brow, nrhs, b, ldb,info ) ibscl = 1_${ik}$ else if( bnrm>bignum ) then ! scale matrix norm down to bignum call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, bnrm, bignum, brow, nrhs, b, ldb,info ) ibscl = 2_${ik}$ end if if ( m>=n ) then ! compute qr factorization of a call stdlib${ii}$_sgeqr( m, n, a, lda, work( lw2+1 ), lw1,work( 1_${ik}$ ), lw2, info ) if ( .not.tran ) then ! least-squares problem min || a * x - b || ! b(1:m,1:nrhs) := q**t * b(1:m,1:nrhs) call stdlib${ii}$_sgemqr( 'L' , 'T', m, nrhs, n, a, lda,work( lw2+1 ), lw1, b, ldb, work(& 1_${ik}$ ), lw2,info ) ! b(1:n,1:nrhs) := inv(r) * b(1:n,1:nrhs) call stdlib${ii}$_strtrs( 'U', 'N', 'N', n, nrhs,a, lda, b, ldb, info ) if( info>0_${ik}$ ) then return end if scllen = n else ! overdetermined system of equations a**t * x = b ! b(1:n,1:nrhs) := inv(r**t) * b(1:n,1:nrhs) call stdlib${ii}$_strtrs( 'U', 'T', 'N', n, nrhs,a, lda, b, ldb, info ) if( info>0_${ik}$ ) then return end if ! b(n+1:m,1:nrhs) = zero do j = 1, nrhs do i = n + 1, m b( i, j ) = zero end do end do ! b(1:m,1:nrhs) := q(1:n,:) * b(1:n,1:nrhs) call stdlib${ii}$_sgemqr( 'L', 'N', m, nrhs, n, a, lda,work( lw2+1 ), lw1, b, ldb, & work( 1_${ik}$ ), lw2,info ) scllen = m end if else ! compute lq factorization of a call stdlib${ii}$_sgelq( m, n, a, lda, work( lw2+1 ), lw1,work( 1_${ik}$ ), lw2, info ) ! workspace at least m, optimally m*nb. if( .not.tran ) then ! underdetermined system of equations a * x = b ! b(1:m,1:nrhs) := inv(l) * b(1:m,1:nrhs) call stdlib${ii}$_strtrs( 'L', 'N', 'N', m, nrhs,a, lda, b, ldb, info ) if( info>0_${ik}$ ) then return end if ! b(m+1:n,1:nrhs) = 0 do j = 1, nrhs do i = m + 1, n b( i, j ) = zero end do end do ! b(1:n,1:nrhs) := q(1:n,:)**t * b(1:m,1:nrhs) call stdlib${ii}$_sgemlq( 'L', 'T', n, nrhs, m, a, lda,work( lw2+1 ), lw1, b, ldb, & work( 1_${ik}$ ), lw2,info ) ! workspace at least nrhs, optimally nrhs*nb scllen = n else ! overdetermined system min || a**t * x - b || ! b(1:n,1:nrhs) := q * b(1:n,1:nrhs) call stdlib${ii}$_sgemlq( 'L', 'N', n, nrhs, m, a, lda,work( lw2+1 ), lw1, b, ldb, & work( 1_${ik}$ ), lw2,info ) ! workspace at least nrhs, optimally nrhs*nb ! b(1:m,1:nrhs) := inv(l**t) * b(1:m,1:nrhs) call stdlib${ii}$_strtrs( 'LOWER', 'TRANSPOSE', 'NON-UNIT', m, nrhs,a, lda, b, ldb, & info ) if( info>0_${ik}$ ) then return end if scllen = m end if end if ! undo scaling if( iascl==1_${ik}$ ) then call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, scllen, nrhs, b, ldb,info ) else if( iascl==2_${ik}$ ) then call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, scllen, nrhs, b, ldb,info ) end if if( ibscl==1_${ik}$ ) then call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, smlnum, bnrm, scllen, nrhs, b, ldb,info ) else if( ibscl==2_${ik}$ ) then call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, bignum, bnrm, scllen, nrhs, b, ldb,info ) end if 50 continue work( 1_${ik}$ ) = real( tszo + lwo,KIND=sp) return end subroutine stdlib${ii}$_sgetsls module subroutine stdlib${ii}$_dgetsls( trans, m, n, nrhs, a, lda, b, ldb,work, lwork, info ) !! DGETSLS solves overdetermined or underdetermined real linear systems !! involving an M-by-N matrix A, using a tall skinny QR or short wide LQ !! factorization of A. It is assumed that A has full rank. !! The following options are provided: !! 1. If TRANS = 'N' and m >= n: find the least squares solution of !! an overdetermined system, i.e., solve the least squares problem !! minimize || B - A*X ||. !! 2. If TRANS = 'N' and m < n: find the minimum norm solution of !! an underdetermined system A * X = B. !! 3. If TRANS = 'T' and m >= n: find the minimum norm solution of !! an undetermined system A**T * X = B. !! 4. If TRANS = 'T' and m < n: find the least squares solution of !! an overdetermined system, i.e., solve the least squares problem !! minimize || B - A**T * X ||. !! Several right hand side vectors b and solution vectors x can be !! handled in a single call; they are stored as the columns of the !! M-by-NRHS right hand side matrix B and the N-by-NRHS solution !! matrix X. ! -- lapack driver routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments character, intent(in) :: trans integer(${ik}$), intent(out) :: info integer(${ik}$), intent(in) :: lda, ldb, lwork, m, n, nrhs ! Array Arguments real(dp), intent(inout) :: a(lda,*), b(ldb,*) real(dp), intent(out) :: work(*) ! ===================================================================== ! Local Scalars logical(lk) :: lquery, tran integer(${ik}$) :: i, iascl, ibscl, j, maxmn, brow, scllen, tszo, tszm, lwo, lwm, lw1, & lw2, wsizeo, wsizem, info2 real(dp) :: anrm, bignum, bnrm, smlnum, tq(5_${ik}$), workq(1_${ik}$) ! Intrinsic Functions ! Executable Statements ! test the input arguments. info = 0_${ik}$ maxmn = max( m, n ) tran = stdlib_lsame( trans, 'T' ) lquery = ( lwork==-1_${ik}$ .or. lwork==-2_${ik}$ ) if( .not.( stdlib_lsame( trans, 'N' ) .or.stdlib_lsame( trans, 'T' ) ) ) then info = -1_${ik}$ else if( m<0_${ik}$ ) then info = -2_${ik}$ else if( n<0_${ik}$ ) then info = -3_${ik}$ else if( nrhs<0_${ik}$ ) then info = -4_${ik}$ else if( lda<max( 1_${ik}$, m ) ) then info = -6_${ik}$ else if( ldb<max( 1_${ik}$, m, n ) ) then info = -8_${ik}$ end if if( info==0_${ik}$ ) then ! determine the optimum and minimum lwork if( m>=n ) then call stdlib${ii}$_dgeqr( m, n, a, lda, tq, -1_${ik}$, workq, -1_${ik}$, info2 ) tszo = int( tq( 1_${ik}$ ),KIND=${ik}$) lwo = int( workq( 1_${ik}$ ),KIND=${ik}$) call stdlib${ii}$_dgemqr( 'L', trans, m, nrhs, n, a, lda, tq,tszo, b, ldb, workq, -1_${ik}$, & info2 ) lwo = max( lwo, int( workq( 1_${ik}$ ),KIND=${ik}$) ) call stdlib${ii}$_dgeqr( m, n, a, lda, tq, -2_${ik}$, workq, -2_${ik}$, info2 ) tszm = int( tq( 1_${ik}$ ),KIND=${ik}$) lwm = int( workq( 1_${ik}$ ),KIND=${ik}$) call stdlib${ii}$_dgemqr( 'L', trans, m, nrhs, n, a, lda, tq,tszm, b, ldb, workq, -1_${ik}$, & info2 ) lwm = max( lwm, int( workq( 1_${ik}$ ),KIND=${ik}$) ) wsizeo = tszo + lwo wsizem = tszm + lwm else call stdlib${ii}$_dgelq( m, n, a, lda, tq, -1_${ik}$, workq, -1_${ik}$, info2 ) tszo = int( tq( 1_${ik}$ ),KIND=${ik}$) lwo = int( workq( 1_${ik}$ ),KIND=${ik}$) call stdlib${ii}$_dgemlq( 'L', trans, n, nrhs, m, a, lda, tq,tszo, b, ldb, workq, -1_${ik}$, & info2 ) lwo = max( lwo, int( workq( 1_${ik}$ ),KIND=${ik}$) ) call stdlib${ii}$_dgelq( m, n, a, lda, tq, -2_${ik}$, workq, -2_${ik}$, info2 ) tszm = int( tq( 1_${ik}$ ),KIND=${ik}$) lwm = int( workq( 1_${ik}$ ),KIND=${ik}$) call stdlib${ii}$_dgemlq( 'L', trans, n, nrhs, m, a, lda, tq,tszm, b, ldb, workq, -1_${ik}$, & info2 ) lwm = max( lwm, int( workq( 1_${ik}$ ),KIND=${ik}$) ) wsizeo = tszo + lwo wsizem = tszm + lwm end if if( ( lwork<wsizem ).and.( .not.lquery ) ) then info = -10_${ik}$ end if work( 1_${ik}$ ) = real( wsizeo,KIND=dp) end if if( info/=0_${ik}$ ) then call stdlib${ii}$_xerbla( 'DGETSLS', -info ) return end if if( lquery ) then if( lwork==-2_${ik}$ ) work( 1_${ik}$ ) = real( wsizem,KIND=dp) return end if if( lwork<wsizeo ) then lw1 = tszm lw2 = lwm else lw1 = tszo lw2 = lwo end if ! quick return if possible if( min( m, n, nrhs )==0_${ik}$ ) then call stdlib${ii}$_dlaset( 'FULL', max( m, n ), nrhs, zero, zero,b, ldb ) return end if ! get machine parameters smlnum = stdlib${ii}$_dlamch( 'S' ) / stdlib${ii}$_dlamch( 'P' ) bignum = one / smlnum call stdlib${ii}$_dlabad( smlnum, bignum ) ! scale a, b if max element outside range [smlnum,bignum] anrm = stdlib${ii}$_dlange( 'M', m, n, a, lda, work ) iascl = 0_${ik}$ if( anrm>zero .and. anrm<smlnum ) then ! scale matrix norm up to smlnum call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, m, n, a, lda, info ) iascl = 1_${ik}$ else if( anrm>bignum ) then ! scale matrix norm down to bignum call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, m, n, a, lda, info ) iascl = 2_${ik}$ else if( anrm==zero ) then ! matrix all zero. return zero solution. call stdlib${ii}$_dlaset( 'F', maxmn, nrhs, zero, zero, b, ldb ) go to 50 end if brow = m if ( tran ) then brow = n end if bnrm = stdlib${ii}$_dlange( 'M', brow, nrhs, b, ldb, work ) ibscl = 0_${ik}$ if( bnrm>zero .and. bnrm<smlnum ) then ! scale matrix norm up to smlnum call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, bnrm, smlnum, brow, nrhs, b, ldb,info ) ibscl = 1_${ik}$ else if( bnrm>bignum ) then ! scale matrix norm down to bignum call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, bnrm, bignum, brow, nrhs, b, ldb,info ) ibscl = 2_${ik}$ end if if ( m>=n ) then ! compute qr factorization of a call stdlib${ii}$_dgeqr( m, n, a, lda, work( lw2+1 ), lw1,work( 1_${ik}$ ), lw2, info ) if ( .not.tran ) then ! least-squares problem min || a * x - b || ! b(1:m,1:nrhs) := q**t * b(1:m,1:nrhs) call stdlib${ii}$_dgemqr( 'L' , 'T', m, nrhs, n, a, lda,work( lw2+1 ), lw1, b, ldb, work(& 1_${ik}$ ), lw2,info ) ! b(1:n,1:nrhs) := inv(r) * b(1:n,1:nrhs) call stdlib${ii}$_dtrtrs( 'U', 'N', 'N', n, nrhs,a, lda, b, ldb, info ) if( info>0_${ik}$ ) then return end if scllen = n else ! overdetermined system of equations a**t * x = b ! b(1:n,1:nrhs) := inv(r**t) * b(1:n,1:nrhs) call stdlib${ii}$_dtrtrs( 'U', 'T', 'N', n, nrhs,a, lda, b, ldb, info ) if( info>0_${ik}$ ) then return end if ! b(n+1:m,1:nrhs) = zero do j = 1, nrhs do i = n + 1, m b( i, j ) = zero end do end do ! b(1:m,1:nrhs) := q(1:n,:) * b(1:n,1:nrhs) call stdlib${ii}$_dgemqr( 'L', 'N', m, nrhs, n, a, lda,work( lw2+1 ), lw1, b, ldb, & work( 1_${ik}$ ), lw2,info ) scllen = m end if else ! compute lq factorization of a call stdlib${ii}$_dgelq( m, n, a, lda, work( lw2+1 ), lw1,work( 1_${ik}$ ), lw2, info ) ! workspace at least m, optimally m*nb. if( .not.tran ) then ! underdetermined system of equations a * x = b ! b(1:m,1:nrhs) := inv(l) * b(1:m,1:nrhs) call stdlib${ii}$_dtrtrs( 'L', 'N', 'N', m, nrhs,a, lda, b, ldb, info ) if( info>0_${ik}$ ) then return end if ! b(m+1:n,1:nrhs) = 0 do j = 1, nrhs do i = m + 1, n b( i, j ) = zero end do end do ! b(1:n,1:nrhs) := q(1:n,:)**t * b(1:m,1:nrhs) call stdlib${ii}$_dgemlq( 'L', 'T', n, nrhs, m, a, lda,work( lw2+1 ), lw1, b, ldb, & work( 1_${ik}$ ), lw2,info ) ! workspace at least nrhs, optimally nrhs*nb scllen = n else ! overdetermined system min || a**t * x - b || ! b(1:n,1:nrhs) := q * b(1:n,1:nrhs) call stdlib${ii}$_dgemlq( 'L', 'N', n, nrhs, m, a, lda,work( lw2+1 ), lw1, b, ldb, & work( 1_${ik}$ ), lw2,info ) ! workspace at least nrhs, optimally nrhs*nb ! b(1:m,1:nrhs) := inv(l**t) * b(1:m,1:nrhs) call stdlib${ii}$_dtrtrs( 'LOWER', 'TRANSPOSE', 'NON-UNIT', m, nrhs,a, lda, b, ldb, & info ) if( info>0_${ik}$ ) then return end if scllen = m end if end if ! undo scaling if( iascl==1_${ik}$ ) then call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, scllen, nrhs, b, ldb,info ) else if( iascl==2_${ik}$ ) then call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, scllen, nrhs, b, ldb,info ) end if if( ibscl==1_${ik}$ ) then call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, smlnum, bnrm, scllen, nrhs, b, ldb,info ) else if( ibscl==2_${ik}$ ) then call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, bignum, bnrm, scllen, nrhs, b, ldb,info ) end if 50 continue work( 1_${ik}$ ) = real( tszo + lwo,KIND=dp) return end subroutine stdlib${ii}$_dgetsls #:for rk,rt,ri in REAL_KINDS_TYPES #:if not rk in ["sp","dp"] module subroutine stdlib${ii}$_${ri}$getsls( trans, m, n, nrhs, a, lda, b, ldb,work, lwork, info ) !! DGETSLS: solves overdetermined or underdetermined real linear systems !! involving an M-by-N matrix A, using a tall skinny QR or short wide LQ !! factorization of A. It is assumed that A has full rank. !! The following options are provided: !! 1. If TRANS = 'N' and m >= n: find the least squares solution of !! an overdetermined system, i.e., solve the least squares problem !! minimize || B - A*X ||. !! 2. If TRANS = 'N' and m < n: find the minimum norm solution of !! an underdetermined system A * X = B. !! 3. If TRANS = 'T' and m >= n: find the minimum norm solution of !! an undetermined system A**T * X = B. !! 4. If TRANS = 'T' and m < n: find the least squares solution of !! an overdetermined system, i.e., solve the least squares problem !! minimize || B - A**T * X ||. !! Several right hand side vectors b and solution vectors x can be !! handled in a single call; they are stored as the columns of the !! M-by-NRHS right hand side matrix B and the N-by-NRHS solution !! matrix X. ! -- lapack driver routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments character, intent(in) :: trans integer(${ik}$), intent(out) :: info integer(${ik}$), intent(in) :: lda, ldb, lwork, m, n, nrhs ! Array Arguments real(${rk}$), intent(inout) :: a(lda,*), b(ldb,*) real(${rk}$), intent(out) :: work(*) ! ===================================================================== ! Local Scalars logical(lk) :: lquery, tran integer(${ik}$) :: i, iascl, ibscl, j, maxmn, brow, scllen, tszo, tszm, lwo, lwm, lw1, & lw2, wsizeo, wsizem, info2 real(${rk}$) :: anrm, bignum, bnrm, smlnum, tq(5_${ik}$), workq(1_${ik}$) ! Intrinsic Functions ! Executable Statements ! test the input arguments. info = 0_${ik}$ maxmn = max( m, n ) tran = stdlib_lsame( trans, 'T' ) lquery = ( lwork==-1_${ik}$ .or. lwork==-2_${ik}$ ) if( .not.( stdlib_lsame( trans, 'N' ) .or.stdlib_lsame( trans, 'T' ) ) ) then info = -1_${ik}$ else if( m<0_${ik}$ ) then info = -2_${ik}$ else if( n<0_${ik}$ ) then info = -3_${ik}$ else if( nrhs<0_${ik}$ ) then info = -4_${ik}$ else if( lda<max( 1_${ik}$, m ) ) then info = -6_${ik}$ else if( ldb<max( 1_${ik}$, m, n ) ) then info = -8_${ik}$ end if if( info==0_${ik}$ ) then ! determine the optimum and minimum lwork if( m>=n ) then call stdlib${ii}$_${ri}$geqr( m, n, a, lda, tq, -1_${ik}$, workq, -1_${ik}$, info2 ) tszo = int( tq( 1_${ik}$ ),KIND=${ik}$) lwo = int( workq( 1_${ik}$ ),KIND=${ik}$) call stdlib${ii}$_${ri}$gemqr( 'L', trans, m, nrhs, n, a, lda, tq,tszo, b, ldb, workq, -1_${ik}$, & info2 ) lwo = max( lwo, int( workq( 1_${ik}$ ),KIND=${ik}$) ) call stdlib${ii}$_${ri}$geqr( m, n, a, lda, tq, -2_${ik}$, workq, -2_${ik}$, info2 ) tszm = int( tq( 1_${ik}$ ),KIND=${ik}$) lwm = int( workq( 1_${ik}$ ),KIND=${ik}$) call stdlib${ii}$_${ri}$gemqr( 'L', trans, m, nrhs, n, a, lda, tq,tszm, b, ldb, workq, -1_${ik}$, & info2 ) lwm = max( lwm, int( workq( 1_${ik}$ ),KIND=${ik}$) ) wsizeo = tszo + lwo wsizem = tszm + lwm else call stdlib${ii}$_${ri}$gelq( m, n, a, lda, tq, -1_${ik}$, workq, -1_${ik}$, info2 ) tszo = int( tq( 1_${ik}$ ),KIND=${ik}$) lwo = int( workq( 1_${ik}$ ),KIND=${ik}$) call stdlib${ii}$_${ri}$gemlq( 'L', trans, n, nrhs, m, a, lda, tq,tszo, b, ldb, workq, -1_${ik}$, & info2 ) lwo = max( lwo, int( workq( 1_${ik}$ ),KIND=${ik}$) ) call stdlib${ii}$_${ri}$gelq( m, n, a, lda, tq, -2_${ik}$, workq, -2_${ik}$, info2 ) tszm = int( tq( 1_${ik}$ ),KIND=${ik}$) lwm = int( workq( 1_${ik}$ ),KIND=${ik}$) call stdlib${ii}$_${ri}$gemlq( 'L', trans, n, nrhs, m, a, lda, tq,tszm, b, ldb, workq, -1_${ik}$, & info2 ) lwm = max( lwm, int( workq( 1_${ik}$ ),KIND=${ik}$) ) wsizeo = tszo + lwo wsizem = tszm + lwm end if if( ( lwork<wsizem ).and.( .not.lquery ) ) then info = -10_${ik}$ end if work( 1_${ik}$ ) = real( wsizeo,KIND=${rk}$) end if if( info/=0_${ik}$ ) then call stdlib${ii}$_xerbla( 'DGETSLS', -info ) return end if if( lquery ) then if( lwork==-2_${ik}$ ) work( 1_${ik}$ ) = real( wsizem,KIND=${rk}$) return end if if( lwork<wsizeo ) then lw1 = tszm lw2 = lwm else lw1 = tszo lw2 = lwo end if ! quick return if possible if( min( m, n, nrhs )==0_${ik}$ ) then call stdlib${ii}$_${ri}$laset( 'FULL', max( m, n ), nrhs, zero, zero,b, ldb ) return end if ! get machine parameters smlnum = stdlib${ii}$_${ri}$lamch( 'S' ) / stdlib${ii}$_${ri}$lamch( 'P' ) bignum = one / smlnum call stdlib${ii}$_${ri}$labad( smlnum, bignum ) ! scale a, b if max element outside range [smlnum,bignum] anrm = stdlib${ii}$_${ri}$lange( 'M', m, n, a, lda, work ) iascl = 0_${ik}$ if( anrm>zero .and. anrm<smlnum ) then ! scale matrix norm up to smlnum call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, m, n, a, lda, info ) iascl = 1_${ik}$ else if( anrm>bignum ) then ! scale matrix norm down to bignum call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, m, n, a, lda, info ) iascl = 2_${ik}$ else if( anrm==zero ) then ! matrix all zero. return zero solution. call stdlib${ii}$_${ri}$laset( 'F', maxmn, nrhs, zero, zero, b, ldb ) go to 50 end if brow = m if ( tran ) then brow = n end if bnrm = stdlib${ii}$_${ri}$lange( 'M', brow, nrhs, b, ldb, work ) ibscl = 0_${ik}$ if( bnrm>zero .and. bnrm<smlnum ) then ! scale matrix norm up to smlnum call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, bnrm, smlnum, brow, nrhs, b, ldb,info ) ibscl = 1_${ik}$ else if( bnrm>bignum ) then ! scale matrix norm down to bignum call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, bnrm, bignum, brow, nrhs, b, ldb,info ) ibscl = 2_${ik}$ end if if ( m>=n ) then ! compute qr factorization of a call stdlib${ii}$_${ri}$geqr( m, n, a, lda, work( lw2+1 ), lw1,work( 1_${ik}$ ), lw2, info ) if ( .not.tran ) then ! least-squares problem min || a * x - b || ! b(1:m,1:nrhs) := q**t * b(1:m,1:nrhs) call stdlib${ii}$_${ri}$gemqr( 'L' , 'T', m, nrhs, n, a, lda,work( lw2+1 ), lw1, b, ldb, work(& 1_${ik}$ ), lw2,info ) ! b(1:n,1:nrhs) := inv(r) * b(1:n,1:nrhs) call stdlib${ii}$_${ri}$trtrs( 'U', 'N', 'N', n, nrhs,a, lda, b, ldb, info ) if( info>0_${ik}$ ) then return end if scllen = n else ! overdetermined system of equations a**t * x = b ! b(1:n,1:nrhs) := inv(r**t) * b(1:n,1:nrhs) call stdlib${ii}$_${ri}$trtrs( 'U', 'T', 'N', n, nrhs,a, lda, b, ldb, info ) if( info>0_${ik}$ ) then return end if ! b(n+1:m,1:nrhs) = zero do j = 1, nrhs do i = n + 1, m b( i, j ) = zero end do end do ! b(1:m,1:nrhs) := q(1:n,:) * b(1:n,1:nrhs) call stdlib${ii}$_${ri}$gemqr( 'L', 'N', m, nrhs, n, a, lda,work( lw2+1 ), lw1, b, ldb, & work( 1_${ik}$ ), lw2,info ) scllen = m end if else ! compute lq factorization of a call stdlib${ii}$_${ri}$gelq( m, n, a, lda, work( lw2+1 ), lw1,work( 1_${ik}$ ), lw2, info ) ! workspace at least m, optimally m*nb. if( .not.tran ) then ! underdetermined system of equations a * x = b ! b(1:m,1:nrhs) := inv(l) * b(1:m,1:nrhs) call stdlib${ii}$_${ri}$trtrs( 'L', 'N', 'N', m, nrhs,a, lda, b, ldb, info ) if( info>0_${ik}$ ) then return end if ! b(m+1:n,1:nrhs) = 0 do j = 1, nrhs do i = m + 1, n b( i, j ) = zero end do end do ! b(1:n,1:nrhs) := q(1:n,:)**t * b(1:m,1:nrhs) call stdlib${ii}$_${ri}$gemlq( 'L', 'T', n, nrhs, m, a, lda,work( lw2+1 ), lw1, b, ldb, & work( 1_${ik}$ ), lw2,info ) ! workspace at least nrhs, optimally nrhs*nb scllen = n else ! overdetermined system min || a**t * x - b || ! b(1:n,1:nrhs) := q * b(1:n,1:nrhs) call stdlib${ii}$_${ri}$gemlq( 'L', 'N', n, nrhs, m, a, lda,work( lw2+1 ), lw1, b, ldb, & work( 1_${ik}$ ), lw2,info ) ! workspace at least nrhs, optimally nrhs*nb ! b(1:m,1:nrhs) := inv(l**t) * b(1:m,1:nrhs) call stdlib${ii}$_${ri}$trtrs( 'LOWER', 'TRANSPOSE', 'NON-UNIT', m, nrhs,a, lda, b, ldb, & info ) if( info>0_${ik}$ ) then return end if scllen = m end if end if ! undo scaling if( iascl==1_${ik}$ ) then call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, scllen, nrhs, b, ldb,info ) else if( iascl==2_${ik}$ ) then call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, scllen, nrhs, b, ldb,info ) end if if( ibscl==1_${ik}$ ) then call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, smlnum, bnrm, scllen, nrhs, b, ldb,info ) else if( ibscl==2_${ik}$ ) then call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, bignum, bnrm, scllen, nrhs, b, ldb,info ) end if 50 continue work( 1_${ik}$ ) = real( tszo + lwo,KIND=${rk}$) return end subroutine stdlib${ii}$_${ri}$getsls #:endif #:endfor module subroutine stdlib${ii}$_cgetsls( trans, m, n, nrhs, a, lda, b, ldb,work, lwork, info ) !! CGETSLS solves overdetermined or underdetermined complex linear systems !! involving an M-by-N matrix A, using a tall skinny QR or short wide LQ !! factorization of A. It is assumed that A has full rank. !! The following options are provided: !! 1. If TRANS = 'N' and m >= n: find the least squares solution of !! an overdetermined system, i.e., solve the least squares problem !! minimize || B - A*X ||. !! 2. If TRANS = 'N' and m < n: find the minimum norm solution of !! an underdetermined system A * X = B. !! 3. If TRANS = 'C' and m >= n: find the minimum norm solution of !! an undetermined system A**T * X = B. !! 4. If TRANS = 'C' and m < n: find the least squares solution of !! an overdetermined system, i.e., solve the least squares problem !! minimize || B - A**T * X ||. !! Several right hand side vectors b and solution vectors x can be !! handled in a single call; they are stored as the columns of the !! M-by-NRHS right hand side matrix B and the N-by-NRHS solution !! matrix X. ! -- lapack driver routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments character, intent(in) :: trans integer(${ik}$), intent(out) :: info integer(${ik}$), intent(in) :: lda, ldb, lwork, m, n, nrhs ! Array Arguments complex(sp), intent(inout) :: a(lda,*), b(ldb,*) complex(sp), intent(out) :: work(*) ! ===================================================================== ! Local Scalars logical(lk) :: lquery, tran integer(${ik}$) :: i, iascl, ibscl, j, maxmn, brow, scllen, tszo, tszm, lwo, lwm, lw1, & lw2, wsizeo, wsizem, info2 real(sp) :: anrm, bignum, bnrm, smlnum, dum(1_${ik}$) complex(sp) :: tq(5_${ik}$), workq(1_${ik}$) ! Intrinsic Functions ! Executable Statements ! test the input arguments. info = 0_${ik}$ maxmn = max( m, n ) tran = stdlib_lsame( trans, 'C' ) lquery = ( lwork==-1_${ik}$ .or. lwork==-2_${ik}$ ) if( .not.( stdlib_lsame( trans, 'N' ) .or.stdlib_lsame( trans, 'C' ) ) ) then info = -1_${ik}$ else if( m<0_${ik}$ ) then info = -2_${ik}$ else if( n<0_${ik}$ ) then info = -3_${ik}$ else if( nrhs<0_${ik}$ ) then info = -4_${ik}$ else if( lda<max( 1_${ik}$, m ) ) then info = -6_${ik}$ else if( ldb<max( 1_${ik}$, m, n ) ) then info = -8_${ik}$ end if if( info==0_${ik}$ ) then ! determine the optimum and minimum lwork if( m>=n ) then call stdlib${ii}$_cgeqr( m, n, a, lda, tq, -1_${ik}$, workq, -1_${ik}$, info2 ) tszo = int( tq( 1_${ik}$ ),KIND=${ik}$) lwo = int( workq( 1_${ik}$ ),KIND=${ik}$) call stdlib${ii}$_cgemqr( 'L', trans, m, nrhs, n, a, lda, tq,tszo, b, ldb, workq, -1_${ik}$, & info2 ) lwo = max( lwo, int( workq( 1_${ik}$ ),KIND=${ik}$) ) call stdlib${ii}$_cgeqr( m, n, a, lda, tq, -2_${ik}$, workq, -2_${ik}$, info2 ) tszm = int( tq( 1_${ik}$ ),KIND=${ik}$) lwm = int( workq( 1_${ik}$ ),KIND=${ik}$) call stdlib${ii}$_cgemqr( 'L', trans, m, nrhs, n, a, lda, tq,tszm, b, ldb, workq, -1_${ik}$, & info2 ) lwm = max( lwm, int( workq( 1_${ik}$ ),KIND=${ik}$) ) wsizeo = tszo + lwo wsizem = tszm + lwm else call stdlib${ii}$_cgelq( m, n, a, lda, tq, -1_${ik}$, workq, -1_${ik}$, info2 ) tszo = int( tq( 1_${ik}$ ),KIND=${ik}$) lwo = int( workq( 1_${ik}$ ),KIND=${ik}$) call stdlib${ii}$_cgemlq( 'L', trans, n, nrhs, m, a, lda, tq,tszo, b, ldb, workq, -1_${ik}$, & info2 ) lwo = max( lwo, int( workq( 1_${ik}$ ),KIND=${ik}$) ) call stdlib${ii}$_cgelq( m, n, a, lda, tq, -2_${ik}$, workq, -2_${ik}$, info2 ) tszm = int( tq( 1_${ik}$ ),KIND=${ik}$) lwm = int( workq( 1_${ik}$ ),KIND=${ik}$) call stdlib${ii}$_cgemlq( 'L', trans, n, nrhs, m, a, lda, tq,tszm, b, ldb, workq, -1_${ik}$, & info2 ) lwm = max( lwm, int( workq( 1_${ik}$ ),KIND=${ik}$) ) wsizeo = tszo + lwo wsizem = tszm + lwm end if if( ( lwork<wsizem ).and.( .not.lquery ) ) then info = -10_${ik}$ end if work( 1_${ik}$ ) = real( wsizeo,KIND=sp) end if if( info/=0_${ik}$ ) then call stdlib${ii}$_xerbla( 'CGETSLS', -info ) return end if if( lquery ) then if( lwork==-2_${ik}$ ) work( 1_${ik}$ ) = real( wsizem,KIND=sp) return end if if( lwork<wsizeo ) then lw1 = tszm lw2 = lwm else lw1 = tszo lw2 = lwo end if ! quick return if possible if( min( m, n, nrhs )==0_${ik}$ ) then call stdlib${ii}$_claset( 'FULL', max( m, n ), nrhs, czero, czero,b, ldb ) return end if ! get machine parameters smlnum = stdlib${ii}$_slamch( 'S' ) / stdlib${ii}$_slamch( 'P' ) bignum = one / smlnum call stdlib${ii}$_slabad( smlnum, bignum ) ! scale a, b if max element outside range [smlnum,bignum] anrm = stdlib${ii}$_clange( 'M', m, n, a, lda, dum ) iascl = 0_${ik}$ if( anrm>zero .and. anrm<smlnum ) then ! scale matrix norm up to smlnum call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, m, n, a, lda, info ) iascl = 1_${ik}$ else if( anrm>bignum ) then ! scale matrix norm down to bignum call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, m, n, a, lda, info ) iascl = 2_${ik}$ else if( anrm==zero ) then ! matrix all zero. return zero solution. call stdlib${ii}$_claset( 'F', maxmn, nrhs, czero, czero, b, ldb ) go to 50 end if brow = m if ( tran ) then brow = n end if bnrm = stdlib${ii}$_clange( 'M', brow, nrhs, b, ldb, dum ) ibscl = 0_${ik}$ if( bnrm>zero .and. bnrm<smlnum ) then ! scale matrix norm up to smlnum call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, bnrm, smlnum, brow, nrhs, b, ldb,info ) ibscl = 1_${ik}$ else if( bnrm>bignum ) then ! scale matrix norm down to bignum call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, bnrm, bignum, brow, nrhs, b, ldb,info ) ibscl = 2_${ik}$ end if if ( m>=n ) then ! compute qr factorization of a call stdlib${ii}$_cgeqr( m, n, a, lda, work( lw2+1 ), lw1,work( 1_${ik}$ ), lw2, info ) if ( .not.tran ) then ! least-squares problem min || a * x - b || ! b(1:m,1:nrhs) := q**t * b(1:m,1:nrhs) call stdlib${ii}$_cgemqr( 'L' , 'C', m, nrhs, n, a, lda,work( lw2+1 ), lw1, b, ldb, work(& 1_${ik}$ ), lw2,info ) ! b(1:n,1:nrhs) := inv(r) * b(1:n,1:nrhs) call stdlib${ii}$_ctrtrs( 'U', 'N', 'N', n, nrhs,a, lda, b, ldb, info ) if( info>0_${ik}$ ) then return end if scllen = n else ! overdetermined system of equations a**t * x = b ! b(1:n,1:nrhs) := inv(r**t) * b(1:n,1:nrhs) call stdlib${ii}$_ctrtrs( 'U', 'C', 'N', n, nrhs,a, lda, b, ldb, info ) if( info>0_${ik}$ ) then return end if ! b(n+1:m,1:nrhs) = czero do j = 1, nrhs do i = n + 1, m b( i, j ) = czero end do end do ! b(1:m,1:nrhs) := q(1:n,:) * b(1:n,1:nrhs) call stdlib${ii}$_cgemqr( 'L', 'N', m, nrhs, n, a, lda,work( lw2+1 ), lw1, b, ldb, & work( 1_${ik}$ ), lw2,info ) scllen = m end if else ! compute lq factorization of a call stdlib${ii}$_cgelq( m, n, a, lda, work( lw2+1 ), lw1,work( 1_${ik}$ ), lw2, info ) ! workspace at least m, optimally m*nb. if( .not.tran ) then ! underdetermined system of equations a * x = b ! b(1:m,1:nrhs) := inv(l) * b(1:m,1:nrhs) call stdlib${ii}$_ctrtrs( 'L', 'N', 'N', m, nrhs,a, lda, b, ldb, info ) if( info>0_${ik}$ ) then return end if ! b(m+1:n,1:nrhs) = 0 do j = 1, nrhs do i = m + 1, n b( i, j ) = czero end do end do ! b(1:n,1:nrhs) := q(1:n,:)**t * b(1:m,1:nrhs) call stdlib${ii}$_cgemlq( 'L', 'C', n, nrhs, m, a, lda,work( lw2+1 ), lw1, b, ldb, & work( 1_${ik}$ ), lw2,info ) ! workspace at least nrhs, optimally nrhs*nb scllen = n else ! overdetermined system min || a**t * x - b || ! b(1:n,1:nrhs) := q * b(1:n,1:nrhs) call stdlib${ii}$_cgemlq( 'L', 'N', n, nrhs, m, a, lda,work( lw2+1 ), lw1, b, ldb, & work( 1_${ik}$ ), lw2,info ) ! workspace at least nrhs, optimally nrhs*nb ! b(1:m,1:nrhs) := inv(l**t) * b(1:m,1:nrhs) call stdlib${ii}$_ctrtrs( 'L', 'C', 'N', m, nrhs,a, lda, b, ldb, info ) if( info>0_${ik}$ ) then return end if scllen = m end if end if ! undo scaling if( iascl==1_${ik}$ ) then call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, scllen, nrhs, b, ldb,info ) else if( iascl==2_${ik}$ ) then call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, scllen, nrhs, b, ldb,info ) end if if( ibscl==1_${ik}$ ) then call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, smlnum, bnrm, scllen, nrhs, b, ldb,info ) else if( ibscl==2_${ik}$ ) then call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, bignum, bnrm, scllen, nrhs, b, ldb,info ) end if 50 continue work( 1_${ik}$ ) = real( tszo + lwo,KIND=sp) return end subroutine stdlib${ii}$_cgetsls module subroutine stdlib${ii}$_zgetsls( trans, m, n, nrhs, a, lda, b, ldb,work, lwork, info ) !! ZGETSLS solves overdetermined or underdetermined complex linear systems !! involving an M-by-N matrix A, using a tall skinny QR or short wide LQ !! factorization of A. It is assumed that A has full rank. !! The following options are provided: !! 1. If TRANS = 'N' and m >= n: find the least squares solution of !! an overdetermined system, i.e., solve the least squares problem !! minimize || B - A*X ||. !! 2. If TRANS = 'N' and m < n: find the minimum norm solution of !! an underdetermined system A * X = B. !! 3. If TRANS = 'C' and m >= n: find the minimum norm solution of !! an undetermined system A**T * X = B. !! 4. If TRANS = 'C' and m < n: find the least squares solution of !! an overdetermined system, i.e., solve the least squares problem !! minimize || B - A**T * X ||. !! Several right hand side vectors b and solution vectors x can be !! handled in a single call; they are stored as the columns of the !! M-by-NRHS right hand side matrix B and the N-by-NRHS solution !! matrix X. ! -- lapack driver routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments character, intent(in) :: trans integer(${ik}$), intent(out) :: info integer(${ik}$), intent(in) :: lda, ldb, lwork, m, n, nrhs ! Array Arguments complex(dp), intent(inout) :: a(lda,*), b(ldb,*) complex(dp), intent(out) :: work(*) ! ===================================================================== ! Local Scalars logical(lk) :: lquery, tran integer(${ik}$) :: i, iascl, ibscl, j, maxmn, brow, scllen, tszo, tszm, lwo, lwm, lw1, & lw2, wsizeo, wsizem, info2 real(dp) :: anrm, bignum, bnrm, smlnum, dum(1_${ik}$) complex(dp) :: tq(5_${ik}$), workq(1_${ik}$) ! Intrinsic Functions ! Executable Statements ! test the input arguments. info = 0_${ik}$ maxmn = max( m, n ) tran = stdlib_lsame( trans, 'C' ) lquery = ( lwork==-1_${ik}$ .or. lwork==-2_${ik}$ ) if( .not.( stdlib_lsame( trans, 'N' ) .or.stdlib_lsame( trans, 'C' ) ) ) then info = -1_${ik}$ else if( m<0_${ik}$ ) then info = -2_${ik}$ else if( n<0_${ik}$ ) then info = -3_${ik}$ else if( nrhs<0_${ik}$ ) then info = -4_${ik}$ else if( lda<max( 1_${ik}$, m ) ) then info = -6_${ik}$ else if( ldb<max( 1_${ik}$, m, n ) ) then info = -8_${ik}$ end if if( info==0_${ik}$ ) then ! determine the optimum and minimum lwork if( m>=n ) then call stdlib${ii}$_zgeqr( m, n, a, lda, tq, -1_${ik}$, workq, -1_${ik}$, info2 ) tszo = int( tq( 1_${ik}$ ),KIND=${ik}$) lwo = int( workq( 1_${ik}$ ),KIND=${ik}$) call stdlib${ii}$_zgemqr( 'L', trans, m, nrhs, n, a, lda, tq,tszo, b, ldb, workq, -1_${ik}$, & info2 ) lwo = max( lwo, int( workq( 1_${ik}$ ),KIND=${ik}$) ) call stdlib${ii}$_zgeqr( m, n, a, lda, tq, -2_${ik}$, workq, -2_${ik}$, info2 ) tszm = int( tq( 1_${ik}$ ),KIND=${ik}$) lwm = int( workq( 1_${ik}$ ),KIND=${ik}$) call stdlib${ii}$_zgemqr( 'L', trans, m, nrhs, n, a, lda, tq,tszm, b, ldb, workq, -1_${ik}$, & info2 ) lwm = max( lwm, int( workq( 1_${ik}$ ),KIND=${ik}$) ) wsizeo = tszo + lwo wsizem = tszm + lwm else call stdlib${ii}$_zgelq( m, n, a, lda, tq, -1_${ik}$, workq, -1_${ik}$, info2 ) tszo = int( tq( 1_${ik}$ ),KIND=${ik}$) lwo = int( workq( 1_${ik}$ ),KIND=${ik}$) call stdlib${ii}$_zgemlq( 'L', trans, n, nrhs, m, a, lda, tq,tszo, b, ldb, workq, -1_${ik}$, & info2 ) lwo = max( lwo, int( workq( 1_${ik}$ ),KIND=${ik}$) ) call stdlib${ii}$_zgelq( m, n, a, lda, tq, -2_${ik}$, workq, -2_${ik}$, info2 ) tszm = int( tq( 1_${ik}$ ),KIND=${ik}$) lwm = int( workq( 1_${ik}$ ),KIND=${ik}$) call stdlib${ii}$_zgemlq( 'L', trans, n, nrhs, m, a, lda, tq,tszm, b, ldb, workq, -1_${ik}$, & info2 ) lwm = max( lwm, int( workq( 1_${ik}$ ),KIND=${ik}$) ) wsizeo = tszo + lwo wsizem = tszm + lwm end if if( ( lwork<wsizem ).and.( .not.lquery ) ) then info = -10_${ik}$ end if work( 1_${ik}$ ) = real( wsizeo,KIND=dp) end if if( info/=0_${ik}$ ) then call stdlib${ii}$_xerbla( 'ZGETSLS', -info ) return end if if( lquery ) then if( lwork==-2_${ik}$ ) work( 1_${ik}$ ) = real( wsizem,KIND=dp) return end if if( lwork<wsizeo ) then lw1 = tszm lw2 = lwm else lw1 = tszo lw2 = lwo end if ! quick return if possible if( min( m, n, nrhs )==0_${ik}$ ) then call stdlib${ii}$_zlaset( 'FULL', max( m, n ), nrhs, czero, czero,b, ldb ) return end if ! get machine parameters smlnum = stdlib${ii}$_dlamch( 'S' ) / stdlib${ii}$_dlamch( 'P' ) bignum = one / smlnum call stdlib${ii}$_dlabad( smlnum, bignum ) ! scale a, b if max element outside range [smlnum,bignum] anrm = stdlib${ii}$_zlange( 'M', m, n, a, lda, dum ) iascl = 0_${ik}$ if( anrm>zero .and. anrm<smlnum ) then ! scale matrix norm up to smlnum call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, m, n, a, lda, info ) iascl = 1_${ik}$ else if( anrm>bignum ) then ! scale matrix norm down to bignum call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, m, n, a, lda, info ) iascl = 2_${ik}$ else if( anrm==zero ) then ! matrix all zero. return zero solution. call stdlib${ii}$_zlaset( 'F', maxmn, nrhs, czero, czero, b, ldb ) go to 50 end if brow = m if ( tran ) then brow = n end if bnrm = stdlib${ii}$_zlange( 'M', brow, nrhs, b, ldb, dum ) ibscl = 0_${ik}$ if( bnrm>zero .and. bnrm<smlnum ) then ! scale matrix norm up to smlnum call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, bnrm, smlnum, brow, nrhs, b, ldb,info ) ibscl = 1_${ik}$ else if( bnrm>bignum ) then ! scale matrix norm down to bignum call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, bnrm, bignum, brow, nrhs, b, ldb,info ) ibscl = 2_${ik}$ end if if ( m>=n ) then ! compute qr factorization of a call stdlib${ii}$_zgeqr( m, n, a, lda, work( lw2+1 ), lw1,work( 1_${ik}$ ), lw2, info ) if ( .not.tran ) then ! least-squares problem min || a * x - b || ! b(1:m,1:nrhs) := q**t * b(1:m,1:nrhs) call stdlib${ii}$_zgemqr( 'L' , 'C', m, nrhs, n, a, lda,work( lw2+1 ), lw1, b, ldb, work(& 1_${ik}$ ), lw2,info ) ! b(1:n,1:nrhs) := inv(r) * b(1:n,1:nrhs) call stdlib${ii}$_ztrtrs( 'U', 'N', 'N', n, nrhs,a, lda, b, ldb, info ) if( info>0_${ik}$ ) then return end if scllen = n else ! overdetermined system of equations a**t * x = b ! b(1:n,1:nrhs) := inv(r**t) * b(1:n,1:nrhs) call stdlib${ii}$_ztrtrs( 'U', 'C', 'N', n, nrhs,a, lda, b, ldb, info ) if( info>0_${ik}$ ) then return end if ! b(n+1:m,1:nrhs) = czero do j = 1, nrhs do i = n + 1, m b( i, j ) = czero end do end do ! b(1:m,1:nrhs) := q(1:n,:) * b(1:n,1:nrhs) call stdlib${ii}$_zgemqr( 'L', 'N', m, nrhs, n, a, lda,work( lw2+1 ), lw1, b, ldb, & work( 1_${ik}$ ), lw2,info ) scllen = m end if else ! compute lq factorization of a call stdlib${ii}$_zgelq( m, n, a, lda, work( lw2+1 ), lw1,work( 1_${ik}$ ), lw2, info ) ! workspace at least m, optimally m*nb. if( .not.tran ) then ! underdetermined system of equations a * x = b ! b(1:m,1:nrhs) := inv(l) * b(1:m,1:nrhs) call stdlib${ii}$_ztrtrs( 'L', 'N', 'N', m, nrhs,a, lda, b, ldb, info ) if( info>0_${ik}$ ) then return end if ! b(m+1:n,1:nrhs) = 0 do j = 1, nrhs do i = m + 1, n b( i, j ) = czero end do end do ! b(1:n,1:nrhs) := q(1:n,:)**t * b(1:m,1:nrhs) call stdlib${ii}$_zgemlq( 'L', 'C', n, nrhs, m, a, lda,work( lw2+1 ), lw1, b, ldb, & work( 1_${ik}$ ), lw2,info ) ! workspace at least nrhs, optimally nrhs*nb scllen = n else ! overdetermined system min || a**t * x - b || ! b(1:n,1:nrhs) := q * b(1:n,1:nrhs) call stdlib${ii}$_zgemlq( 'L', 'N', n, nrhs, m, a, lda,work( lw2+1 ), lw1, b, ldb, & work( 1_${ik}$ ), lw2,info ) ! workspace at least nrhs, optimally nrhs*nb ! b(1:m,1:nrhs) := inv(l**t) * b(1:m,1:nrhs) call stdlib${ii}$_ztrtrs( 'L', 'C', 'N', m, nrhs,a, lda, b, ldb, info ) if( info>0_${ik}$ ) then return end if scllen = m end if end if ! undo scaling if( iascl==1_${ik}$ ) then call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, scllen, nrhs, b, ldb,info ) else if( iascl==2_${ik}$ ) then call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, scllen, nrhs, b, ldb,info ) end if if( ibscl==1_${ik}$ ) then call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, smlnum, bnrm, scllen, nrhs, b, ldb,info ) else if( ibscl==2_${ik}$ ) then call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, bignum, bnrm, scllen, nrhs, b, ldb,info ) end if 50 continue work( 1_${ik}$ ) = real( tszo + lwo,KIND=dp) return end subroutine stdlib${ii}$_zgetsls #:for ck,ct,ci in CMPLX_KINDS_TYPES #:if not ck in ["sp","dp"] module subroutine stdlib${ii}$_${ci}$getsls( trans, m, n, nrhs, a, lda, b, ldb,work, lwork, info ) !! ZGETSLS: solves overdetermined or underdetermined complex linear systems !! involving an M-by-N matrix A, using a tall skinny QR or short wide LQ !! factorization of A. It is assumed that A has full rank. !! The following options are provided: !! 1. If TRANS = 'N' and m >= n: find the least squares solution of !! an overdetermined system, i.e., solve the least squares problem !! minimize || B - A*X ||. !! 2. If TRANS = 'N' and m < n: find the minimum norm solution of !! an underdetermined system A * X = B. !! 3. If TRANS = 'C' and m >= n: find the minimum norm solution of !! an undetermined system A**T * X = B. !! 4. If TRANS = 'C' and m < n: find the least squares solution of !! an overdetermined system, i.e., solve the least squares problem !! minimize || B - A**T * X ||. !! Several right hand side vectors b and solution vectors x can be !! handled in a single call; they are stored as the columns of the !! M-by-NRHS right hand side matrix B and the N-by-NRHS solution !! matrix X. ! -- lapack driver routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_${ck}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments character, intent(in) :: trans integer(${ik}$), intent(out) :: info integer(${ik}$), intent(in) :: lda, ldb, lwork, m, n, nrhs ! Array Arguments complex(${ck}$), intent(inout) :: a(lda,*), b(ldb,*) complex(${ck}$), intent(out) :: work(*) ! ===================================================================== ! Local Scalars logical(lk) :: lquery, tran integer(${ik}$) :: i, iascl, ibscl, j, maxmn, brow, scllen, tszo, tszm, lwo, lwm, lw1, & lw2, wsizeo, wsizem, info2 real(${ck}$) :: anrm, bignum, bnrm, smlnum, dum(1_${ik}$) complex(${ck}$) :: tq(5_${ik}$), workq(1_${ik}$) ! Intrinsic Functions ! Executable Statements ! test the input arguments. info = 0_${ik}$ maxmn = max( m, n ) tran = stdlib_lsame( trans, 'C' ) lquery = ( lwork==-1_${ik}$ .or. lwork==-2_${ik}$ ) if( .not.( stdlib_lsame( trans, 'N' ) .or.stdlib_lsame( trans, 'C' ) ) ) then info = -1_${ik}$ else if( m<0_${ik}$ ) then info = -2_${ik}$ else if( n<0_${ik}$ ) then info = -3_${ik}$ else if( nrhs<0_${ik}$ ) then info = -4_${ik}$ else if( lda<max( 1_${ik}$, m ) ) then info = -6_${ik}$ else if( ldb<max( 1_${ik}$, m, n ) ) then info = -8_${ik}$ end if if( info==0_${ik}$ ) then ! determine the optimum and minimum lwork if( m>=n ) then call stdlib${ii}$_${ci}$geqr( m, n, a, lda, tq, -1_${ik}$, workq, -1_${ik}$, info2 ) tszo = int( tq( 1_${ik}$ ),KIND=${ik}$) lwo = int( workq( 1_${ik}$ ),KIND=${ik}$) call stdlib${ii}$_${ci}$gemqr( 'L', trans, m, nrhs, n, a, lda, tq,tszo, b, ldb, workq, -1_${ik}$, & info2 ) lwo = max( lwo, int( workq( 1_${ik}$ ),KIND=${ik}$) ) call stdlib${ii}$_${ci}$geqr( m, n, a, lda, tq, -2_${ik}$, workq, -2_${ik}$, info2 ) tszm = int( tq( 1_${ik}$ ),KIND=${ik}$) lwm = int( workq( 1_${ik}$ ),KIND=${ik}$) call stdlib${ii}$_${ci}$gemqr( 'L', trans, m, nrhs, n, a, lda, tq,tszm, b, ldb, workq, -1_${ik}$, & info2 ) lwm = max( lwm, int( workq( 1_${ik}$ ),KIND=${ik}$) ) wsizeo = tszo + lwo wsizem = tszm + lwm else call stdlib${ii}$_${ci}$gelq( m, n, a, lda, tq, -1_${ik}$, workq, -1_${ik}$, info2 ) tszo = int( tq( 1_${ik}$ ),KIND=${ik}$) lwo = int( workq( 1_${ik}$ ),KIND=${ik}$) call stdlib${ii}$_${ci}$gemlq( 'L', trans, n, nrhs, m, a, lda, tq,tszo, b, ldb, workq, -1_${ik}$, & info2 ) lwo = max( lwo, int( workq( 1_${ik}$ ),KIND=${ik}$) ) call stdlib${ii}$_${ci}$gelq( m, n, a, lda, tq, -2_${ik}$, workq, -2_${ik}$, info2 ) tszm = int( tq( 1_${ik}$ ),KIND=${ik}$) lwm = int( workq( 1_${ik}$ ),KIND=${ik}$) call stdlib${ii}$_${ci}$gemlq( 'L', trans, n, nrhs, m, a, lda, tq,tszm, b, ldb, workq, -1_${ik}$, & info2 ) lwm = max( lwm, int( workq( 1_${ik}$ ),KIND=${ik}$) ) wsizeo = tszo + lwo wsizem = tszm + lwm end if if( ( lwork<wsizem ).and.( .not.lquery ) ) then info = -10_${ik}$ end if work( 1_${ik}$ ) = real( wsizeo,KIND=${ck}$) end if if( info/=0_${ik}$ ) then call stdlib${ii}$_xerbla( 'ZGETSLS', -info ) return end if if( lquery ) then if( lwork==-2_${ik}$ ) work( 1_${ik}$ ) = real( wsizem,KIND=${ck}$) return end if if( lwork<wsizeo ) then lw1 = tszm lw2 = lwm else lw1 = tszo lw2 = lwo end if ! quick return if possible if( min( m, n, nrhs )==0_${ik}$ ) then call stdlib${ii}$_${ci}$laset( 'FULL', max( m, n ), nrhs, czero, czero,b, ldb ) return end if ! get machine parameters smlnum = stdlib${ii}$_${c2ri(ci)}$lamch( 'S' ) / stdlib${ii}$_${c2ri(ci)}$lamch( 'P' ) bignum = one / smlnum call stdlib${ii}$_${c2ri(ci)}$labad( smlnum, bignum ) ! scale a, b if max element outside range [smlnum,bignum] anrm = stdlib${ii}$_${ci}$lange( 'M', m, n, a, lda, dum ) iascl = 0_${ik}$ if( anrm>zero .and. anrm<smlnum ) then ! scale matrix norm up to smlnum call stdlib${ii}$_${ci}$lascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, m, n, a, lda, info ) iascl = 1_${ik}$ else if( anrm>bignum ) then ! scale matrix norm down to bignum call stdlib${ii}$_${ci}$lascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, m, n, a, lda, info ) iascl = 2_${ik}$ else if( anrm==zero ) then ! matrix all zero. return zero solution. call stdlib${ii}$_${ci}$laset( 'F', maxmn, nrhs, czero, czero, b, ldb ) go to 50 end if brow = m if ( tran ) then brow = n end if bnrm = stdlib${ii}$_${ci}$lange( 'M', brow, nrhs, b, ldb, dum ) ibscl = 0_${ik}$ if( bnrm>zero .and. bnrm<smlnum ) then ! scale matrix norm up to smlnum call stdlib${ii}$_${ci}$lascl( 'G', 0_${ik}$, 0_${ik}$, bnrm, smlnum, brow, nrhs, b, ldb,info ) ibscl = 1_${ik}$ else if( bnrm>bignum ) then ! scale matrix norm down to bignum call stdlib${ii}$_${ci}$lascl( 'G', 0_${ik}$, 0_${ik}$, bnrm, bignum, brow, nrhs, b, ldb,info ) ibscl = 2_${ik}$ end if if ( m>=n ) then ! compute qr factorization of a call stdlib${ii}$_${ci}$geqr( m, n, a, lda, work( lw2+1 ), lw1,work( 1_${ik}$ ), lw2, info ) if ( .not.tran ) then ! least-squares problem min || a * x - b || ! b(1:m,1:nrhs) := q**t * b(1:m,1:nrhs) call stdlib${ii}$_${ci}$gemqr( 'L' , 'C', m, nrhs, n, a, lda,work( lw2+1 ), lw1, b, ldb, work(& 1_${ik}$ ), lw2,info ) ! b(1:n,1:nrhs) := inv(r) * b(1:n,1:nrhs) call stdlib${ii}$_${ci}$trtrs( 'U', 'N', 'N', n, nrhs,a, lda, b, ldb, info ) if( info>0_${ik}$ ) then return end if scllen = n else ! overdetermined system of equations a**t * x = b ! b(1:n,1:nrhs) := inv(r**t) * b(1:n,1:nrhs) call stdlib${ii}$_${ci}$trtrs( 'U', 'C', 'N', n, nrhs,a, lda, b, ldb, info ) if( info>0_${ik}$ ) then return end if ! b(n+1:m,1:nrhs) = czero do j = 1, nrhs do i = n + 1, m b( i, j ) = czero end do end do ! b(1:m,1:nrhs) := q(1:n,:) * b(1:n,1:nrhs) call stdlib${ii}$_${ci}$gemqr( 'L', 'N', m, nrhs, n, a, lda,work( lw2+1 ), lw1, b, ldb, & work( 1_${ik}$ ), lw2,info ) scllen = m end if else ! compute lq factorization of a call stdlib${ii}$_${ci}$gelq( m, n, a, lda, work( lw2+1 ), lw1,work( 1_${ik}$ ), lw2, info ) ! workspace at least m, optimally m*nb. if( .not.tran ) then ! underdetermined system of equations a * x = b ! b(1:m,1:nrhs) := inv(l) * b(1:m,1:nrhs) call stdlib${ii}$_${ci}$trtrs( 'L', 'N', 'N', m, nrhs,a, lda, b, ldb, info ) if( info>0_${ik}$ ) then return end if ! b(m+1:n,1:nrhs) = 0 do j = 1, nrhs do i = m + 1, n b( i, j ) = czero end do end do ! b(1:n,1:nrhs) := q(1:n,:)**t * b(1:m,1:nrhs) call stdlib${ii}$_${ci}$gemlq( 'L', 'C', n, nrhs, m, a, lda,work( lw2+1 ), lw1, b, ldb, & work( 1_${ik}$ ), lw2,info ) ! workspace at least nrhs, optimally nrhs*nb scllen = n else ! overdetermined system min || a**t * x - b || ! b(1:n,1:nrhs) := q * b(1:n,1:nrhs) call stdlib${ii}$_${ci}$gemlq( 'L', 'N', n, nrhs, m, a, lda,work( lw2+1 ), lw1, b, ldb, & work( 1_${ik}$ ), lw2,info ) ! workspace at least nrhs, optimally nrhs*nb ! b(1:m,1:nrhs) := inv(l**t) * b(1:m,1:nrhs) call stdlib${ii}$_${ci}$trtrs( 'L', 'C', 'N', m, nrhs,a, lda, b, ldb, info ) if( info>0_${ik}$ ) then return end if scllen = m end if end if ! undo scaling if( iascl==1_${ik}$ ) then call stdlib${ii}$_${ci}$lascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, scllen, nrhs, b, ldb,info ) else if( iascl==2_${ik}$ ) then call stdlib${ii}$_${ci}$lascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, scllen, nrhs, b, ldb,info ) end if if( ibscl==1_${ik}$ ) then call stdlib${ii}$_${ci}$lascl( 'G', 0_${ik}$, 0_${ik}$, smlnum, bnrm, scllen, nrhs, b, ldb,info ) else if( ibscl==2_${ik}$ ) then call stdlib${ii}$_${ci}$lascl( 'G', 0_${ik}$, 0_${ik}$, bignum, bnrm, scllen, nrhs, b, ldb,info ) end if 50 continue work( 1_${ik}$ ) = real( tszo + lwo,KIND=${ck}$) return end subroutine stdlib${ii}$_${ci}$getsls #:endif #:endfor #:endfor end submodule stdlib_lapack_lsq