#:include "common.fypp" submodule(stdlib_lapack_eig_svd_lsq) stdlib_lapack_eigv_svd_drivers implicit none contains #:for ik,it,ii in LINALG_INT_KINDS_TYPES module subroutine stdlib${ii}$_sgesvd( jobu, jobvt, m, n, a, lda, s, u, ldu, vt, ldvt,work, lwork, info ) !! SGESVD computes the singular value decomposition (SVD) of a real !! M-by-N matrix A, optionally computing the left and/or right singular !! vectors. The SVD is written !! A = U * SIGMA * transpose(V) !! where SIGMA is an M-by-N matrix which is zero except for its !! min(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and !! V is an N-by-N orthogonal matrix. The diagonal elements of SIGMA !! are the singular values of A; they are real and non-negative, and !! are returned in descending order. The first min(m,n) columns of !! U and V are the left and right singular vectors of A. !! Note that the routine returns V**T, not V. ! -- lapack driver routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments character, intent(in) :: jobu, jobvt integer(${ik}$), intent(out) :: info integer(${ik}$), intent(in) :: lda, ldu, ldvt, lwork, m, n ! Array Arguments real(sp), intent(inout) :: a(lda,*) real(sp), intent(out) :: s(*), u(ldu,*), vt(ldvt,*), work(*) ! ===================================================================== ! Local Scalars logical(lk) :: lquery, wntua, wntuas, wntun, wntuo, wntus, wntva, wntvas, wntvn, wntvo,& wntvs integer(${ik}$) :: bdspac, blk, chunk, i, ie, ierr, ir, iscl, itau, itaup, itauq, iu, & iwork, ldwrkr, ldwrku, maxwrk, minmn, minwrk, mnthr, ncu, ncvt, nru, nrvt, & wrkbl integer(${ik}$) :: lwork_sgeqrf, lwork_sorgqr_n, lwork_sorgqr_m, lwork_sgebrd, & lwork_sorgbr_p, lwork_sorgbr_q, lwork_sgelqf, lwork_sorglq_n, lwork_sorglq_m real(sp) :: anrm, bignum, eps, smlnum ! Local Arrays real(sp) :: dum(1_${ik}$) ! Intrinsic Functions ! Executable Statements ! test the input arguments info = 0_${ik}$ minmn = min( m, n ) wntua = stdlib_lsame( jobu, 'A' ) wntus = stdlib_lsame( jobu, 'S' ) wntuas = wntua .or. wntus wntuo = stdlib_lsame( jobu, 'O' ) wntun = stdlib_lsame( jobu, 'N' ) wntva = stdlib_lsame( jobvt, 'A' ) wntvs = stdlib_lsame( jobvt, 'S' ) wntvas = wntva .or. wntvs wntvo = stdlib_lsame( jobvt, 'O' ) wntvn = stdlib_lsame( jobvt, 'N' ) lquery = ( lwork==-1_${ik}$ ) if( .not.( wntua .or. wntus .or. wntuo .or. wntun ) ) then info = -1_${ik}$ else if( .not.( wntva .or. wntvs .or. wntvo .or. wntvn ) .or.( wntvo .and. wntuo ) ) & then info = -2_${ik}$ else if( m<0_${ik}$ ) then info = -3_${ik}$ else if( n<0_${ik}$ ) then info = -4_${ik}$ else if( lda<max( 1_${ik}$, m ) ) then info = -6_${ik}$ else if( ldu<1_${ik}$ .or. ( wntuas .and. ldu<m ) ) then info = -9_${ik}$ else if( ldvt<1_${ik}$ .or. ( wntva .and. ldvt<n ) .or.( wntvs .and. ldvt<minmn ) ) & then info = -11_${ik}$ end if ! compute workspace ! (note: comments in the code beginning "workspace:" describe the ! minimal amount of workspace needed at that point in the code, ! as well as the preferred amount for good performance. ! nb refers to the optimal block size for the immediately ! following subroutine, as returned by stdlib${ii}$_ilaenv.) if( info==0_${ik}$ ) then minwrk = 1_${ik}$ maxwrk = 1_${ik}$ if( m>=n .and. minmn>0_${ik}$ ) then ! compute space needed for stdlib${ii}$_sbdsqr mnthr = stdlib${ii}$_ilaenv( 6_${ik}$, 'SGESVD', jobu // jobvt, m, n, 0_${ik}$, 0_${ik}$ ) bdspac = 5_${ik}$*n ! compute space needed for stdlib${ii}$_sgeqrf call stdlib${ii}$_sgeqrf( m, n, a, lda, dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, ierr ) lwork_sgeqrf = int( dum(1_${ik}$),KIND=${ik}$) ! compute space needed for stdlib${ii}$_sorgqr call stdlib${ii}$_sorgqr( m, n, n, a, lda, dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, ierr ) lwork_sorgqr_n = int( dum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_sorgqr( m, m, n, a, lda, dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, ierr ) lwork_sorgqr_m = int( dum(1_${ik}$),KIND=${ik}$) ! compute space needed for stdlib${ii}$_sgebrd call stdlib${ii}$_sgebrd( n, n, a, lda, s, dum(1_${ik}$), dum(1_${ik}$),dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, ierr ) lwork_sgebrd = int( dum(1_${ik}$),KIND=${ik}$) ! compute space needed for stdlib${ii}$_sorgbr p call stdlib${ii}$_sorgbr( 'P', n, n, n, a, lda, dum(1_${ik}$),dum(1_${ik}$), -1_${ik}$, ierr ) lwork_sorgbr_p = int( dum(1_${ik}$),KIND=${ik}$) ! compute space needed for stdlib${ii}$_sorgbr q call stdlib${ii}$_sorgbr( 'Q', n, n, n, a, lda, dum(1_${ik}$),dum(1_${ik}$), -1_${ik}$, ierr ) lwork_sorgbr_q = int( dum(1_${ik}$),KIND=${ik}$) if( m>=mnthr ) then if( wntun ) then ! path 1 (m much larger than n, jobu='n') maxwrk = n + lwork_sgeqrf maxwrk = max( maxwrk, 3_${ik}$*n+lwork_sgebrd ) if( wntvo .or. wntvas )maxwrk = max( maxwrk, 3_${ik}$*n+lwork_sorgbr_p ) maxwrk = max( maxwrk, bdspac ) minwrk = max( 4_${ik}$*n, bdspac ) else if( wntuo .and. wntvn ) then ! path 2 (m much larger than n, jobu='o', jobvt='n') wrkbl = n + lwork_sgeqrf wrkbl = max( wrkbl, n+lwork_sorgqr_n ) wrkbl = max( wrkbl, 3_${ik}$*n+lwork_sgebrd ) wrkbl = max( wrkbl, 3_${ik}$*n+lwork_sorgbr_q ) wrkbl = max( wrkbl, bdspac ) maxwrk = max( n*n+wrkbl, n*n+m*n+n ) minwrk = max( 3_${ik}$*n+m, bdspac ) else if( wntuo .and. wntvas ) then ! path 3 (m much larger than n, jobu='o', jobvt='s' or ! 'a') wrkbl = n + lwork_sgeqrf wrkbl = max( wrkbl, n+lwork_sorgqr_n ) wrkbl = max( wrkbl, 3_${ik}$*n+lwork_sgebrd ) wrkbl = max( wrkbl, 3_${ik}$*n+lwork_sorgbr_q ) wrkbl = max( wrkbl, 3_${ik}$*n+lwork_sorgbr_p ) wrkbl = max( wrkbl, bdspac ) maxwrk = max( n*n+wrkbl, n*n+m*n+n ) minwrk = max( 3_${ik}$*n+m, bdspac ) else if( wntus .and. wntvn ) then ! path 4 (m much larger than n, jobu='s', jobvt='n') wrkbl = n + lwork_sgeqrf wrkbl = max( wrkbl, n+lwork_sorgqr_n ) wrkbl = max( wrkbl, 3_${ik}$*n+lwork_sgebrd ) wrkbl = max( wrkbl, 3_${ik}$*n+lwork_sorgbr_q ) wrkbl = max( wrkbl, bdspac ) maxwrk = n*n + wrkbl minwrk = max( 3_${ik}$*n+m, bdspac ) else if( wntus .and. wntvo ) then ! path 5 (m much larger than n, jobu='s', jobvt='o') wrkbl = n + lwork_sgeqrf wrkbl = max( wrkbl, n+lwork_sorgqr_n ) wrkbl = max( wrkbl, 3_${ik}$*n+lwork_sgebrd ) wrkbl = max( wrkbl, 3_${ik}$*n+lwork_sorgbr_q ) wrkbl = max( wrkbl, 3_${ik}$*n+lwork_sorgbr_p ) wrkbl = max( wrkbl, bdspac ) maxwrk = 2_${ik}$*n*n + wrkbl minwrk = max( 3_${ik}$*n+m, bdspac ) else if( wntus .and. wntvas ) then ! path 6 (m much larger than n, jobu='s', jobvt='s' or ! 'a') wrkbl = n + lwork_sgeqrf wrkbl = max( wrkbl, n+lwork_sorgqr_n ) wrkbl = max( wrkbl, 3_${ik}$*n+lwork_sgebrd ) wrkbl = max( wrkbl, 3_${ik}$*n+lwork_sorgbr_q ) wrkbl = max( wrkbl, 3_${ik}$*n+lwork_sorgbr_p ) wrkbl = max( wrkbl, bdspac ) maxwrk = n*n + wrkbl minwrk = max( 3_${ik}$*n+m, bdspac ) else if( wntua .and. wntvn ) then ! path 7 (m much larger than n, jobu='a', jobvt='n') wrkbl = n + lwork_sgeqrf wrkbl = max( wrkbl, n+lwork_sorgqr_m ) wrkbl = max( wrkbl, 3_${ik}$*n+lwork_sgebrd ) wrkbl = max( wrkbl, 3_${ik}$*n+lwork_sorgbr_q ) wrkbl = max( wrkbl, bdspac ) maxwrk = n*n + wrkbl minwrk = max( 3_${ik}$*n+m, bdspac ) else if( wntua .and. wntvo ) then ! path 8 (m much larger than n, jobu='a', jobvt='o') wrkbl = n + lwork_sgeqrf wrkbl = max( wrkbl, n+lwork_sorgqr_m ) wrkbl = max( wrkbl, 3_${ik}$*n+lwork_sgebrd ) wrkbl = max( wrkbl, 3_${ik}$*n+lwork_sorgbr_q ) wrkbl = max( wrkbl, 3_${ik}$*n+lwork_sorgbr_p ) wrkbl = max( wrkbl, bdspac ) maxwrk = 2_${ik}$*n*n + wrkbl minwrk = max( 3_${ik}$*n+m, bdspac ) else if( wntua .and. wntvas ) then ! path 9 (m much larger than n, jobu='a', jobvt='s' or ! 'a') wrkbl = n + lwork_sgeqrf wrkbl = max( wrkbl, n+lwork_sorgqr_m ) wrkbl = max( wrkbl, 3_${ik}$*n+lwork_sgebrd ) wrkbl = max( wrkbl, 3_${ik}$*n+lwork_sorgbr_q ) wrkbl = max( wrkbl, 3_${ik}$*n+lwork_sorgbr_p ) wrkbl = max( wrkbl, bdspac ) maxwrk = n*n + wrkbl minwrk = max( 3_${ik}$*n+m, bdspac ) end if else ! path 10 (m at least n, but not much larger) call stdlib${ii}$_sgebrd( m, n, a, lda, s, dum(1_${ik}$), dum(1_${ik}$),dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, ierr ) lwork_sgebrd = int( dum(1_${ik}$),KIND=${ik}$) maxwrk = 3_${ik}$*n + lwork_sgebrd if( wntus .or. wntuo ) then call stdlib${ii}$_sorgbr( 'Q', m, n, n, a, lda, dum(1_${ik}$),dum(1_${ik}$), -1_${ik}$, ierr ) lwork_sorgbr_q = int( dum(1_${ik}$),KIND=${ik}$) maxwrk = max( maxwrk, 3_${ik}$*n+lwork_sorgbr_q ) end if if( wntua ) then call stdlib${ii}$_sorgbr( 'Q', m, m, n, a, lda, dum(1_${ik}$),dum(1_${ik}$), -1_${ik}$, ierr ) lwork_sorgbr_q = int( dum(1_${ik}$),KIND=${ik}$) maxwrk = max( maxwrk, 3_${ik}$*n+lwork_sorgbr_q ) end if if( .not.wntvn ) then maxwrk = max( maxwrk, 3_${ik}$*n+lwork_sorgbr_p ) end if maxwrk = max( maxwrk, bdspac ) minwrk = max( 3_${ik}$*n+m, bdspac ) end if else if( minmn>0_${ik}$ ) then ! compute space needed for stdlib${ii}$_sbdsqr mnthr = stdlib${ii}$_ilaenv( 6_${ik}$, 'SGESVD', jobu // jobvt, m, n, 0_${ik}$, 0_${ik}$ ) bdspac = 5_${ik}$*m ! compute space needed for stdlib${ii}$_sgelqf call stdlib${ii}$_sgelqf( m, n, a, lda, dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, ierr ) lwork_sgelqf = int( dum(1_${ik}$),KIND=${ik}$) ! compute space needed for stdlib${ii}$_sorglq call stdlib${ii}$_sorglq( n, n, m, dum(1_${ik}$), n, dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, ierr ) lwork_sorglq_n = int( dum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_sorglq( m, n, m, a, lda, dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, ierr ) lwork_sorglq_m = int( dum(1_${ik}$),KIND=${ik}$) ! compute space needed for stdlib${ii}$_sgebrd call stdlib${ii}$_sgebrd( m, m, a, lda, s, dum(1_${ik}$), dum(1_${ik}$),dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, ierr ) lwork_sgebrd = int( dum(1_${ik}$),KIND=${ik}$) ! compute space needed for stdlib${ii}$_sorgbr p call stdlib${ii}$_sorgbr( 'P', m, m, m, a, n, dum(1_${ik}$),dum(1_${ik}$), -1_${ik}$, ierr ) lwork_sorgbr_p = int( dum(1_${ik}$),KIND=${ik}$) ! compute space needed for stdlib${ii}$_sorgbr q call stdlib${ii}$_sorgbr( 'Q', m, m, m, a, n, dum(1_${ik}$),dum(1_${ik}$), -1_${ik}$, ierr ) lwork_sorgbr_q = int( dum(1_${ik}$),KIND=${ik}$) if( n>=mnthr ) then if( wntvn ) then ! path 1t(n much larger than m, jobvt='n') maxwrk = m + lwork_sgelqf maxwrk = max( maxwrk, 3_${ik}$*m+lwork_sgebrd ) if( wntuo .or. wntuas )maxwrk = max( maxwrk, 3_${ik}$*m+lwork_sorgbr_q ) maxwrk = max( maxwrk, bdspac ) minwrk = max( 4_${ik}$*m, bdspac ) else if( wntvo .and. wntun ) then ! path 2t(n much larger than m, jobu='n', jobvt='o') wrkbl = m + lwork_sgelqf wrkbl = max( wrkbl, m+lwork_sorglq_m ) wrkbl = max( wrkbl, 3_${ik}$*m+lwork_sgebrd ) wrkbl = max( wrkbl, 3_${ik}$*m+lwork_sorgbr_p ) wrkbl = max( wrkbl, bdspac ) maxwrk = max( m*m+wrkbl, m*m+m*n+m ) minwrk = max( 3_${ik}$*m+n, bdspac ) else if( wntvo .and. wntuas ) then ! path 3t(n much larger than m, jobu='s' or 'a', ! jobvt='o') wrkbl = m + lwork_sgelqf wrkbl = max( wrkbl, m+lwork_sorglq_m ) wrkbl = max( wrkbl, 3_${ik}$*m+lwork_sgebrd ) wrkbl = max( wrkbl, 3_${ik}$*m+lwork_sorgbr_p ) wrkbl = max( wrkbl, 3_${ik}$*m+lwork_sorgbr_q ) wrkbl = max( wrkbl, bdspac ) maxwrk = max( m*m+wrkbl, m*m+m*n+m ) minwrk = max( 3_${ik}$*m+n, bdspac ) else if( wntvs .and. wntun ) then ! path 4t(n much larger than m, jobu='n', jobvt='s') wrkbl = m + lwork_sgelqf wrkbl = max( wrkbl, m+lwork_sorglq_m ) wrkbl = max( wrkbl, 3_${ik}$*m+lwork_sgebrd ) wrkbl = max( wrkbl, 3_${ik}$*m+lwork_sorgbr_p ) wrkbl = max( wrkbl, bdspac ) maxwrk = m*m + wrkbl minwrk = max( 3_${ik}$*m+n, bdspac ) else if( wntvs .and. wntuo ) then ! path 5t(n much larger than m, jobu='o', jobvt='s') wrkbl = m + lwork_sgelqf wrkbl = max( wrkbl, m+lwork_sorglq_m ) wrkbl = max( wrkbl, 3_${ik}$*m+lwork_sgebrd ) wrkbl = max( wrkbl, 3_${ik}$*m+lwork_sorgbr_p ) wrkbl = max( wrkbl, 3_${ik}$*m+lwork_sorgbr_q ) wrkbl = max( wrkbl, bdspac ) maxwrk = 2_${ik}$*m*m + wrkbl minwrk = max( 3_${ik}$*m+n, bdspac ) maxwrk = max( maxwrk, minwrk ) else if( wntvs .and. wntuas ) then ! path 6t(n much larger than m, jobu='s' or 'a', ! jobvt='s') wrkbl = m + lwork_sgelqf wrkbl = max( wrkbl, m+lwork_sorglq_m ) wrkbl = max( wrkbl, 3_${ik}$*m+lwork_sgebrd ) wrkbl = max( wrkbl, 3_${ik}$*m+lwork_sorgbr_p ) wrkbl = max( wrkbl, 3_${ik}$*m+lwork_sorgbr_q ) wrkbl = max( wrkbl, bdspac ) maxwrk = m*m + wrkbl minwrk = max( 3_${ik}$*m+n, bdspac ) else if( wntva .and. wntun ) then ! path 7t(n much larger than m, jobu='n', jobvt='a') wrkbl = m + lwork_sgelqf wrkbl = max( wrkbl, m+lwork_sorglq_n ) wrkbl = max( wrkbl, 3_${ik}$*m+lwork_sgebrd ) wrkbl = max( wrkbl, 3_${ik}$*m+lwork_sorgbr_p ) wrkbl = max( wrkbl, bdspac ) maxwrk = m*m + wrkbl minwrk = max( 3_${ik}$*m+n, bdspac ) else if( wntva .and. wntuo ) then ! path 8t(n much larger than m, jobu='o', jobvt='a') wrkbl = m + lwork_sgelqf wrkbl = max( wrkbl, m+lwork_sorglq_n ) wrkbl = max( wrkbl, 3_${ik}$*m+lwork_sgebrd ) wrkbl = max( wrkbl, 3_${ik}$*m+lwork_sorgbr_p ) wrkbl = max( wrkbl, 3_${ik}$*m+lwork_sorgbr_q ) wrkbl = max( wrkbl, bdspac ) maxwrk = 2_${ik}$*m*m + wrkbl minwrk = max( 3_${ik}$*m+n, bdspac ) else if( wntva .and. wntuas ) then ! path 9t(n much larger than m, jobu='s' or 'a', ! jobvt='a') wrkbl = m + lwork_sgelqf wrkbl = max( wrkbl, m+lwork_sorglq_n ) wrkbl = max( wrkbl, 3_${ik}$*m+lwork_sgebrd ) wrkbl = max( wrkbl, 3_${ik}$*m+lwork_sorgbr_p ) wrkbl = max( wrkbl, 3_${ik}$*m+lwork_sorgbr_q ) wrkbl = max( wrkbl, bdspac ) maxwrk = m*m + wrkbl minwrk = max( 3_${ik}$*m+n, bdspac ) end if else ! path 10t(n greater than m, but not much larger) call stdlib${ii}$_sgebrd( m, n, a, lda, s, dum(1_${ik}$), dum(1_${ik}$),dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, ierr ) lwork_sgebrd = int( dum(1_${ik}$),KIND=${ik}$) maxwrk = 3_${ik}$*m + lwork_sgebrd if( wntvs .or. wntvo ) then ! compute space needed for stdlib${ii}$_sorgbr p call stdlib${ii}$_sorgbr( 'P', m, n, m, a, n, dum(1_${ik}$),dum(1_${ik}$), -1_${ik}$, ierr ) lwork_sorgbr_p = int( dum(1_${ik}$),KIND=${ik}$) maxwrk = max( maxwrk, 3_${ik}$*m+lwork_sorgbr_p ) end if if( wntva ) then call stdlib${ii}$_sorgbr( 'P', n, n, m, a, n, dum(1_${ik}$),dum(1_${ik}$), -1_${ik}$, ierr ) lwork_sorgbr_p = int( dum(1_${ik}$),KIND=${ik}$) maxwrk = max( maxwrk, 3_${ik}$*m+lwork_sorgbr_p ) end if if( .not.wntun ) then maxwrk = max( maxwrk, 3_${ik}$*m+lwork_sorgbr_q ) end if maxwrk = max( maxwrk, bdspac ) minwrk = max( 3_${ik}$*m+n, bdspac ) end if end if maxwrk = max( maxwrk, minwrk ) work( 1_${ik}$ ) = maxwrk if( lwork<minwrk .and. .not.lquery ) then info = -13_${ik}$ end if end if if( info/=0_${ik}$ ) then call stdlib${ii}$_xerbla( 'SGESVD', -info ) return else if( lquery ) then return end if ! quick return if possible if( m==0_${ik}$ .or. n==0_${ik}$ ) then return end if ! get machine constants eps = stdlib${ii}$_slamch( 'P' ) smlnum = sqrt( stdlib${ii}$_slamch( 'S' ) ) / eps bignum = one / smlnum ! scale a if max element outside range [smlnum,bignum] anrm = stdlib${ii}$_slange( 'M', m, n, a, lda, dum ) iscl = 0_${ik}$ if( anrm>zero .and. anrm<smlnum ) then iscl = 1_${ik}$ call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, m, n, a, lda, ierr ) else if( anrm>bignum ) then iscl = 1_${ik}$ call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, m, n, a, lda, ierr ) end if if( m>=n ) then ! a has at least as many rows as columns. if a has sufficiently ! more rows than columns, first reduce using the qr ! decomposition (if sufficient workspace available) if( m>=mnthr ) then if( wntun ) then ! path 1 (m much larger than n, jobu='n') ! no left singular vectors to be computed itau = 1_${ik}$ iwork = itau + n ! compute a=q*r ! (workspace: need 2*n, prefer n+n*nb) call stdlib${ii}$_sgeqrf( m, n, a, lda, work( itau ), work( iwork ),lwork-iwork+1, & ierr ) ! zero out below r if( n > 1_${ik}$ ) then call stdlib${ii}$_slaset( 'L', n-1, n-1, zero, zero, a( 2_${ik}$, 1_${ik}$ ),lda ) end if ie = 1_${ik}$ itauq = ie + n itaup = itauq + n iwork = itaup + n ! bidiagonalize r in a ! (workspace: need 4*n, prefer 3*n+2*n*nb) call stdlib${ii}$_sgebrd( n, n, a, lda, s, work( ie ), work( itauq ),work( itaup ), & work( iwork ), lwork-iwork+1,ierr ) ncvt = 0_${ik}$ if( wntvo .or. wntvas ) then ! if right singular vectors desired, generate p'. ! (workspace: need 4*n-1, prefer 3*n+(n-1)*nb) call stdlib${ii}$_sorgbr( 'P', n, n, n, a, lda, work( itaup ),work( iwork ), & lwork-iwork+1, ierr ) ncvt = n end if iwork = ie + n ! perform bidiagonal qr iteration, computing right ! singular vectors of a in a if desired ! (workspace: need bdspac) call stdlib${ii}$_sbdsqr( 'U', n, ncvt, 0_${ik}$, 0_${ik}$, s, work( ie ), a, lda,dum, 1_${ik}$, dum, 1_${ik}$, & work( iwork ), info ) ! if right singular vectors desired in vt, copy them there if( wntvas )call stdlib${ii}$_slacpy( 'F', n, n, a, lda, vt, ldvt ) else if( wntuo .and. wntvn ) then ! path 2 (m much larger than n, jobu='o', jobvt='n') ! n left singular vectors to be overwritten on a and ! no right singular vectors to be computed if( lwork>=n*n+max( 4_${ik}$*n, bdspac ) ) then ! sufficient workspace for a fast algorithm ir = 1_${ik}$ if( lwork>=max( wrkbl, lda*n+n )+lda*n ) then ! work(iu) is lda by n, work(ir) is lda by n ldwrku = lda ldwrkr = lda else if( lwork>=max( wrkbl, lda*n+n )+n*n ) then ! work(iu) is lda by n, work(ir) is n by n ldwrku = lda ldwrkr = n else ! work(iu) is ldwrku by n, work(ir) is n by n ldwrku = ( lwork-n*n-n ) / n ldwrkr = n end if itau = ir + ldwrkr*n iwork = itau + n ! compute a=q*r ! (workspace: need n*n+2*n, prefer n*n+n+n*nb) call stdlib${ii}$_sgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-iwork+& 1_${ik}$, ierr ) ! copy r to work(ir) and zero out below it call stdlib${ii}$_slacpy( 'U', n, n, a, lda, work( ir ), ldwrkr ) call stdlib${ii}$_slaset( 'L', n-1, n-1, zero, zero, work( ir+1 ),ldwrkr ) ! generate q in a ! (workspace: need n*n+2*n, prefer n*n+n+n*nb) call stdlib${ii}$_sorgqr( m, n, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = itau itauq = ie + n itaup = itauq + n iwork = itaup + n ! bidiagonalize r in work(ir) ! (workspace: need n*n+4*n, prefer n*n+3*n+2*n*nb) call stdlib${ii}$_sgebrd( n, n, work( ir ), ldwrkr, s, work( ie ),work( itauq ), & work( itaup ),work( iwork ), lwork-iwork+1, ierr ) ! generate left vectors bidiagonalizing r ! (workspace: need n*n+4*n, prefer n*n+3*n+n*nb) call stdlib${ii}$_sorgbr( 'Q', n, n, n, work( ir ), ldwrkr,work( itauq ), work( & iwork ),lwork-iwork+1, ierr ) iwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of r in work(ir) ! (workspace: need n*n+bdspac) call stdlib${ii}$_sbdsqr( 'U', n, 0_${ik}$, n, 0_${ik}$, s, work( ie ), dum, 1_${ik}$,work( ir ), & ldwrkr, dum, 1_${ik}$,work( iwork ), info ) iu = ie + n ! multiply q in a by left singular vectors of r in ! work(ir), storing result in work(iu) and copying to a ! (workspace: need n*n+2*n, prefer n*n+m*n+n) do i = 1, m, ldwrku chunk = min( m-i+1, ldwrku ) call stdlib${ii}$_sgemm( 'N', 'N', chunk, n, n, one, a( i, 1_${ik}$ ),lda, work( ir )& , ldwrkr, zero,work( iu ), ldwrku ) call stdlib${ii}$_slacpy( 'F', chunk, n, work( iu ), ldwrku,a( i, 1_${ik}$ ), lda ) end do else ! insufficient workspace for a fast algorithm ie = 1_${ik}$ itauq = ie + n itaup = itauq + n iwork = itaup + n ! bidiagonalize a ! (workspace: need 3*n+m, prefer 3*n+(m+n)*nb) call stdlib${ii}$_sgebrd( m, n, a, lda, s, work( ie ),work( itauq ), work( itaup & ),work( iwork ), lwork-iwork+1, ierr ) ! generate left vectors bidiagonalizing a ! (workspace: need 4*n, prefer 3*n+n*nb) call stdlib${ii}$_sorgbr( 'Q', m, n, n, a, lda, work( itauq ),work( iwork ), & lwork-iwork+1, ierr ) iwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of a in a ! (workspace: need bdspac) call stdlib${ii}$_sbdsqr( 'U', n, 0_${ik}$, m, 0_${ik}$, s, work( ie ), dum, 1_${ik}$,a, lda, dum, 1_${ik}$, & work( iwork ), info ) end if else if( wntuo .and. wntvas ) then ! path 3 (m much larger than n, jobu='o', jobvt='s' or 'a') ! n left singular vectors to be overwritten on a and ! n right singular vectors to be computed in vt if( lwork>=n*n+max( 4_${ik}$*n, bdspac ) ) then ! sufficient workspace for a fast algorithm ir = 1_${ik}$ if( lwork>=max( wrkbl, lda*n+n )+lda*n ) then ! work(iu) is lda by n and work(ir) is lda by n ldwrku = lda ldwrkr = lda else if( lwork>=max( wrkbl, lda*n+n )+n*n ) then ! work(iu) is lda by n and work(ir) is n by n ldwrku = lda ldwrkr = n else ! work(iu) is ldwrku by n and work(ir) is n by n ldwrku = ( lwork-n*n-n ) / n ldwrkr = n end if itau = ir + ldwrkr*n iwork = itau + n ! compute a=q*r ! (workspace: need n*n+2*n, prefer n*n+n+n*nb) call stdlib${ii}$_sgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-iwork+& 1_${ik}$, ierr ) ! copy r to vt, zeroing out below it call stdlib${ii}$_slacpy( 'U', n, n, a, lda, vt, ldvt ) if( n>1_${ik}$ )call stdlib${ii}$_slaset( 'L', n-1, n-1, zero, zero,vt( 2_${ik}$, 1_${ik}$ ), ldvt ) ! generate q in a ! (workspace: need n*n+2*n, prefer n*n+n+n*nb) call stdlib${ii}$_sorgqr( m, n, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = itau itauq = ie + n itaup = itauq + n iwork = itaup + n ! bidiagonalize r in vt, copying result to work(ir) ! (workspace: need n*n+4*n, prefer n*n+3*n+2*n*nb) call stdlib${ii}$_sgebrd( n, n, vt, ldvt, s, work( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) call stdlib${ii}$_slacpy( 'L', n, n, vt, ldvt, work( ir ), ldwrkr ) ! generate left vectors bidiagonalizing r in work(ir) ! (workspace: need n*n+4*n, prefer n*n+3*n+n*nb) call stdlib${ii}$_sorgbr( 'Q', n, n, n, work( ir ), ldwrkr,work( itauq ), work( & iwork ),lwork-iwork+1, ierr ) ! generate right vectors bidiagonalizing r in vt ! (workspace: need n*n+4*n-1, prefer n*n+3*n+(n-1)*nb) call stdlib${ii}$_sorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),work( iwork ), & lwork-iwork+1, ierr ) iwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of r in work(ir) and computing right ! singular vectors of r in vt ! (workspace: need n*n+bdspac) call stdlib${ii}$_sbdsqr( 'U', n, n, n, 0_${ik}$, s, work( ie ), vt, ldvt,work( ir ), & ldwrkr, dum, 1_${ik}$,work( iwork ), info ) iu = ie + n ! multiply q in a by left singular vectors of r in ! work(ir), storing result in work(iu) and copying to a ! (workspace: need n*n+2*n, prefer n*n+m*n+n) do i = 1, m, ldwrku chunk = min( m-i+1, ldwrku ) call stdlib${ii}$_sgemm( 'N', 'N', chunk, n, n, one, a( i, 1_${ik}$ ),lda, work( ir )& , ldwrkr, zero,work( iu ), ldwrku ) call stdlib${ii}$_slacpy( 'F', chunk, n, work( iu ), ldwrku,a( i, 1_${ik}$ ), lda ) end do else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + n ! compute a=q*r ! (workspace: need 2*n, prefer n+n*nb) call stdlib${ii}$_sgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-iwork+& 1_${ik}$, ierr ) ! copy r to vt, zeroing out below it call stdlib${ii}$_slacpy( 'U', n, n, a, lda, vt, ldvt ) if( n>1_${ik}$ )call stdlib${ii}$_slaset( 'L', n-1, n-1, zero, zero,vt( 2_${ik}$, 1_${ik}$ ), ldvt ) ! generate q in a ! (workspace: need 2*n, prefer n+n*nb) call stdlib${ii}$_sorgqr( m, n, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = itau itauq = ie + n itaup = itauq + n iwork = itaup + n ! bidiagonalize r in vt ! (workspace: need 4*n, prefer 3*n+2*n*nb) call stdlib${ii}$_sgebrd( n, n, vt, ldvt, s, work( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply q in a by left vectors bidiagonalizing r ! (workspace: need 3*n+m, prefer 3*n+m*nb) call stdlib${ii}$_sormbr( 'Q', 'R', 'N', m, n, n, vt, ldvt,work( itauq ), a, lda,& work( iwork ),lwork-iwork+1, ierr ) ! generate right vectors bidiagonalizing r in vt ! (workspace: need 4*n-1, prefer 3*n+(n-1)*nb) call stdlib${ii}$_sorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),work( iwork ), & lwork-iwork+1, ierr ) iwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of a in a and computing right ! singular vectors of a in vt ! (workspace: need bdspac) call stdlib${ii}$_sbdsqr( 'U', n, n, m, 0_${ik}$, s, work( ie ), vt, ldvt,a, lda, dum, & 1_${ik}$, work( iwork ), info ) end if else if( wntus ) then if( wntvn ) then ! path 4 (m much larger than n, jobu='s', jobvt='n') ! n left singular vectors to be computed in u and ! no right singular vectors to be computed if( lwork>=n*n+max( 4_${ik}$*n, bdspac ) ) then ! sufficient workspace for a fast algorithm ir = 1_${ik}$ if( lwork>=wrkbl+lda*n ) then ! work(ir) is lda by n ldwrkr = lda else ! work(ir) is n by n ldwrkr = n end if itau = ir + ldwrkr*n iwork = itau + n ! compute a=q*r ! (workspace: need n*n+2*n, prefer n*n+n+n*nb) call stdlib${ii}$_sgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ! copy r to work(ir), zeroing out below it call stdlib${ii}$_slacpy( 'U', n, n, a, lda, work( ir ),ldwrkr ) call stdlib${ii}$_slaset( 'L', n-1, n-1, zero, zero,work( ir+1 ), ldwrkr ) ! generate q in a ! (workspace: need n*n+2*n, prefer n*n+n+n*nb) call stdlib${ii}$_sorgqr( m, n, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = itau itauq = ie + n itaup = itauq + n iwork = itaup + n ! bidiagonalize r in work(ir) ! (workspace: need n*n+4*n, prefer n*n+3*n+2*n*nb) call stdlib${ii}$_sgebrd( n, n, work( ir ), ldwrkr, s,work( ie ), work( itauq & ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) ! generate left vectors bidiagonalizing r in work(ir) ! (workspace: need n*n+4*n, prefer n*n+3*n+n*nb) call stdlib${ii}$_sorgbr( 'Q', n, n, n, work( ir ), ldwrkr,work( itauq ), & work( iwork ),lwork-iwork+1, ierr ) iwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of r in work(ir) ! (workspace: need n*n+bdspac) call stdlib${ii}$_sbdsqr( 'U', n, 0_${ik}$, n, 0_${ik}$, s, work( ie ), dum,1_${ik}$, work( ir ), & ldwrkr, dum, 1_${ik}$,work( iwork ), info ) ! multiply q in a by left singular vectors of r in ! work(ir), storing result in u ! (workspace: need n*n) call stdlib${ii}$_sgemm( 'N', 'N', m, n, n, one, a, lda,work( ir ), ldwrkr, & zero, u, ldu ) else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + n ! compute a=q*r, copying result to u ! (workspace: need 2*n, prefer n+n*nb) call stdlib${ii}$_sgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_slacpy( 'L', m, n, a, lda, u, ldu ) ! generate q in u ! (workspace: need 2*n, prefer n+n*nb) call stdlib${ii}$_sorgqr( m, n, n, u, ldu, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = itau itauq = ie + n itaup = itauq + n iwork = itaup + n ! zero out below r in a if( n > 1_${ik}$ ) then call stdlib${ii}$_slaset( 'L', n-1, n-1, zero, zero,a( 2_${ik}$, 1_${ik}$ ), lda ) end if ! bidiagonalize r in a ! (workspace: need 4*n, prefer 3*n+2*n*nb) call stdlib${ii}$_sgebrd( n, n, a, lda, s, work( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply q in u by left vectors bidiagonalizing r ! (workspace: need 3*n+m, prefer 3*n+m*nb) call stdlib${ii}$_sormbr( 'Q', 'R', 'N', m, n, n, a, lda,work( itauq ), u, & ldu, work( iwork ),lwork-iwork+1, ierr ) iwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of a in u ! (workspace: need bdspac) call stdlib${ii}$_sbdsqr( 'U', n, 0_${ik}$, m, 0_${ik}$, s, work( ie ), dum,1_${ik}$, u, ldu, dum, & 1_${ik}$, work( iwork ),info ) end if else if( wntvo ) then ! path 5 (m much larger than n, jobu='s', jobvt='o') ! n left singular vectors to be computed in u and ! n right singular vectors to be overwritten on a if( lwork>=2_${ik}$*n*n+max( 4_${ik}$*n, bdspac ) ) then ! sufficient workspace for a fast algorithm iu = 1_${ik}$ if( lwork>=wrkbl+2*lda*n ) then ! work(iu) is lda by n and work(ir) is lda by n ldwrku = lda ir = iu + ldwrku*n ldwrkr = lda else if( lwork>=wrkbl+( lda+n )*n ) then ! work(iu) is lda by n and work(ir) is n by n ldwrku = lda ir = iu + ldwrku*n ldwrkr = n else ! work(iu) is n by n and work(ir) is n by n ldwrku = n ir = iu + ldwrku*n ldwrkr = n end if itau = ir + ldwrkr*n iwork = itau + n ! compute a=q*r ! (workspace: need 2*n*n+2*n, prefer 2*n*n+n+n*nb) call stdlib${ii}$_sgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ! copy r to work(iu), zeroing out below it call stdlib${ii}$_slacpy( 'U', n, n, a, lda, work( iu ),ldwrku ) call stdlib${ii}$_slaset( 'L', n-1, n-1, zero, zero,work( iu+1 ), ldwrku ) ! generate q in a ! (workspace: need 2*n*n+2*n, prefer 2*n*n+n+n*nb) call stdlib${ii}$_sorgqr( m, n, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = itau itauq = ie + n itaup = itauq + n iwork = itaup + n ! bidiagonalize r in work(iu), copying result to ! work(ir) ! (workspace: need 2*n*n+4*n, ! prefer 2*n*n+3*n+2*n*nb) call stdlib${ii}$_sgebrd( n, n, work( iu ), ldwrku, s,work( ie ), work( itauq & ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) call stdlib${ii}$_slacpy( 'U', n, n, work( iu ), ldwrku,work( ir ), ldwrkr ) ! generate left bidiagonalizing vectors in work(iu) ! (workspace: need 2*n*n+4*n, prefer 2*n*n+3*n+n*nb) call stdlib${ii}$_sorgbr( 'Q', n, n, n, work( iu ), ldwrku,work( itauq ), & work( iwork ),lwork-iwork+1, ierr ) ! generate right bidiagonalizing vectors in work(ir) ! (workspace: need 2*n*n+4*n-1, ! prefer 2*n*n+3*n+(n-1)*nb) call stdlib${ii}$_sorgbr( 'P', n, n, n, work( ir ), ldwrkr,work( itaup ), & work( iwork ),lwork-iwork+1, ierr ) iwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of r in work(iu) and computing ! right singular vectors of r in work(ir) ! (workspace: need 2*n*n+bdspac) call stdlib${ii}$_sbdsqr( 'U', n, n, n, 0_${ik}$, s, work( ie ),work( ir ), ldwrkr, & work( iu ),ldwrku, dum, 1_${ik}$, work( iwork ), info ) ! multiply q in a by left singular vectors of r in ! work(iu), storing result in u ! (workspace: need n*n) call stdlib${ii}$_sgemm( 'N', 'N', m, n, n, one, a, lda,work( iu ), ldwrku, & zero, u, ldu ) ! copy right singular vectors of r to a ! (workspace: need n*n) call stdlib${ii}$_slacpy( 'F', n, n, work( ir ), ldwrkr, a,lda ) else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + n ! compute a=q*r, copying result to u ! (workspace: need 2*n, prefer n+n*nb) call stdlib${ii}$_sgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_slacpy( 'L', m, n, a, lda, u, ldu ) ! generate q in u ! (workspace: need 2*n, prefer n+n*nb) call stdlib${ii}$_sorgqr( m, n, n, u, ldu, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = itau itauq = ie + n itaup = itauq + n iwork = itaup + n ! zero out below r in a if( n > 1_${ik}$ ) then call stdlib${ii}$_slaset( 'L', n-1, n-1, zero, zero,a( 2_${ik}$, 1_${ik}$ ), lda ) end if ! bidiagonalize r in a ! (workspace: need 4*n, prefer 3*n+2*n*nb) call stdlib${ii}$_sgebrd( n, n, a, lda, s, work( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply q in u by left vectors bidiagonalizing r ! (workspace: need 3*n+m, prefer 3*n+m*nb) call stdlib${ii}$_sormbr( 'Q', 'R', 'N', m, n, n, a, lda,work( itauq ), u, & ldu, work( iwork ),lwork-iwork+1, ierr ) ! generate right vectors bidiagonalizing r in a ! (workspace: need 4*n-1, prefer 3*n+(n-1)*nb) call stdlib${ii}$_sorgbr( 'P', n, n, n, a, lda, work( itaup ),work( iwork ), & lwork-iwork+1, ierr ) iwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of a in u and computing right ! singular vectors of a in a ! (workspace: need bdspac) call stdlib${ii}$_sbdsqr( 'U', n, n, m, 0_${ik}$, s, work( ie ), a,lda, u, ldu, dum, & 1_${ik}$, work( iwork ),info ) end if else if( wntvas ) then ! path 6 (m much larger than n, jobu='s', jobvt='s' ! or 'a') ! n left singular vectors to be computed in u and ! n right singular vectors to be computed in vt if( lwork>=n*n+max( 4_${ik}$*n, bdspac ) ) then ! sufficient workspace for a fast algorithm iu = 1_${ik}$ if( lwork>=wrkbl+lda*n ) then ! work(iu) is lda by n ldwrku = lda else ! work(iu) is n by n ldwrku = n end if itau = iu + ldwrku*n iwork = itau + n ! compute a=q*r ! (workspace: need n*n+2*n, prefer n*n+n+n*nb) call stdlib${ii}$_sgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ! copy r to work(iu), zeroing out below it call stdlib${ii}$_slacpy( 'U', n, n, a, lda, work( iu ),ldwrku ) call stdlib${ii}$_slaset( 'L', n-1, n-1, zero, zero,work( iu+1 ), ldwrku ) ! generate q in a ! (workspace: need n*n+2*n, prefer n*n+n+n*nb) call stdlib${ii}$_sorgqr( m, n, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = itau itauq = ie + n itaup = itauq + n iwork = itaup + n ! bidiagonalize r in work(iu), copying result to vt ! (workspace: need n*n+4*n, prefer n*n+3*n+2*n*nb) call stdlib${ii}$_sgebrd( n, n, work( iu ), ldwrku, s,work( ie ), work( itauq & ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) call stdlib${ii}$_slacpy( 'U', n, n, work( iu ), ldwrku, vt,ldvt ) ! generate left bidiagonalizing vectors in work(iu) ! (workspace: need n*n+4*n, prefer n*n+3*n+n*nb) call stdlib${ii}$_sorgbr( 'Q', n, n, n, work( iu ), ldwrku,work( itauq ), & work( iwork ),lwork-iwork+1, ierr ) ! generate right bidiagonalizing vectors in vt ! (workspace: need n*n+4*n-1, ! prefer n*n+3*n+(n-1)*nb) call stdlib${ii}$_sorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),work( iwork ),& lwork-iwork+1, ierr ) iwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of r in work(iu) and computing ! right singular vectors of r in vt ! (workspace: need n*n+bdspac) call stdlib${ii}$_sbdsqr( 'U', n, n, n, 0_${ik}$, s, work( ie ), vt,ldvt, work( iu ),& ldwrku, dum, 1_${ik}$,work( iwork ), info ) ! multiply q in a by left singular vectors of r in ! work(iu), storing result in u ! (workspace: need n*n) call stdlib${ii}$_sgemm( 'N', 'N', m, n, n, one, a, lda,work( iu ), ldwrku, & zero, u, ldu ) else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + n ! compute a=q*r, copying result to u ! (workspace: need 2*n, prefer n+n*nb) call stdlib${ii}$_sgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_slacpy( 'L', m, n, a, lda, u, ldu ) ! generate q in u ! (workspace: need 2*n, prefer n+n*nb) call stdlib${ii}$_sorgqr( m, n, n, u, ldu, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ! copy r to vt, zeroing out below it call stdlib${ii}$_slacpy( 'U', n, n, a, lda, vt, ldvt ) if( n>1_${ik}$ )call stdlib${ii}$_slaset( 'L', n-1, n-1, zero, zero,vt( 2_${ik}$, 1_${ik}$ ), ldvt & ) ie = itau itauq = ie + n itaup = itauq + n iwork = itaup + n ! bidiagonalize r in vt ! (workspace: need 4*n, prefer 3*n+2*n*nb) call stdlib${ii}$_sgebrd( n, n, vt, ldvt, s, work( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply q in u by left bidiagonalizing vectors ! in vt ! (workspace: need 3*n+m, prefer 3*n+m*nb) call stdlib${ii}$_sormbr( 'Q', 'R', 'N', m, n, n, vt, ldvt,work( itauq ), u, & ldu, work( iwork ),lwork-iwork+1, ierr ) ! generate right bidiagonalizing vectors in vt ! (workspace: need 4*n-1, prefer 3*n+(n-1)*nb) call stdlib${ii}$_sorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),work( iwork ),& lwork-iwork+1, ierr ) iwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of a in u and computing right ! singular vectors of a in vt ! (workspace: need bdspac) call stdlib${ii}$_sbdsqr( 'U', n, n, m, 0_${ik}$, s, work( ie ), vt,ldvt, u, ldu, & dum, 1_${ik}$, work( iwork ),info ) end if end if else if( wntua ) then if( wntvn ) then ! path 7 (m much larger than n, jobu='a', jobvt='n') ! m left singular vectors to be computed in u and ! no right singular vectors to be computed if( lwork>=n*n+max( n+m, 4_${ik}$*n, bdspac ) ) then ! sufficient workspace for a fast algorithm ir = 1_${ik}$ if( lwork>=wrkbl+lda*n ) then ! work(ir) is lda by n ldwrkr = lda else ! work(ir) is n by n ldwrkr = n end if itau = ir + ldwrkr*n iwork = itau + n ! compute a=q*r, copying result to u ! (workspace: need n*n+2*n, prefer n*n+n+n*nb) call stdlib${ii}$_sgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_slacpy( 'L', m, n, a, lda, u, ldu ) ! copy r to work(ir), zeroing out below it call stdlib${ii}$_slacpy( 'U', n, n, a, lda, work( ir ),ldwrkr ) call stdlib${ii}$_slaset( 'L', n-1, n-1, zero, zero,work( ir+1 ), ldwrkr ) ! generate q in u ! (workspace: need n*n+n+m, prefer n*n+n+m*nb) call stdlib${ii}$_sorgqr( m, m, n, u, ldu, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = itau itauq = ie + n itaup = itauq + n iwork = itaup + n ! bidiagonalize r in work(ir) ! (workspace: need n*n+4*n, prefer n*n+3*n+2*n*nb) call stdlib${ii}$_sgebrd( n, n, work( ir ), ldwrkr, s,work( ie ), work( itauq & ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) ! generate left bidiagonalizing vectors in work(ir) ! (workspace: need n*n+4*n, prefer n*n+3*n+n*nb) call stdlib${ii}$_sorgbr( 'Q', n, n, n, work( ir ), ldwrkr,work( itauq ), & work( iwork ),lwork-iwork+1, ierr ) iwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of r in work(ir) ! (workspace: need n*n+bdspac) call stdlib${ii}$_sbdsqr( 'U', n, 0_${ik}$, n, 0_${ik}$, s, work( ie ), dum,1_${ik}$, work( ir ), & ldwrkr, dum, 1_${ik}$,work( iwork ), info ) ! multiply q in u by left singular vectors of r in ! work(ir), storing result in a ! (workspace: need n*n) call stdlib${ii}$_sgemm( 'N', 'N', m, n, n, one, u, ldu,work( ir ), ldwrkr, & zero, a, lda ) ! copy left singular vectors of a from a to u call stdlib${ii}$_slacpy( 'F', m, n, a, lda, u, ldu ) else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + n ! compute a=q*r, copying result to u ! (workspace: need 2*n, prefer n+n*nb) call stdlib${ii}$_sgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_slacpy( 'L', m, n, a, lda, u, ldu ) ! generate q in u ! (workspace: need n+m, prefer n+m*nb) call stdlib${ii}$_sorgqr( m, m, n, u, ldu, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = itau itauq = ie + n itaup = itauq + n iwork = itaup + n ! zero out below r in a if( n > 1_${ik}$ ) then call stdlib${ii}$_slaset( 'L', n-1, n-1, zero, zero,a( 2_${ik}$, 1_${ik}$ ), lda ) end if ! bidiagonalize r in a ! (workspace: need 4*n, prefer 3*n+2*n*nb) call stdlib${ii}$_sgebrd( n, n, a, lda, s, work( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply q in u by left bidiagonalizing vectors ! in a ! (workspace: need 3*n+m, prefer 3*n+m*nb) call stdlib${ii}$_sormbr( 'Q', 'R', 'N', m, n, n, a, lda,work( itauq ), u, & ldu, work( iwork ),lwork-iwork+1, ierr ) iwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of a in u ! (workspace: need bdspac) call stdlib${ii}$_sbdsqr( 'U', n, 0_${ik}$, m, 0_${ik}$, s, work( ie ), dum,1_${ik}$, u, ldu, dum, & 1_${ik}$, work( iwork ),info ) end if else if( wntvo ) then ! path 8 (m much larger than n, jobu='a', jobvt='o') ! m left singular vectors to be computed in u and ! n right singular vectors to be overwritten on a if( lwork>=2_${ik}$*n*n+max( n+m, 4_${ik}$*n, bdspac ) ) then ! sufficient workspace for a fast algorithm iu = 1_${ik}$ if( lwork>=wrkbl+2*lda*n ) then ! work(iu) is lda by n and work(ir) is lda by n ldwrku = lda ir = iu + ldwrku*n ldwrkr = lda else if( lwork>=wrkbl+( lda+n )*n ) then ! work(iu) is lda by n and work(ir) is n by n ldwrku = lda ir = iu + ldwrku*n ldwrkr = n else ! work(iu) is n by n and work(ir) is n by n ldwrku = n ir = iu + ldwrku*n ldwrkr = n end if itau = ir + ldwrkr*n iwork = itau + n ! compute a=q*r, copying result to u ! (workspace: need 2*n*n+2*n, prefer 2*n*n+n+n*nb) call stdlib${ii}$_sgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_slacpy( 'L', m, n, a, lda, u, ldu ) ! generate q in u ! (workspace: need 2*n*n+n+m, prefer 2*n*n+n+m*nb) call stdlib${ii}$_sorgqr( m, m, n, u, ldu, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ! copy r to work(iu), zeroing out below it call stdlib${ii}$_slacpy( 'U', n, n, a, lda, work( iu ),ldwrku ) call stdlib${ii}$_slaset( 'L', n-1, n-1, zero, zero,work( iu+1 ), ldwrku ) ie = itau itauq = ie + n itaup = itauq + n iwork = itaup + n ! bidiagonalize r in work(iu), copying result to ! work(ir) ! (workspace: need 2*n*n+4*n, ! prefer 2*n*n+3*n+2*n*nb) call stdlib${ii}$_sgebrd( n, n, work( iu ), ldwrku, s,work( ie ), work( itauq & ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) call stdlib${ii}$_slacpy( 'U', n, n, work( iu ), ldwrku,work( ir ), ldwrkr ) ! generate left bidiagonalizing vectors in work(iu) ! (workspace: need 2*n*n+4*n, prefer 2*n*n+3*n+n*nb) call stdlib${ii}$_sorgbr( 'Q', n, n, n, work( iu ), ldwrku,work( itauq ), & work( iwork ),lwork-iwork+1, ierr ) ! generate right bidiagonalizing vectors in work(ir) ! (workspace: need 2*n*n+4*n-1, ! prefer 2*n*n+3*n+(n-1)*nb) call stdlib${ii}$_sorgbr( 'P', n, n, n, work( ir ), ldwrkr,work( itaup ), & work( iwork ),lwork-iwork+1, ierr ) iwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of r in work(iu) and computing ! right singular vectors of r in work(ir) ! (workspace: need 2*n*n+bdspac) call stdlib${ii}$_sbdsqr( 'U', n, n, n, 0_${ik}$, s, work( ie ),work( ir ), ldwrkr, & work( iu ),ldwrku, dum, 1_${ik}$, work( iwork ), info ) ! multiply q in u by left singular vectors of r in ! work(iu), storing result in a ! (workspace: need n*n) call stdlib${ii}$_sgemm( 'N', 'N', m, n, n, one, u, ldu,work( iu ), ldwrku, & zero, a, lda ) ! copy left singular vectors of a from a to u call stdlib${ii}$_slacpy( 'F', m, n, a, lda, u, ldu ) ! copy right singular vectors of r from work(ir) to a call stdlib${ii}$_slacpy( 'F', n, n, work( ir ), ldwrkr, a,lda ) else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + n ! compute a=q*r, copying result to u ! (workspace: need 2*n, prefer n+n*nb) call stdlib${ii}$_sgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_slacpy( 'L', m, n, a, lda, u, ldu ) ! generate q in u ! (workspace: need n+m, prefer n+m*nb) call stdlib${ii}$_sorgqr( m, m, n, u, ldu, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = itau itauq = ie + n itaup = itauq + n iwork = itaup + n ! zero out below r in a if( n > 1_${ik}$ ) then call stdlib${ii}$_slaset( 'L', n-1, n-1, zero, zero,a( 2_${ik}$, 1_${ik}$ ), lda ) end if ! bidiagonalize r in a ! (workspace: need 4*n, prefer 3*n+2*n*nb) call stdlib${ii}$_sgebrd( n, n, a, lda, s, work( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply q in u by left bidiagonalizing vectors ! in a ! (workspace: need 3*n+m, prefer 3*n+m*nb) call stdlib${ii}$_sormbr( 'Q', 'R', 'N', m, n, n, a, lda,work( itauq ), u, & ldu, work( iwork ),lwork-iwork+1, ierr ) ! generate right bidiagonalizing vectors in a ! (workspace: need 4*n-1, prefer 3*n+(n-1)*nb) call stdlib${ii}$_sorgbr( 'P', n, n, n, a, lda, work( itaup ),work( iwork ), & lwork-iwork+1, ierr ) iwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of a in u and computing right ! singular vectors of a in a ! (workspace: need bdspac) call stdlib${ii}$_sbdsqr( 'U', n, n, m, 0_${ik}$, s, work( ie ), a,lda, u, ldu, dum, & 1_${ik}$, work( iwork ),info ) end if else if( wntvas ) then ! path 9 (m much larger than n, jobu='a', jobvt='s' ! or 'a') ! m left singular vectors to be computed in u and ! n right singular vectors to be computed in vt if( lwork>=n*n+max( n+m, 4_${ik}$*n, bdspac ) ) then ! sufficient workspace for a fast algorithm iu = 1_${ik}$ if( lwork>=wrkbl+lda*n ) then ! work(iu) is lda by n ldwrku = lda else ! work(iu) is n by n ldwrku = n end if itau = iu + ldwrku*n iwork = itau + n ! compute a=q*r, copying result to u ! (workspace: need n*n+2*n, prefer n*n+n+n*nb) call stdlib${ii}$_sgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_slacpy( 'L', m, n, a, lda, u, ldu ) ! generate q in u ! (workspace: need n*n+n+m, prefer n*n+n+m*nb) call stdlib${ii}$_sorgqr( m, m, n, u, ldu, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ! copy r to work(iu), zeroing out below it call stdlib${ii}$_slacpy( 'U', n, n, a, lda, work( iu ),ldwrku ) call stdlib${ii}$_slaset( 'L', n-1, n-1, zero, zero,work( iu+1 ), ldwrku ) ie = itau itauq = ie + n itaup = itauq + n iwork = itaup + n ! bidiagonalize r in work(iu), copying result to vt ! (workspace: need n*n+4*n, prefer n*n+3*n+2*n*nb) call stdlib${ii}$_sgebrd( n, n, work( iu ), ldwrku, s,work( ie ), work( itauq & ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) call stdlib${ii}$_slacpy( 'U', n, n, work( iu ), ldwrku, vt,ldvt ) ! generate left bidiagonalizing vectors in work(iu) ! (workspace: need n*n+4*n, prefer n*n+3*n+n*nb) call stdlib${ii}$_sorgbr( 'Q', n, n, n, work( iu ), ldwrku,work( itauq ), & work( iwork ),lwork-iwork+1, ierr ) ! generate right bidiagonalizing vectors in vt ! (workspace: need n*n+4*n-1, ! prefer n*n+3*n+(n-1)*nb) call stdlib${ii}$_sorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),work( iwork ),& lwork-iwork+1, ierr ) iwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of r in work(iu) and computing ! right singular vectors of r in vt ! (workspace: need n*n+bdspac) call stdlib${ii}$_sbdsqr( 'U', n, n, n, 0_${ik}$, s, work( ie ), vt,ldvt, work( iu ),& ldwrku, dum, 1_${ik}$,work( iwork ), info ) ! multiply q in u by left singular vectors of r in ! work(iu), storing result in a ! (workspace: need n*n) call stdlib${ii}$_sgemm( 'N', 'N', m, n, n, one, u, ldu,work( iu ), ldwrku, & zero, a, lda ) ! copy left singular vectors of a from a to u call stdlib${ii}$_slacpy( 'F', m, n, a, lda, u, ldu ) else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + n ! compute a=q*r, copying result to u ! (workspace: need 2*n, prefer n+n*nb) call stdlib${ii}$_sgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_slacpy( 'L', m, n, a, lda, u, ldu ) ! generate q in u ! (workspace: need n+m, prefer n+m*nb) call stdlib${ii}$_sorgqr( m, m, n, u, ldu, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ! copy r from a to vt, zeroing out below it call stdlib${ii}$_slacpy( 'U', n, n, a, lda, vt, ldvt ) if( n>1_${ik}$ )call stdlib${ii}$_slaset( 'L', n-1, n-1, zero, zero,vt( 2_${ik}$, 1_${ik}$ ), ldvt & ) ie = itau itauq = ie + n itaup = itauq + n iwork = itaup + n ! bidiagonalize r in vt ! (workspace: need 4*n, prefer 3*n+2*n*nb) call stdlib${ii}$_sgebrd( n, n, vt, ldvt, s, work( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply q in u by left bidiagonalizing vectors ! in vt ! (workspace: need 3*n+m, prefer 3*n+m*nb) call stdlib${ii}$_sormbr( 'Q', 'R', 'N', m, n, n, vt, ldvt,work( itauq ), u, & ldu, work( iwork ),lwork-iwork+1, ierr ) ! generate right bidiagonalizing vectors in vt ! (workspace: need 4*n-1, prefer 3*n+(n-1)*nb) call stdlib${ii}$_sorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),work( iwork ),& lwork-iwork+1, ierr ) iwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of a in u and computing right ! singular vectors of a in vt ! (workspace: need bdspac) call stdlib${ii}$_sbdsqr( 'U', n, n, m, 0_${ik}$, s, work( ie ), vt,ldvt, u, ldu, & dum, 1_${ik}$, work( iwork ),info ) end if end if end if else ! m < mnthr ! path 10 (m at least n, but not much larger) ! reduce to bidiagonal form without qr decomposition ie = 1_${ik}$ itauq = ie + n itaup = itauq + n iwork = itaup + n ! bidiagonalize a ! (workspace: need 3*n+m, prefer 3*n+(m+n)*nb) call stdlib${ii}$_sgebrd( m, n, a, lda, s, work( ie ), work( itauq ),work( itaup ), & work( iwork ), lwork-iwork+1,ierr ) if( wntuas ) then ! if left singular vectors desired in u, copy result to u ! and generate left bidiagonalizing vectors in u ! (workspace: need 3*n+ncu, prefer 3*n+ncu*nb) call stdlib${ii}$_slacpy( 'L', m, n, a, lda, u, ldu ) if( wntus )ncu = n if( wntua )ncu = m call stdlib${ii}$_sorgbr( 'Q', m, ncu, n, u, ldu, work( itauq ),work( iwork ), & lwork-iwork+1, ierr ) end if if( wntvas ) then ! if right singular vectors desired in vt, copy result to ! vt and generate right bidiagonalizing vectors in vt ! (workspace: need 4*n-1, prefer 3*n+(n-1)*nb) call stdlib${ii}$_slacpy( 'U', n, n, a, lda, vt, ldvt ) call stdlib${ii}$_sorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),work( iwork ), & lwork-iwork+1, ierr ) end if if( wntuo ) then ! if left singular vectors desired in a, generate left ! bidiagonalizing vectors in a ! (workspace: need 4*n, prefer 3*n+n*nb) call stdlib${ii}$_sorgbr( 'Q', m, n, n, a, lda, work( itauq ),work( iwork ), lwork-& iwork+1, ierr ) end if if( wntvo ) then ! if right singular vectors desired in a, generate right ! bidiagonalizing vectors in a ! (workspace: need 4*n-1, prefer 3*n+(n-1)*nb) call stdlib${ii}$_sorgbr( 'P', n, n, n, a, lda, work( itaup ),work( iwork ), lwork-& iwork+1, ierr ) end if iwork = ie + n if( wntuas .or. wntuo )nru = m if( wntun )nru = 0_${ik}$ if( wntvas .or. wntvo )ncvt = n if( wntvn )ncvt = 0_${ik}$ if( ( .not.wntuo ) .and. ( .not.wntvo ) ) then ! perform bidiagonal qr iteration, if desired, computing ! left singular vectors in u and computing right singular ! vectors in vt ! (workspace: need bdspac) call stdlib${ii}$_sbdsqr( 'U', n, ncvt, nru, 0_${ik}$, s, work( ie ), vt,ldvt, u, ldu, dum,& 1_${ik}$, work( iwork ), info ) else if( ( .not.wntuo ) .and. wntvo ) then ! perform bidiagonal qr iteration, if desired, computing ! left singular vectors in u and computing right singular ! vectors in a ! (workspace: need bdspac) call stdlib${ii}$_sbdsqr( 'U', n, ncvt, nru, 0_${ik}$, s, work( ie ), a, lda,u, ldu, dum, & 1_${ik}$, work( iwork ), info ) else ! perform bidiagonal qr iteration, if desired, computing ! left singular vectors in a and computing right singular ! vectors in vt ! (workspace: need bdspac) call stdlib${ii}$_sbdsqr( 'U', n, ncvt, nru, 0_${ik}$, s, work( ie ), vt,ldvt, a, lda, dum,& 1_${ik}$, work( iwork ), info ) end if end if else ! a has more columns than rows. if a has sufficiently more ! columns than rows, first reduce using the lq decomposition (if ! sufficient workspace available) if( n>=mnthr ) then if( wntvn ) then ! path 1t(n much larger than m, jobvt='n') ! no right singular vectors to be computed itau = 1_${ik}$ iwork = itau + m ! compute a=l*q ! (workspace: need 2*m, prefer m+m*nb) call stdlib${ii}$_sgelqf( m, n, a, lda, work( itau ), work( iwork ),lwork-iwork+1, & ierr ) ! zero out above l if (m>1_${ik}$) call stdlib${ii}$_slaset( 'U', m-1, m-1, zero, zero, a( 1_${ik}$, 2_${ik}$ ), lda ) ie = 1_${ik}$ itauq = ie + m itaup = itauq + m iwork = itaup + m ! bidiagonalize l in a ! (workspace: need 4*m, prefer 3*m+2*m*nb) call stdlib${ii}$_sgebrd( m, m, a, lda, s, work( ie ), work( itauq ),work( itaup ), & work( iwork ), lwork-iwork+1,ierr ) if( wntuo .or. wntuas ) then ! if left singular vectors desired, generate q ! (workspace: need 4*m, prefer 3*m+m*nb) call stdlib${ii}$_sorgbr( 'Q', m, m, m, a, lda, work( itauq ),work( iwork ), & lwork-iwork+1, ierr ) end if iwork = ie + m nru = 0_${ik}$ if( wntuo .or. wntuas )nru = m ! perform bidiagonal qr iteration, computing left singular ! vectors of a in a if desired ! (workspace: need bdspac) call stdlib${ii}$_sbdsqr( 'U', m, 0_${ik}$, nru, 0_${ik}$, s, work( ie ), dum, 1_${ik}$, a,lda, dum, 1_${ik}$, & work( iwork ), info ) ! if left singular vectors desired in u, copy them there if( wntuas )call stdlib${ii}$_slacpy( 'F', m, m, a, lda, u, ldu ) else if( wntvo .and. wntun ) then ! path 2t(n much larger than m, jobu='n', jobvt='o') ! m right singular vectors to be overwritten on a and ! no left singular vectors to be computed if( lwork>=m*m+max( 4_${ik}$*m, bdspac ) ) then ! sufficient workspace for a fast algorithm ir = 1_${ik}$ if( lwork>=max( wrkbl, lda*n+m )+lda*m ) then ! work(iu) is lda by n and work(ir) is lda by m ldwrku = lda chunk = n ldwrkr = lda else if( lwork>=max( wrkbl, lda*n+m )+m*m ) then ! work(iu) is lda by n and work(ir) is m by m ldwrku = lda chunk = n ldwrkr = m else ! work(iu) is m by chunk and work(ir) is m by m ldwrku = m chunk = ( lwork-m*m-m ) / m ldwrkr = m end if itau = ir + ldwrkr*m iwork = itau + m ! compute a=l*q ! (workspace: need m*m+2*m, prefer m*m+m+m*nb) call stdlib${ii}$_sgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-iwork+& 1_${ik}$, ierr ) ! copy l to work(ir) and zero out above it call stdlib${ii}$_slacpy( 'L', m, m, a, lda, work( ir ), ldwrkr ) call stdlib${ii}$_slaset( 'U', m-1, m-1, zero, zero,work( ir+ldwrkr ), ldwrkr ) ! generate q in a ! (workspace: need m*m+2*m, prefer m*m+m+m*nb) call stdlib${ii}$_sorglq( m, n, m, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = itau itauq = ie + m itaup = itauq + m iwork = itaup + m ! bidiagonalize l in work(ir) ! (workspace: need m*m+4*m, prefer m*m+3*m+2*m*nb) call stdlib${ii}$_sgebrd( m, m, work( ir ), ldwrkr, s, work( ie ),work( itauq ), & work( itaup ),work( iwork ), lwork-iwork+1, ierr ) ! generate right vectors bidiagonalizing l ! (workspace: need m*m+4*m-1, prefer m*m+3*m+(m-1)*nb) call stdlib${ii}$_sorgbr( 'P', m, m, m, work( ir ), ldwrkr,work( itaup ), work( & iwork ),lwork-iwork+1, ierr ) iwork = ie + m ! perform bidiagonal qr iteration, computing right ! singular vectors of l in work(ir) ! (workspace: need m*m+bdspac) call stdlib${ii}$_sbdsqr( 'U', m, m, 0_${ik}$, 0_${ik}$, s, work( ie ),work( ir ), ldwrkr, dum,& 1_${ik}$, dum, 1_${ik}$,work( iwork ), info ) iu = ie + m ! multiply right singular vectors of l in work(ir) by q ! in a, storing result in work(iu) and copying to a ! (workspace: need m*m+2*m, prefer m*m+m*n+m) do i = 1, n, chunk blk = min( n-i+1, chunk ) call stdlib${ii}$_sgemm( 'N', 'N', m, blk, m, one, work( ir ),ldwrkr, a( 1_${ik}$, i & ), lda, zero,work( iu ), ldwrku ) call stdlib${ii}$_slacpy( 'F', m, blk, work( iu ), ldwrku,a( 1_${ik}$, i ), lda ) end do else ! insufficient workspace for a fast algorithm ie = 1_${ik}$ itauq = ie + m itaup = itauq + m iwork = itaup + m ! bidiagonalize a ! (workspace: need 3*m+n, prefer 3*m+(m+n)*nb) call stdlib${ii}$_sgebrd( m, n, a, lda, s, work( ie ),work( itauq ), work( itaup & ),work( iwork ), lwork-iwork+1, ierr ) ! generate right vectors bidiagonalizing a ! (workspace: need 4*m, prefer 3*m+m*nb) call stdlib${ii}$_sorgbr( 'P', m, n, m, a, lda, work( itaup ),work( iwork ), & lwork-iwork+1, ierr ) iwork = ie + m ! perform bidiagonal qr iteration, computing right ! singular vectors of a in a ! (workspace: need bdspac) call stdlib${ii}$_sbdsqr( 'L', m, n, 0_${ik}$, 0_${ik}$, s, work( ie ), a, lda,dum, 1_${ik}$, dum, 1_${ik}$, & work( iwork ), info ) end if else if( wntvo .and. wntuas ) then ! path 3t(n much larger than m, jobu='s' or 'a', jobvt='o') ! m right singular vectors to be overwritten on a and ! m left singular vectors to be computed in u if( lwork>=m*m+max( 4_${ik}$*m, bdspac ) ) then ! sufficient workspace for a fast algorithm ir = 1_${ik}$ if( lwork>=max( wrkbl, lda*n+m )+lda*m ) then ! work(iu) is lda by n and work(ir) is lda by m ldwrku = lda chunk = n ldwrkr = lda else if( lwork>=max( wrkbl, lda*n+m )+m*m ) then ! work(iu) is lda by n and work(ir) is m by m ldwrku = lda chunk = n ldwrkr = m else ! work(iu) is m by chunk and work(ir) is m by m ldwrku = m chunk = ( lwork-m*m-m ) / m ldwrkr = m end if itau = ir + ldwrkr*m iwork = itau + m ! compute a=l*q ! (workspace: need m*m+2*m, prefer m*m+m+m*nb) call stdlib${ii}$_sgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-iwork+& 1_${ik}$, ierr ) ! copy l to u, zeroing about above it call stdlib${ii}$_slacpy( 'L', m, m, a, lda, u, ldu ) if (m>1_${ik}$) call stdlib${ii}$_slaset( 'U', m-1, m-1, zero, zero, u( 1_${ik}$, 2_${ik}$ ),ldu ) ! generate q in a ! (workspace: need m*m+2*m, prefer m*m+m+m*nb) call stdlib${ii}$_sorglq( m, n, m, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = itau itauq = ie + m itaup = itauq + m iwork = itaup + m ! bidiagonalize l in u, copying result to work(ir) ! (workspace: need m*m+4*m, prefer m*m+3*m+2*m*nb) call stdlib${ii}$_sgebrd( m, m, u, ldu, s, work( ie ),work( itauq ), work( itaup & ),work( iwork ), lwork-iwork+1, ierr ) call stdlib${ii}$_slacpy( 'U', m, m, u, ldu, work( ir ), ldwrkr ) ! generate right vectors bidiagonalizing l in work(ir) ! (workspace: need m*m+4*m-1, prefer m*m+3*m+(m-1)*nb) call stdlib${ii}$_sorgbr( 'P', m, m, m, work( ir ), ldwrkr,work( itaup ), work( & iwork ),lwork-iwork+1, ierr ) ! generate left vectors bidiagonalizing l in u ! (workspace: need m*m+4*m, prefer m*m+3*m+m*nb) call stdlib${ii}$_sorgbr( 'Q', m, m, m, u, ldu, work( itauq ),work( iwork ), & lwork-iwork+1, ierr ) iwork = ie + m ! perform bidiagonal qr iteration, computing left ! singular vectors of l in u, and computing right ! singular vectors of l in work(ir) ! (workspace: need m*m+bdspac) call stdlib${ii}$_sbdsqr( 'U', m, m, m, 0_${ik}$, s, work( ie ),work( ir ), ldwrkr, u, & ldu, dum, 1_${ik}$,work( iwork ), info ) iu = ie + m ! multiply right singular vectors of l in work(ir) by q ! in a, storing result in work(iu) and copying to a ! (workspace: need m*m+2*m, prefer m*m+m*n+m)) do i = 1, n, chunk blk = min( n-i+1, chunk ) call stdlib${ii}$_sgemm( 'N', 'N', m, blk, m, one, work( ir ),ldwrkr, a( 1_${ik}$, i & ), lda, zero,work( iu ), ldwrku ) call stdlib${ii}$_slacpy( 'F', m, blk, work( iu ), ldwrku,a( 1_${ik}$, i ), lda ) end do else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + m ! compute a=l*q ! (workspace: need 2*m, prefer m+m*nb) call stdlib${ii}$_sgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-iwork+& 1_${ik}$, ierr ) ! copy l to u, zeroing out above it call stdlib${ii}$_slacpy( 'L', m, m, a, lda, u, ldu ) if (m>1_${ik}$) call stdlib${ii}$_slaset( 'U', m-1, m-1, zero, zero, u( 1_${ik}$, 2_${ik}$ ),ldu ) ! generate q in a ! (workspace: need 2*m, prefer m+m*nb) call stdlib${ii}$_sorglq( m, n, m, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = itau itauq = ie + m itaup = itauq + m iwork = itaup + m ! bidiagonalize l in u ! (workspace: need 4*m, prefer 3*m+2*m*nb) call stdlib${ii}$_sgebrd( m, m, u, ldu, s, work( ie ),work( itauq ), work( itaup & ),work( iwork ), lwork-iwork+1, ierr ) ! multiply right vectors bidiagonalizing l by q in a ! (workspace: need 3*m+n, prefer 3*m+n*nb) call stdlib${ii}$_sormbr( 'P', 'L', 'T', m, n, m, u, ldu,work( itaup ), a, lda, & work( iwork ),lwork-iwork+1, ierr ) ! generate left vectors bidiagonalizing l in u ! (workspace: need 4*m, prefer 3*m+m*nb) call stdlib${ii}$_sorgbr( 'Q', m, m, m, u, ldu, work( itauq ),work( iwork ), & lwork-iwork+1, ierr ) iwork = ie + m ! perform bidiagonal qr iteration, computing left ! singular vectors of a in u and computing right ! singular vectors of a in a ! (workspace: need bdspac) call stdlib${ii}$_sbdsqr( 'U', m, n, m, 0_${ik}$, s, work( ie ), a, lda,u, ldu, dum, 1_${ik}$, & work( iwork ), info ) end if else if( wntvs ) then if( wntun ) then ! path 4t(n much larger than m, jobu='n', jobvt='s') ! m right singular vectors to be computed in vt and ! no left singular vectors to be computed if( lwork>=m*m+max( 4_${ik}$*m, bdspac ) ) then ! sufficient workspace for a fast algorithm ir = 1_${ik}$ if( lwork>=wrkbl+lda*m ) then ! work(ir) is lda by m ldwrkr = lda else ! work(ir) is m by m ldwrkr = m end if itau = ir + ldwrkr*m iwork = itau + m ! compute a=l*q ! (workspace: need m*m+2*m, prefer m*m+m+m*nb) call stdlib${ii}$_sgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ! copy l to work(ir), zeroing out above it call stdlib${ii}$_slacpy( 'L', m, m, a, lda, work( ir ),ldwrkr ) call stdlib${ii}$_slaset( 'U', m-1, m-1, zero, zero,work( ir+ldwrkr ), ldwrkr & ) ! generate q in a ! (workspace: need m*m+2*m, prefer m*m+m+m*nb) call stdlib${ii}$_sorglq( m, n, m, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = itau itauq = ie + m itaup = itauq + m iwork = itaup + m ! bidiagonalize l in work(ir) ! (workspace: need m*m+4*m, prefer m*m+3*m+2*m*nb) call stdlib${ii}$_sgebrd( m, m, work( ir ), ldwrkr, s,work( ie ), work( itauq & ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) ! generate right vectors bidiagonalizing l in ! work(ir) ! (workspace: need m*m+4*m, prefer m*m+3*m+(m-1)*nb) call stdlib${ii}$_sorgbr( 'P', m, m, m, work( ir ), ldwrkr,work( itaup ), & work( iwork ),lwork-iwork+1, ierr ) iwork = ie + m ! perform bidiagonal qr iteration, computing right ! singular vectors of l in work(ir) ! (workspace: need m*m+bdspac) call stdlib${ii}$_sbdsqr( 'U', m, m, 0_${ik}$, 0_${ik}$, s, work( ie ),work( ir ), ldwrkr, & dum, 1_${ik}$, dum, 1_${ik}$,work( iwork ), info ) ! multiply right singular vectors of l in work(ir) by ! q in a, storing result in vt ! (workspace: need m*m) call stdlib${ii}$_sgemm( 'N', 'N', m, n, m, one, work( ir ),ldwrkr, a, lda, & zero, vt, ldvt ) else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + m ! compute a=l*q ! (workspace: need 2*m, prefer m+m*nb) call stdlib${ii}$_sgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ! copy result to vt call stdlib${ii}$_slacpy( 'U', m, n, a, lda, vt, ldvt ) ! generate q in vt ! (workspace: need 2*m, prefer m+m*nb) call stdlib${ii}$_sorglq( m, n, m, vt, ldvt, work( itau ),work( iwork ), & lwork-iwork+1, ierr ) ie = itau itauq = ie + m itaup = itauq + m iwork = itaup + m ! zero out above l in a if (m>1_${ik}$) call stdlib${ii}$_slaset( 'U', m-1, m-1, zero, zero, a( 1_${ik}$, 2_${ik}$ ),lda ) ! bidiagonalize l in a ! (workspace: need 4*m, prefer 3*m+2*m*nb) call stdlib${ii}$_sgebrd( m, m, a, lda, s, work( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply right vectors bidiagonalizing l by q in vt ! (workspace: need 3*m+n, prefer 3*m+n*nb) call stdlib${ii}$_sormbr( 'P', 'L', 'T', m, n, m, a, lda,work( itaup ), vt, & ldvt,work( iwork ), lwork-iwork+1, ierr ) iwork = ie + m ! perform bidiagonal qr iteration, computing right ! singular vectors of a in vt ! (workspace: need bdspac) call stdlib${ii}$_sbdsqr( 'U', m, n, 0_${ik}$, 0_${ik}$, s, work( ie ), vt,ldvt, dum, 1_${ik}$, & dum, 1_${ik}$, work( iwork ),info ) end if else if( wntuo ) then ! path 5t(n much larger than m, jobu='o', jobvt='s') ! m right singular vectors to be computed in vt and ! m left singular vectors to be overwritten on a if( lwork>=2_${ik}$*m*m+max( 4_${ik}$*m, bdspac ) ) then ! sufficient workspace for a fast algorithm iu = 1_${ik}$ if( lwork>=wrkbl+2*lda*m ) then ! work(iu) is lda by m and work(ir) is lda by m ldwrku = lda ir = iu + ldwrku*m ldwrkr = lda else if( lwork>=wrkbl+( lda+m )*m ) then ! work(iu) is lda by m and work(ir) is m by m ldwrku = lda ir = iu + ldwrku*m ldwrkr = m else ! work(iu) is m by m and work(ir) is m by m ldwrku = m ir = iu + ldwrku*m ldwrkr = m end if itau = ir + ldwrkr*m iwork = itau + m ! compute a=l*q ! (workspace: need 2*m*m+2*m, prefer 2*m*m+m+m*nb) call stdlib${ii}$_sgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ! copy l to work(iu), zeroing out below it call stdlib${ii}$_slacpy( 'L', m, m, a, lda, work( iu ),ldwrku ) call stdlib${ii}$_slaset( 'U', m-1, m-1, zero, zero,work( iu+ldwrku ), ldwrku & ) ! generate q in a ! (workspace: need 2*m*m+2*m, prefer 2*m*m+m+m*nb) call stdlib${ii}$_sorglq( m, n, m, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = itau itauq = ie + m itaup = itauq + m iwork = itaup + m ! bidiagonalize l in work(iu), copying result to ! work(ir) ! (workspace: need 2*m*m+4*m, ! prefer 2*m*m+3*m+2*m*nb) call stdlib${ii}$_sgebrd( m, m, work( iu ), ldwrku, s,work( ie ), work( itauq & ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) call stdlib${ii}$_slacpy( 'L', m, m, work( iu ), ldwrku,work( ir ), ldwrkr ) ! generate right bidiagonalizing vectors in work(iu) ! (workspace: need 2*m*m+4*m-1, ! prefer 2*m*m+3*m+(m-1)*nb) call stdlib${ii}$_sorgbr( 'P', m, m, m, work( iu ), ldwrku,work( itaup ), & work( iwork ),lwork-iwork+1, ierr ) ! generate left bidiagonalizing vectors in work(ir) ! (workspace: need 2*m*m+4*m, prefer 2*m*m+3*m+m*nb) call stdlib${ii}$_sorgbr( 'Q', m, m, m, work( ir ), ldwrkr,work( itauq ), & work( iwork ),lwork-iwork+1, ierr ) iwork = ie + m ! perform bidiagonal qr iteration, computing left ! singular vectors of l in work(ir) and computing ! right singular vectors of l in work(iu) ! (workspace: need 2*m*m+bdspac) call stdlib${ii}$_sbdsqr( 'U', m, m, m, 0_${ik}$, s, work( ie ),work( iu ), ldwrku, & work( ir ),ldwrkr, dum, 1_${ik}$, work( iwork ), info ) ! multiply right singular vectors of l in work(iu) by ! q in a, storing result in vt ! (workspace: need m*m) call stdlib${ii}$_sgemm( 'N', 'N', m, n, m, one, work( iu ),ldwrku, a, lda, & zero, vt, ldvt ) ! copy left singular vectors of l to a ! (workspace: need m*m) call stdlib${ii}$_slacpy( 'F', m, m, work( ir ), ldwrkr, a,lda ) else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + m ! compute a=l*q, copying result to vt ! (workspace: need 2*m, prefer m+m*nb) call stdlib${ii}$_sgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_slacpy( 'U', m, n, a, lda, vt, ldvt ) ! generate q in vt ! (workspace: need 2*m, prefer m+m*nb) call stdlib${ii}$_sorglq( m, n, m, vt, ldvt, work( itau ),work( iwork ), & lwork-iwork+1, ierr ) ie = itau itauq = ie + m itaup = itauq + m iwork = itaup + m ! zero out above l in a if (m>1_${ik}$) call stdlib${ii}$_slaset( 'U', m-1, m-1, zero, zero, a( 1_${ik}$, 2_${ik}$ ),lda ) ! bidiagonalize l in a ! (workspace: need 4*m, prefer 3*m+2*m*nb) call stdlib${ii}$_sgebrd( m, m, a, lda, s, work( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply right vectors bidiagonalizing l by q in vt ! (workspace: need 3*m+n, prefer 3*m+n*nb) call stdlib${ii}$_sormbr( 'P', 'L', 'T', m, n, m, a, lda,work( itaup ), vt, & ldvt,work( iwork ), lwork-iwork+1, ierr ) ! generate left bidiagonalizing vectors of l in a ! (workspace: need 4*m, prefer 3*m+m*nb) call stdlib${ii}$_sorgbr( 'Q', m, m, m, a, lda, work( itauq ),work( iwork ), & lwork-iwork+1, ierr ) iwork = ie + m ! perform bidiagonal qr iteration, compute left ! singular vectors of a in a and compute right ! singular vectors of a in vt ! (workspace: need bdspac) call stdlib${ii}$_sbdsqr( 'U', m, n, m, 0_${ik}$, s, work( ie ), vt,ldvt, a, lda, & dum, 1_${ik}$, work( iwork ),info ) end if else if( wntuas ) then ! path 6t(n much larger than m, jobu='s' or 'a', ! jobvt='s') ! m right singular vectors to be computed in vt and ! m left singular vectors to be computed in u if( lwork>=m*m+max( 4_${ik}$*m, bdspac ) ) then ! sufficient workspace for a fast algorithm iu = 1_${ik}$ if( lwork>=wrkbl+lda*m ) then ! work(iu) is lda by n ldwrku = lda else ! work(iu) is lda by m ldwrku = m end if itau = iu + ldwrku*m iwork = itau + m ! compute a=l*q ! (workspace: need m*m+2*m, prefer m*m+m+m*nb) call stdlib${ii}$_sgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ! copy l to work(iu), zeroing out above it call stdlib${ii}$_slacpy( 'L', m, m, a, lda, work( iu ),ldwrku ) call stdlib${ii}$_slaset( 'U', m-1, m-1, zero, zero,work( iu+ldwrku ), ldwrku & ) ! generate q in a ! (workspace: need m*m+2*m, prefer m*m+m+m*nb) call stdlib${ii}$_sorglq( m, n, m, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = itau itauq = ie + m itaup = itauq + m iwork = itaup + m ! bidiagonalize l in work(iu), copying result to u ! (workspace: need m*m+4*m, prefer m*m+3*m+2*m*nb) call stdlib${ii}$_sgebrd( m, m, work( iu ), ldwrku, s,work( ie ), work( itauq & ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) call stdlib${ii}$_slacpy( 'L', m, m, work( iu ), ldwrku, u,ldu ) ! generate right bidiagonalizing vectors in work(iu) ! (workspace: need m*m+4*m-1, ! prefer m*m+3*m+(m-1)*nb) call stdlib${ii}$_sorgbr( 'P', m, m, m, work( iu ), ldwrku,work( itaup ), & work( iwork ),lwork-iwork+1, ierr ) ! generate left bidiagonalizing vectors in u ! (workspace: need m*m+4*m, prefer m*m+3*m+m*nb) call stdlib${ii}$_sorgbr( 'Q', m, m, m, u, ldu, work( itauq ),work( iwork ), & lwork-iwork+1, ierr ) iwork = ie + m ! perform bidiagonal qr iteration, computing left ! singular vectors of l in u and computing right ! singular vectors of l in work(iu) ! (workspace: need m*m+bdspac) call stdlib${ii}$_sbdsqr( 'U', m, m, m, 0_${ik}$, s, work( ie ),work( iu ), ldwrku, & u, ldu, dum, 1_${ik}$,work( iwork ), info ) ! multiply right singular vectors of l in work(iu) by ! q in a, storing result in vt ! (workspace: need m*m) call stdlib${ii}$_sgemm( 'N', 'N', m, n, m, one, work( iu ),ldwrku, a, lda, & zero, vt, ldvt ) else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + m ! compute a=l*q, copying result to vt ! (workspace: need 2*m, prefer m+m*nb) call stdlib${ii}$_sgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_slacpy( 'U', m, n, a, lda, vt, ldvt ) ! generate q in vt ! (workspace: need 2*m, prefer m+m*nb) call stdlib${ii}$_sorglq( m, n, m, vt, ldvt, work( itau ),work( iwork ), & lwork-iwork+1, ierr ) ! copy l to u, zeroing out above it call stdlib${ii}$_slacpy( 'L', m, m, a, lda, u, ldu ) if (m>1_${ik}$) call stdlib${ii}$_slaset( 'U', m-1, m-1, zero, zero, u( 1_${ik}$, 2_${ik}$ ),ldu ) ie = itau itauq = ie + m itaup = itauq + m iwork = itaup + m ! bidiagonalize l in u ! (workspace: need 4*m, prefer 3*m+2*m*nb) call stdlib${ii}$_sgebrd( m, m, u, ldu, s, work( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply right bidiagonalizing vectors in u by q ! in vt ! (workspace: need 3*m+n, prefer 3*m+n*nb) call stdlib${ii}$_sormbr( 'P', 'L', 'T', m, n, m, u, ldu,work( itaup ), vt, & ldvt,work( iwork ), lwork-iwork+1, ierr ) ! generate left bidiagonalizing vectors in u ! (workspace: need 4*m, prefer 3*m+m*nb) call stdlib${ii}$_sorgbr( 'Q', m, m, m, u, ldu, work( itauq ),work( iwork ), & lwork-iwork+1, ierr ) iwork = ie + m ! perform bidiagonal qr iteration, computing left ! singular vectors of a in u and computing right ! singular vectors of a in vt ! (workspace: need bdspac) call stdlib${ii}$_sbdsqr( 'U', m, n, m, 0_${ik}$, s, work( ie ), vt,ldvt, u, ldu, & dum, 1_${ik}$, work( iwork ),info ) end if end if else if( wntva ) then if( wntun ) then ! path 7t(n much larger than m, jobu='n', jobvt='a') ! n right singular vectors to be computed in vt and ! no left singular vectors to be computed if( lwork>=m*m+max( n+m, 4_${ik}$*m, bdspac ) ) then ! sufficient workspace for a fast algorithm ir = 1_${ik}$ if( lwork>=wrkbl+lda*m ) then ! work(ir) is lda by m ldwrkr = lda else ! work(ir) is m by m ldwrkr = m end if itau = ir + ldwrkr*m iwork = itau + m ! compute a=l*q, copying result to vt ! (workspace: need m*m+2*m, prefer m*m+m+m*nb) call stdlib${ii}$_sgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_slacpy( 'U', m, n, a, lda, vt, ldvt ) ! copy l to work(ir), zeroing out above it call stdlib${ii}$_slacpy( 'L', m, m, a, lda, work( ir ),ldwrkr ) call stdlib${ii}$_slaset( 'U', m-1, m-1, zero, zero,work( ir+ldwrkr ), ldwrkr & ) ! generate q in vt ! (workspace: need m*m+m+n, prefer m*m+m+n*nb) call stdlib${ii}$_sorglq( n, n, m, vt, ldvt, work( itau ),work( iwork ), & lwork-iwork+1, ierr ) ie = itau itauq = ie + m itaup = itauq + m iwork = itaup + m ! bidiagonalize l in work(ir) ! (workspace: need m*m+4*m, prefer m*m+3*m+2*m*nb) call stdlib${ii}$_sgebrd( m, m, work( ir ), ldwrkr, s,work( ie ), work( itauq & ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) ! generate right bidiagonalizing vectors in work(ir) ! (workspace: need m*m+4*m-1, ! prefer m*m+3*m+(m-1)*nb) call stdlib${ii}$_sorgbr( 'P', m, m, m, work( ir ), ldwrkr,work( itaup ), & work( iwork ),lwork-iwork+1, ierr ) iwork = ie + m ! perform bidiagonal qr iteration, computing right ! singular vectors of l in work(ir) ! (workspace: need m*m+bdspac) call stdlib${ii}$_sbdsqr( 'U', m, m, 0_${ik}$, 0_${ik}$, s, work( ie ),work( ir ), ldwrkr, & dum, 1_${ik}$, dum, 1_${ik}$,work( iwork ), info ) ! multiply right singular vectors of l in work(ir) by ! q in vt, storing result in a ! (workspace: need m*m) call stdlib${ii}$_sgemm( 'N', 'N', m, n, m, one, work( ir ),ldwrkr, vt, ldvt, & zero, a, lda ) ! copy right singular vectors of a from a to vt call stdlib${ii}$_slacpy( 'F', m, n, a, lda, vt, ldvt ) else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + m ! compute a=l*q, copying result to vt ! (workspace: need 2*m, prefer m+m*nb) call stdlib${ii}$_sgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_slacpy( 'U', m, n, a, lda, vt, ldvt ) ! generate q in vt ! (workspace: need m+n, prefer m+n*nb) call stdlib${ii}$_sorglq( n, n, m, vt, ldvt, work( itau ),work( iwork ), & lwork-iwork+1, ierr ) ie = itau itauq = ie + m itaup = itauq + m iwork = itaup + m ! zero out above l in a if (m>1_${ik}$) call stdlib${ii}$_slaset( 'U', m-1, m-1, zero, zero, a( 1_${ik}$, 2_${ik}$ ),lda ) ! bidiagonalize l in a ! (workspace: need 4*m, prefer 3*m+2*m*nb) call stdlib${ii}$_sgebrd( m, m, a, lda, s, work( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply right bidiagonalizing vectors in a by q ! in vt ! (workspace: need 3*m+n, prefer 3*m+n*nb) call stdlib${ii}$_sormbr( 'P', 'L', 'T', m, n, m, a, lda,work( itaup ), vt, & ldvt,work( iwork ), lwork-iwork+1, ierr ) iwork = ie + m ! perform bidiagonal qr iteration, computing right ! singular vectors of a in vt ! (workspace: need bdspac) call stdlib${ii}$_sbdsqr( 'U', m, n, 0_${ik}$, 0_${ik}$, s, work( ie ), vt,ldvt, dum, 1_${ik}$, & dum, 1_${ik}$, work( iwork ),info ) end if else if( wntuo ) then ! path 8t(n much larger than m, jobu='o', jobvt='a') ! n right singular vectors to be computed in vt and ! m left singular vectors to be overwritten on a if( lwork>=2_${ik}$*m*m+max( n+m, 4_${ik}$*m, bdspac ) ) then ! sufficient workspace for a fast algorithm iu = 1_${ik}$ if( lwork>=wrkbl+2*lda*m ) then ! work(iu) is lda by m and work(ir) is lda by m ldwrku = lda ir = iu + ldwrku*m ldwrkr = lda else if( lwork>=wrkbl+( lda+m )*m ) then ! work(iu) is lda by m and work(ir) is m by m ldwrku = lda ir = iu + ldwrku*m ldwrkr = m else ! work(iu) is m by m and work(ir) is m by m ldwrku = m ir = iu + ldwrku*m ldwrkr = m end if itau = ir + ldwrkr*m iwork = itau + m ! compute a=l*q, copying result to vt ! (workspace: need 2*m*m+2*m, prefer 2*m*m+m+m*nb) call stdlib${ii}$_sgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_slacpy( 'U', m, n, a, lda, vt, ldvt ) ! generate q in vt ! (workspace: need 2*m*m+m+n, prefer 2*m*m+m+n*nb) call stdlib${ii}$_sorglq( n, n, m, vt, ldvt, work( itau ),work( iwork ), & lwork-iwork+1, ierr ) ! copy l to work(iu), zeroing out above it call stdlib${ii}$_slacpy( 'L', m, m, a, lda, work( iu ),ldwrku ) call stdlib${ii}$_slaset( 'U', m-1, m-1, zero, zero,work( iu+ldwrku ), ldwrku & ) ie = itau itauq = ie + m itaup = itauq + m iwork = itaup + m ! bidiagonalize l in work(iu), copying result to ! work(ir) ! (workspace: need 2*m*m+4*m, ! prefer 2*m*m+3*m+2*m*nb) call stdlib${ii}$_sgebrd( m, m, work( iu ), ldwrku, s,work( ie ), work( itauq & ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) call stdlib${ii}$_slacpy( 'L', m, m, work( iu ), ldwrku,work( ir ), ldwrkr ) ! generate right bidiagonalizing vectors in work(iu) ! (workspace: need 2*m*m+4*m-1, ! prefer 2*m*m+3*m+(m-1)*nb) call stdlib${ii}$_sorgbr( 'P', m, m, m, work( iu ), ldwrku,work( itaup ), & work( iwork ),lwork-iwork+1, ierr ) ! generate left bidiagonalizing vectors in work(ir) ! (workspace: need 2*m*m+4*m, prefer 2*m*m+3*m+m*nb) call stdlib${ii}$_sorgbr( 'Q', m, m, m, work( ir ), ldwrkr,work( itauq ), & work( iwork ),lwork-iwork+1, ierr ) iwork = ie + m ! perform bidiagonal qr iteration, computing left ! singular vectors of l in work(ir) and computing ! right singular vectors of l in work(iu) ! (workspace: need 2*m*m+bdspac) call stdlib${ii}$_sbdsqr( 'U', m, m, m, 0_${ik}$, s, work( ie ),work( iu ), ldwrku, & work( ir ),ldwrkr, dum, 1_${ik}$, work( iwork ), info ) ! multiply right singular vectors of l in work(iu) by ! q in vt, storing result in a ! (workspace: need m*m) call stdlib${ii}$_sgemm( 'N', 'N', m, n, m, one, work( iu ),ldwrku, vt, ldvt, & zero, a, lda ) ! copy right singular vectors of a from a to vt call stdlib${ii}$_slacpy( 'F', m, n, a, lda, vt, ldvt ) ! copy left singular vectors of a from work(ir) to a call stdlib${ii}$_slacpy( 'F', m, m, work( ir ), ldwrkr, a,lda ) else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + m ! compute a=l*q, copying result to vt ! (workspace: need 2*m, prefer m+m*nb) call stdlib${ii}$_sgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_slacpy( 'U', m, n, a, lda, vt, ldvt ) ! generate q in vt ! (workspace: need m+n, prefer m+n*nb) call stdlib${ii}$_sorglq( n, n, m, vt, ldvt, work( itau ),work( iwork ), & lwork-iwork+1, ierr ) ie = itau itauq = ie + m itaup = itauq + m iwork = itaup + m ! zero out above l in a if (m>1_${ik}$) call stdlib${ii}$_slaset( 'U', m-1, m-1, zero, zero, a( 1_${ik}$, 2_${ik}$ ),lda ) ! bidiagonalize l in a ! (workspace: need 4*m, prefer 3*m+2*m*nb) call stdlib${ii}$_sgebrd( m, m, a, lda, s, work( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply right bidiagonalizing vectors in a by q ! in vt ! (workspace: need 3*m+n, prefer 3*m+n*nb) call stdlib${ii}$_sormbr( 'P', 'L', 'T', m, n, m, a, lda,work( itaup ), vt, & ldvt,work( iwork ), lwork-iwork+1, ierr ) ! generate left bidiagonalizing vectors in a ! (workspace: need 4*m, prefer 3*m+m*nb) call stdlib${ii}$_sorgbr( 'Q', m, m, m, a, lda, work( itauq ),work( iwork ), & lwork-iwork+1, ierr ) iwork = ie + m ! perform bidiagonal qr iteration, computing left ! singular vectors of a in a and computing right ! singular vectors of a in vt ! (workspace: need bdspac) call stdlib${ii}$_sbdsqr( 'U', m, n, m, 0_${ik}$, s, work( ie ), vt,ldvt, a, lda, & dum, 1_${ik}$, work( iwork ),info ) end if else if( wntuas ) then ! path 9t(n much larger than m, jobu='s' or 'a', ! jobvt='a') ! n right singular vectors to be computed in vt and ! m left singular vectors to be computed in u if( lwork>=m*m+max( n+m, 4_${ik}$*m, bdspac ) ) then ! sufficient workspace for a fast algorithm iu = 1_${ik}$ if( lwork>=wrkbl+lda*m ) then ! work(iu) is lda by m ldwrku = lda else ! work(iu) is m by m ldwrku = m end if itau = iu + ldwrku*m iwork = itau + m ! compute a=l*q, copying result to vt ! (workspace: need m*m+2*m, prefer m*m+m+m*nb) call stdlib${ii}$_sgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_slacpy( 'U', m, n, a, lda, vt, ldvt ) ! generate q in vt ! (workspace: need m*m+m+n, prefer m*m+m+n*nb) call stdlib${ii}$_sorglq( n, n, m, vt, ldvt, work( itau ),work( iwork ), & lwork-iwork+1, ierr ) ! copy l to work(iu), zeroing out above it call stdlib${ii}$_slacpy( 'L', m, m, a, lda, work( iu ),ldwrku ) call stdlib${ii}$_slaset( 'U', m-1, m-1, zero, zero,work( iu+ldwrku ), ldwrku & ) ie = itau itauq = ie + m itaup = itauq + m iwork = itaup + m ! bidiagonalize l in work(iu), copying result to u ! (workspace: need m*m+4*m, prefer m*m+3*m+2*m*nb) call stdlib${ii}$_sgebrd( m, m, work( iu ), ldwrku, s,work( ie ), work( itauq & ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) call stdlib${ii}$_slacpy( 'L', m, m, work( iu ), ldwrku, u,ldu ) ! generate right bidiagonalizing vectors in work(iu) ! (workspace: need m*m+4*m, prefer m*m+3*m+(m-1)*nb) call stdlib${ii}$_sorgbr( 'P', m, m, m, work( iu ), ldwrku,work( itaup ), & work( iwork ),lwork-iwork+1, ierr ) ! generate left bidiagonalizing vectors in u ! (workspace: need m*m+4*m, prefer m*m+3*m+m*nb) call stdlib${ii}$_sorgbr( 'Q', m, m, m, u, ldu, work( itauq ),work( iwork ), & lwork-iwork+1, ierr ) iwork = ie + m ! perform bidiagonal qr iteration, computing left ! singular vectors of l in u and computing right ! singular vectors of l in work(iu) ! (workspace: need m*m+bdspac) call stdlib${ii}$_sbdsqr( 'U', m, m, m, 0_${ik}$, s, work( ie ),work( iu ), ldwrku, & u, ldu, dum, 1_${ik}$,work( iwork ), info ) ! multiply right singular vectors of l in work(iu) by ! q in vt, storing result in a ! (workspace: need m*m) call stdlib${ii}$_sgemm( 'N', 'N', m, n, m, one, work( iu ),ldwrku, vt, ldvt, & zero, a, lda ) ! copy right singular vectors of a from a to vt call stdlib${ii}$_slacpy( 'F', m, n, a, lda, vt, ldvt ) else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + m ! compute a=l*q, copying result to vt ! (workspace: need 2*m, prefer m+m*nb) call stdlib${ii}$_sgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_slacpy( 'U', m, n, a, lda, vt, ldvt ) ! generate q in vt ! (workspace: need m+n, prefer m+n*nb) call stdlib${ii}$_sorglq( n, n, m, vt, ldvt, work( itau ),work( iwork ), & lwork-iwork+1, ierr ) ! copy l to u, zeroing out above it call stdlib${ii}$_slacpy( 'L', m, m, a, lda, u, ldu ) if (m>1_${ik}$) call stdlib${ii}$_slaset( 'U', m-1, m-1, zero, zero, u( 1_${ik}$, 2_${ik}$ ),ldu ) ie = itau itauq = ie + m itaup = itauq + m iwork = itaup + m ! bidiagonalize l in u ! (workspace: need 4*m, prefer 3*m+2*m*nb) call stdlib${ii}$_sgebrd( m, m, u, ldu, s, work( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply right bidiagonalizing vectors in u by q ! in vt ! (workspace: need 3*m+n, prefer 3*m+n*nb) call stdlib${ii}$_sormbr( 'P', 'L', 'T', m, n, m, u, ldu,work( itaup ), vt, & ldvt,work( iwork ), lwork-iwork+1, ierr ) ! generate left bidiagonalizing vectors in u ! (workspace: need 4*m, prefer 3*m+m*nb) call stdlib${ii}$_sorgbr( 'Q', m, m, m, u, ldu, work( itauq ),work( iwork ), & lwork-iwork+1, ierr ) iwork = ie + m ! perform bidiagonal qr iteration, computing left ! singular vectors of a in u and computing right ! singular vectors of a in vt ! (workspace: need bdspac) call stdlib${ii}$_sbdsqr( 'U', m, n, m, 0_${ik}$, s, work( ie ), vt,ldvt, u, ldu, & dum, 1_${ik}$, work( iwork ),info ) end if end if end if else ! n < mnthr ! path 10t(n greater than m, but not much larger) ! reduce to bidiagonal form without lq decomposition ie = 1_${ik}$ itauq = ie + m itaup = itauq + m iwork = itaup + m ! bidiagonalize a ! (workspace: need 3*m+n, prefer 3*m+(m+n)*nb) call stdlib${ii}$_sgebrd( m, n, a, lda, s, work( ie ), work( itauq ),work( itaup ), & work( iwork ), lwork-iwork+1,ierr ) if( wntuas ) then ! if left singular vectors desired in u, copy result to u ! and generate left bidiagonalizing vectors in u ! (workspace: need 4*m-1, prefer 3*m+(m-1)*nb) call stdlib${ii}$_slacpy( 'L', m, m, a, lda, u, ldu ) call stdlib${ii}$_sorgbr( 'Q', m, m, n, u, ldu, work( itauq ),work( iwork ), lwork-& iwork+1, ierr ) end if if( wntvas ) then ! if right singular vectors desired in vt, copy result to ! vt and generate right bidiagonalizing vectors in vt ! (workspace: need 3*m+nrvt, prefer 3*m+nrvt*nb) call stdlib${ii}$_slacpy( 'U', m, n, a, lda, vt, ldvt ) if( wntva )nrvt = n if( wntvs )nrvt = m call stdlib${ii}$_sorgbr( 'P', nrvt, n, m, vt, ldvt, work( itaup ),work( iwork ), & lwork-iwork+1, ierr ) end if if( wntuo ) then ! if left singular vectors desired in a, generate left ! bidiagonalizing vectors in a ! (workspace: need 4*m-1, prefer 3*m+(m-1)*nb) call stdlib${ii}$_sorgbr( 'Q', m, m, n, a, lda, work( itauq ),work( iwork ), lwork-& iwork+1, ierr ) end if if( wntvo ) then ! if right singular vectors desired in a, generate right ! bidiagonalizing vectors in a ! (workspace: need 4*m, prefer 3*m+m*nb) call stdlib${ii}$_sorgbr( 'P', m, n, m, a, lda, work( itaup ),work( iwork ), lwork-& iwork+1, ierr ) end if iwork = ie + m if( wntuas .or. wntuo )nru = m if( wntun )nru = 0_${ik}$ if( wntvas .or. wntvo )ncvt = n if( wntvn )ncvt = 0_${ik}$ if( ( .not.wntuo ) .and. ( .not.wntvo ) ) then ! perform bidiagonal qr iteration, if desired, computing ! left singular vectors in u and computing right singular ! vectors in vt ! (workspace: need bdspac) call stdlib${ii}$_sbdsqr( 'L', m, ncvt, nru, 0_${ik}$, s, work( ie ), vt,ldvt, u, ldu, dum,& 1_${ik}$, work( iwork ), info ) else if( ( .not.wntuo ) .and. wntvo ) then ! perform bidiagonal qr iteration, if desired, computing ! left singular vectors in u and computing right singular ! vectors in a ! (workspace: need bdspac) call stdlib${ii}$_sbdsqr( 'L', m, ncvt, nru, 0_${ik}$, s, work( ie ), a, lda,u, ldu, dum, & 1_${ik}$, work( iwork ), info ) else ! perform bidiagonal qr iteration, if desired, computing ! left singular vectors in a and computing right singular ! vectors in vt ! (workspace: need bdspac) call stdlib${ii}$_sbdsqr( 'L', m, ncvt, nru, 0_${ik}$, s, work( ie ), vt,ldvt, a, lda, dum,& 1_${ik}$, work( iwork ), info ) end if end if end if ! if stdlib${ii}$_sbdsqr failed to converge, copy unconverged superdiagonals ! to work( 2:minmn ) if( info/=0_${ik}$ ) then if( ie>2_${ik}$ ) then do i = 1, minmn - 1 work( i+1 ) = work( i+ie-1 ) end do end if if( ie<2_${ik}$ ) then do i = minmn - 1, 1, -1 work( i+1 ) = work( i+ie-1 ) end do end if end if ! undo scaling if necessary if( iscl==1_${ik}$ ) then if( anrm>bignum )call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, bignum, anrm, minmn, 1_${ik}$, s, minmn,& ierr ) if( info/=0_${ik}$ .and. anrm>bignum )call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, bignum, anrm, minmn-1,& 1_${ik}$, work( 2_${ik}$ ),minmn, ierr ) if( anrm<smlnum )call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, smlnum, anrm, minmn, 1_${ik}$, s, minmn,& ierr ) if( info/=0_${ik}$ .and. anrm<smlnum )call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, smlnum, anrm, minmn-1,& 1_${ik}$, work( 2_${ik}$ ),minmn, ierr ) end if ! return optimal workspace in work(1) work( 1_${ik}$ ) = maxwrk return end subroutine stdlib${ii}$_sgesvd module subroutine stdlib${ii}$_dgesvd( jobu, jobvt, m, n, a, lda, s, u, ldu,vt, ldvt, work, lwork, info ) !! DGESVD computes the singular value decomposition (SVD) of a real !! M-by-N matrix A, optionally computing the left and/or right singular !! vectors. The SVD is written !! A = U * SIGMA * transpose(V) !! where SIGMA is an M-by-N matrix which is zero except for its !! min(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and !! V is an N-by-N orthogonal matrix. The diagonal elements of SIGMA !! are the singular values of A; they are real and non-negative, and !! are returned in descending order. The first min(m,n) columns of !! U and V are the left and right singular vectors of A. !! Note that the routine returns V**T, not V. ! -- lapack driver routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments character, intent(in) :: jobu, jobvt integer(${ik}$), intent(out) :: info integer(${ik}$), intent(in) :: lda, ldu, ldvt, lwork, m, n ! Array Arguments real(dp), intent(inout) :: a(lda,*) real(dp), intent(out) :: s(*), u(ldu,*), vt(ldvt,*), work(*) ! ===================================================================== ! Local Scalars logical(lk) :: lquery, wntua, wntuas, wntun, wntuo, wntus, wntva, wntvas, wntvn, wntvo,& wntvs integer(${ik}$) :: bdspac, blk, chunk, i, ie, ierr, ir, iscl, itau, itaup, itauq, iu, & iwork, ldwrkr, ldwrku, maxwrk, minmn, minwrk, mnthr, ncu, ncvt, nru, nrvt, & wrkbl integer(${ik}$) :: lwork_dgeqrf, lwork_dorgqr_n, lwork_dorgqr_m, lwork_dgebrd, & lwork_dorgbr_p, lwork_dorgbr_q, lwork_dgelqf, lwork_dorglq_n, lwork_dorglq_m real(dp) :: anrm, bignum, eps, smlnum ! Local Arrays real(dp) :: dum(1_${ik}$) ! Intrinsic Functions ! Executable Statements ! test the input arguments info = 0_${ik}$ minmn = min( m, n ) wntua = stdlib_lsame( jobu, 'A' ) wntus = stdlib_lsame( jobu, 'S' ) wntuas = wntua .or. wntus wntuo = stdlib_lsame( jobu, 'O' ) wntun = stdlib_lsame( jobu, 'N' ) wntva = stdlib_lsame( jobvt, 'A' ) wntvs = stdlib_lsame( jobvt, 'S' ) wntvas = wntva .or. wntvs wntvo = stdlib_lsame( jobvt, 'O' ) wntvn = stdlib_lsame( jobvt, 'N' ) lquery = ( lwork==-1_${ik}$ ) if( .not.( wntua .or. wntus .or. wntuo .or. wntun ) ) then info = -1_${ik}$ else if( .not.( wntva .or. wntvs .or. wntvo .or. wntvn ) .or.( wntvo .and. wntuo ) ) & then info = -2_${ik}$ else if( m<0_${ik}$ ) then info = -3_${ik}$ else if( n<0_${ik}$ ) then info = -4_${ik}$ else if( lda<max( 1_${ik}$, m ) ) then info = -6_${ik}$ else if( ldu<1_${ik}$ .or. ( wntuas .and. ldu<m ) ) then info = -9_${ik}$ else if( ldvt<1_${ik}$ .or. ( wntva .and. ldvt<n ) .or.( wntvs .and. ldvt<minmn ) ) & then info = -11_${ik}$ end if ! compute workspace ! (note: comments in the code beginning "workspace:" describe the ! minimal amount of workspace needed at that point in the code, ! as well as the preferred amount for good performance. ! nb refers to the optimal block size for the immediately ! following subroutine, as returned by stdlib${ii}$_ilaenv.) if( info==0_${ik}$ ) then minwrk = 1_${ik}$ maxwrk = 1_${ik}$ if( m>=n .and. minmn>0_${ik}$ ) then ! compute space needed for stdlib${ii}$_dbdsqr mnthr = stdlib${ii}$_ilaenv( 6_${ik}$, 'DGESVD', jobu // jobvt, m, n, 0_${ik}$, 0_${ik}$ ) bdspac = 5_${ik}$*n ! compute space needed for stdlib${ii}$_dgeqrf call stdlib${ii}$_dgeqrf( m, n, a, lda, dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, ierr ) lwork_dgeqrf = int( dum(1_${ik}$),KIND=${ik}$) ! compute space needed for stdlib${ii}$_dorgqr call stdlib${ii}$_dorgqr( m, n, n, a, lda, dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, ierr ) lwork_dorgqr_n = int( dum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_dorgqr( m, m, n, a, lda, dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, ierr ) lwork_dorgqr_m = int( dum(1_${ik}$),KIND=${ik}$) ! compute space needed for stdlib${ii}$_dgebrd call stdlib${ii}$_dgebrd( n, n, a, lda, s, dum(1_${ik}$), dum(1_${ik}$),dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, ierr ) lwork_dgebrd = int( dum(1_${ik}$),KIND=${ik}$) ! compute space needed for stdlib${ii}$_dorgbr p call stdlib${ii}$_dorgbr( 'P', n, n, n, a, lda, dum(1_${ik}$),dum(1_${ik}$), -1_${ik}$, ierr ) lwork_dorgbr_p = int( dum(1_${ik}$),KIND=${ik}$) ! compute space needed for stdlib${ii}$_dorgbr q call stdlib${ii}$_dorgbr( 'Q', n, n, n, a, lda, dum(1_${ik}$),dum(1_${ik}$), -1_${ik}$, ierr ) lwork_dorgbr_q = int( dum(1_${ik}$),KIND=${ik}$) if( m>=mnthr ) then if( wntun ) then ! path 1 (m much larger than n, jobu='n') maxwrk = n + lwork_dgeqrf maxwrk = max( maxwrk, 3_${ik}$*n + lwork_dgebrd ) if( wntvo .or. wntvas )maxwrk = max( maxwrk, 3_${ik}$*n + lwork_dorgbr_p ) maxwrk = max( maxwrk, bdspac ) minwrk = max( 4_${ik}$*n, bdspac ) else if( wntuo .and. wntvn ) then ! path 2 (m much larger than n, jobu='o', jobvt='n') wrkbl = n + lwork_dgeqrf wrkbl = max( wrkbl, n + lwork_dorgqr_n ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_dgebrd ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_dorgbr_q ) wrkbl = max( wrkbl, bdspac ) maxwrk = max( n*n + wrkbl, n*n + m*n + n ) minwrk = max( 3_${ik}$*n + m, bdspac ) else if( wntuo .and. wntvas ) then ! path 3 (m much larger than n, jobu='o', jobvt='s' or ! 'a') wrkbl = n + lwork_dgeqrf wrkbl = max( wrkbl, n + lwork_dorgqr_n ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_dgebrd ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_dorgbr_q ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_dorgbr_p ) wrkbl = max( wrkbl, bdspac ) maxwrk = max( n*n + wrkbl, n*n + m*n + n ) minwrk = max( 3_${ik}$*n + m, bdspac ) else if( wntus .and. wntvn ) then ! path 4 (m much larger than n, jobu='s', jobvt='n') wrkbl = n + lwork_dgeqrf wrkbl = max( wrkbl, n + lwork_dorgqr_n ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_dgebrd ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_dorgbr_q ) wrkbl = max( wrkbl, bdspac ) maxwrk = n*n + wrkbl minwrk = max( 3_${ik}$*n + m, bdspac ) else if( wntus .and. wntvo ) then ! path 5 (m much larger than n, jobu='s', jobvt='o') wrkbl = n + lwork_dgeqrf wrkbl = max( wrkbl, n + lwork_dorgqr_n ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_dgebrd ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_dorgbr_q ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_dorgbr_p ) wrkbl = max( wrkbl, bdspac ) maxwrk = 2_${ik}$*n*n + wrkbl minwrk = max( 3_${ik}$*n + m, bdspac ) else if( wntus .and. wntvas ) then ! path 6 (m much larger than n, jobu='s', jobvt='s' or ! 'a') wrkbl = n + lwork_dgeqrf wrkbl = max( wrkbl, n + lwork_dorgqr_n ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_dgebrd ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_dorgbr_q ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_dorgbr_p ) wrkbl = max( wrkbl, bdspac ) maxwrk = n*n + wrkbl minwrk = max( 3_${ik}$*n + m, bdspac ) else if( wntua .and. wntvn ) then ! path 7 (m much larger than n, jobu='a', jobvt='n') wrkbl = n + lwork_dgeqrf wrkbl = max( wrkbl, n + lwork_dorgqr_m ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_dgebrd ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_dorgbr_q ) wrkbl = max( wrkbl, bdspac ) maxwrk = n*n + wrkbl minwrk = max( 3_${ik}$*n + m, bdspac ) else if( wntua .and. wntvo ) then ! path 8 (m much larger than n, jobu='a', jobvt='o') wrkbl = n + lwork_dgeqrf wrkbl = max( wrkbl, n + lwork_dorgqr_m ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_dgebrd ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_dorgbr_q ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_dorgbr_p ) wrkbl = max( wrkbl, bdspac ) maxwrk = 2_${ik}$*n*n + wrkbl minwrk = max( 3_${ik}$*n + m, bdspac ) else if( wntua .and. wntvas ) then ! path 9 (m much larger than n, jobu='a', jobvt='s' or ! 'a') wrkbl = n + lwork_dgeqrf wrkbl = max( wrkbl, n + lwork_dorgqr_m ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_dgebrd ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_dorgbr_q ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_dorgbr_p ) wrkbl = max( wrkbl, bdspac ) maxwrk = n*n + wrkbl minwrk = max( 3_${ik}$*n + m, bdspac ) end if else ! path 10 (m at least n, but not much larger) call stdlib${ii}$_dgebrd( m, n, a, lda, s, dum(1_${ik}$), dum(1_${ik}$),dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, ierr ) lwork_dgebrd = int( dum(1_${ik}$),KIND=${ik}$) maxwrk = 3_${ik}$*n + lwork_dgebrd if( wntus .or. wntuo ) then call stdlib${ii}$_dorgbr( 'Q', m, n, n, a, lda, dum(1_${ik}$),dum(1_${ik}$), -1_${ik}$, ierr ) lwork_dorgbr_q = int( dum(1_${ik}$),KIND=${ik}$) maxwrk = max( maxwrk, 3_${ik}$*n + lwork_dorgbr_q ) end if if( wntua ) then call stdlib${ii}$_dorgbr( 'Q', m, m, n, a, lda, dum(1_${ik}$),dum(1_${ik}$), -1_${ik}$, ierr ) lwork_dorgbr_q = int( dum(1_${ik}$),KIND=${ik}$) maxwrk = max( maxwrk, 3_${ik}$*n + lwork_dorgbr_q ) end if if( .not.wntvn ) then maxwrk = max( maxwrk, 3_${ik}$*n + lwork_dorgbr_p ) end if maxwrk = max( maxwrk, bdspac ) minwrk = max( 3_${ik}$*n + m, bdspac ) end if else if( minmn>0_${ik}$ ) then ! compute space needed for stdlib${ii}$_dbdsqr mnthr = stdlib${ii}$_ilaenv( 6_${ik}$, 'DGESVD', jobu // jobvt, m, n, 0_${ik}$, 0_${ik}$ ) bdspac = 5_${ik}$*m ! compute space needed for stdlib${ii}$_dgelqf call stdlib${ii}$_dgelqf( m, n, a, lda, dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, ierr ) lwork_dgelqf = int( dum(1_${ik}$),KIND=${ik}$) ! compute space needed for stdlib${ii}$_dorglq call stdlib${ii}$_dorglq( n, n, m, dum(1_${ik}$), n, dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, ierr ) lwork_dorglq_n = int( dum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_dorglq( m, n, m, a, lda, dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, ierr ) lwork_dorglq_m = int( dum(1_${ik}$),KIND=${ik}$) ! compute space needed for stdlib${ii}$_dgebrd call stdlib${ii}$_dgebrd( m, m, a, lda, s, dum(1