#:include "common.fypp" submodule(stdlib_lapack_eig_svd_lsq) stdlib_lapack_eigv_svd_drivers implicit none contains #:for ik,it,ii in LINALG_INT_KINDS_TYPES module subroutine stdlib${ii}$_sgesvd( jobu, jobvt, m, n, a, lda, s, u, ldu, vt, ldvt,work, lwork, info ) !! SGESVD computes the singular value decomposition (SVD) of a real !! M-by-N matrix A, optionally computing the left and/or right singular !! vectors. The SVD is written !! A = U * SIGMA * transpose(V) !! where SIGMA is an M-by-N matrix which is zero except for its !! min(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and !! V is an N-by-N orthogonal matrix. The diagonal elements of SIGMA !! are the singular values of A; they are real and non-negative, and !! are returned in descending order. The first min(m,n) columns of !! U and V are the left and right singular vectors of A. !! Note that the routine returns V**T, not V. ! -- lapack driver routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments character, intent(in) :: jobu, jobvt integer(${ik}$), intent(out) :: info integer(${ik}$), intent(in) :: lda, ldu, ldvt, lwork, m, n ! Array Arguments real(sp), intent(inout) :: a(lda,*) real(sp), intent(out) :: s(*), u(ldu,*), vt(ldvt,*), work(*) ! ===================================================================== ! Local Scalars logical(lk) :: lquery, wntua, wntuas, wntun, wntuo, wntus, wntva, wntvas, wntvn, wntvo,& wntvs integer(${ik}$) :: bdspac, blk, chunk, i, ie, ierr, ir, iscl, itau, itaup, itauq, iu, & iwork, ldwrkr, ldwrku, maxwrk, minmn, minwrk, mnthr, ncu, ncvt, nru, nrvt, & wrkbl integer(${ik}$) :: lwork_sgeqrf, lwork_sorgqr_n, lwork_sorgqr_m, lwork_sgebrd, & lwork_sorgbr_p, lwork_sorgbr_q, lwork_sgelqf, lwork_sorglq_n, lwork_sorglq_m real(sp) :: anrm, bignum, eps, smlnum ! Local Arrays real(sp) :: dum(1_${ik}$) ! Intrinsic Functions ! Executable Statements ! test the input arguments info = 0_${ik}$ minmn = min( m, n ) wntua = stdlib_lsame( jobu, 'A' ) wntus = stdlib_lsame( jobu, 'S' ) wntuas = wntua .or. wntus wntuo = stdlib_lsame( jobu, 'O' ) wntun = stdlib_lsame( jobu, 'N' ) wntva = stdlib_lsame( jobvt, 'A' ) wntvs = stdlib_lsame( jobvt, 'S' ) wntvas = wntva .or. wntvs wntvo = stdlib_lsame( jobvt, 'O' ) wntvn = stdlib_lsame( jobvt, 'N' ) lquery = ( lwork==-1_${ik}$ ) if( .not.( wntua .or. wntus .or. wntuo .or. wntun ) ) then info = -1_${ik}$ else if( .not.( wntva .or. wntvs .or. wntvo .or. wntvn ) .or.( wntvo .and. wntuo ) ) & then info = -2_${ik}$ else if( m<0_${ik}$ ) then info = -3_${ik}$ else if( n<0_${ik}$ ) then info = -4_${ik}$ else if( lda<max( 1_${ik}$, m ) ) then info = -6_${ik}$ else if( ldu<1_${ik}$ .or. ( wntuas .and. ldu<m ) ) then info = -9_${ik}$ else if( ldvt<1_${ik}$ .or. ( wntva .and. ldvt<n ) .or.( wntvs .and. ldvt<minmn ) ) & then info = -11_${ik}$ end if ! compute workspace ! (note: comments in the code beginning "workspace:" describe the ! minimal amount of workspace needed at that point in the code, ! as well as the preferred amount for good performance. ! nb refers to the optimal block size for the immediately ! following subroutine, as returned by stdlib${ii}$_ilaenv.) if( info==0_${ik}$ ) then minwrk = 1_${ik}$ maxwrk = 1_${ik}$ if( m>=n .and. minmn>0_${ik}$ ) then ! compute space needed for stdlib${ii}$_sbdsqr mnthr = stdlib${ii}$_ilaenv( 6_${ik}$, 'SGESVD', jobu // jobvt, m, n, 0_${ik}$, 0_${ik}$ ) bdspac = 5_${ik}$*n ! compute space needed for stdlib${ii}$_sgeqrf call stdlib${ii}$_sgeqrf( m, n, a, lda, dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, ierr ) lwork_sgeqrf = int( dum(1_${ik}$),KIND=${ik}$) ! compute space needed for stdlib${ii}$_sorgqr call stdlib${ii}$_sorgqr( m, n, n, a, lda, dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, ierr ) lwork_sorgqr_n = int( dum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_sorgqr( m, m, n, a, lda, dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, ierr ) lwork_sorgqr_m = int( dum(1_${ik}$),KIND=${ik}$) ! compute space needed for stdlib${ii}$_sgebrd call stdlib${ii}$_sgebrd( n, n, a, lda, s, dum(1_${ik}$), dum(1_${ik}$),dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, ierr ) lwork_sgebrd = int( dum(1_${ik}$),KIND=${ik}$) ! compute space needed for stdlib${ii}$_sorgbr p call stdlib${ii}$_sorgbr( 'P', n, n, n, a, lda, dum(1_${ik}$),dum(1_${ik}$), -1_${ik}$, ierr ) lwork_sorgbr_p = int( dum(1_${ik}$),KIND=${ik}$) ! compute space needed for stdlib${ii}$_sorgbr q call stdlib${ii}$_sorgbr( 'Q', n, n, n, a, lda, dum(1_${ik}$),dum(1_${ik}$), -1_${ik}$, ierr ) lwork_sorgbr_q = int( dum(1_${ik}$),KIND=${ik}$) if( m>=mnthr ) then if( wntun ) then ! path 1 (m much larger than n, jobu='n') maxwrk = n + lwork_sgeqrf maxwrk = max( maxwrk, 3_${ik}$*n+lwork_sgebrd ) if( wntvo .or. wntvas )maxwrk = max( maxwrk, 3_${ik}$*n+lwork_sorgbr_p ) maxwrk = max( maxwrk, bdspac ) minwrk = max( 4_${ik}$*n, bdspac ) else if( wntuo .and. wntvn ) then ! path 2 (m much larger than n, jobu='o', jobvt='n') wrkbl = n + lwork_sgeqrf wrkbl = max( wrkbl, n+lwork_sorgqr_n ) wrkbl = max( wrkbl, 3_${ik}$*n+lwork_sgebrd ) wrkbl = max( wrkbl, 3_${ik}$*n+lwork_sorgbr_q ) wrkbl = max( wrkbl, bdspac ) maxwrk = max( n*n+wrkbl, n*n+m*n+n ) minwrk = max( 3_${ik}$*n+m, bdspac ) else if( wntuo .and. wntvas ) then ! path 3 (m much larger than n, jobu='o', jobvt='s' or ! 'a') wrkbl = n + lwork_sgeqrf wrkbl = max( wrkbl, n+lwork_sorgqr_n ) wrkbl = max( wrkbl, 3_${ik}$*n+lwork_sgebrd ) wrkbl = max( wrkbl, 3_${ik}$*n+lwork_sorgbr_q ) wrkbl = max( wrkbl, 3_${ik}$*n+lwork_sorgbr_p ) wrkbl = max( wrkbl, bdspac ) maxwrk = max( n*n+wrkbl, n*n+m*n+n ) minwrk = max( 3_${ik}$*n+m, bdspac ) else if( wntus .and. wntvn ) then ! path 4 (m much larger than n, jobu='s', jobvt='n') wrkbl = n + lwork_sgeqrf wrkbl = max( wrkbl, n+lwork_sorgqr_n ) wrkbl = max( wrkbl, 3_${ik}$*n+lwork_sgebrd ) wrkbl = max( wrkbl, 3_${ik}$*n+lwork_sorgbr_q ) wrkbl = max( wrkbl, bdspac ) maxwrk = n*n + wrkbl minwrk = max( 3_${ik}$*n+m, bdspac ) else if( wntus .and. wntvo ) then ! path 5 (m much larger than n, jobu='s', jobvt='o') wrkbl = n + lwork_sgeqrf wrkbl = max( wrkbl, n+lwork_sorgqr_n ) wrkbl = max( wrkbl, 3_${ik}$*n+lwork_sgebrd ) wrkbl = max( wrkbl, 3_${ik}$*n+lwork_sorgbr_q ) wrkbl = max( wrkbl, 3_${ik}$*n+lwork_sorgbr_p ) wrkbl = max( wrkbl, bdspac ) maxwrk = 2_${ik}$*n*n + wrkbl minwrk = max( 3_${ik}$*n+m, bdspac ) else if( wntus .and. wntvas ) then ! path 6 (m much larger than n, jobu='s', jobvt='s' or ! 'a') wrkbl = n + lwork_sgeqrf wrkbl = max( wrkbl, n+lwork_sorgqr_n ) wrkbl = max( wrkbl, 3_${ik}$*n+lwork_sgebrd ) wrkbl = max( wrkbl, 3_${ik}$*n+lwork_sorgbr_q ) wrkbl = max( wrkbl, 3_${ik}$*n+lwork_sorgbr_p ) wrkbl = max( wrkbl, bdspac ) maxwrk = n*n + wrkbl minwrk = max( 3_${ik}$*n+m, bdspac ) else if( wntua .and. wntvn ) then ! path 7 (m much larger than n, jobu='a', jobvt='n') wrkbl = n + lwork_sgeqrf wrkbl = max( wrkbl, n+lwork_sorgqr_m ) wrkbl = max( wrkbl, 3_${ik}$*n+lwork_sgebrd ) wrkbl = max( wrkbl, 3_${ik}$*n+lwork_sorgbr_q ) wrkbl = max( wrkbl, bdspac ) maxwrk = n*n + wrkbl minwrk = max( 3_${ik}$*n+m, bdspac ) else if( wntua .and. wntvo ) then ! path 8 (m much larger than n, jobu='a', jobvt='o') wrkbl = n + lwork_sgeqrf wrkbl = max( wrkbl, n+lwork_sorgqr_m ) wrkbl = max( wrkbl, 3_${ik}$*n+lwork_sgebrd ) wrkbl = max( wrkbl, 3_${ik}$*n+lwork_sorgbr_q ) wrkbl = max( wrkbl, 3_${ik}$*n+lwork_sorgbr_p ) wrkbl = max( wrkbl, bdspac ) maxwrk = 2_${ik}$*n*n + wrkbl minwrk = max( 3_${ik}$*n+m, bdspac ) else if( wntua .and. wntvas ) then ! path 9 (m much larger than n, jobu='a', jobvt='s' or ! 'a') wrkbl = n + lwork_sgeqrf wrkbl = max( wrkbl, n+lwork_sorgqr_m ) wrkbl = max( wrkbl, 3_${ik}$*n+lwork_sgebrd ) wrkbl = max( wrkbl, 3_${ik}$*n+lwork_sorgbr_q ) wrkbl = max( wrkbl, 3_${ik}$*n+lwork_sorgbr_p ) wrkbl = max( wrkbl, bdspac ) maxwrk = n*n + wrkbl minwrk = max( 3_${ik}$*n+m, bdspac ) end if else ! path 10 (m at least n, but not much larger) call stdlib${ii}$_sgebrd( m, n, a, lda, s, dum(1_${ik}$), dum(1_${ik}$),dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, ierr ) lwork_sgebrd = int( dum(1_${ik}$),KIND=${ik}$) maxwrk = 3_${ik}$*n + lwork_sgebrd if( wntus .or. wntuo ) then call stdlib${ii}$_sorgbr( 'Q', m, n, n, a, lda, dum(1_${ik}$),dum(1_${ik}$), -1_${ik}$, ierr ) lwork_sorgbr_q = int( dum(1_${ik}$),KIND=${ik}$) maxwrk = max( maxwrk, 3_${ik}$*n+lwork_sorgbr_q ) end if if( wntua ) then call stdlib${ii}$_sorgbr( 'Q', m, m, n, a, lda, dum(1_${ik}$),dum(1_${ik}$), -1_${ik}$, ierr ) lwork_sorgbr_q = int( dum(1_${ik}$),KIND=${ik}$) maxwrk = max( maxwrk, 3_${ik}$*n+lwork_sorgbr_q ) end if if( .not.wntvn ) then maxwrk = max( maxwrk, 3_${ik}$*n+lwork_sorgbr_p ) end if maxwrk = max( maxwrk, bdspac ) minwrk = max( 3_${ik}$*n+m, bdspac ) end if else if( minmn>0_${ik}$ ) then ! compute space needed for stdlib${ii}$_sbdsqr mnthr = stdlib${ii}$_ilaenv( 6_${ik}$, 'SGESVD', jobu // jobvt, m, n, 0_${ik}$, 0_${ik}$ ) bdspac = 5_${ik}$*m ! compute space needed for stdlib${ii}$_sgelqf call stdlib${ii}$_sgelqf( m, n, a, lda, dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, ierr ) lwork_sgelqf = int( dum(1_${ik}$),KIND=${ik}$) ! compute space needed for stdlib${ii}$_sorglq call stdlib${ii}$_sorglq( n, n, m, dum(1_${ik}$), n, dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, ierr ) lwork_sorglq_n = int( dum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_sorglq( m, n, m, a, lda, dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, ierr ) lwork_sorglq_m = int( dum(1_${ik}$),KIND=${ik}$) ! compute space needed for stdlib${ii}$_sgebrd call stdlib${ii}$_sgebrd( m, m, a, lda, s, dum(1_${ik}$), dum(1_${ik}$),dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, ierr ) lwork_sgebrd = int( dum(1_${ik}$),KIND=${ik}$) ! compute space needed for stdlib${ii}$_sorgbr p call stdlib${ii}$_sorgbr( 'P', m, m, m, a, n, dum(1_${ik}$),dum(1_${ik}$), -1_${ik}$, ierr ) lwork_sorgbr_p = int( dum(1_${ik}$),KIND=${ik}$) ! compute space needed for stdlib${ii}$_sorgbr q call stdlib${ii}$_sorgbr( 'Q', m, m, m, a, n, dum(1_${ik}$),dum(1_${ik}$), -1_${ik}$, ierr ) lwork_sorgbr_q = int( dum(1_${ik}$),KIND=${ik}$) if( n>=mnthr ) then if( wntvn ) then ! path 1t(n much larger than m, jobvt='n') maxwrk = m + lwork_sgelqf maxwrk = max( maxwrk, 3_${ik}$*m+lwork_sgebrd ) if( wntuo .or. wntuas )maxwrk = max( maxwrk, 3_${ik}$*m+lwork_sorgbr_q ) maxwrk = max( maxwrk, bdspac ) minwrk = max( 4_${ik}$*m, bdspac ) else if( wntvo .and. wntun ) then ! path 2t(n much larger than m, jobu='n', jobvt='o') wrkbl = m + lwork_sgelqf wrkbl = max( wrkbl, m+lwork_sorglq_m ) wrkbl = max( wrkbl, 3_${ik}$*m+lwork_sgebrd ) wrkbl = max( wrkbl, 3_${ik}$*m+lwork_sorgbr_p ) wrkbl = max( wrkbl, bdspac ) maxwrk = max( m*m+wrkbl, m*m+m*n+m ) minwrk = max( 3_${ik}$*m+n, bdspac ) else if( wntvo .and. wntuas ) then ! path 3t(n much larger than m, jobu='s' or 'a', ! jobvt='o') wrkbl = m + lwork_sgelqf wrkbl = max( wrkbl, m+lwork_sorglq_m ) wrkbl = max( wrkbl, 3_${ik}$*m+lwork_sgebrd ) wrkbl = max( wrkbl, 3_${ik}$*m+lwork_sorgbr_p ) wrkbl = max( wrkbl, 3_${ik}$*m+lwork_sorgbr_q ) wrkbl = max( wrkbl, bdspac ) maxwrk = max( m*m+wrkbl, m*m+m*n+m ) minwrk = max( 3_${ik}$*m+n, bdspac ) else if( wntvs .and. wntun ) then ! path 4t(n much larger than m, jobu='n', jobvt='s') wrkbl = m + lwork_sgelqf wrkbl = max( wrkbl, m+lwork_sorglq_m ) wrkbl = max( wrkbl, 3_${ik}$*m+lwork_sgebrd ) wrkbl = max( wrkbl, 3_${ik}$*m+lwork_sorgbr_p ) wrkbl = max( wrkbl, bdspac ) maxwrk = m*m + wrkbl minwrk = max( 3_${ik}$*m+n, bdspac ) else if( wntvs .and. wntuo ) then ! path 5t(n much larger than m, jobu='o', jobvt='s') wrkbl = m + lwork_sgelqf wrkbl = max( wrkbl, m+lwork_sorglq_m ) wrkbl = max( wrkbl, 3_${ik}$*m+lwork_sgebrd ) wrkbl = max( wrkbl, 3_${ik}$*m+lwork_sorgbr_p ) wrkbl = max( wrkbl, 3_${ik}$*m+lwork_sorgbr_q ) wrkbl = max( wrkbl, bdspac ) maxwrk = 2_${ik}$*m*m + wrkbl minwrk = max( 3_${ik}$*m+n, bdspac ) maxwrk = max( maxwrk, minwrk ) else if( wntvs .and. wntuas ) then ! path 6t(n much larger than m, jobu='s' or 'a', ! jobvt='s') wrkbl = m + lwork_sgelqf wrkbl = max( wrkbl, m+lwork_sorglq_m ) wrkbl = max( wrkbl, 3_${ik}$*m+lwork_sgebrd ) wrkbl = max( wrkbl, 3_${ik}$*m+lwork_sorgbr_p ) wrkbl = max( wrkbl, 3_${ik}$*m+lwork_sorgbr_q ) wrkbl = max( wrkbl, bdspac ) maxwrk = m*m + wrkbl minwrk = max( 3_${ik}$*m+n, bdspac ) else if( wntva .and. wntun ) then ! path 7t(n much larger than m, jobu='n', jobvt='a') wrkbl = m + lwork_sgelqf wrkbl = max( wrkbl, m+lwork_sorglq_n ) wrkbl = max( wrkbl, 3_${ik}$*m+lwork_sgebrd ) wrkbl = max( wrkbl, 3_${ik}$*m+lwork_sorgbr_p ) wrkbl = max( wrkbl, bdspac ) maxwrk = m*m + wrkbl minwrk = max( 3_${ik}$*m+n, bdspac ) else if( wntva .and. wntuo ) then ! path 8t(n much larger than m, jobu='o', jobvt='a') wrkbl = m + lwork_sgelqf wrkbl = max( wrkbl, m+lwork_sorglq_n ) wrkbl = max( wrkbl, 3_${ik}$*m+lwork_sgebrd ) wrkbl = max( wrkbl, 3_${ik}$*m+lwork_sorgbr_p ) wrkbl = max( wrkbl, 3_${ik}$*m+lwork_sorgbr_q ) wrkbl = max( wrkbl, bdspac ) maxwrk = 2_${ik}$*m*m + wrkbl minwrk = max( 3_${ik}$*m+n, bdspac ) else if( wntva .and. wntuas ) then ! path 9t(n much larger than m, jobu='s' or 'a', ! jobvt='a') wrkbl = m + lwork_sgelqf wrkbl = max( wrkbl, m+lwork_sorglq_n ) wrkbl = max( wrkbl, 3_${ik}$*m+lwork_sgebrd ) wrkbl = max( wrkbl, 3_${ik}$*m+lwork_sorgbr_p ) wrkbl = max( wrkbl, 3_${ik}$*m+lwork_sorgbr_q ) wrkbl = max( wrkbl, bdspac ) maxwrk = m*m + wrkbl minwrk = max( 3_${ik}$*m+n, bdspac ) end if else ! path 10t(n greater than m, but not much larger) call stdlib${ii}$_sgebrd( m, n, a, lda, s, dum(1_${ik}$), dum(1_${ik}$),dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, ierr ) lwork_sgebrd = int( dum(1_${ik}$),KIND=${ik}$) maxwrk = 3_${ik}$*m + lwork_sgebrd if( wntvs .or. wntvo ) then ! compute space needed for stdlib${ii}$_sorgbr p call stdlib${ii}$_sorgbr( 'P', m, n, m, a, n, dum(1_${ik}$),dum(1_${ik}$), -1_${ik}$, ierr ) lwork_sorgbr_p = int( dum(1_${ik}$),KIND=${ik}$) maxwrk = max( maxwrk, 3_${ik}$*m+lwork_sorgbr_p ) end if if( wntva ) then call stdlib${ii}$_sorgbr( 'P', n, n, m, a, n, dum(1_${ik}$),dum(1_${ik}$), -1_${ik}$, ierr ) lwork_sorgbr_p = int( dum(1_${ik}$),KIND=${ik}$) maxwrk = max( maxwrk, 3_${ik}$*m+lwork_sorgbr_p ) end if if( .not.wntun ) then maxwrk = max( maxwrk, 3_${ik}$*m+lwork_sorgbr_q ) end if maxwrk = max( maxwrk, bdspac ) minwrk = max( 3_${ik}$*m+n, bdspac ) end if end if maxwrk = max( maxwrk, minwrk ) work( 1_${ik}$ ) = maxwrk if( lwork<minwrk .and. .not.lquery ) then info = -13_${ik}$ end if end if if( info/=0_${ik}$ ) then call stdlib${ii}$_xerbla( 'SGESVD', -info ) return else if( lquery ) then return end if ! quick return if possible if( m==0_${ik}$ .or. n==0_${ik}$ ) then return end if ! get machine constants eps = stdlib${ii}$_slamch( 'P' ) smlnum = sqrt( stdlib${ii}$_slamch( 'S' ) ) / eps bignum = one / smlnum ! scale a if max element outside range [smlnum,bignum] anrm = stdlib${ii}$_slange( 'M', m, n, a, lda, dum ) iscl = 0_${ik}$ if( anrm>zero .and. anrm<smlnum ) then iscl = 1_${ik}$ call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, m, n, a, lda, ierr ) else if( anrm>bignum ) then iscl = 1_${ik}$ call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, m, n, a, lda, ierr ) end if if( m>=n ) then ! a has at least as many rows as columns. if a has sufficiently ! more rows than columns, first reduce using the qr ! decomposition (if sufficient workspace available) if( m>=mnthr ) then if( wntun ) then ! path 1 (m much larger than n, jobu='n') ! no left singular vectors to be computed itau = 1_${ik}$ iwork = itau + n ! compute a=q*r ! (workspace: need 2*n, prefer n+n*nb) call stdlib${ii}$_sgeqrf( m, n, a, lda, work( itau ), work( iwork ),lwork-iwork+1, & ierr ) ! zero out below r if( n > 1_${ik}$ ) then call stdlib${ii}$_slaset( 'L', n-1, n-1, zero, zero, a( 2_${ik}$, 1_${ik}$ ),lda ) end if ie = 1_${ik}$ itauq = ie + n itaup = itauq + n iwork = itaup + n ! bidiagonalize r in a ! (workspace: need 4*n, prefer 3*n+2*n*nb) call stdlib${ii}$_sgebrd( n, n, a, lda, s, work( ie ), work( itauq ),work( itaup ), & work( iwork ), lwork-iwork+1,ierr ) ncvt = 0_${ik}$ if( wntvo .or. wntvas ) then ! if right singular vectors desired, generate p'. ! (workspace: need 4*n-1, prefer 3*n+(n-1)*nb) call stdlib${ii}$_sorgbr( 'P', n, n, n, a, lda, work( itaup ),work( iwork ), & lwork-iwork+1, ierr ) ncvt = n end if iwork = ie + n ! perform bidiagonal qr iteration, computing right ! singular vectors of a in a if desired ! (workspace: need bdspac) call stdlib${ii}$_sbdsqr( 'U', n, ncvt, 0_${ik}$, 0_${ik}$, s, work( ie ), a, lda,dum, 1_${ik}$, dum, 1_${ik}$, & work( iwork ), info ) ! if right singular vectors desired in vt, copy them there if( wntvas )call stdlib${ii}$_slacpy( 'F', n, n, a, lda, vt, ldvt ) else if( wntuo .and. wntvn ) then ! path 2 (m much larger than n, jobu='o', jobvt='n') ! n left singular vectors to be overwritten on a and ! no right singular vectors to be computed if( lwork>=n*n+max( 4_${ik}$*n, bdspac ) ) then ! sufficient workspace for a fast algorithm ir = 1_${ik}$ if( lwork>=max( wrkbl, lda*n+n )+lda*n ) then ! work(iu) is lda by n, work(ir) is lda by n ldwrku = lda ldwrkr = lda else if( lwork>=max( wrkbl, lda*n+n )+n*n ) then ! work(iu) is lda by n, work(ir) is n by n ldwrku = lda ldwrkr = n else ! work(iu) is ldwrku by n, work(ir) is n by n ldwrku = ( lwork-n*n-n ) / n ldwrkr = n end if itau = ir + ldwrkr*n iwork = itau + n ! compute a=q*r ! (workspace: need n*n+2*n, prefer n*n+n+n*nb) call stdlib${ii}$_sgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-iwork+& 1_${ik}$, ierr ) ! copy r to work(ir) and zero out below it call stdlib${ii}$_slacpy( 'U', n, n, a, lda, work( ir ), ldwrkr ) call stdlib${ii}$_slaset( 'L', n-1, n-1, zero, zero, work( ir+1 ),ldwrkr ) ! generate q in a ! (workspace: need n*n+2*n, prefer n*n+n+n*nb) call stdlib${ii}$_sorgqr( m, n, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = itau itauq = ie + n itaup = itauq + n iwork = itaup + n ! bidiagonalize r in work(ir) ! (workspace: need n*n+4*n, prefer n*n+3*n+2*n*nb) call stdlib${ii}$_sgebrd( n, n, work( ir ), ldwrkr, s, work( ie ),work( itauq ), & work( itaup ),work( iwork ), lwork-iwork+1, ierr ) ! generate left vectors bidiagonalizing r ! (workspace: need n*n+4*n, prefer n*n+3*n+n*nb) call stdlib${ii}$_sorgbr( 'Q', n, n, n, work( ir ), ldwrkr,work( itauq ), work( & iwork ),lwork-iwork+1, ierr ) iwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of r in work(ir) ! (workspace: need n*n+bdspac) call stdlib${ii}$_sbdsqr( 'U', n, 0_${ik}$, n, 0_${ik}$, s, work( ie ), dum, 1_${ik}$,work( ir ), & ldwrkr, dum, 1_${ik}$,work( iwork ), info ) iu = ie + n ! multiply q in a by left singular vectors of r in ! work(ir), storing result in work(iu) and copying to a ! (workspace: need n*n+2*n, prefer n*n+m*n+n) do i = 1, m, ldwrku chunk = min( m-i+1, ldwrku ) call stdlib${ii}$_sgemm( 'N', 'N', chunk, n, n, one, a( i, 1_${ik}$ ),lda, work( ir )& , ldwrkr, zero,work( iu ), ldwrku ) call stdlib${ii}$_slacpy( 'F', chunk, n, work( iu ), ldwrku,a( i, 1_${ik}$ ), lda ) end do else ! insufficient workspace for a fast algorithm ie = 1_${ik}$ itauq = ie + n itaup = itauq + n iwork = itaup + n ! bidiagonalize a ! (workspace: need 3*n+m, prefer 3*n+(m+n)*nb) call stdlib${ii}$_sgebrd( m, n, a, lda, s, work( ie ),work( itauq ), work( itaup & ),work( iwork ), lwork-iwork+1, ierr ) ! generate left vectors bidiagonalizing a ! (workspace: need 4*n, prefer 3*n+n*nb) call stdlib${ii}$_sorgbr( 'Q', m, n, n, a, lda, work( itauq ),work( iwork ), & lwork-iwork+1, ierr ) iwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of a in a ! (workspace: need bdspac) call stdlib${ii}$_sbdsqr( 'U', n, 0_${ik}$, m, 0_${ik}$, s, work( ie ), dum, 1_${ik}$,a, lda, dum, 1_${ik}$, & work( iwork ), info ) end if else if( wntuo .and. wntvas ) then ! path 3 (m much larger than n, jobu='o', jobvt='s' or 'a') ! n left singular vectors to be overwritten on a and ! n right singular vectors to be computed in vt if( lwork>=n*n+max( 4_${ik}$*n, bdspac ) ) then ! sufficient workspace for a fast algorithm ir = 1_${ik}$ if( lwork>=max( wrkbl, lda*n+n )+lda*n ) then ! work(iu) is lda by n and work(ir) is lda by n ldwrku = lda ldwrkr = lda else if( lwork>=max( wrkbl, lda*n+n )+n*n ) then ! work(iu) is lda by n and work(ir) is n by n ldwrku = lda ldwrkr = n else ! work(iu) is ldwrku by n and work(ir) is n by n ldwrku = ( lwork-n*n-n ) / n ldwrkr = n end if itau = ir + ldwrkr*n iwork = itau + n ! compute a=q*r ! (workspace: need n*n+2*n, prefer n*n+n+n*nb) call stdlib${ii}$_sgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-iwork+& 1_${ik}$, ierr ) ! copy r to vt, zeroing out below it call stdlib${ii}$_slacpy( 'U', n, n, a, lda, vt, ldvt ) if( n>1_${ik}$ )call stdlib${ii}$_slaset( 'L', n-1, n-1, zero, zero,vt( 2_${ik}$, 1_${ik}$ ), ldvt ) ! generate q in a ! (workspace: need n*n+2*n, prefer n*n+n+n*nb) call stdlib${ii}$_sorgqr( m, n, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = itau itauq = ie + n itaup = itauq + n iwork = itaup + n ! bidiagonalize r in vt, copying result to work(ir) ! (workspace: need n*n+4*n, prefer n*n+3*n+2*n*nb) call stdlib${ii}$_sgebrd( n, n, vt, ldvt, s, work( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) call stdlib${ii}$_slacpy( 'L', n, n, vt, ldvt, work( ir ), ldwrkr ) ! generate left vectors bidiagonalizing r in work(ir) ! (workspace: need n*n+4*n, prefer n*n+3*n+n*nb) call stdlib${ii}$_sorgbr( 'Q', n, n, n, work( ir ), ldwrkr,work( itauq ), work( & iwork ),lwork-iwork+1, ierr ) ! generate right vectors bidiagonalizing r in vt ! (workspace: need n*n+4*n-1, prefer n*n+3*n+(n-1)*nb) call stdlib${ii}$_sorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),work( iwork ), & lwork-iwork+1, ierr ) iwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of r in work(ir) and computing right ! singular vectors of r in vt ! (workspace: need n*n+bdspac) call stdlib${ii}$_sbdsqr( 'U', n, n, n, 0_${ik}$, s, work( ie ), vt, ldvt,work( ir ), & ldwrkr, dum, 1_${ik}$,work( iwork ), info ) iu = ie + n ! multiply q in a by left singular vectors of r in ! work(ir), storing result in work(iu) and copying to a ! (workspace: need n*n+2*n, prefer n*n+m*n+n) do i = 1, m, ldwrku chunk = min( m-i+1, ldwrku ) call stdlib${ii}$_sgemm( 'N', 'N', chunk, n, n, one, a( i, 1_${ik}$ ),lda, work( ir )& , ldwrkr, zero,work( iu ), ldwrku ) call stdlib${ii}$_slacpy( 'F', chunk, n, work( iu ), ldwrku,a( i, 1_${ik}$ ), lda ) end do else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + n ! compute a=q*r ! (workspace: need 2*n, prefer n+n*nb) call stdlib${ii}$_sgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-iwork+& 1_${ik}$, ierr ) ! copy r to vt, zeroing out below it call stdlib${ii}$_slacpy( 'U', n, n, a, lda, vt, ldvt ) if( n>1_${ik}$ )call stdlib${ii}$_slaset( 'L', n-1, n-1, zero, zero,vt( 2_${ik}$, 1_${ik}$ ), ldvt ) ! generate q in a ! (workspace: need 2*n, prefer n+n*nb) call stdlib${ii}$_sorgqr( m, n, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = itau itauq = ie + n itaup = itauq + n iwork = itaup + n ! bidiagonalize r in vt ! (workspace: need 4*n, prefer 3*n+2*n*nb) call stdlib${ii}$_sgebrd( n, n, vt, ldvt, s, work( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply q in a by left vectors bidiagonalizing r ! (workspace: need 3*n+m, prefer 3*n+m*nb) call stdlib${ii}$_sormbr( 'Q', 'R', 'N', m, n, n, vt, ldvt,work( itauq ), a, lda,& work( iwork ),lwork-iwork+1, ierr ) ! generate right vectors bidiagonalizing r in vt ! (workspace: need 4*n-1, prefer 3*n+(n-1)*nb) call stdlib${ii}$_sorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),work( iwork ), & lwork-iwork+1, ierr ) iwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of a in a and computing right ! singular vectors of a in vt ! (workspace: need bdspac) call stdlib${ii}$_sbdsqr( 'U', n, n, m, 0_${ik}$, s, work( ie ), vt, ldvt,a, lda, dum, & 1_${ik}$, work( iwork ), info ) end if else if( wntus ) then if( wntvn ) then ! path 4 (m much larger than n, jobu='s', jobvt='n') ! n left singular vectors to be computed in u and ! no right singular vectors to be computed if( lwork>=n*n+max( 4_${ik}$*n, bdspac ) ) then ! sufficient workspace for a fast algorithm ir = 1_${ik}$ if( lwork>=wrkbl+lda*n ) then ! work(ir) is lda by n ldwrkr = lda else ! work(ir) is n by n ldwrkr = n end if itau = ir + ldwrkr*n iwork = itau + n ! compute a=q*r ! (workspace: need n*n+2*n, prefer n*n+n+n*nb) call stdlib${ii}$_sgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ! copy r to work(ir), zeroing out below it call stdlib${ii}$_slacpy( 'U', n, n, a, lda, work( ir ),ldwrkr ) call stdlib${ii}$_slaset( 'L', n-1, n-1, zero, zero,work( ir+1 ), ldwrkr ) ! generate q in a ! (workspace: need n*n+2*n, prefer n*n+n+n*nb) call stdlib${ii}$_sorgqr( m, n, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = itau itauq = ie + n itaup = itauq + n iwork = itaup + n ! bidiagonalize r in work(ir) ! (workspace: need n*n+4*n, prefer n*n+3*n+2*n*nb) call stdlib${ii}$_sgebrd( n, n, work( ir ), ldwrkr, s,work( ie ), work( itauq & ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) ! generate left vectors bidiagonalizing r in work(ir) ! (workspace: need n*n+4*n, prefer n*n+3*n+n*nb) call stdlib${ii}$_sorgbr( 'Q', n, n, n, work( ir ), ldwrkr,work( itauq ), & work( iwork ),lwork-iwork+1, ierr ) iwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of r in work(ir) ! (workspace: need n*n+bdspac) call stdlib${ii}$_sbdsqr( 'U', n, 0_${ik}$, n, 0_${ik}$, s, work( ie ), dum,1_${ik}$, work( ir ), & ldwrkr, dum, 1_${ik}$,work( iwork ), info ) ! multiply q in a by left singular vectors of r in ! work(ir), storing result in u ! (workspace: need n*n) call stdlib${ii}$_sgemm( 'N', 'N', m, n, n, one, a, lda,work( ir ), ldwrkr, & zero, u, ldu ) else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + n ! compute a=q*r, copying result to u ! (workspace: need 2*n, prefer n+n*nb) call stdlib${ii}$_sgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_slacpy( 'L', m, n, a, lda, u, ldu ) ! generate q in u ! (workspace: need 2*n, prefer n+n*nb) call stdlib${ii}$_sorgqr( m, n, n, u, ldu, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = itau itauq = ie + n itaup = itauq + n iwork = itaup + n ! zero out below r in a if( n > 1_${ik}$ ) then call stdlib${ii}$_slaset( 'L', n-1, n-1, zero, zero,a( 2_${ik}$, 1_${ik}$ ), lda ) end if ! bidiagonalize r in a ! (workspace: need 4*n, prefer 3*n+2*n*nb) call stdlib${ii}$_sgebrd( n, n, a, lda, s, work( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply q in u by left vectors bidiagonalizing r ! (workspace: need 3*n+m, prefer 3*n+m*nb) call stdlib${ii}$_sormbr( 'Q', 'R', 'N', m, n, n, a, lda,work( itauq ), u, & ldu, work( iwork ),lwork-iwork+1, ierr ) iwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of a in u ! (workspace: need bdspac) call stdlib${ii}$_sbdsqr( 'U', n, 0_${ik}$, m, 0_${ik}$, s, work( ie ), dum,1_${ik}$, u, ldu, dum, & 1_${ik}$, work( iwork ),info ) end if else if( wntvo ) then ! path 5 (m much larger than n, jobu='s', jobvt='o') ! n left singular vectors to be computed in u and ! n right singular vectors to be overwritten on a if( lwork>=2_${ik}$*n*n+max( 4_${ik}$*n, bdspac ) ) then ! sufficient workspace for a fast algorithm iu = 1_${ik}$ if( lwork>=wrkbl+2*lda*n ) then ! work(iu) is lda by n and work(ir) is lda by n ldwrku = lda ir = iu + ldwrku*n ldwrkr = lda else if( lwork>=wrkbl+( lda+n )*n ) then ! work(iu) is lda by n and work(ir) is n by n ldwrku = lda ir = iu + ldwrku*n ldwrkr = n else ! work(iu) is n by n and work(ir) is n by n ldwrku = n ir = iu + ldwrku*n ldwrkr = n end if itau = ir + ldwrkr*n iwork = itau + n ! compute a=q*r ! (workspace: need 2*n*n+2*n, prefer 2*n*n+n+n*nb) call stdlib${ii}$_sgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ! copy r to work(iu), zeroing out below it call stdlib${ii}$_slacpy( 'U', n, n, a, lda, work( iu ),ldwrku ) call stdlib${ii}$_slaset( 'L', n-1, n-1, zero, zero,work( iu+1 ), ldwrku ) ! generate q in a ! (workspace: need 2*n*n+2*n, prefer 2*n*n+n+n*nb) call stdlib${ii}$_sorgqr( m, n, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = itau itauq = ie + n itaup = itauq + n iwork = itaup + n ! bidiagonalize r in work(iu), copying result to ! work(ir) ! (workspace: need 2*n*n+4*n, ! prefer 2*n*n+3*n+2*n*nb) call stdlib${ii}$_sgebrd( n, n, work( iu ), ldwrku, s,work( ie ), work( itauq & ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) call stdlib${ii}$_slacpy( 'U', n, n, work( iu ), ldwrku,work( ir ), ldwrkr ) ! generate left bidiagonalizing vectors in work(iu) ! (workspace: need 2*n*n+4*n, prefer 2*n*n+3*n+n*nb) call stdlib${ii}$_sorgbr( 'Q', n, n, n, work( iu ), ldwrku,work( itauq ), & work( iwork ),lwork-iwork+1, ierr ) ! generate right bidiagonalizing vectors in work(ir) ! (workspace: need 2*n*n+4*n-1, ! prefer 2*n*n+3*n+(n-1)*nb) call stdlib${ii}$_sorgbr( 'P', n, n, n, work( ir ), ldwrkr,work( itaup ), & work( iwork ),lwork-iwork+1, ierr ) iwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of r in work(iu) and computing ! right singular vectors of r in work(ir) ! (workspace: need 2*n*n+bdspac) call stdlib${ii}$_sbdsqr( 'U', n, n, n, 0_${ik}$, s, work( ie ),work( ir ), ldwrkr, & work( iu ),ldwrku, dum, 1_${ik}$, work( iwork ), info ) ! multiply q in a by left singular vectors of r in ! work(iu), storing result in u ! (workspace: need n*n) call stdlib${ii}$_sgemm( 'N', 'N', m, n, n, one, a, lda,work( iu ), ldwrku, & zero, u, ldu ) ! copy right singular vectors of r to a ! (workspace: need n*n) call stdlib${ii}$_slacpy( 'F', n, n, work( ir ), ldwrkr, a,lda ) else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + n ! compute a=q*r, copying result to u ! (workspace: need 2*n, prefer n+n*nb) call stdlib${ii}$_sgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_slacpy( 'L', m, n, a, lda, u, ldu ) ! generate q in u ! (workspace: need 2*n, prefer n+n*nb) call stdlib${ii}$_sorgqr( m, n, n, u, ldu, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = itau itauq = ie + n itaup = itauq + n iwork = itaup + n ! zero out below r in a if( n > 1_${ik}$ ) then call stdlib${ii}$_slaset( 'L', n-1, n-1, zero, zero,a( 2_${ik}$, 1_${ik}$ ), lda ) end if ! bidiagonalize r in a ! (workspace: need 4*n, prefer 3*n+2*n*nb) call stdlib${ii}$_sgebrd( n, n, a, lda, s, work( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply q in u by left vectors bidiagonalizing r ! (workspace: need 3*n+m, prefer 3*n+m*nb) call stdlib${ii}$_sormbr( 'Q', 'R', 'N', m, n, n, a, lda,work( itauq ), u, & ldu, work( iwork ),lwork-iwork+1, ierr ) ! generate right vectors bidiagonalizing r in a ! (workspace: need 4*n-1, prefer 3*n+(n-1)*nb) call stdlib${ii}$_sorgbr( 'P', n, n, n, a, lda, work( itaup ),work( iwork ), & lwork-iwork+1, ierr ) iwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of a in u and computing right ! singular vectors of a in a ! (workspace: need bdspac) call stdlib${ii}$_sbdsqr( 'U', n, n, m, 0_${ik}$, s, work( ie ), a,lda, u, ldu, dum, & 1_${ik}$, work( iwork ),info ) end if else if( wntvas ) then ! path 6 (m much larger than n, jobu='s', jobvt='s' ! or 'a') ! n left singular vectors to be computed in u and ! n right singular vectors to be computed in vt if( lwork>=n*n+max( 4_${ik}$*n, bdspac ) ) then ! sufficient workspace for a fast algorithm iu = 1_${ik}$ if( lwork>=wrkbl+lda*n ) then ! work(iu) is lda by n ldwrku = lda else ! work(iu) is n by n ldwrku = n end if itau = iu + ldwrku*n iwork = itau + n ! compute a=q*r ! (workspace: need n*n+2*n, prefer n*n+n+n*nb) call stdlib${ii}$_sgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ! copy r to work(iu), zeroing out below it call stdlib${ii}$_slacpy( 'U', n, n, a, lda, work( iu ),ldwrku ) call stdlib${ii}$_slaset( 'L', n-1, n-1, zero, zero,work( iu+1 ), ldwrku ) ! generate q in a ! (workspace: need n*n+2*n, prefer n*n+n+n*nb) call stdlib${ii}$_sorgqr( m, n, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = itau itauq = ie + n itaup = itauq + n iwork = itaup + n ! bidiagonalize r in work(iu), copying result to vt ! (workspace: need n*n+4*n, prefer n*n+3*n+2*n*nb) call stdlib${ii}$_sgebrd( n, n, work( iu ), ldwrku, s,work( ie ), work( itauq & ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) call stdlib${ii}$_slacpy( 'U', n, n, work( iu ), ldwrku, vt,ldvt ) ! generate left bidiagonalizing vectors in work(iu) ! (workspace: need n*n+4*n, prefer n*n+3*n+n*nb) call stdlib${ii}$_sorgbr( 'Q', n, n, n, work( iu ), ldwrku,work( itauq ), & work( iwork ),lwork-iwork+1, ierr ) ! generate right bidiagonalizing vectors in vt ! (workspace: need n*n+4*n-1, ! prefer n*n+3*n+(n-1)*nb) call stdlib${ii}$_sorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),work( iwork ),& lwork-iwork+1, ierr ) iwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of r in work(iu) and computing ! right singular vectors of r in vt ! (workspace: need n*n+bdspac) call stdlib${ii}$_sbdsqr( 'U', n, n, n, 0_${ik}$, s, work( ie ), vt,ldvt, work( iu ),& ldwrku, dum, 1_${ik}$,work( iwork ), info ) ! multiply q in a by left singular vectors of r in ! work(iu), storing result in u ! (workspace: need n*n) call stdlib${ii}$_sgemm( 'N', 'N', m, n, n, one, a, lda,work( iu ), ldwrku, & zero, u, ldu ) else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + n ! compute a=q*r, copying result to u ! (workspace: need 2*n, prefer n+n*nb) call stdlib${ii}$_sgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_slacpy( 'L', m, n, a, lda, u, ldu ) ! generate q in u ! (workspace: need 2*n, prefer n+n*nb) call stdlib${ii}$_sorgqr( m, n, n, u, ldu, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ! copy r to vt, zeroing out below it call stdlib${ii}$_slacpy( 'U', n, n, a, lda, vt, ldvt ) if( n>1_${ik}$ )call stdlib${ii}$_slaset( 'L', n-1, n-1, zero, zero,vt( 2_${ik}$, 1_${ik}$ ), ldvt & ) ie = itau itauq = ie + n itaup = itauq + n iwork = itaup + n ! bidiagonalize r in vt ! (workspace: need 4*n, prefer 3*n+2*n*nb) call stdlib${ii}$_sgebrd( n, n, vt, ldvt, s, work( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply q in u by left bidiagonalizing vectors ! in vt ! (workspace: need 3*n+m, prefer 3*n+m*nb) call stdlib${ii}$_sormbr( 'Q', 'R', 'N', m, n, n, vt, ldvt,work( itauq ), u, & ldu, work( iwork ),lwork-iwork+1, ierr ) ! generate right bidiagonalizing vectors in vt ! (workspace: need 4*n-1, prefer 3*n+(n-1)*nb) call stdlib${ii}$_sorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),work( iwork ),& lwork-iwork+1, ierr ) iwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of a in u and computing right ! singular vectors of a in vt ! (workspace: need bdspac) call stdlib${ii}$_sbdsqr( 'U', n, n, m, 0_${ik}$, s, work( ie ), vt,ldvt, u, ldu, & dum, 1_${ik}$, work( iwork ),info ) end if end if else if( wntua ) then if( wntvn ) then ! path 7 (m much larger than n, jobu='a', jobvt='n') ! m left singular vectors to be computed in u and ! no right singular vectors to be computed if( lwork>=n*n+max( n+m, 4_${ik}$*n, bdspac ) ) then ! sufficient workspace for a fast algorithm ir = 1_${ik}$ if( lwork>=wrkbl+lda*n ) then ! work(ir) is lda by n ldwrkr = lda else ! work(ir) is n by n ldwrkr = n end if itau = ir + ldwrkr*n iwork = itau + n ! compute a=q*r, copying result to u ! (workspace: need n*n+2*n, prefer n*n+n+n*nb) call stdlib${ii}$_sgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_slacpy( 'L', m, n, a, lda, u, ldu ) ! copy r to work(ir), zeroing out below it call stdlib${ii}$_slacpy( 'U', n, n, a, lda, work( ir ),ldwrkr ) call stdlib${ii}$_slaset( 'L', n-1, n-1, zero, zero,work( ir+1 ), ldwrkr ) ! generate q in u ! (workspace: need n*n+n+m, prefer n*n+n+m*nb) call stdlib${ii}$_sorgqr( m, m, n, u, ldu, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = itau itauq = ie + n itaup = itauq + n iwork = itaup + n ! bidiagonalize r in work(ir) ! (workspace: need n*n+4*n, prefer n*n+3*n+2*n*nb) call stdlib${ii}$_sgebrd( n, n, work( ir ), ldwrkr, s,work( ie ), work( itauq & ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) ! generate left bidiagonalizing vectors in work(ir) ! (workspace: need n*n+4*n, prefer n*n+3*n+n*nb) call stdlib${ii}$_sorgbr( 'Q', n, n, n, work( ir ), ldwrkr,work( itauq ), & work( iwork ),lwork-iwork+1, ierr ) iwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of r in work(ir) ! (workspace: need n*n+bdspac) call stdlib${ii}$_sbdsqr( 'U', n, 0_${ik}$, n, 0_${ik}$, s, work( ie ), dum,1_${ik}$, work( ir ), & ldwrkr, dum, 1_${ik}$,work( iwork ), info ) ! multiply q in u by left singular vectors of r in ! work(ir), storing result in a ! (workspace: need n*n) call stdlib${ii}$_sgemm( 'N', 'N', m, n, n, one, u, ldu,work( ir ), ldwrkr, & zero, a, lda ) ! copy left singular vectors of a from a to u call stdlib${ii}$_slacpy( 'F', m, n, a, lda, u, ldu ) else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + n ! compute a=q*r, copying result to u ! (workspace: need 2*n, prefer n+n*nb) call stdlib${ii}$_sgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_slacpy( 'L', m, n, a, lda, u, ldu ) ! generate q in u ! (workspace: need n+m, prefer n+m*nb) call stdlib${ii}$_sorgqr( m, m, n, u, ldu, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = itau itauq = ie + n itaup = itauq + n iwork = itaup + n ! zero out below r in a if( n > 1_${ik}$ ) then call stdlib${ii}$_slaset( 'L', n-1, n-1, zero, zero,a( 2_${ik}$, 1_${ik}$ ), lda ) end if ! bidiagonalize r in a ! (workspace: need 4*n, prefer 3*n+2*n*nb) call stdlib${ii}$_sgebrd( n, n, a, lda, s, work( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply q in u by left bidiagonalizing vectors ! in a ! (workspace: need 3*n+m, prefer 3*n+m*nb) call stdlib${ii}$_sormbr( 'Q', 'R', 'N', m, n, n, a, lda,work( itauq ), u, & ldu, work( iwork ),lwork-iwork+1, ierr ) iwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of a in u ! (workspace: need bdspac) call stdlib${ii}$_sbdsqr( 'U', n, 0_${ik}$, m, 0_${ik}$, s, work( ie ), dum,1_${ik}$, u, ldu, dum, & 1_${ik}$, work( iwork ),info ) end if else if( wntvo ) then ! path 8 (m much larger than n, jobu='a', jobvt='o') ! m left singular vectors to be computed in u and ! n right singular vectors to be overwritten on a if( lwork>=2_${ik}$*n*n+max( n+m, 4_${ik}$*n, bdspac ) ) then ! sufficient workspace for a fast algorithm iu = 1_${ik}$ if( lwork>=wrkbl+2*lda*n ) then ! work(iu) is lda by n and work(ir) is lda by n ldwrku = lda ir = iu + ldwrku*n ldwrkr = lda else if( lwork>=wrkbl+( lda+n )*n ) then ! work(iu) is lda by n and work(ir) is n by n ldwrku = lda ir = iu + ldwrku*n ldwrkr = n else ! work(iu) is n by n and work(ir) is n by n ldwrku = n ir = iu + ldwrku*n ldwrkr = n end if itau = ir + ldwrkr*n iwork = itau + n ! compute a=q*r, copying result to u ! (workspace: need 2*n*n+2*n, prefer 2*n*n+n+n*nb) call stdlib${ii}$_sgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_slacpy( 'L', m, n, a, lda, u, ldu ) ! generate q in u ! (workspace: need 2*n*n+n+m, prefer 2*n*n+n+m*nb) call stdlib${ii}$_sorgqr( m, m, n, u, ldu, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ! copy r to work(iu), zeroing out below it call stdlib${ii}$_slacpy( 'U', n, n, a, lda, work( iu ),ldwrku ) call stdlib${ii}$_slaset( 'L', n-1, n-1, zero, zero,work( iu+1 ), ldwrku ) ie = itau itauq = ie + n itaup = itauq + n iwork = itaup + n ! bidiagonalize r in work(iu), copying result to ! work(ir) ! (workspace: need 2*n*n+4*n, ! prefer 2*n*n+3*n+2*n*nb) call stdlib${ii}$_sgebrd( n, n, work( iu ), ldwrku, s,work( ie ), work( itauq & ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) call stdlib${ii}$_slacpy( 'U', n, n, work( iu ), ldwrku,work( ir ), ldwrkr ) ! generate left bidiagonalizing vectors in work(iu) ! (workspace: need 2*n*n+4*n, prefer 2*n*n+3*n+n*nb) call stdlib${ii}$_sorgbr( 'Q', n, n, n, work( iu ), ldwrku,work( itauq ), & work( iwork ),lwork-iwork+1, ierr ) ! generate right bidiagonalizing vectors in work(ir) ! (workspace: need 2*n*n+4*n-1, ! prefer 2*n*n+3*n+(n-1)*nb) call stdlib${ii}$_sorgbr( 'P', n, n, n, work( ir ), ldwrkr,work( itaup ), & work( iwork ),lwork-iwork+1, ierr ) iwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of r in work(iu) and computing ! right singular vectors of r in work(ir) ! (workspace: need 2*n*n+bdspac) call stdlib${ii}$_sbdsqr( 'U', n, n, n, 0_${ik}$, s, work( ie ),work( ir ), ldwrkr, & work( iu ),ldwrku, dum, 1_${ik}$, work( iwork ), info ) ! multiply q in u by left singular vectors of r in ! work(iu), storing result in a ! (workspace: need n*n) call stdlib${ii}$_sgemm( 'N', 'N', m, n, n, one, u, ldu,work( iu ), ldwrku, & zero, a, lda ) ! copy left singular vectors of a from a to u call stdlib${ii}$_slacpy( 'F', m, n, a, lda, u, ldu ) ! copy right singular vectors of r from work(ir) to a call stdlib${ii}$_slacpy( 'F', n, n, work( ir ), ldwrkr, a,lda ) else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + n ! compute a=q*r, copying result to u ! (workspace: need 2*n, prefer n+n*nb) call stdlib${ii}$_sgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_slacpy( 'L', m, n, a, lda, u, ldu ) ! generate q in u ! (workspace: need n+m, prefer n+m*nb) call stdlib${ii}$_sorgqr( m, m, n, u, ldu, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = itau itauq = ie + n itaup = itauq + n iwork = itaup + n ! zero out below r in a if( n > 1_${ik}$ ) then call stdlib${ii}$_slaset( 'L', n-1, n-1, zero, zero,a( 2_${ik}$, 1_${ik}$ ), lda ) end if ! bidiagonalize r in a ! (workspace: need 4*n, prefer 3*n+2*n*nb) call stdlib${ii}$_sgebrd( n, n, a, lda, s, work( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply q in u by left bidiagonalizing vectors ! in a ! (workspace: need 3*n+m, prefer 3*n+m*nb) call stdlib${ii}$_sormbr( 'Q', 'R', 'N', m, n, n, a, lda,work( itauq ), u, & ldu, work( iwork ),lwork-iwork+1, ierr ) ! generate right bidiagonalizing vectors in a ! (workspace: need 4*n-1, prefer 3*n+(n-1)*nb) call stdlib${ii}$_sorgbr( 'P', n, n, n, a, lda, work( itaup ),work( iwork ), & lwork-iwork+1, ierr ) iwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of a in u and computing right ! singular vectors of a in a ! (workspace: need bdspac) call stdlib${ii}$_sbdsqr( 'U', n, n, m, 0_${ik}$, s, work( ie ), a,lda, u, ldu, dum, & 1_${ik}$, work( iwork ),info ) end if else if( wntvas ) then ! path 9 (m much larger than n, jobu='a', jobvt='s' ! or 'a') ! m left singular vectors to be computed in u and ! n right singular vectors to be computed in vt if( lwork>=n*n+max( n+m, 4_${ik}$*n, bdspac ) ) then ! sufficient workspace for a fast algorithm iu = 1_${ik}$ if( lwork>=wrkbl+lda*n ) then ! work(iu) is lda by n ldwrku = lda else ! work(iu) is n by n ldwrku = n end if itau = iu + ldwrku*n iwork = itau + n ! compute a=q*r, copying result to u ! (workspace: need n*n+2*n, prefer n*n+n+n*nb) call stdlib${ii}$_sgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_slacpy( 'L', m, n, a, lda, u, ldu ) ! generate q in u ! (workspace: need n*n+n+m, prefer n*n+n+m*nb) call stdlib${ii}$_sorgqr( m, m, n, u, ldu, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ! copy r to work(iu), zeroing out below it call stdlib${ii}$_slacpy( 'U', n, n, a, lda, work( iu ),ldwrku ) call stdlib${ii}$_slaset( 'L', n-1, n-1, zero, zero,work( iu+1 ), ldwrku ) ie = itau itauq = ie + n itaup = itauq + n iwork = itaup + n ! bidiagonalize r in work(iu), copying result to vt ! (workspace: need n*n+4*n, prefer n*n+3*n+2*n*nb) call stdlib${ii}$_sgebrd( n, n, work( iu ), ldwrku, s,work( ie ), work( itauq & ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) call stdlib${ii}$_slacpy( 'U', n, n, work( iu ), ldwrku, vt,ldvt ) ! generate left bidiagonalizing vectors in work(iu) ! (workspace: need n*n+4*n, prefer n*n+3*n+n*nb) call stdlib${ii}$_sorgbr( 'Q', n, n, n, work( iu ), ldwrku,work( itauq ), & work( iwork ),lwork-iwork+1, ierr ) ! generate right bidiagonalizing vectors in vt ! (workspace: need n*n+4*n-1, ! prefer n*n+3*n+(n-1)*nb) call stdlib${ii}$_sorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),work( iwork ),& lwork-iwork+1, ierr ) iwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of r in work(iu) and computing ! right singular vectors of r in vt ! (workspace: need n*n+bdspac) call stdlib${ii}$_sbdsqr( 'U', n, n, n, 0_${ik}$, s, work( ie ), vt,ldvt, work( iu ),& ldwrku, dum, 1_${ik}$,work( iwork ), info ) ! multiply q in u by left singular vectors of r in ! work(iu), storing result in a ! (workspace: need n*n) call stdlib${ii}$_sgemm( 'N', 'N', m, n, n, one, u, ldu,work( iu ), ldwrku, & zero, a, lda ) ! copy left singular vectors of a from a to u call stdlib${ii}$_slacpy( 'F', m, n, a, lda, u, ldu ) else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + n ! compute a=q*r, copying result to u ! (workspace: need 2*n, prefer n+n*nb) call stdlib${ii}$_sgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_slacpy( 'L', m, n, a, lda, u, ldu ) ! generate q in u ! (workspace: need n+m, prefer n+m*nb) call stdlib${ii}$_sorgqr( m, m, n, u, ldu, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ! copy r from a to vt, zeroing out below it call stdlib${ii}$_slacpy( 'U', n, n, a, lda, vt, ldvt ) if( n>1_${ik}$ )call stdlib${ii}$_slaset( 'L', n-1, n-1, zero, zero,vt( 2_${ik}$, 1_${ik}$ ), ldvt & ) ie = itau itauq = ie + n itaup = itauq + n iwork = itaup + n ! bidiagonalize r in vt ! (workspace: need 4*n, prefer 3*n+2*n*nb) call stdlib${ii}$_sgebrd( n, n, vt, ldvt, s, work( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply q in u by left bidiagonalizing vectors ! in vt ! (workspace: need 3*n+m, prefer 3*n+m*nb) call stdlib${ii}$_sormbr( 'Q', 'R', 'N', m, n, n, vt, ldvt,work( itauq ), u, & ldu, work( iwork ),lwork-iwork+1, ierr ) ! generate right bidiagonalizing vectors in vt ! (workspace: need 4*n-1, prefer 3*n+(n-1)*nb) call stdlib${ii}$_sorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),work( iwork ),& lwork-iwork+1, ierr ) iwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of a in u and computing right ! singular vectors of a in vt ! (workspace: need bdspac) call stdlib${ii}$_sbdsqr( 'U', n, n, m, 0_${ik}$, s, work( ie ), vt,ldvt, u, ldu, & dum, 1_${ik}$, work( iwork ),info ) end if end if end if else ! m < mnthr ! path 10 (m at least n, but not much larger) ! reduce to bidiagonal form without qr decomposition ie = 1_${ik}$ itauq = ie + n itaup = itauq + n iwork = itaup + n ! bidiagonalize a ! (workspace: need 3*n+m, prefer 3*n+(m+n)*nb) call stdlib${ii}$_sgebrd( m, n, a, lda, s, work( ie ), work( itauq ),work( itaup ), & work( iwork ), lwork-iwork+1,ierr ) if( wntuas ) then ! if left singular vectors desired in u, copy result to u ! and generate left bidiagonalizing vectors in u ! (workspace: need 3*n+ncu, prefer 3*n+ncu*nb) call stdlib${ii}$_slacpy( 'L', m, n, a, lda, u, ldu ) if( wntus )ncu = n if( wntua )ncu = m call stdlib${ii}$_sorgbr( 'Q', m, ncu, n, u, ldu, work( itauq ),work( iwork ), & lwork-iwork+1, ierr ) end if if( wntvas ) then ! if right singular vectors desired in vt, copy result to ! vt and generate right bidiagonalizing vectors in vt ! (workspace: need 4*n-1, prefer 3*n+(n-1)*nb) call stdlib${ii}$_slacpy( 'U', n, n, a, lda, vt, ldvt ) call stdlib${ii}$_sorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),work( iwork ), & lwork-iwork+1, ierr ) end if if( wntuo ) then ! if left singular vectors desired in a, generate left ! bidiagonalizing vectors in a ! (workspace: need 4*n, prefer 3*n+n*nb) call stdlib${ii}$_sorgbr( 'Q', m, n, n, a, lda, work( itauq ),work( iwork ), lwork-& iwork+1, ierr ) end if if( wntvo ) then ! if right singular vectors desired in a, generate right ! bidiagonalizing vectors in a ! (workspace: need 4*n-1, prefer 3*n+(n-1)*nb) call stdlib${ii}$_sorgbr( 'P', n, n, n, a, lda, work( itaup ),work( iwork ), lwork-& iwork+1, ierr ) end if iwork = ie + n if( wntuas .or. wntuo )nru = m if( wntun )nru = 0_${ik}$ if( wntvas .or. wntvo )ncvt = n if( wntvn )ncvt = 0_${ik}$ if( ( .not.wntuo ) .and. ( .not.wntvo ) ) then ! perform bidiagonal qr iteration, if desired, computing ! left singular vectors in u and computing right singular ! vectors in vt ! (workspace: need bdspac) call stdlib${ii}$_sbdsqr( 'U', n, ncvt, nru, 0_${ik}$, s, work( ie ), vt,ldvt, u, ldu, dum,& 1_${ik}$, work( iwork ), info ) else if( ( .not.wntuo ) .and. wntvo ) then ! perform bidiagonal qr iteration, if desired, computing ! left singular vectors in u and computing right singular ! vectors in a ! (workspace: need bdspac) call stdlib${ii}$_sbdsqr( 'U', n, ncvt, nru, 0_${ik}$, s, work( ie ), a, lda,u, ldu, dum, & 1_${ik}$, work( iwork ), info ) else ! perform bidiagonal qr iteration, if desired, computing ! left singular vectors in a and computing right singular ! vectors in vt ! (workspace: need bdspac) call stdlib${ii}$_sbdsqr( 'U', n, ncvt, nru, 0_${ik}$, s, work( ie ), vt,ldvt, a, lda, dum,& 1_${ik}$, work( iwork ), info ) end if end if else ! a has more columns than rows. if a has sufficiently more ! columns than rows, first reduce using the lq decomposition (if ! sufficient workspace available) if( n>=mnthr ) then if( wntvn ) then ! path 1t(n much larger than m, jobvt='n') ! no right singular vectors to be computed itau = 1_${ik}$ iwork = itau + m ! compute a=l*q ! (workspace: need 2*m, prefer m+m*nb) call stdlib${ii}$_sgelqf( m, n, a, lda, work( itau ), work( iwork ),lwork-iwork+1, & ierr ) ! zero out above l if (m>1_${ik}$) call stdlib${ii}$_slaset( 'U', m-1, m-1, zero, zero, a( 1_${ik}$, 2_${ik}$ ), lda ) ie = 1_${ik}$ itauq = ie + m itaup = itauq + m iwork = itaup + m ! bidiagonalize l in a ! (workspace: need 4*m, prefer 3*m+2*m*nb) call stdlib${ii}$_sgebrd( m, m, a, lda, s, work( ie ), work( itauq ),work( itaup ), & work( iwork ), lwork-iwork+1,ierr ) if( wntuo .or. wntuas ) then ! if left singular vectors desired, generate q ! (workspace: need 4*m, prefer 3*m+m*nb) call stdlib${ii}$_sorgbr( 'Q', m, m, m, a, lda, work( itauq ),work( iwork ), & lwork-iwork+1, ierr ) end if iwork = ie + m nru = 0_${ik}$ if( wntuo .or. wntuas )nru = m ! perform bidiagonal qr iteration, computing left singular ! vectors of a in a if desired ! (workspace: need bdspac) call stdlib${ii}$_sbdsqr( 'U', m, 0_${ik}$, nru, 0_${ik}$, s, work( ie ), dum, 1_${ik}$, a,lda, dum, 1_${ik}$, & work( iwork ), info ) ! if left singular vectors desired in u, copy them there if( wntuas )call stdlib${ii}$_slacpy( 'F', m, m, a, lda, u, ldu ) else if( wntvo .and. wntun ) then ! path 2t(n much larger than m, jobu='n', jobvt='o') ! m right singular vectors to be overwritten on a and ! no left singular vectors to be computed if( lwork>=m*m+max( 4_${ik}$*m, bdspac ) ) then ! sufficient workspace for a fast algorithm ir = 1_${ik}$ if( lwork>=max( wrkbl, lda*n+m )+lda*m ) then ! work(iu) is lda by n and work(ir) is lda by m ldwrku = lda chunk = n ldwrkr = lda else if( lwork>=max( wrkbl, lda*n+m )+m*m ) then ! work(iu) is lda by n and work(ir) is m by m ldwrku = lda chunk = n ldwrkr = m else ! work(iu) is m by chunk and work(ir) is m by m ldwrku = m chunk = ( lwork-m*m-m ) / m ldwrkr = m end if itau = ir + ldwrkr*m iwork = itau + m ! compute a=l*q ! (workspace: need m*m+2*m, prefer m*m+m+m*nb) call stdlib${ii}$_sgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-iwork+& 1_${ik}$, ierr ) ! copy l to work(ir) and zero out above it call stdlib${ii}$_slacpy( 'L', m, m, a, lda, work( ir ), ldwrkr ) call stdlib${ii}$_slaset( 'U', m-1, m-1, zero, zero,work( ir+ldwrkr ), ldwrkr ) ! generate q in a ! (workspace: need m*m+2*m, prefer m*m+m+m*nb) call stdlib${ii}$_sorglq( m, n, m, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = itau itauq = ie + m itaup = itauq + m iwork = itaup + m ! bidiagonalize l in work(ir) ! (workspace: need m*m+4*m, prefer m*m+3*m+2*m*nb) call stdlib${ii}$_sgebrd( m, m, work( ir ), ldwrkr, s, work( ie ),work( itauq ), & work( itaup ),work( iwork ), lwork-iwork+1, ierr ) ! generate right vectors bidiagonalizing l ! (workspace: need m*m+4*m-1, prefer m*m+3*m+(m-1)*nb) call stdlib${ii}$_sorgbr( 'P', m, m, m, work( ir ), ldwrkr,work( itaup ), work( & iwork ),lwork-iwork+1, ierr ) iwork = ie + m ! perform bidiagonal qr iteration, computing right ! singular vectors of l in work(ir) ! (workspace: need m*m+bdspac) call stdlib${ii}$_sbdsqr( 'U', m, m, 0_${ik}$, 0_${ik}$, s, work( ie ),work( ir ), ldwrkr, dum,& 1_${ik}$, dum, 1_${ik}$,work( iwork ), info ) iu = ie + m ! multiply right singular vectors of l in work(ir) by q ! in a, storing result in work(iu) and copying to a ! (workspace: need m*m+2*m, prefer m*m+m*n+m) do i = 1, n, chunk blk = min( n-i+1, chunk ) call stdlib${ii}$_sgemm( 'N', 'N', m, blk, m, one, work( ir ),ldwrkr, a( 1_${ik}$, i & ), lda, zero,work( iu ), ldwrku ) call stdlib${ii}$_slacpy( 'F', m, blk, work( iu ), ldwrku,a( 1_${ik}$, i ), lda ) end do else ! insufficient workspace for a fast algorithm ie = 1_${ik}$ itauq = ie + m itaup = itauq + m iwork = itaup + m ! bidiagonalize a ! (workspace: need 3*m+n, prefer 3*m+(m+n)*nb) call stdlib${ii}$_sgebrd( m, n, a, lda, s, work( ie ),work( itauq ), work( itaup & ),work( iwork ), lwork-iwork+1, ierr ) ! generate right vectors bidiagonalizing a ! (workspace: need 4*m, prefer 3*m+m*nb) call stdlib${ii}$_sorgbr( 'P', m, n, m, a, lda, work( itaup ),work( iwork ), & lwork-iwork+1, ierr ) iwork = ie + m ! perform bidiagonal qr iteration, computing right ! singular vectors of a in a ! (workspace: need bdspac) call stdlib${ii}$_sbdsqr( 'L', m, n, 0_${ik}$, 0_${ik}$, s, work( ie ), a, lda,dum, 1_${ik}$, dum, 1_${ik}$, & work( iwork ), info ) end if else if( wntvo .and. wntuas ) then ! path 3t(n much larger than m, jobu='s' or 'a', jobvt='o') ! m right singular vectors to be overwritten on a and ! m left singular vectors to be computed in u if( lwork>=m*m+max( 4_${ik}$*m, bdspac ) ) then ! sufficient workspace for a fast algorithm ir = 1_${ik}$ if( lwork>=max( wrkbl, lda*n+m )+lda*m ) then ! work(iu) is lda by n and work(ir) is lda by m ldwrku = lda chunk = n ldwrkr = lda else if( lwork>=max( wrkbl, lda*n+m )+m*m ) then ! work(iu) is lda by n and work(ir) is m by m ldwrku = lda chunk = n ldwrkr = m else ! work(iu) is m by chunk and work(ir) is m by m ldwrku = m chunk = ( lwork-m*m-m ) / m ldwrkr = m end if itau = ir + ldwrkr*m iwork = itau + m ! compute a=l*q ! (workspace: need m*m+2*m, prefer m*m+m+m*nb) call stdlib${ii}$_sgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-iwork+& 1_${ik}$, ierr ) ! copy l to u, zeroing about above it call stdlib${ii}$_slacpy( 'L', m, m, a, lda, u, ldu ) if (m>1_${ik}$) call stdlib${ii}$_slaset( 'U', m-1, m-1, zero, zero, u( 1_${ik}$, 2_${ik}$ ),ldu ) ! generate q in a ! (workspace: need m*m+2*m, prefer m*m+m+m*nb) call stdlib${ii}$_sorglq( m, n, m, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = itau itauq = ie + m itaup = itauq + m iwork = itaup + m ! bidiagonalize l in u, copying result to work(ir) ! (workspace: need m*m+4*m, prefer m*m+3*m+2*m*nb) call stdlib${ii}$_sgebrd( m, m, u, ldu, s, work( ie ),work( itauq ), work( itaup & ),work( iwork ), lwork-iwork+1, ierr ) call stdlib${ii}$_slacpy( 'U', m, m, u, ldu, work( ir ), ldwrkr ) ! generate right vectors bidiagonalizing l in work(ir) ! (workspace: need m*m+4*m-1, prefer m*m+3*m+(m-1)*nb) call stdlib${ii}$_sorgbr( 'P', m, m, m, work( ir ), ldwrkr,work( itaup ), work( & iwork ),lwork-iwork+1, ierr ) ! generate left vectors bidiagonalizing l in u ! (workspace: need m*m+4*m, prefer m*m+3*m+m*nb) call stdlib${ii}$_sorgbr( 'Q', m, m, m, u, ldu, work( itauq ),work( iwork ), & lwork-iwork+1, ierr ) iwork = ie + m ! perform bidiagonal qr iteration, computing left ! singular vectors of l in u, and computing right ! singular vectors of l in work(ir) ! (workspace: need m*m+bdspac) call stdlib${ii}$_sbdsqr( 'U', m, m, m, 0_${ik}$, s, work( ie ),work( ir ), ldwrkr, u, & ldu, dum, 1_${ik}$,work( iwork ), info ) iu = ie + m ! multiply right singular vectors of l in work(ir) by q ! in a, storing result in work(iu) and copying to a ! (workspace: need m*m+2*m, prefer m*m+m*n+m)) do i = 1, n, chunk blk = min( n-i+1, chunk ) call stdlib${ii}$_sgemm( 'N', 'N', m, blk, m, one, work( ir ),ldwrkr, a( 1_${ik}$, i & ), lda, zero,work( iu ), ldwrku ) call stdlib${ii}$_slacpy( 'F', m, blk, work( iu ), ldwrku,a( 1_${ik}$, i ), lda ) end do else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + m ! compute a=l*q ! (workspace: need 2*m, prefer m+m*nb) call stdlib${ii}$_sgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-iwork+& 1_${ik}$, ierr ) ! copy l to u, zeroing out above it call stdlib${ii}$_slacpy( 'L', m, m, a, lda, u, ldu ) if (m>1_${ik}$) call stdlib${ii}$_slaset( 'U', m-1, m-1, zero, zero, u( 1_${ik}$, 2_${ik}$ ),ldu ) ! generate q in a ! (workspace: need 2*m, prefer m+m*nb) call stdlib${ii}$_sorglq( m, n, m, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = itau itauq = ie + m itaup = itauq + m iwork = itaup + m ! bidiagonalize l in u ! (workspace: need 4*m, prefer 3*m+2*m*nb) call stdlib${ii}$_sgebrd( m, m, u, ldu, s, work( ie ),work( itauq ), work( itaup & ),work( iwork ), lwork-iwork+1, ierr ) ! multiply right vectors bidiagonalizing l by q in a ! (workspace: need 3*m+n, prefer 3*m+n*nb) call stdlib${ii}$_sormbr( 'P', 'L', 'T', m, n, m, u, ldu,work( itaup ), a, lda, & work( iwork ),lwork-iwork+1, ierr ) ! generate left vectors bidiagonalizing l in u ! (workspace: need 4*m, prefer 3*m+m*nb) call stdlib${ii}$_sorgbr( 'Q', m, m, m, u, ldu, work( itauq ),work( iwork ), & lwork-iwork+1, ierr ) iwork = ie + m ! perform bidiagonal qr iteration, computing left ! singular vectors of a in u and computing right ! singular vectors of a in a ! (workspace: need bdspac) call stdlib${ii}$_sbdsqr( 'U', m, n, m, 0_${ik}$, s, work( ie ), a, lda,u, ldu, dum, 1_${ik}$, & work( iwork ), info ) end if else if( wntvs ) then if( wntun ) then ! path 4t(n much larger than m, jobu='n', jobvt='s') ! m right singular vectors to be computed in vt and ! no left singular vectors to be computed if( lwork>=m*m+max( 4_${ik}$*m, bdspac ) ) then ! sufficient workspace for a fast algorithm ir = 1_${ik}$ if( lwork>=wrkbl+lda*m ) then ! work(ir) is lda by m ldwrkr = lda else ! work(ir) is m by m ldwrkr = m end if itau = ir + ldwrkr*m iwork = itau + m ! compute a=l*q ! (workspace: need m*m+2*m, prefer m*m+m+m*nb) call stdlib${ii}$_sgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ! copy l to work(ir), zeroing out above it call stdlib${ii}$_slacpy( 'L', m, m, a, lda, work( ir ),ldwrkr ) call stdlib${ii}$_slaset( 'U', m-1, m-1, zero, zero,work( ir+ldwrkr ), ldwrkr & ) ! generate q in a ! (workspace: need m*m+2*m, prefer m*m+m+m*nb) call stdlib${ii}$_sorglq( m, n, m, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = itau itauq = ie + m itaup = itauq + m iwork = itaup + m ! bidiagonalize l in work(ir) ! (workspace: need m*m+4*m, prefer m*m+3*m+2*m*nb) call stdlib${ii}$_sgebrd( m, m, work( ir ), ldwrkr, s,work( ie ), work( itauq & ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) ! generate right vectors bidiagonalizing l in ! work(ir) ! (workspace: need m*m+4*m, prefer m*m+3*m+(m-1)*nb) call stdlib${ii}$_sorgbr( 'P', m, m, m, work( ir ), ldwrkr,work( itaup ), & work( iwork ),lwork-iwork+1, ierr ) iwork = ie + m ! perform bidiagonal qr iteration, computing right ! singular vectors of l in work(ir) ! (workspace: need m*m+bdspac) call stdlib${ii}$_sbdsqr( 'U', m, m, 0_${ik}$, 0_${ik}$, s, work( ie ),work( ir ), ldwrkr, & dum, 1_${ik}$, dum, 1_${ik}$,work( iwork ), info ) ! multiply right singular vectors of l in work(ir) by ! q in a, storing result in vt ! (workspace: need m*m) call stdlib${ii}$_sgemm( 'N', 'N', m, n, m, one, work( ir ),ldwrkr, a, lda, & zero, vt, ldvt ) else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + m ! compute a=l*q ! (workspace: need 2*m, prefer m+m*nb) call stdlib${ii}$_sgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ! copy result to vt call stdlib${ii}$_slacpy( 'U', m, n, a, lda, vt, ldvt ) ! generate q in vt ! (workspace: need 2*m, prefer m+m*nb) call stdlib${ii}$_sorglq( m, n, m, vt, ldvt, work( itau ),work( iwork ), & lwork-iwork+1, ierr ) ie = itau itauq = ie + m itaup = itauq + m iwork = itaup + m ! zero out above l in a if (m>1_${ik}$) call stdlib${ii}$_slaset( 'U', m-1, m-1, zero, zero, a( 1_${ik}$, 2_${ik}$ ),lda ) ! bidiagonalize l in a ! (workspace: need 4*m, prefer 3*m+2*m*nb) call stdlib${ii}$_sgebrd( m, m, a, lda, s, work( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply right vectors bidiagonalizing l by q in vt ! (workspace: need 3*m+n, prefer 3*m+n*nb) call stdlib${ii}$_sormbr( 'P', 'L', 'T', m, n, m, a, lda,work( itaup ), vt, & ldvt,work( iwork ), lwork-iwork+1, ierr ) iwork = ie + m ! perform bidiagonal qr iteration, computing right ! singular vectors of a in vt ! (workspace: need bdspac) call stdlib${ii}$_sbdsqr( 'U', m, n, 0_${ik}$, 0_${ik}$, s, work( ie ), vt,ldvt, dum, 1_${ik}$, & dum, 1_${ik}$, work( iwork ),info ) end if else if( wntuo ) then ! path 5t(n much larger than m, jobu='o', jobvt='s') ! m right singular vectors to be computed in vt and ! m left singular vectors to be overwritten on a if( lwork>=2_${ik}$*m*m+max( 4_${ik}$*m, bdspac ) ) then ! sufficient workspace for a fast algorithm iu = 1_${ik}$ if( lwork>=wrkbl+2*lda*m ) then ! work(iu) is lda by m and work(ir) is lda by m ldwrku = lda ir = iu + ldwrku*m ldwrkr = lda else if( lwork>=wrkbl+( lda+m )*m ) then ! work(iu) is lda by m and work(ir) is m by m ldwrku = lda ir = iu + ldwrku*m ldwrkr = m else ! work(iu) is m by m and work(ir) is m by m ldwrku = m ir = iu + ldwrku*m ldwrkr = m end if itau = ir + ldwrkr*m iwork = itau + m ! compute a=l*q ! (workspace: need 2*m*m+2*m, prefer 2*m*m+m+m*nb) call stdlib${ii}$_sgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ! copy l to work(iu), zeroing out below it call stdlib${ii}$_slacpy( 'L', m, m, a, lda, work( iu ),ldwrku ) call stdlib${ii}$_slaset( 'U', m-1, m-1, zero, zero,work( iu+ldwrku ), ldwrku & ) ! generate q in a ! (workspace: need 2*m*m+2*m, prefer 2*m*m+m+m*nb) call stdlib${ii}$_sorglq( m, n, m, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = itau itauq = ie + m itaup = itauq + m iwork = itaup + m ! bidiagonalize l in work(iu), copying result to ! work(ir) ! (workspace: need 2*m*m+4*m, ! prefer 2*m*m+3*m+2*m*nb) call stdlib${ii}$_sgebrd( m, m, work( iu ), ldwrku, s,work( ie ), work( itauq & ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) call stdlib${ii}$_slacpy( 'L', m, m, work( iu ), ldwrku,work( ir ), ldwrkr ) ! generate right bidiagonalizing vectors in work(iu) ! (workspace: need 2*m*m+4*m-1, ! prefer 2*m*m+3*m+(m-1)*nb) call stdlib${ii}$_sorgbr( 'P', m, m, m, work( iu ), ldwrku,work( itaup ), & work( iwork ),lwork-iwork+1, ierr ) ! generate left bidiagonalizing vectors in work(ir) ! (workspace: need 2*m*m+4*m, prefer 2*m*m+3*m+m*nb) call stdlib${ii}$_sorgbr( 'Q', m, m, m, work( ir ), ldwrkr,work( itauq ), & work( iwork ),lwork-iwork+1, ierr ) iwork = ie + m ! perform bidiagonal qr iteration, computing left ! singular vectors of l in work(ir) and computing ! right singular vectors of l in work(iu) ! (workspace: need 2*m*m+bdspac) call stdlib${ii}$_sbdsqr( 'U', m, m, m, 0_${ik}$, s, work( ie ),work( iu ), ldwrku, & work( ir ),ldwrkr, dum, 1_${ik}$, work( iwork ), info ) ! multiply right singular vectors of l in work(iu) by ! q in a, storing result in vt ! (workspace: need m*m) call stdlib${ii}$_sgemm( 'N', 'N', m, n, m, one, work( iu ),ldwrku, a, lda, & zero, vt, ldvt ) ! copy left singular vectors of l to a ! (workspace: need m*m) call stdlib${ii}$_slacpy( 'F', m, m, work( ir ), ldwrkr, a,lda ) else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + m ! compute a=l*q, copying result to vt ! (workspace: need 2*m, prefer m+m*nb) call stdlib${ii}$_sgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_slacpy( 'U', m, n, a, lda, vt, ldvt ) ! generate q in vt ! (workspace: need 2*m, prefer m+m*nb) call stdlib${ii}$_sorglq( m, n, m, vt, ldvt, work( itau ),work( iwork ), & lwork-iwork+1, ierr ) ie = itau itauq = ie + m itaup = itauq + m iwork = itaup + m ! zero out above l in a if (m>1_${ik}$) call stdlib${ii}$_slaset( 'U', m-1, m-1, zero, zero, a( 1_${ik}$, 2_${ik}$ ),lda ) ! bidiagonalize l in a ! (workspace: need 4*m, prefer 3*m+2*m*nb) call stdlib${ii}$_sgebrd( m, m, a, lda, s, work( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply right vectors bidiagonalizing l by q in vt ! (workspace: need 3*m+n, prefer 3*m+n*nb) call stdlib${ii}$_sormbr( 'P', 'L', 'T', m, n, m, a, lda,work( itaup ), vt, & ldvt,work( iwork ), lwork-iwork+1, ierr ) ! generate left bidiagonalizing vectors of l in a ! (workspace: need 4*m, prefer 3*m+m*nb) call stdlib${ii}$_sorgbr( 'Q', m, m, m, a, lda, work( itauq ),work( iwork ), & lwork-iwork+1, ierr ) iwork = ie + m ! perform bidiagonal qr iteration, compute left ! singular vectors of a in a and compute right ! singular vectors of a in vt ! (workspace: need bdspac) call stdlib${ii}$_sbdsqr( 'U', m, n, m, 0_${ik}$, s, work( ie ), vt,ldvt, a, lda, & dum, 1_${ik}$, work( iwork ),info ) end if else if( wntuas ) then ! path 6t(n much larger than m, jobu='s' or 'a', ! jobvt='s') ! m right singular vectors to be computed in vt and ! m left singular vectors to be computed in u if( lwork>=m*m+max( 4_${ik}$*m, bdspac ) ) then ! sufficient workspace for a fast algorithm iu = 1_${ik}$ if( lwork>=wrkbl+lda*m ) then ! work(iu) is lda by n ldwrku = lda else ! work(iu) is lda by m ldwrku = m end if itau = iu + ldwrku*m iwork = itau + m ! compute a=l*q ! (workspace: need m*m+2*m, prefer m*m+m+m*nb) call stdlib${ii}$_sgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ! copy l to work(iu), zeroing out above it call stdlib${ii}$_slacpy( 'L', m, m, a, lda, work( iu ),ldwrku ) call stdlib${ii}$_slaset( 'U', m-1, m-1, zero, zero,work( iu+ldwrku ), ldwrku & ) ! generate q in a ! (workspace: need m*m+2*m, prefer m*m+m+m*nb) call stdlib${ii}$_sorglq( m, n, m, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = itau itauq = ie + m itaup = itauq + m iwork = itaup + m ! bidiagonalize l in work(iu), copying result to u ! (workspace: need m*m+4*m, prefer m*m+3*m+2*m*nb) call stdlib${ii}$_sgebrd( m, m, work( iu ), ldwrku, s,work( ie ), work( itauq & ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) call stdlib${ii}$_slacpy( 'L', m, m, work( iu ), ldwrku, u,ldu ) ! generate right bidiagonalizing vectors in work(iu) ! (workspace: need m*m+4*m-1, ! prefer m*m+3*m+(m-1)*nb) call stdlib${ii}$_sorgbr( 'P', m, m, m, work( iu ), ldwrku,work( itaup ), & work( iwork ),lwork-iwork+1, ierr ) ! generate left bidiagonalizing vectors in u ! (workspace: need m*m+4*m, prefer m*m+3*m+m*nb) call stdlib${ii}$_sorgbr( 'Q', m, m, m, u, ldu, work( itauq ),work( iwork ), & lwork-iwork+1, ierr ) iwork = ie + m ! perform bidiagonal qr iteration, computing left ! singular vectors of l in u and computing right ! singular vectors of l in work(iu) ! (workspace: need m*m+bdspac) call stdlib${ii}$_sbdsqr( 'U', m, m, m, 0_${ik}$, s, work( ie ),work( iu ), ldwrku, & u, ldu, dum, 1_${ik}$,work( iwork ), info ) ! multiply right singular vectors of l in work(iu) by ! q in a, storing result in vt ! (workspace: need m*m) call stdlib${ii}$_sgemm( 'N', 'N', m, n, m, one, work( iu ),ldwrku, a, lda, & zero, vt, ldvt ) else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + m ! compute a=l*q, copying result to vt ! (workspace: need 2*m, prefer m+m*nb) call stdlib${ii}$_sgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_slacpy( 'U', m, n, a, lda, vt, ldvt ) ! generate q in vt ! (workspace: need 2*m, prefer m+m*nb) call stdlib${ii}$_sorglq( m, n, m, vt, ldvt, work( itau ),work( iwork ), & lwork-iwork+1, ierr ) ! copy l to u, zeroing out above it call stdlib${ii}$_slacpy( 'L', m, m, a, lda, u, ldu ) if (m>1_${ik}$) call stdlib${ii}$_slaset( 'U', m-1, m-1, zero, zero, u( 1_${ik}$, 2_${ik}$ ),ldu ) ie = itau itauq = ie + m itaup = itauq + m iwork = itaup + m ! bidiagonalize l in u ! (workspace: need 4*m, prefer 3*m+2*m*nb) call stdlib${ii}$_sgebrd( m, m, u, ldu, s, work( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply right bidiagonalizing vectors in u by q ! in vt ! (workspace: need 3*m+n, prefer 3*m+n*nb) call stdlib${ii}$_sormbr( 'P', 'L', 'T', m, n, m, u, ldu,work( itaup ), vt, & ldvt,work( iwork ), lwork-iwork+1, ierr ) ! generate left bidiagonalizing vectors in u ! (workspace: need 4*m, prefer 3*m+m*nb) call stdlib${ii}$_sorgbr( 'Q', m, m, m, u, ldu, work( itauq ),work( iwork ), & lwork-iwork+1, ierr ) iwork = ie + m ! perform bidiagonal qr iteration, computing left ! singular vectors of a in u and computing right ! singular vectors of a in vt ! (workspace: need bdspac) call stdlib${ii}$_sbdsqr( 'U', m, n, m, 0_${ik}$, s, work( ie ), vt,ldvt, u, ldu, & dum, 1_${ik}$, work( iwork ),info ) end if end if else if( wntva ) then if( wntun ) then ! path 7t(n much larger than m, jobu='n', jobvt='a') ! n right singular vectors to be computed in vt and ! no left singular vectors to be computed if( lwork>=m*m+max( n+m, 4_${ik}$*m, bdspac ) ) then ! sufficient workspace for a fast algorithm ir = 1_${ik}$ if( lwork>=wrkbl+lda*m ) then ! work(ir) is lda by m ldwrkr = lda else ! work(ir) is m by m ldwrkr = m end if itau = ir + ldwrkr*m iwork = itau + m ! compute a=l*q, copying result to vt ! (workspace: need m*m+2*m, prefer m*m+m+m*nb) call stdlib${ii}$_sgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_slacpy( 'U', m, n, a, lda, vt, ldvt ) ! copy l to work(ir), zeroing out above it call stdlib${ii}$_slacpy( 'L', m, m, a, lda, work( ir ),ldwrkr ) call stdlib${ii}$_slaset( 'U', m-1, m-1, zero, zero,work( ir+ldwrkr ), ldwrkr & ) ! generate q in vt ! (workspace: need m*m+m+n, prefer m*m+m+n*nb) call stdlib${ii}$_sorglq( n, n, m, vt, ldvt, work( itau ),work( iwork ), & lwork-iwork+1, ierr ) ie = itau itauq = ie + m itaup = itauq + m iwork = itaup + m ! bidiagonalize l in work(ir) ! (workspace: need m*m+4*m, prefer m*m+3*m+2*m*nb) call stdlib${ii}$_sgebrd( m, m, work( ir ), ldwrkr, s,work( ie ), work( itauq & ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) ! generate right bidiagonalizing vectors in work(ir) ! (workspace: need m*m+4*m-1, ! prefer m*m+3*m+(m-1)*nb) call stdlib${ii}$_sorgbr( 'P', m, m, m, work( ir ), ldwrkr,work( itaup ), & work( iwork ),lwork-iwork+1, ierr ) iwork = ie + m ! perform bidiagonal qr iteration, computing right ! singular vectors of l in work(ir) ! (workspace: need m*m+bdspac) call stdlib${ii}$_sbdsqr( 'U', m, m, 0_${ik}$, 0_${ik}$, s, work( ie ),work( ir ), ldwrkr, & dum, 1_${ik}$, dum, 1_${ik}$,work( iwork ), info ) ! multiply right singular vectors of l in work(ir) by ! q in vt, storing result in a ! (workspace: need m*m) call stdlib${ii}$_sgemm( 'N', 'N', m, n, m, one, work( ir ),ldwrkr, vt, ldvt, & zero, a, lda ) ! copy right singular vectors of a from a to vt call stdlib${ii}$_slacpy( 'F', m, n, a, lda, vt, ldvt ) else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + m ! compute a=l*q, copying result to vt ! (workspace: need 2*m, prefer m+m*nb) call stdlib${ii}$_sgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_slacpy( 'U', m, n, a, lda, vt, ldvt ) ! generate q in vt ! (workspace: need m+n, prefer m+n*nb) call stdlib${ii}$_sorglq( n, n, m, vt, ldvt, work( itau ),work( iwork ), & lwork-iwork+1, ierr ) ie = itau itauq = ie + m itaup = itauq + m iwork = itaup + m ! zero out above l in a if (m>1_${ik}$) call stdlib${ii}$_slaset( 'U', m-1, m-1, zero, zero, a( 1_${ik}$, 2_${ik}$ ),lda ) ! bidiagonalize l in a ! (workspace: need 4*m, prefer 3*m+2*m*nb) call stdlib${ii}$_sgebrd( m, m, a, lda, s, work( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply right bidiagonalizing vectors in a by q ! in vt ! (workspace: need 3*m+n, prefer 3*m+n*nb) call stdlib${ii}$_sormbr( 'P', 'L', 'T', m, n, m, a, lda,work( itaup ), vt, & ldvt,work( iwork ), lwork-iwork+1, ierr ) iwork = ie + m ! perform bidiagonal qr iteration, computing right ! singular vectors of a in vt ! (workspace: need bdspac) call stdlib${ii}$_sbdsqr( 'U', m, n, 0_${ik}$, 0_${ik}$, s, work( ie ), vt,ldvt, dum, 1_${ik}$, & dum, 1_${ik}$, work( iwork ),info ) end if else if( wntuo ) then ! path 8t(n much larger than m, jobu='o', jobvt='a') ! n right singular vectors to be computed in vt and ! m left singular vectors to be overwritten on a if( lwork>=2_${ik}$*m*m+max( n+m, 4_${ik}$*m, bdspac ) ) then ! sufficient workspace for a fast algorithm iu = 1_${ik}$ if( lwork>=wrkbl+2*lda*m ) then ! work(iu) is lda by m and work(ir) is lda by m ldwrku = lda ir = iu + ldwrku*m ldwrkr = lda else if( lwork>=wrkbl+( lda+m )*m ) then ! work(iu) is lda by m and work(ir) is m by m ldwrku = lda ir = iu + ldwrku*m ldwrkr = m else ! work(iu) is m by m and work(ir) is m by m ldwrku = m ir = iu + ldwrku*m ldwrkr = m end if itau = ir + ldwrkr*m iwork = itau + m ! compute a=l*q, copying result to vt ! (workspace: need 2*m*m+2*m, prefer 2*m*m+m+m*nb) call stdlib${ii}$_sgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_slacpy( 'U', m, n, a, lda, vt, ldvt ) ! generate q in vt ! (workspace: need 2*m*m+m+n, prefer 2*m*m+m+n*nb) call stdlib${ii}$_sorglq( n, n, m, vt, ldvt, work( itau ),work( iwork ), & lwork-iwork+1, ierr ) ! copy l to work(iu), zeroing out above it call stdlib${ii}$_slacpy( 'L', m, m, a, lda, work( iu ),ldwrku ) call stdlib${ii}$_slaset( 'U', m-1, m-1, zero, zero,work( iu+ldwrku ), ldwrku & ) ie = itau itauq = ie + m itaup = itauq + m iwork = itaup + m ! bidiagonalize l in work(iu), copying result to ! work(ir) ! (workspace: need 2*m*m+4*m, ! prefer 2*m*m+3*m+2*m*nb) call stdlib${ii}$_sgebrd( m, m, work( iu ), ldwrku, s,work( ie ), work( itauq & ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) call stdlib${ii}$_slacpy( 'L', m, m, work( iu ), ldwrku,work( ir ), ldwrkr ) ! generate right bidiagonalizing vectors in work(iu) ! (workspace: need 2*m*m+4*m-1, ! prefer 2*m*m+3*m+(m-1)*nb) call stdlib${ii}$_sorgbr( 'P', m, m, m, work( iu ), ldwrku,work( itaup ), & work( iwork ),lwork-iwork+1, ierr ) ! generate left bidiagonalizing vectors in work(ir) ! (workspace: need 2*m*m+4*m, prefer 2*m*m+3*m+m*nb) call stdlib${ii}$_sorgbr( 'Q', m, m, m, work( ir ), ldwrkr,work( itauq ), & work( iwork ),lwork-iwork+1, ierr ) iwork = ie + m ! perform bidiagonal qr iteration, computing left ! singular vectors of l in work(ir) and computing ! right singular vectors of l in work(iu) ! (workspace: need 2*m*m+bdspac) call stdlib${ii}$_sbdsqr( 'U', m, m, m, 0_${ik}$, s, work( ie ),work( iu ), ldwrku, & work( ir ),ldwrkr, dum, 1_${ik}$, work( iwork ), info ) ! multiply right singular vectors of l in work(iu) by ! q in vt, storing result in a ! (workspace: need m*m) call stdlib${ii}$_sgemm( 'N', 'N', m, n, m, one, work( iu ),ldwrku, vt, ldvt, & zero, a, lda ) ! copy right singular vectors of a from a to vt call stdlib${ii}$_slacpy( 'F', m, n, a, lda, vt, ldvt ) ! copy left singular vectors of a from work(ir) to a call stdlib${ii}$_slacpy( 'F', m, m, work( ir ), ldwrkr, a,lda ) else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + m ! compute a=l*q, copying result to vt ! (workspace: need 2*m, prefer m+m*nb) call stdlib${ii}$_sgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_slacpy( 'U', m, n, a, lda, vt, ldvt ) ! generate q in vt ! (workspace: need m+n, prefer m+n*nb) call stdlib${ii}$_sorglq( n, n, m, vt, ldvt, work( itau ),work( iwork ), & lwork-iwork+1, ierr ) ie = itau itauq = ie + m itaup = itauq + m iwork = itaup + m ! zero out above l in a if (m>1_${ik}$) call stdlib${ii}$_slaset( 'U', m-1, m-1, zero, zero, a( 1_${ik}$, 2_${ik}$ ),lda ) ! bidiagonalize l in a ! (workspace: need 4*m, prefer 3*m+2*m*nb) call stdlib${ii}$_sgebrd( m, m, a, lda, s, work( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply right bidiagonalizing vectors in a by q ! in vt ! (workspace: need 3*m+n, prefer 3*m+n*nb) call stdlib${ii}$_sormbr( 'P', 'L', 'T', m, n, m, a, lda,work( itaup ), vt, & ldvt,work( iwork ), lwork-iwork+1, ierr ) ! generate left bidiagonalizing vectors in a ! (workspace: need 4*m, prefer 3*m+m*nb) call stdlib${ii}$_sorgbr( 'Q', m, m, m, a, lda, work( itauq ),work( iwork ), & lwork-iwork+1, ierr ) iwork = ie + m ! perform bidiagonal qr iteration, computing left ! singular vectors of a in a and computing right ! singular vectors of a in vt ! (workspace: need bdspac) call stdlib${ii}$_sbdsqr( 'U', m, n, m, 0_${ik}$, s, work( ie ), vt,ldvt, a, lda, & dum, 1_${ik}$, work( iwork ),info ) end if else if( wntuas ) then ! path 9t(n much larger than m, jobu='s' or 'a', ! jobvt='a') ! n right singular vectors to be computed in vt and ! m left singular vectors to be computed in u if( lwork>=m*m+max( n+m, 4_${ik}$*m, bdspac ) ) then ! sufficient workspace for a fast algorithm iu = 1_${ik}$ if( lwork>=wrkbl+lda*m ) then ! work(iu) is lda by m ldwrku = lda else ! work(iu) is m by m ldwrku = m end if itau = iu + ldwrku*m iwork = itau + m ! compute a=l*q, copying result to vt ! (workspace: need m*m+2*m, prefer m*m+m+m*nb) call stdlib${ii}$_sgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_slacpy( 'U', m, n, a, lda, vt, ldvt ) ! generate q in vt ! (workspace: need m*m+m+n, prefer m*m+m+n*nb) call stdlib${ii}$_sorglq( n, n, m, vt, ldvt, work( itau ),work( iwork ), & lwork-iwork+1, ierr ) ! copy l to work(iu), zeroing out above it call stdlib${ii}$_slacpy( 'L', m, m, a, lda, work( iu ),ldwrku ) call stdlib${ii}$_slaset( 'U', m-1, m-1, zero, zero,work( iu+ldwrku ), ldwrku & ) ie = itau itauq = ie + m itaup = itauq + m iwork = itaup + m ! bidiagonalize l in work(iu), copying result to u ! (workspace: need m*m+4*m, prefer m*m+3*m+2*m*nb) call stdlib${ii}$_sgebrd( m, m, work( iu ), ldwrku, s,work( ie ), work( itauq & ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) call stdlib${ii}$_slacpy( 'L', m, m, work( iu ), ldwrku, u,ldu ) ! generate right bidiagonalizing vectors in work(iu) ! (workspace: need m*m+4*m, prefer m*m+3*m+(m-1)*nb) call stdlib${ii}$_sorgbr( 'P', m, m, m, work( iu ), ldwrku,work( itaup ), & work( iwork ),lwork-iwork+1, ierr ) ! generate left bidiagonalizing vectors in u ! (workspace: need m*m+4*m, prefer m*m+3*m+m*nb) call stdlib${ii}$_sorgbr( 'Q', m, m, m, u, ldu, work( itauq ),work( iwork ), & lwork-iwork+1, ierr ) iwork = ie + m ! perform bidiagonal qr iteration, computing left ! singular vectors of l in u and computing right ! singular vectors of l in work(iu) ! (workspace: need m*m+bdspac) call stdlib${ii}$_sbdsqr( 'U', m, m, m, 0_${ik}$, s, work( ie ),work( iu ), ldwrku, & u, ldu, dum, 1_${ik}$,work( iwork ), info ) ! multiply right singular vectors of l in work(iu) by ! q in vt, storing result in a ! (workspace: need m*m) call stdlib${ii}$_sgemm( 'N', 'N', m, n, m, one, work( iu ),ldwrku, vt, ldvt, & zero, a, lda ) ! copy right singular vectors of a from a to vt call stdlib${ii}$_slacpy( 'F', m, n, a, lda, vt, ldvt ) else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + m ! compute a=l*q, copying result to vt ! (workspace: need 2*m, prefer m+m*nb) call stdlib${ii}$_sgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_slacpy( 'U', m, n, a, lda, vt, ldvt ) ! generate q in vt ! (workspace: need m+n, prefer m+n*nb) call stdlib${ii}$_sorglq( n, n, m, vt, ldvt, work( itau ),work( iwork ), & lwork-iwork+1, ierr ) ! copy l to u, zeroing out above it call stdlib${ii}$_slacpy( 'L', m, m, a, lda, u, ldu ) if (m>1_${ik}$) call stdlib${ii}$_slaset( 'U', m-1, m-1, zero, zero, u( 1_${ik}$, 2_${ik}$ ),ldu ) ie = itau itauq = ie + m itaup = itauq + m iwork = itaup + m ! bidiagonalize l in u ! (workspace: need 4*m, prefer 3*m+2*m*nb) call stdlib${ii}$_sgebrd( m, m, u, ldu, s, work( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply right bidiagonalizing vectors in u by q ! in vt ! (workspace: need 3*m+n, prefer 3*m+n*nb) call stdlib${ii}$_sormbr( 'P', 'L', 'T', m, n, m, u, ldu,work( itaup ), vt, & ldvt,work( iwork ), lwork-iwork+1, ierr ) ! generate left bidiagonalizing vectors in u ! (workspace: need 4*m, prefer 3*m+m*nb) call stdlib${ii}$_sorgbr( 'Q', m, m, m, u, ldu, work( itauq ),work( iwork ), & lwork-iwork+1, ierr ) iwork = ie + m ! perform bidiagonal qr iteration, computing left ! singular vectors of a in u and computing right ! singular vectors of a in vt ! (workspace: need bdspac) call stdlib${ii}$_sbdsqr( 'U', m, n, m, 0_${ik}$, s, work( ie ), vt,ldvt, u, ldu, & dum, 1_${ik}$, work( iwork ),info ) end if end if end if else ! n < mnthr ! path 10t(n greater than m, but not much larger) ! reduce to bidiagonal form without lq decomposition ie = 1_${ik}$ itauq = ie + m itaup = itauq + m iwork = itaup + m ! bidiagonalize a ! (workspace: need 3*m+n, prefer 3*m+(m+n)*nb) call stdlib${ii}$_sgebrd( m, n, a, lda, s, work( ie ), work( itauq ),work( itaup ), & work( iwork ), lwork-iwork+1,ierr ) if( wntuas ) then ! if left singular vectors desired in u, copy result to u ! and generate left bidiagonalizing vectors in u ! (workspace: need 4*m-1, prefer 3*m+(m-1)*nb) call stdlib${ii}$_slacpy( 'L', m, m, a, lda, u, ldu ) call stdlib${ii}$_sorgbr( 'Q', m, m, n, u, ldu, work( itauq ),work( iwork ), lwork-& iwork+1, ierr ) end if if( wntvas ) then ! if right singular vectors desired in vt, copy result to ! vt and generate right bidiagonalizing vectors in vt ! (workspace: need 3*m+nrvt, prefer 3*m+nrvt*nb) call stdlib${ii}$_slacpy( 'U', m, n, a, lda, vt, ldvt ) if( wntva )nrvt = n if( wntvs )nrvt = m call stdlib${ii}$_sorgbr( 'P', nrvt, n, m, vt, ldvt, work( itaup ),work( iwork ), & lwork-iwork+1, ierr ) end if if( wntuo ) then ! if left singular vectors desired in a, generate left ! bidiagonalizing vectors in a ! (workspace: need 4*m-1, prefer 3*m+(m-1)*nb) call stdlib${ii}$_sorgbr( 'Q', m, m, n, a, lda, work( itauq ),work( iwork ), lwork-& iwork+1, ierr ) end if if( wntvo ) then ! if right singular vectors desired in a, generate right ! bidiagonalizing vectors in a ! (workspace: need 4*m, prefer 3*m+m*nb) call stdlib${ii}$_sorgbr( 'P', m, n, m, a, lda, work( itaup ),work( iwork ), lwork-& iwork+1, ierr ) end if iwork = ie + m if( wntuas .or. wntuo )nru = m if( wntun )nru = 0_${ik}$ if( wntvas .or. wntvo )ncvt = n if( wntvn )ncvt = 0_${ik}$ if( ( .not.wntuo ) .and. ( .not.wntvo ) ) then ! perform bidiagonal qr iteration, if desired, computing ! left singular vectors in u and computing right singular ! vectors in vt ! (workspace: need bdspac) call stdlib${ii}$_sbdsqr( 'L', m, ncvt, nru, 0_${ik}$, s, work( ie ), vt,ldvt, u, ldu, dum,& 1_${ik}$, work( iwork ), info ) else if( ( .not.wntuo ) .and. wntvo ) then ! perform bidiagonal qr iteration, if desired, computing ! left singular vectors in u and computing right singular ! vectors in a ! (workspace: need bdspac) call stdlib${ii}$_sbdsqr( 'L', m, ncvt, nru, 0_${ik}$, s, work( ie ), a, lda,u, ldu, dum, & 1_${ik}$, work( iwork ), info ) else ! perform bidiagonal qr iteration, if desired, computing ! left singular vectors in a and computing right singular ! vectors in vt ! (workspace: need bdspac) call stdlib${ii}$_sbdsqr( 'L', m, ncvt, nru, 0_${ik}$, s, work( ie ), vt,ldvt, a, lda, dum,& 1_${ik}$, work( iwork ), info ) end if end if end if ! if stdlib${ii}$_sbdsqr failed to converge, copy unconverged superdiagonals ! to work( 2:minmn ) if( info/=0_${ik}$ ) then if( ie>2_${ik}$ ) then do i = 1, minmn - 1 work( i+1 ) = work( i+ie-1 ) end do end if if( ie<2_${ik}$ ) then do i = minmn - 1, 1, -1 work( i+1 ) = work( i+ie-1 ) end do end if end if ! undo scaling if necessary if( iscl==1_${ik}$ ) then if( anrm>bignum )call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, bignum, anrm, minmn, 1_${ik}$, s, minmn,& ierr ) if( info/=0_${ik}$ .and. anrm>bignum )call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, bignum, anrm, minmn-1,& 1_${ik}$, work( 2_${ik}$ ),minmn, ierr ) if( anrm<smlnum )call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, smlnum, anrm, minmn, 1_${ik}$, s, minmn,& ierr ) if( info/=0_${ik}$ .and. anrm<smlnum )call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, smlnum, anrm, minmn-1,& 1_${ik}$, work( 2_${ik}$ ),minmn, ierr ) end if ! return optimal workspace in work(1) work( 1_${ik}$ ) = maxwrk return end subroutine stdlib${ii}$_sgesvd module subroutine stdlib${ii}$_dgesvd( jobu, jobvt, m, n, a, lda, s, u, ldu,vt, ldvt, work, lwork, info ) !! DGESVD computes the singular value decomposition (SVD) of a real !! M-by-N matrix A, optionally computing the left and/or right singular !! vectors. The SVD is written !! A = U * SIGMA * transpose(V) !! where SIGMA is an M-by-N matrix which is zero except for its !! min(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and !! V is an N-by-N orthogonal matrix. The diagonal elements of SIGMA !! are the singular values of A; they are real and non-negative, and !! are returned in descending order. The first min(m,n) columns of !! U and V are the left and right singular vectors of A. !! Note that the routine returns V**T, not V. ! -- lapack driver routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments character, intent(in) :: jobu, jobvt integer(${ik}$), intent(out) :: info integer(${ik}$), intent(in) :: lda, ldu, ldvt, lwork, m, n ! Array Arguments real(dp), intent(inout) :: a(lda,*) real(dp), intent(out) :: s(*), u(ldu,*), vt(ldvt,*), work(*) ! ===================================================================== ! Local Scalars logical(lk) :: lquery, wntua, wntuas, wntun, wntuo, wntus, wntva, wntvas, wntvn, wntvo,& wntvs integer(${ik}$) :: bdspac, blk, chunk, i, ie, ierr, ir, iscl, itau, itaup, itauq, iu, & iwork, ldwrkr, ldwrku, maxwrk, minmn, minwrk, mnthr, ncu, ncvt, nru, nrvt, & wrkbl integer(${ik}$) :: lwork_dgeqrf, lwork_dorgqr_n, lwork_dorgqr_m, lwork_dgebrd, & lwork_dorgbr_p, lwork_dorgbr_q, lwork_dgelqf, lwork_dorglq_n, lwork_dorglq_m real(dp) :: anrm, bignum, eps, smlnum ! Local Arrays real(dp) :: dum(1_${ik}$) ! Intrinsic Functions ! Executable Statements ! test the input arguments info = 0_${ik}$ minmn = min( m, n ) wntua = stdlib_lsame( jobu, 'A' ) wntus = stdlib_lsame( jobu, 'S' ) wntuas = wntua .or. wntus wntuo = stdlib_lsame( jobu, 'O' ) wntun = stdlib_lsame( jobu, 'N' ) wntva = stdlib_lsame( jobvt, 'A' ) wntvs = stdlib_lsame( jobvt, 'S' ) wntvas = wntva .or. wntvs wntvo = stdlib_lsame( jobvt, 'O' ) wntvn = stdlib_lsame( jobvt, 'N' ) lquery = ( lwork==-1_${ik}$ ) if( .not.( wntua .or. wntus .or. wntuo .or. wntun ) ) then info = -1_${ik}$ else if( .not.( wntva .or. wntvs .or. wntvo .or. wntvn ) .or.( wntvo .and. wntuo ) ) & then info = -2_${ik}$ else if( m<0_${ik}$ ) then info = -3_${ik}$ else if( n<0_${ik}$ ) then info = -4_${ik}$ else if( lda<max( 1_${ik}$, m ) ) then info = -6_${ik}$ else if( ldu<1_${ik}$ .or. ( wntuas .and. ldu<m ) ) then info = -9_${ik}$ else if( ldvt<1_${ik}$ .or. ( wntva .and. ldvt<n ) .or.( wntvs .and. ldvt<minmn ) ) & then info = -11_${ik}$ end if ! compute workspace ! (note: comments in the code beginning "workspace:" describe the ! minimal amount of workspace needed at that point in the code, ! as well as the preferred amount for good performance. ! nb refers to the optimal block size for the immediately ! following subroutine, as returned by stdlib${ii}$_ilaenv.) if( info==0_${ik}$ ) then minwrk = 1_${ik}$ maxwrk = 1_${ik}$ if( m>=n .and. minmn>0_${ik}$ ) then ! compute space needed for stdlib${ii}$_dbdsqr mnthr = stdlib${ii}$_ilaenv( 6_${ik}$, 'DGESVD', jobu // jobvt, m, n, 0_${ik}$, 0_${ik}$ ) bdspac = 5_${ik}$*n ! compute space needed for stdlib${ii}$_dgeqrf call stdlib${ii}$_dgeqrf( m, n, a, lda, dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, ierr ) lwork_dgeqrf = int( dum(1_${ik}$),KIND=${ik}$) ! compute space needed for stdlib${ii}$_dorgqr call stdlib${ii}$_dorgqr( m, n, n, a, lda, dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, ierr ) lwork_dorgqr_n = int( dum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_dorgqr( m, m, n, a, lda, dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, ierr ) lwork_dorgqr_m = int( dum(1_${ik}$),KIND=${ik}$) ! compute space needed for stdlib${ii}$_dgebrd call stdlib${ii}$_dgebrd( n, n, a, lda, s, dum(1_${ik}$), dum(1_${ik}$),dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, ierr ) lwork_dgebrd = int( dum(1_${ik}$),KIND=${ik}$) ! compute space needed for stdlib${ii}$_dorgbr p call stdlib${ii}$_dorgbr( 'P', n, n, n, a, lda, dum(1_${ik}$),dum(1_${ik}$), -1_${ik}$, ierr ) lwork_dorgbr_p = int( dum(1_${ik}$),KIND=${ik}$) ! compute space needed for stdlib${ii}$_dorgbr q call stdlib${ii}$_dorgbr( 'Q', n, n, n, a, lda, dum(1_${ik}$),dum(1_${ik}$), -1_${ik}$, ierr ) lwork_dorgbr_q = int( dum(1_${ik}$),KIND=${ik}$) if( m>=mnthr ) then if( wntun ) then ! path 1 (m much larger than n, jobu='n') maxwrk = n + lwork_dgeqrf maxwrk = max( maxwrk, 3_${ik}$*n + lwork_dgebrd ) if( wntvo .or. wntvas )maxwrk = max( maxwrk, 3_${ik}$*n + lwork_dorgbr_p ) maxwrk = max( maxwrk, bdspac ) minwrk = max( 4_${ik}$*n, bdspac ) else if( wntuo .and. wntvn ) then ! path 2 (m much larger than n, jobu='o', jobvt='n') wrkbl = n + lwork_dgeqrf wrkbl = max( wrkbl, n + lwork_dorgqr_n ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_dgebrd ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_dorgbr_q ) wrkbl = max( wrkbl, bdspac ) maxwrk = max( n*n + wrkbl, n*n + m*n + n ) minwrk = max( 3_${ik}$*n + m, bdspac ) else if( wntuo .and. wntvas ) then ! path 3 (m much larger than n, jobu='o', jobvt='s' or ! 'a') wrkbl = n + lwork_dgeqrf wrkbl = max( wrkbl, n + lwork_dorgqr_n ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_dgebrd ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_dorgbr_q ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_dorgbr_p ) wrkbl = max( wrkbl, bdspac ) maxwrk = max( n*n + wrkbl, n*n + m*n + n ) minwrk = max( 3_${ik}$*n + m, bdspac ) else if( wntus .and. wntvn ) then ! path 4 (m much larger than n, jobu='s', jobvt='n') wrkbl = n + lwork_dgeqrf wrkbl = max( wrkbl, n + lwork_dorgqr_n ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_dgebrd ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_dorgbr_q ) wrkbl = max( wrkbl, bdspac ) maxwrk = n*n + wrkbl minwrk = max( 3_${ik}$*n + m, bdspac ) else if( wntus .and. wntvo ) then ! path 5 (m much larger than n, jobu='s', jobvt='o') wrkbl = n + lwork_dgeqrf wrkbl = max( wrkbl, n + lwork_dorgqr_n ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_dgebrd ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_dorgbr_q ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_dorgbr_p ) wrkbl = max( wrkbl, bdspac ) maxwrk = 2_${ik}$*n*n + wrkbl minwrk = max( 3_${ik}$*n + m, bdspac ) else if( wntus .and. wntvas ) then ! path 6 (m much larger than n, jobu='s', jobvt='s' or ! 'a') wrkbl = n + lwork_dgeqrf wrkbl = max( wrkbl, n + lwork_dorgqr_n ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_dgebrd ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_dorgbr_q ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_dorgbr_p ) wrkbl = max( wrkbl, bdspac ) maxwrk = n*n + wrkbl minwrk = max( 3_${ik}$*n + m, bdspac ) else if( wntua .and. wntvn ) then ! path 7 (m much larger than n, jobu='a', jobvt='n') wrkbl = n + lwork_dgeqrf wrkbl = max( wrkbl, n + lwork_dorgqr_m ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_dgebrd ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_dorgbr_q ) wrkbl = max( wrkbl, bdspac ) maxwrk = n*n + wrkbl minwrk = max( 3_${ik}$*n + m, bdspac ) else if( wntua .and. wntvo ) then ! path 8 (m much larger than n, jobu='a', jobvt='o') wrkbl = n + lwork_dgeqrf wrkbl = max( wrkbl, n + lwork_dorgqr_m ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_dgebrd ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_dorgbr_q ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_dorgbr_p ) wrkbl = max( wrkbl, bdspac ) maxwrk = 2_${ik}$*n*n + wrkbl minwrk = max( 3_${ik}$*n + m, bdspac ) else if( wntua .and. wntvas ) then ! path 9 (m much larger than n, jobu='a', jobvt='s' or ! 'a') wrkbl = n + lwork_dgeqrf wrkbl = max( wrkbl, n + lwork_dorgqr_m ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_dgebrd ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_dorgbr_q ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_dorgbr_p ) wrkbl = max( wrkbl, bdspac ) maxwrk = n*n + wrkbl minwrk = max( 3_${ik}$*n + m, bdspac ) end if else ! path 10 (m at least n, but not much larger) call stdlib${ii}$_dgebrd( m, n, a, lda, s, dum(1_${ik}$), dum(1_${ik}$),dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, ierr ) lwork_dgebrd = int( dum(1_${ik}$),KIND=${ik}$) maxwrk = 3_${ik}$*n + lwork_dgebrd if( wntus .or. wntuo ) then call stdlib${ii}$_dorgbr( 'Q', m, n, n, a, lda, dum(1_${ik}$),dum(1_${ik}$), -1_${ik}$, ierr ) lwork_dorgbr_q = int( dum(1_${ik}$),KIND=${ik}$) maxwrk = max( maxwrk, 3_${ik}$*n + lwork_dorgbr_q ) end if if( wntua ) then call stdlib${ii}$_dorgbr( 'Q', m, m, n, a, lda, dum(1_${ik}$),dum(1_${ik}$), -1_${ik}$, ierr ) lwork_dorgbr_q = int( dum(1_${ik}$),KIND=${ik}$) maxwrk = max( maxwrk, 3_${ik}$*n + lwork_dorgbr_q ) end if if( .not.wntvn ) then maxwrk = max( maxwrk, 3_${ik}$*n + lwork_dorgbr_p ) end if maxwrk = max( maxwrk, bdspac ) minwrk = max( 3_${ik}$*n + m, bdspac ) end if else if( minmn>0_${ik}$ ) then ! compute space needed for stdlib${ii}$_dbdsqr mnthr = stdlib${ii}$_ilaenv( 6_${ik}$, 'DGESVD', jobu // jobvt, m, n, 0_${ik}$, 0_${ik}$ ) bdspac = 5_${ik}$*m ! compute space needed for stdlib${ii}$_dgelqf call stdlib${ii}$_dgelqf( m, n, a, lda, dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, ierr ) lwork_dgelqf = int( dum(1_${ik}$),KIND=${ik}$) ! compute space needed for stdlib${ii}$_dorglq call stdlib${ii}$_dorglq( n, n, m, dum(1_${ik}$), n, dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, ierr ) lwork_dorglq_n = int( dum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_dorglq( m, n, m, a, lda, dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, ierr ) lwork_dorglq_m = int( dum(1_${ik}$),KIND=${ik}$) ! compute space needed for stdlib${ii}$_dgebrd call stdlib${ii}$_dgebrd( m, m, a, lda, s, dum(1_${ik}$), dum(1_${ik}$),dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, ierr ) lwork_dgebrd = int( dum(1_${ik}$),KIND=${ik}$) ! compute space needed for stdlib${ii}$_dorgbr p call stdlib${ii}$_dorgbr( 'P', m, m, m, a, n, dum(1_${ik}$),dum(1_${ik}$), -1_${ik}$, ierr ) lwork_dorgbr_p = int( dum(1_${ik}$),KIND=${ik}$) ! compute space needed for stdlib${ii}$_dorgbr q call stdlib${ii}$_dorgbr( 'Q', m, m, m, a, n, dum(1_${ik}$),dum(1_${ik}$), -1_${ik}$, ierr ) lwork_dorgbr_q = int( dum(1_${ik}$),KIND=${ik}$) if( n>=mnthr ) then if( wntvn ) then ! path 1t(n much larger than m, jobvt='n') maxwrk = m + lwork_dgelqf maxwrk = max( maxwrk, 3_${ik}$*m + lwork_dgebrd ) if( wntuo .or. wntuas )maxwrk = max( maxwrk, 3_${ik}$*m + lwork_dorgbr_q ) maxwrk = max( maxwrk, bdspac ) minwrk = max( 4_${ik}$*m, bdspac ) else if( wntvo .and. wntun ) then ! path 2t(n much larger than m, jobu='n', jobvt='o') wrkbl = m + lwork_dgelqf wrkbl = max( wrkbl, m + lwork_dorglq_m ) wrkbl = max( wrkbl, 3_${ik}$*m + lwork_dgebrd ) wrkbl = max( wrkbl, 3_${ik}$*m + lwork_dorgbr_p ) wrkbl = max( wrkbl, bdspac ) maxwrk = max( m*m + wrkbl, m*m + m*n + m ) minwrk = max( 3_${ik}$*m + n, bdspac ) else if( wntvo .and. wntuas ) then ! path 3t(n much larger than m, jobu='s' or 'a', ! jobvt='o') wrkbl = m + lwork_dgelqf wrkbl = max( wrkbl, m + lwork_dorglq_m ) wrkbl = max( wrkbl, 3_${ik}$*m + lwork_dgebrd ) wrkbl = max( wrkbl, 3_${ik}$*m + lwork_dorgbr_p ) wrkbl = max( wrkbl, 3_${ik}$*m + lwork_dorgbr_q ) wrkbl = max( wrkbl, bdspac ) maxwrk = max( m*m + wrkbl, m*m + m*n + m ) minwrk = max( 3_${ik}$*m + n, bdspac ) else if( wntvs .and. wntun ) then ! path 4t(n much larger than m, jobu='n', jobvt='s') wrkbl = m + lwork_dgelqf wrkbl = max( wrkbl, m + lwork_dorglq_m ) wrkbl = max( wrkbl, 3_${ik}$*m + lwork_dgebrd ) wrkbl = max( wrkbl, 3_${ik}$*m + lwork_dorgbr_p ) wrkbl = max( wrkbl, bdspac ) maxwrk = m*m + wrkbl minwrk = max( 3_${ik}$*m + n, bdspac ) else if( wntvs .and. wntuo ) then ! path 5t(n much larger than m, jobu='o', jobvt='s') wrkbl = m + lwork_dgelqf wrkbl = max( wrkbl, m + lwork_dorglq_m ) wrkbl = max( wrkbl, 3_${ik}$*m + lwork_dgebrd ) wrkbl = max( wrkbl, 3_${ik}$*m + lwork_dorgbr_p ) wrkbl = max( wrkbl, 3_${ik}$*m + lwork_dorgbr_q ) wrkbl = max( wrkbl, bdspac ) maxwrk = 2_${ik}$*m*m + wrkbl minwrk = max( 3_${ik}$*m + n, bdspac ) else if( wntvs .and. wntuas ) then ! path 6t(n much larger than m, jobu='s' or 'a', ! jobvt='s') wrkbl = m + lwork_dgelqf wrkbl = max( wrkbl, m + lwork_dorglq_m ) wrkbl = max( wrkbl, 3_${ik}$*m + lwork_dgebrd ) wrkbl = max( wrkbl, 3_${ik}$*m + lwork_dorgbr_p ) wrkbl = max( wrkbl, 3_${ik}$*m + lwork_dorgbr_q ) wrkbl = max( wrkbl, bdspac ) maxwrk = m*m + wrkbl minwrk = max( 3_${ik}$*m + n, bdspac ) else if( wntva .and. wntun ) then ! path 7t(n much larger than m, jobu='n', jobvt='a') wrkbl = m + lwork_dgelqf wrkbl = max( wrkbl, m + lwork_dorglq_n ) wrkbl = max( wrkbl, 3_${ik}$*m + lwork_dgebrd ) wrkbl = max( wrkbl, 3_${ik}$*m + lwork_dorgbr_p ) wrkbl = max( wrkbl, bdspac ) maxwrk = m*m + wrkbl minwrk = max( 3_${ik}$*m + n, bdspac ) else if( wntva .and. wntuo ) then ! path 8t(n much larger than m, jobu='o', jobvt='a') wrkbl = m + lwork_dgelqf wrkbl = max( wrkbl, m + lwork_dorglq_n ) wrkbl = max( wrkbl, 3_${ik}$*m + lwork_dgebrd ) wrkbl = max( wrkbl, 3_${ik}$*m + lwork_dorgbr_p ) wrkbl = max( wrkbl, 3_${ik}$*m + lwork_dorgbr_q ) wrkbl = max( wrkbl, bdspac ) maxwrk = 2_${ik}$*m*m + wrkbl minwrk = max( 3_${ik}$*m + n, bdspac ) else if( wntva .and. wntuas ) then ! path 9t(n much larger than m, jobu='s' or 'a', ! jobvt='a') wrkbl = m + lwork_dgelqf wrkbl = max( wrkbl, m + lwork_dorglq_n ) wrkbl = max( wrkbl, 3_${ik}$*m + lwork_dgebrd ) wrkbl = max( wrkbl, 3_${ik}$*m + lwork_dorgbr_p ) wrkbl = max( wrkbl, 3_${ik}$*m + lwork_dorgbr_q ) wrkbl = max( wrkbl, bdspac ) maxwrk = m*m + wrkbl minwrk = max( 3_${ik}$*m + n, bdspac ) end if else ! path 10t(n greater than m, but not much larger) call stdlib${ii}$_dgebrd( m, n, a, lda, s, dum(1_${ik}$), dum(1_${ik}$),dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, ierr ) lwork_dgebrd = int( dum(1_${ik}$),KIND=${ik}$) maxwrk = 3_${ik}$*m + lwork_dgebrd if( wntvs .or. wntvo ) then ! compute space needed for stdlib${ii}$_dorgbr p call stdlib${ii}$_dorgbr( 'P', m, n, m, a, n, dum(1_${ik}$),dum(1_${ik}$), -1_${ik}$, ierr ) lwork_dorgbr_p = int( dum(1_${ik}$),KIND=${ik}$) maxwrk = max( maxwrk, 3_${ik}$*m + lwork_dorgbr_p ) end if if( wntva ) then call stdlib${ii}$_dorgbr( 'P', n, n, m, a, n, dum(1_${ik}$),dum(1_${ik}$), -1_${ik}$, ierr ) lwork_dorgbr_p = int( dum(1_${ik}$),KIND=${ik}$) maxwrk = max( maxwrk, 3_${ik}$*m + lwork_dorgbr_p ) end if if( .not.wntun ) then maxwrk = max( maxwrk, 3_${ik}$*m + lwork_dorgbr_q ) end if maxwrk = max( maxwrk, bdspac ) minwrk = max( 3_${ik}$*m + n, bdspac ) end if end if maxwrk = max( maxwrk, minwrk ) work( 1_${ik}$ ) = maxwrk if( lwork<minwrk .and. .not.lquery ) then info = -13_${ik}$ end if end if if( info/=0_${ik}$ ) then call stdlib${ii}$_xerbla( 'DGESVD', -info ) return else if( lquery ) then return end if ! quick return if possible if( m==0_${ik}$ .or. n==0_${ik}$ ) then return end if ! get machine constants eps = stdlib${ii}$_dlamch( 'P' ) smlnum = sqrt( stdlib${ii}$_dlamch( 'S' ) ) / eps bignum = one / smlnum ! scale a if max element outside range [smlnum,bignum] anrm = stdlib${ii}$_dlange( 'M', m, n, a, lda, dum ) iscl = 0_${ik}$ if( anrm>zero .and. anrm<smlnum ) then iscl = 1_${ik}$ call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, m, n, a, lda, ierr ) else if( anrm>bignum ) then iscl = 1_${ik}$ call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, m, n, a, lda, ierr ) end if if( m>=n ) then ! a has at least as many rows as columns. if a has sufficiently ! more rows than columns, first reduce using the qr ! decomposition (if sufficient workspace available) if( m>=mnthr ) then if( wntun ) then ! path 1 (m much larger than n, jobu='n') ! no left singular vectors to be computed itau = 1_${ik}$ iwork = itau + n ! compute a=q*r ! (workspace: need 2*n, prefer n + n*nb) call stdlib${ii}$_dgeqrf( m, n, a, lda, work( itau ), work( iwork ),lwork-iwork+1, & ierr ) ! zero out below r if( n > 1_${ik}$ ) then call stdlib${ii}$_dlaset( 'L', n-1, n-1, zero, zero, a( 2_${ik}$, 1_${ik}$ ),lda ) end if ie = 1_${ik}$ itauq = ie + n itaup = itauq + n iwork = itaup + n ! bidiagonalize r in a ! (workspace: need 4*n, prefer 3*n + 2*n*nb) call stdlib${ii}$_dgebrd( n, n, a, lda, s, work( ie ), work( itauq ),work( itaup ), & work( iwork ), lwork-iwork+1,ierr ) ncvt = 0_${ik}$ if( wntvo .or. wntvas ) then ! if right singular vectors desired, generate p'. ! (workspace: need 4*n-1, prefer 3*n + (n-1)*nb) call stdlib${ii}$_dorgbr( 'P', n, n, n, a, lda, work( itaup ),work( iwork ), & lwork-iwork+1, ierr ) ncvt = n end if iwork = ie + n ! perform bidiagonal qr iteration, computing right ! singular vectors of a in a if desired ! (workspace: need bdspac) call stdlib${ii}$_dbdsqr( 'U', n, ncvt, 0_${ik}$, 0_${ik}$, s, work( ie ), a, lda,dum, 1_${ik}$, dum, 1_${ik}$, & work( iwork ), info ) ! if right singular vectors desired in vt, copy them there if( wntvas )call stdlib${ii}$_dlacpy( 'F', n, n, a, lda, vt, ldvt ) else if( wntuo .and. wntvn ) then ! path 2 (m much larger than n, jobu='o', jobvt='n') ! n left singular vectors to be overwritten on a and ! no right singular vectors to be computed if( lwork>=n*n+max( 4_${ik}$*n, bdspac ) ) then ! sufficient workspace for a fast algorithm ir = 1_${ik}$ if( lwork>=max( wrkbl, lda*n + n ) + lda*n ) then ! work(iu) is lda by n, work(ir) is lda by n ldwrku = lda ldwrkr = lda else if( lwork>=max( wrkbl, lda*n + n ) + n*n ) then ! work(iu) is lda by n, work(ir) is n by n ldwrku = lda ldwrkr = n else ! work(iu) is ldwrku by n, work(ir) is n by n ldwrku = ( lwork-n*n-n ) / n ldwrkr = n end if itau = ir + ldwrkr*n iwork = itau + n ! compute a=q*r ! (workspace: need n*n + 2*n, prefer n*n + n + n*nb) call stdlib${ii}$_dgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-iwork+& 1_${ik}$, ierr ) ! copy r to work(ir) and zero out below it call stdlib${ii}$_dlacpy( 'U', n, n, a, lda, work( ir ), ldwrkr ) call stdlib${ii}$_dlaset( 'L', n-1, n-1, zero, zero, work( ir+1 ),ldwrkr ) ! generate q in a ! (workspace: need n*n + 2*n, prefer n*n + n + n*nb) call stdlib${ii}$_dorgqr( m, n, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = itau itauq = ie + n itaup = itauq + n iwork = itaup + n ! bidiagonalize r in work(ir) ! (workspace: need n*n + 4*n, prefer n*n + 3*n + 2*n*nb) call stdlib${ii}$_dgebrd( n, n, work( ir ), ldwrkr, s, work( ie ),work( itauq ), & work( itaup ),work( iwork ), lwork-iwork+1, ierr ) ! generate left vectors bidiagonalizing r ! (workspace: need n*n + 4*n, prefer n*n + 3*n + n*nb) call stdlib${ii}$_dorgbr( 'Q', n, n, n, work( ir ), ldwrkr,work( itauq ), work( & iwork ),lwork-iwork+1, ierr ) iwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of r in work(ir) ! (workspace: need n*n + bdspac) call stdlib${ii}$_dbdsqr( 'U', n, 0_${ik}$, n, 0_${ik}$, s, work( ie ), dum, 1_${ik}$,work( ir ), & ldwrkr, dum, 1_${ik}$,work( iwork ), info ) iu = ie + n ! multiply q in a by left singular vectors of r in ! work(ir), storing result in work(iu) and copying to a ! (workspace: need n*n + 2*n, prefer n*n + m*n + n) do i = 1, m, ldwrku chunk = min( m-i+1, ldwrku ) call stdlib${ii}$_dgemm( 'N', 'N', chunk, n, n, one, a( i, 1_${ik}$ ),lda, work( ir )& , ldwrkr, zero,work( iu ), ldwrku ) call stdlib${ii}$_dlacpy( 'F', chunk, n, work( iu ), ldwrku,a( i, 1_${ik}$ ), lda ) end do else ! insufficient workspace for a fast algorithm ie = 1_${ik}$ itauq = ie + n itaup = itauq + n iwork = itaup + n ! bidiagonalize a ! (workspace: need 3*n + m, prefer 3*n + (m + n)*nb) call stdlib${ii}$_dgebrd( m, n, a, lda, s, work( ie ),work( itauq ), work( itaup & ),work( iwork ), lwork-iwork+1, ierr ) ! generate left vectors bidiagonalizing a ! (workspace: need 4*n, prefer 3*n + n*nb) call stdlib${ii}$_dorgbr( 'Q', m, n, n, a, lda, work( itauq ),work( iwork ), & lwork-iwork+1, ierr ) iwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of a in a ! (workspace: need bdspac) call stdlib${ii}$_dbdsqr( 'U', n, 0_${ik}$, m, 0_${ik}$, s, work( ie ), dum, 1_${ik}$,a, lda, dum, 1_${ik}$, & work( iwork ), info ) end if else if( wntuo .and. wntvas ) then ! path 3 (m much larger than n, jobu='o', jobvt='s' or 'a') ! n left singular vectors to be overwritten on a and ! n right singular vectors to be computed in vt if( lwork>=n*n+max( 4_${ik}$*n, bdspac ) ) then ! sufficient workspace for a fast algorithm ir = 1_${ik}$ if( lwork>=max( wrkbl, lda*n + n ) + lda*n ) then ! work(iu) is lda by n and work(ir) is lda by n ldwrku = lda ldwrkr = lda else if( lwork>=max( wrkbl, lda*n + n ) + n*n ) then ! work(iu) is lda by n and work(ir) is n by n ldwrku = lda ldwrkr = n else ! work(iu) is ldwrku by n and work(ir) is n by n ldwrku = ( lwork-n*n-n ) / n ldwrkr = n end if itau = ir + ldwrkr*n iwork = itau + n ! compute a=q*r ! (workspace: need n*n + 2*n, prefer n*n + n + n*nb) call stdlib${ii}$_dgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-iwork+& 1_${ik}$, ierr ) ! copy r to vt, zeroing out below it call stdlib${ii}$_dlacpy( 'U', n, n, a, lda, vt, ldvt ) if( n>1_${ik}$ )call stdlib${ii}$_dlaset( 'L', n-1, n-1, zero, zero,vt( 2_${ik}$, 1_${ik}$ ), ldvt ) ! generate q in a ! (workspace: need n*n + 2*n, prefer n*n + n + n*nb) call stdlib${ii}$_dorgqr( m, n, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = itau itauq = ie + n itaup = itauq + n iwork = itaup + n ! bidiagonalize r in vt, copying result to work(ir) ! (workspace: need n*n + 4*n, prefer n*n + 3*n + 2*n*nb) call stdlib${ii}$_dgebrd( n, n, vt, ldvt, s, work( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) call stdlib${ii}$_dlacpy( 'L', n, n, vt, ldvt, work( ir ), ldwrkr ) ! generate left vectors bidiagonalizing r in work(ir) ! (workspace: need n*n + 4*n, prefer n*n + 3*n + n*nb) call stdlib${ii}$_dorgbr( 'Q', n, n, n, work( ir ), ldwrkr,work( itauq ), work( & iwork ),lwork-iwork+1, ierr ) ! generate right vectors bidiagonalizing r in vt ! (workspace: need n*n + 4*n-1, prefer n*n + 3*n + (n-1)*nb) call stdlib${ii}$_dorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),work( iwork ), & lwork-iwork+1, ierr ) iwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of r in work(ir) and computing right ! singular vectors of r in vt ! (workspace: need n*n + bdspac) call stdlib${ii}$_dbdsqr( 'U', n, n, n, 0_${ik}$, s, work( ie ), vt, ldvt,work( ir ), & ldwrkr, dum, 1_${ik}$,work( iwork ), info ) iu = ie + n ! multiply q in a by left singular vectors of r in ! work(ir), storing result in work(iu) and copying to a ! (workspace: need n*n + 2*n, prefer n*n + m*n + n) do i = 1, m, ldwrku chunk = min( m-i+1, ldwrku ) call stdlib${ii}$_dgemm( 'N', 'N', chunk, n, n, one, a( i, 1_${ik}$ ),lda, work( ir )& , ldwrkr, zero,work( iu ), ldwrku ) call stdlib${ii}$_dlacpy( 'F', chunk, n, work( iu ), ldwrku,a( i, 1_${ik}$ ), lda ) end do else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + n ! compute a=q*r ! (workspace: need 2*n, prefer n + n*nb) call stdlib${ii}$_dgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-iwork+& 1_${ik}$, ierr ) ! copy r to vt, zeroing out below it call stdlib${ii}$_dlacpy( 'U', n, n, a, lda, vt, ldvt ) if( n>1_${ik}$ )call stdlib${ii}$_dlaset( 'L', n-1, n-1, zero, zero,vt( 2_${ik}$, 1_${ik}$ ), ldvt ) ! generate q in a ! (workspace: need 2*n, prefer n + n*nb) call stdlib${ii}$_dorgqr( m, n, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = itau itauq = ie + n itaup = itauq + n iwork = itaup + n ! bidiagonalize r in vt ! (workspace: need 4*n, prefer 3*n + 2*n*nb) call stdlib${ii}$_dgebrd( n, n, vt, ldvt, s, work( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply q in a by left vectors bidiagonalizing r ! (workspace: need 3*n + m, prefer 3*n + m*nb) call stdlib${ii}$_dormbr( 'Q', 'R', 'N', m, n, n, vt, ldvt,work( itauq ), a, lda,& work( iwork ),lwork-iwork+1, ierr ) ! generate right vectors bidiagonalizing r in vt ! (workspace: need 4*n-1, prefer 3*n + (n-1)*nb) call stdlib${ii}$_dorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),work( iwork ), & lwork-iwork+1, ierr ) iwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of a in a and computing right ! singular vectors of a in vt ! (workspace: need bdspac) call stdlib${ii}$_dbdsqr( 'U', n, n, m, 0_${ik}$, s, work( ie ), vt, ldvt,a, lda, dum, & 1_${ik}$, work( iwork ), info ) end if else if( wntus ) then if( wntvn ) then ! path 4 (m much larger than n, jobu='s', jobvt='n') ! n left singular vectors to be computed in u and ! no right singular vectors to be computed if( lwork>=n*n+max( 4_${ik}$*n, bdspac ) ) then ! sufficient workspace for a fast algorithm ir = 1_${ik}$ if( lwork>=wrkbl+lda*n ) then ! work(ir) is lda by n ldwrkr = lda else ! work(ir) is n by n ldwrkr = n end if itau = ir + ldwrkr*n iwork = itau + n ! compute a=q*r ! (workspace: need n*n + 2*n, prefer n*n + n + n*nb) call stdlib${ii}$_dgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ! copy r to work(ir), zeroing out below it call stdlib${ii}$_dlacpy( 'U', n, n, a, lda, work( ir ),ldwrkr ) call stdlib${ii}$_dlaset( 'L', n-1, n-1, zero, zero,work( ir+1 ), ldwrkr ) ! generate q in a ! (workspace: need n*n + 2*n, prefer n*n + n + n*nb) call stdlib${ii}$_dorgqr( m, n, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = itau itauq = ie + n itaup = itauq + n iwork = itaup + n ! bidiagonalize r in work(ir) ! (workspace: need n*n + 4*n, prefer n*n + 3*n + 2*n*nb) call stdlib${ii}$_dgebrd( n, n, work( ir ), ldwrkr, s,work( ie ), work( itauq & ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) ! generate left vectors bidiagonalizing r in work(ir) ! (workspace: need n*n + 4*n, prefer n*n + 3*n + n*nb) call stdlib${ii}$_dorgbr( 'Q', n, n, n, work( ir ), ldwrkr,work( itauq ), & work( iwork ),lwork-iwork+1, ierr ) iwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of r in work(ir) ! (workspace: need n*n + bdspac) call stdlib${ii}$_dbdsqr( 'U', n, 0_${ik}$, n, 0_${ik}$, s, work( ie ), dum,1_${ik}$, work( ir ), & ldwrkr, dum, 1_${ik}$,work( iwork ), info ) ! multiply q in a by left singular vectors of r in ! work(ir), storing result in u ! (workspace: need n*n) call stdlib${ii}$_dgemm( 'N', 'N', m, n, n, one, a, lda,work( ir ), ldwrkr, & zero, u, ldu ) else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + n ! compute a=q*r, copying result to u ! (workspace: need 2*n, prefer n + n*nb) call stdlib${ii}$_dgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_dlacpy( 'L', m, n, a, lda, u, ldu ) ! generate q in u ! (workspace: need 2*n, prefer n + n*nb) call stdlib${ii}$_dorgqr( m, n, n, u, ldu, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = itau itauq = ie + n itaup = itauq + n iwork = itaup + n ! zero out below r in a if( n > 1_${ik}$ ) then call stdlib${ii}$_dlaset( 'L', n-1, n-1, zero, zero,a( 2_${ik}$, 1_${ik}$ ), lda ) end if ! bidiagonalize r in a ! (workspace: need 4*n, prefer 3*n + 2*n*nb) call stdlib${ii}$_dgebrd( n, n, a, lda, s, work( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply q in u by left vectors bidiagonalizing r ! (workspace: need 3*n + m, prefer 3*n + m*nb) call stdlib${ii}$_dormbr( 'Q', 'R', 'N', m, n, n, a, lda,work( itauq ), u, & ldu, work( iwork ),lwork-iwork+1, ierr ) iwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of a in u ! (workspace: need bdspac) call stdlib${ii}$_dbdsqr( 'U', n, 0_${ik}$, m, 0_${ik}$, s, work( ie ), dum,1_${ik}$, u, ldu, dum, & 1_${ik}$, work( iwork ),info ) end if else if( wntvo ) then ! path 5 (m much larger than n, jobu='s', jobvt='o') ! n left singular vectors to be computed in u and ! n right singular vectors to be overwritten on a if( lwork>=2_${ik}$*n*n+max( 4_${ik}$*n, bdspac ) ) then ! sufficient workspace for a fast algorithm iu = 1_${ik}$ if( lwork>=wrkbl+2*lda*n ) then ! work(iu) is lda by n and work(ir) is lda by n ldwrku = lda ir = iu + ldwrku*n ldwrkr = lda else if( lwork>=wrkbl+( lda + n )*n ) then ! work(iu) is lda by n and work(ir) is n by n ldwrku = lda ir = iu + ldwrku*n ldwrkr = n else ! work(iu) is n by n and work(ir) is n by n ldwrku = n ir = iu + ldwrku*n ldwrkr = n end if itau = ir + ldwrkr*n iwork = itau + n ! compute a=q*r ! (workspace: need 2*n*n + 2*n, prefer 2*n*n + n + n*nb) call stdlib${ii}$_dgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ! copy r to work(iu), zeroing out below it call stdlib${ii}$_dlacpy( 'U', n, n, a, lda, work( iu ),ldwrku ) call stdlib${ii}$_dlaset( 'L', n-1, n-1, zero, zero,work( iu+1 ), ldwrku ) ! generate q in a ! (workspace: need 2*n*n + 2*n, prefer 2*n*n + n + n*nb) call stdlib${ii}$_dorgqr( m, n, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = itau itauq = ie + n itaup = itauq + n iwork = itaup + n ! bidiagonalize r in work(iu), copying result to ! work(ir) ! (workspace: need 2*n*n + 4*n, ! prefer 2*n*n+3*n+2*n*nb) call stdlib${ii}$_dgebrd( n, n, work( iu ), ldwrku, s,work( ie ), work( itauq & ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) call stdlib${ii}$_dlacpy( 'U', n, n, work( iu ), ldwrku,work( ir ), ldwrkr ) ! generate left bidiagonalizing vectors in work(iu) ! (workspace: need 2*n*n + 4*n, prefer 2*n*n + 3*n + n*nb) call stdlib${ii}$_dorgbr( 'Q', n, n, n, work( iu ), ldwrku,work( itauq ), & work( iwork ),lwork-iwork+1, ierr ) ! generate right bidiagonalizing vectors in work(ir) ! (workspace: need 2*n*n + 4*n-1, ! prefer 2*n*n+3*n+(n-1)*nb) call stdlib${ii}$_dorgbr( 'P', n, n, n, work( ir ), ldwrkr,work( itaup ), & work( iwork ),lwork-iwork+1, ierr ) iwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of r in work(iu) and computing ! right singular vectors of r in work(ir) ! (workspace: need 2*n*n + bdspac) call stdlib${ii}$_dbdsqr( 'U', n, n, n, 0_${ik}$, s, work( ie ),work( ir ), ldwrkr, & work( iu ),ldwrku, dum, 1_${ik}$, work( iwork ), info ) ! multiply q in a by left singular vectors of r in ! work(iu), storing result in u ! (workspace: need n*n) call stdlib${ii}$_dgemm( 'N', 'N', m, n, n, one, a, lda,work( iu ), ldwrku, & zero, u, ldu ) ! copy right singular vectors of r to a ! (workspace: need n*n) call stdlib${ii}$_dlacpy( 'F', n, n, work( ir ), ldwrkr, a,lda ) else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + n ! compute a=q*r, copying result to u ! (workspace: need 2*n, prefer n + n*nb) call stdlib${ii}$_dgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_dlacpy( 'L', m, n, a, lda, u, ldu ) ! generate q in u ! (workspace: need 2*n, prefer n + n*nb) call stdlib${ii}$_dorgqr( m, n, n, u, ldu, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = itau itauq = ie + n itaup = itauq + n iwork = itaup + n ! zero out below r in a if( n > 1_${ik}$ ) then call stdlib${ii}$_dlaset( 'L', n-1, n-1, zero, zero,a( 2_${ik}$, 1_${ik}$ ), lda ) end if ! bidiagonalize r in a ! (workspace: need 4*n, prefer 3*n + 2*n*nb) call stdlib${ii}$_dgebrd( n, n, a, lda, s, work( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply q in u by left vectors bidiagonalizing r ! (workspace: need 3*n + m, prefer 3*n + m*nb) call stdlib${ii}$_dormbr( 'Q', 'R', 'N', m, n, n, a, lda,work( itauq ), u, & ldu, work( iwork ),lwork-iwork+1, ierr ) ! generate right vectors bidiagonalizing r in a ! (workspace: need 4*n-1, prefer 3*n + (n-1)*nb) call stdlib${ii}$_dorgbr( 'P', n, n, n, a, lda, work( itaup ),work( iwork ), & lwork-iwork+1, ierr ) iwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of a in u and computing right ! singular vectors of a in a ! (workspace: need bdspac) call stdlib${ii}$_dbdsqr( 'U', n, n, m, 0_${ik}$, s, work( ie ), a,lda, u, ldu, dum, & 1_${ik}$, work( iwork ),info ) end if else if( wntvas ) then ! path 6 (m much larger than n, jobu='s', jobvt='s' ! or 'a') ! n left singular vectors to be computed in u and ! n right singular vectors to be computed in vt if( lwork>=n*n+max( 4_${ik}$*n, bdspac ) ) then ! sufficient workspace for a fast algorithm iu = 1_${ik}$ if( lwork>=wrkbl+lda*n ) then ! work(iu) is lda by n ldwrku = lda else ! work(iu) is n by n ldwrku = n end if itau = iu + ldwrku*n iwork = itau + n ! compute a=q*r ! (workspace: need n*n + 2*n, prefer n*n + n + n*nb) call stdlib${ii}$_dgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ! copy r to work(iu), zeroing out below it call stdlib${ii}$_dlacpy( 'U', n, n, a, lda, work( iu ),ldwrku ) call stdlib${ii}$_dlaset( 'L', n-1, n-1, zero, zero,work( iu+1 ), ldwrku ) ! generate q in a ! (workspace: need n*n + 2*n, prefer n*n + n + n*nb) call stdlib${ii}$_dorgqr( m, n, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = itau itauq = ie + n itaup = itauq + n iwork = itaup + n ! bidiagonalize r in work(iu), copying result to vt ! (workspace: need n*n + 4*n, prefer n*n + 3*n + 2*n*nb) call stdlib${ii}$_dgebrd( n, n, work( iu ), ldwrku, s,work( ie ), work( itauq & ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) call stdlib${ii}$_dlacpy( 'U', n, n, work( iu ), ldwrku, vt,ldvt ) ! generate left bidiagonalizing vectors in work(iu) ! (workspace: need n*n + 4*n, prefer n*n + 3*n + n*nb) call stdlib${ii}$_dorgbr( 'Q', n, n, n, work( iu ), ldwrku,work( itauq ), & work( iwork ),lwork-iwork+1, ierr ) ! generate right bidiagonalizing vectors in vt ! (workspace: need n*n + 4*n-1, ! prefer n*n+3*n+(n-1)*nb) call stdlib${ii}$_dorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),work( iwork ),& lwork-iwork+1, ierr ) iwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of r in work(iu) and computing ! right singular vectors of r in vt ! (workspace: need n*n + bdspac) call stdlib${ii}$_dbdsqr( 'U', n, n, n, 0_${ik}$, s, work( ie ), vt,ldvt, work( iu ),& ldwrku, dum, 1_${ik}$,work( iwork ), info ) ! multiply q in a by left singular vectors of r in ! work(iu), storing result in u ! (workspace: need n*n) call stdlib${ii}$_dgemm( 'N', 'N', m, n, n, one, a, lda,work( iu ), ldwrku, & zero, u, ldu ) else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + n ! compute a=q*r, copying result to u ! (workspace: need 2*n, prefer n + n*nb) call stdlib${ii}$_dgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_dlacpy( 'L', m, n, a, lda, u, ldu ) ! generate q in u ! (workspace: need 2*n, prefer n + n*nb) call stdlib${ii}$_dorgqr( m, n, n, u, ldu, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ! copy r to vt, zeroing out below it call stdlib${ii}$_dlacpy( 'U', n, n, a, lda, vt, ldvt ) if( n>1_${ik}$ )call stdlib${ii}$_dlaset( 'L', n-1, n-1, zero, zero,vt( 2_${ik}$, 1_${ik}$ ), ldvt & ) ie = itau itauq = ie + n itaup = itauq + n iwork = itaup + n ! bidiagonalize r in vt ! (workspace: need 4*n, prefer 3*n + 2*n*nb) call stdlib${ii}$_dgebrd( n, n, vt, ldvt, s, work( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply q in u by left bidiagonalizing vectors ! in vt ! (workspace: need 3*n + m, prefer 3*n + m*nb) call stdlib${ii}$_dormbr( 'Q', 'R', 'N', m, n, n, vt, ldvt,work( itauq ), u, & ldu, work( iwork ),lwork-iwork+1, ierr ) ! generate right bidiagonalizing vectors in vt ! (workspace: need 4*n-1, prefer 3*n + (n-1)*nb) call stdlib${ii}$_dorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),work( iwork ),& lwork-iwork+1, ierr ) iwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of a in u and computing right ! singular vectors of a in vt ! (workspace: need bdspac) call stdlib${ii}$_dbdsqr( 'U', n, n, m, 0_${ik}$, s, work( ie ), vt,ldvt, u, ldu, & dum, 1_${ik}$, work( iwork ),info ) end if end if else if( wntua ) then if( wntvn ) then ! path 7 (m much larger than n, jobu='a', jobvt='n') ! m left singular vectors to be computed in u and ! no right singular vectors to be computed if( lwork>=n*n+max( n+m, 4_${ik}$*n, bdspac ) ) then ! sufficient workspace for a fast algorithm ir = 1_${ik}$ if( lwork>=wrkbl+lda*n ) then ! work(ir) is lda by n ldwrkr = lda else ! work(ir) is n by n ldwrkr = n end if itau = ir + ldwrkr*n iwork = itau + n ! compute a=q*r, copying result to u ! (workspace: need n*n + 2*n, prefer n*n + n + n*nb) call stdlib${ii}$_dgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_dlacpy( 'L', m, n, a, lda, u, ldu ) ! copy r to work(ir), zeroing out below it call stdlib${ii}$_dlacpy( 'U', n, n, a, lda, work( ir ),ldwrkr ) call stdlib${ii}$_dlaset( 'L', n-1, n-1, zero, zero,work( ir+1 ), ldwrkr ) ! generate q in u ! (workspace: need n*n + n + m, prefer n*n + n + m*nb) call stdlib${ii}$_dorgqr( m, m, n, u, ldu, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = itau itauq = ie + n itaup = itauq + n iwork = itaup + n ! bidiagonalize r in work(ir) ! (workspace: need n*n + 4*n, prefer n*n + 3*n + 2*n*nb) call stdlib${ii}$_dgebrd( n, n, work( ir ), ldwrkr, s,work( ie ), work( itauq & ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) ! generate left bidiagonalizing vectors in work(ir) ! (workspace: need n*n + 4*n, prefer n*n + 3*n + n*nb) call stdlib${ii}$_dorgbr( 'Q', n, n, n, work( ir ), ldwrkr,work( itauq ), & work( iwork ),lwork-iwork+1, ierr ) iwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of r in work(ir) ! (workspace: need n*n + bdspac) call stdlib${ii}$_dbdsqr( 'U', n, 0_${ik}$, n, 0_${ik}$, s, work( ie ), dum,1_${ik}$, work( ir ), & ldwrkr, dum, 1_${ik}$,work( iwork ), info ) ! multiply q in u by left singular vectors of r in ! work(ir), storing result in a ! (workspace: need n*n) call stdlib${ii}$_dgemm( 'N', 'N', m, n, n, one, u, ldu,work( ir ), ldwrkr, & zero, a, lda ) ! copy left singular vectors of a from a to u call stdlib${ii}$_dlacpy( 'F', m, n, a, lda, u, ldu ) else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + n ! compute a=q*r, copying result to u ! (workspace: need 2*n, prefer n + n*nb) call stdlib${ii}$_dgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_dlacpy( 'L', m, n, a, lda, u, ldu ) ! generate q in u ! (workspace: need n + m, prefer n + m*nb) call stdlib${ii}$_dorgqr( m, m, n, u, ldu, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = itau itauq = ie + n itaup = itauq + n iwork = itaup + n ! zero out below r in a if( n > 1_${ik}$ ) then call stdlib${ii}$_dlaset( 'L', n-1, n-1, zero, zero,a( 2_${ik}$, 1_${ik}$ ), lda ) end if ! bidiagonalize r in a ! (workspace: need 4*n, prefer 3*n + 2*n*nb) call stdlib${ii}$_dgebrd( n, n, a, lda, s, work( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply q in u by left bidiagonalizing vectors ! in a ! (workspace: need 3*n + m, prefer 3*n + m*nb) call stdlib${ii}$_dormbr( 'Q', 'R', 'N', m, n, n, a, lda,work( itauq ), u, & ldu, work( iwork ),lwork-iwork+1, ierr ) iwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of a in u ! (workspace: need bdspac) call stdlib${ii}$_dbdsqr( 'U', n, 0_${ik}$, m, 0_${ik}$, s, work( ie ), dum,1_${ik}$, u, ldu, dum, & 1_${ik}$, work( iwork ),info ) end if else if( wntvo ) then ! path 8 (m much larger than n, jobu='a', jobvt='o') ! m left singular vectors to be computed in u and ! n right singular vectors to be overwritten on a if( lwork>=2_${ik}$*n*n+max( n+m, 4_${ik}$*n, bdspac ) ) then ! sufficient workspace for a fast algorithm iu = 1_${ik}$ if( lwork>=wrkbl+2*lda*n ) then ! work(iu) is lda by n and work(ir) is lda by n ldwrku = lda ir = iu + ldwrku*n ldwrkr = lda else if( lwork>=wrkbl+( lda + n )*n ) then ! work(iu) is lda by n and work(ir) is n by n ldwrku = lda ir = iu + ldwrku*n ldwrkr = n else ! work(iu) is n by n and work(ir) is n by n ldwrku = n ir = iu + ldwrku*n ldwrkr = n end if itau = ir + ldwrkr*n iwork = itau + n ! compute a=q*r, copying result to u ! (workspace: need 2*n*n + 2*n, prefer 2*n*n + n + n*nb) call stdlib${ii}$_dgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_dlacpy( 'L', m, n, a, lda, u, ldu ) ! generate q in u ! (workspace: need 2*n*n + n + m, prefer 2*n*n + n + m*nb) call stdlib${ii}$_dorgqr( m, m, n, u, ldu, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ! copy r to work(iu), zeroing out below it call stdlib${ii}$_dlacpy( 'U', n, n, a, lda, work( iu ),ldwrku ) call stdlib${ii}$_dlaset( 'L', n-1, n-1, zero, zero,work( iu+1 ), ldwrku ) ie = itau itauq = ie + n itaup = itauq + n iwork = itaup + n ! bidiagonalize r in work(iu), copying result to ! work(ir) ! (workspace: need 2*n*n + 4*n, ! prefer 2*n*n+3*n+2*n*nb) call stdlib${ii}$_dgebrd( n, n, work( iu ), ldwrku, s,work( ie ), work( itauq & ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) call stdlib${ii}$_dlacpy( 'U', n, n, work( iu ), ldwrku,work( ir ), ldwrkr ) ! generate left bidiagonalizing vectors in work(iu) ! (workspace: need 2*n*n + 4*n, prefer 2*n*n + 3*n + n*nb) call stdlib${ii}$_dorgbr( 'Q', n, n, n, work( iu ), ldwrku,work( itauq ), & work( iwork ),lwork-iwork+1, ierr ) ! generate right bidiagonalizing vectors in work(ir) ! (workspace: need 2*n*n + 4*n-1, ! prefer 2*n*n+3*n+(n-1)*nb) call stdlib${ii}$_dorgbr( 'P', n, n, n, work( ir ), ldwrkr,work( itaup ), & work( iwork ),lwork-iwork+1, ierr ) iwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of r in work(iu) and computing ! right singular vectors of r in work(ir) ! (workspace: need 2*n*n + bdspac) call stdlib${ii}$_dbdsqr( 'U', n, n, n, 0_${ik}$, s, work( ie ),work( ir ), ldwrkr, & work( iu ),ldwrku, dum, 1_${ik}$, work( iwork ), info ) ! multiply q in u by left singular vectors of r in ! work(iu), storing result in a ! (workspace: need n*n) call stdlib${ii}$_dgemm( 'N', 'N', m, n, n, one, u, ldu,work( iu ), ldwrku, & zero, a, lda ) ! copy left singular vectors of a from a to u call stdlib${ii}$_dlacpy( 'F', m, n, a, lda, u, ldu ) ! copy right singular vectors of r from work(ir) to a call stdlib${ii}$_dlacpy( 'F', n, n, work( ir ), ldwrkr, a,lda ) else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + n ! compute a=q*r, copying result to u ! (workspace: need 2*n, prefer n + n*nb) call stdlib${ii}$_dgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_dlacpy( 'L', m, n, a, lda, u, ldu ) ! generate q in u ! (workspace: need n + m, prefer n + m*nb) call stdlib${ii}$_dorgqr( m, m, n, u, ldu, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = itau itauq = ie + n itaup = itauq + n iwork = itaup + n ! zero out below r in a if( n > 1_${ik}$ ) then call stdlib${ii}$_dlaset( 'L', n-1, n-1, zero, zero,a( 2_${ik}$, 1_${ik}$ ), lda ) end if ! bidiagonalize r in a ! (workspace: need 4*n, prefer 3*n + 2*n*nb) call stdlib${ii}$_dgebrd( n, n, a, lda, s, work( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply q in u by left bidiagonalizing vectors ! in a ! (workspace: need 3*n + m, prefer 3*n + m*nb) call stdlib${ii}$_dormbr( 'Q', 'R', 'N', m, n, n, a, lda,work( itauq ), u, & ldu, work( iwork ),lwork-iwork+1, ierr ) ! generate right bidiagonalizing vectors in a ! (workspace: need 4*n-1, prefer 3*n + (n-1)*nb) call stdlib${ii}$_dorgbr( 'P', n, n, n, a, lda, work( itaup ),work( iwork ), & lwork-iwork+1, ierr ) iwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of a in u and computing right ! singular vectors of a in a ! (workspace: need bdspac) call stdlib${ii}$_dbdsqr( 'U', n, n, m, 0_${ik}$, s, work( ie ), a,lda, u, ldu, dum, & 1_${ik}$, work( iwork ),info ) end if else if( wntvas ) then ! path 9 (m much larger than n, jobu='a', jobvt='s' ! or 'a') ! m left singular vectors to be computed in u and ! n right singular vectors to be computed in vt if( lwork>=n*n+max( n+m, 4_${ik}$*n, bdspac ) ) then ! sufficient workspace for a fast algorithm iu = 1_${ik}$ if( lwork>=wrkbl+lda*n ) then ! work(iu) is lda by n ldwrku = lda else ! work(iu) is n by n ldwrku = n end if itau = iu + ldwrku*n iwork = itau + n ! compute a=q*r, copying result to u ! (workspace: need n*n + 2*n, prefer n*n + n + n*nb) call stdlib${ii}$_dgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_dlacpy( 'L', m, n, a, lda, u, ldu ) ! generate q in u ! (workspace: need n*n + n + m, prefer n*n + n + m*nb) call stdlib${ii}$_dorgqr( m, m, n, u, ldu, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ! copy r to work(iu), zeroing out below it call stdlib${ii}$_dlacpy( 'U', n, n, a, lda, work( iu ),ldwrku ) call stdlib${ii}$_dlaset( 'L', n-1, n-1, zero, zero,work( iu+1 ), ldwrku ) ie = itau itauq = ie + n itaup = itauq + n iwork = itaup + n ! bidiagonalize r in work(iu), copying result to vt ! (workspace: need n*n + 4*n, prefer n*n + 3*n + 2*n*nb) call stdlib${ii}$_dgebrd( n, n, work( iu ), ldwrku, s,work( ie ), work( itauq & ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) call stdlib${ii}$_dlacpy( 'U', n, n, work( iu ), ldwrku, vt,ldvt ) ! generate left bidiagonalizing vectors in work(iu) ! (workspace: need n*n + 4*n, prefer n*n + 3*n + n*nb) call stdlib${ii}$_dorgbr( 'Q', n, n, n, work( iu ), ldwrku,work( itauq ), & work( iwork ),lwork-iwork+1, ierr ) ! generate right bidiagonalizing vectors in vt ! (workspace: need n*n + 4*n-1, ! prefer n*n+3*n+(n-1)*nb) call stdlib${ii}$_dorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),work( iwork ),& lwork-iwork+1, ierr ) iwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of r in work(iu) and computing ! right singular vectors of r in vt ! (workspace: need n*n + bdspac) call stdlib${ii}$_dbdsqr( 'U', n, n, n, 0_${ik}$, s, work( ie ), vt,ldvt, work( iu ),& ldwrku, dum, 1_${ik}$,work( iwork ), info ) ! multiply q in u by left singular vectors of r in ! work(iu), storing result in a ! (workspace: need n*n) call stdlib${ii}$_dgemm( 'N', 'N', m, n, n, one, u, ldu,work( iu ), ldwrku, & zero, a, lda ) ! copy left singular vectors of a from a to u call stdlib${ii}$_dlacpy( 'F', m, n, a, lda, u, ldu ) else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + n ! compute a=q*r, copying result to u ! (workspace: need 2*n, prefer n + n*nb) call stdlib${ii}$_dgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_dlacpy( 'L', m, n, a, lda, u, ldu ) ! generate q in u ! (workspace: need n + m, prefer n + m*nb) call stdlib${ii}$_dorgqr( m, m, n, u, ldu, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ! copy r from a to vt, zeroing out below it call stdlib${ii}$_dlacpy( 'U', n, n, a, lda, vt, ldvt ) if( n>1_${ik}$ )call stdlib${ii}$_dlaset( 'L', n-1, n-1, zero, zero,vt( 2_${ik}$, 1_${ik}$ ), ldvt & ) ie = itau itauq = ie + n itaup = itauq + n iwork = itaup + n ! bidiagonalize r in vt ! (workspace: need 4*n, prefer 3*n + 2*n*nb) call stdlib${ii}$_dgebrd( n, n, vt, ldvt, s, work( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply q in u by left bidiagonalizing vectors ! in vt ! (workspace: need 3*n + m, prefer 3*n + m*nb) call stdlib${ii}$_dormbr( 'Q', 'R', 'N', m, n, n, vt, ldvt,work( itauq ), u, & ldu, work( iwork ),lwork-iwork+1, ierr ) ! generate right bidiagonalizing vectors in vt ! (workspace: need 4*n-1, prefer 3*n + (n-1)*nb) call stdlib${ii}$_dorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),work( iwork ),& lwork-iwork+1, ierr ) iwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of a in u and computing right ! singular vectors of a in vt ! (workspace: need bdspac) call stdlib${ii}$_dbdsqr( 'U', n, n, m, 0_${ik}$, s, work( ie ), vt,ldvt, u, ldu, & dum, 1_${ik}$, work( iwork ),info ) end if end if end if else ! m < mnthr ! path 10 (m at least n, but not much larger) ! reduce to bidiagonal form without qr decomposition ie = 1_${ik}$ itauq = ie + n itaup = itauq + n iwork = itaup + n ! bidiagonalize a ! (workspace: need 3*n + m, prefer 3*n + (m + n)*nb) call stdlib${ii}$_dgebrd( m, n, a, lda, s, work( ie ), work( itauq ),work( itaup ), & work( iwork ), lwork-iwork+1,ierr ) if( wntuas ) then ! if left singular vectors desired in u, copy result to u ! and generate left bidiagonalizing vectors in u ! (workspace: need 3*n + ncu, prefer 3*n + ncu*nb) call stdlib${ii}$_dlacpy( 'L', m, n, a, lda, u, ldu ) if( wntus )ncu = n if( wntua )ncu = m call stdlib${ii}$_dorgbr( 'Q', m, ncu, n, u, ldu, work( itauq ),work( iwork ), & lwork-iwork+1, ierr ) end if if( wntvas ) then ! if right singular vectors desired in vt, copy result to ! vt and generate right bidiagonalizing vectors in vt ! (workspace: need 4*n-1, prefer 3*n + (n-1)*nb) call stdlib${ii}$_dlacpy( 'U', n, n, a, lda, vt, ldvt ) call stdlib${ii}$_dorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),work( iwork ), & lwork-iwork+1, ierr ) end if if( wntuo ) then ! if left singular vectors desired in a, generate left ! bidiagonalizing vectors in a ! (workspace: need 4*n, prefer 3*n + n*nb) call stdlib${ii}$_dorgbr( 'Q', m, n, n, a, lda, work( itauq ),work( iwork ), lwork-& iwork+1, ierr ) end if if( wntvo ) then ! if right singular vectors desired in a, generate right ! bidiagonalizing vectors in a ! (workspace: need 4*n-1, prefer 3*n + (n-1)*nb) call stdlib${ii}$_dorgbr( 'P', n, n, n, a, lda, work( itaup ),work( iwork ), lwork-& iwork+1, ierr ) end if iwork = ie + n if( wntuas .or. wntuo )nru = m if( wntun )nru = 0_${ik}$ if( wntvas .or. wntvo )ncvt = n if( wntvn )ncvt = 0_${ik}$ if( ( .not.wntuo ) .and. ( .not.wntvo ) ) then ! perform bidiagonal qr iteration, if desired, computing ! left singular vectors in u and computing right singular ! vectors in vt ! (workspace: need bdspac) call stdlib${ii}$_dbdsqr( 'U', n, ncvt, nru, 0_${ik}$, s, work( ie ), vt,ldvt, u, ldu, dum,& 1_${ik}$, work( iwork ), info ) else if( ( .not.wntuo ) .and. wntvo ) then ! perform bidiagonal qr iteration, if desired, computing ! left singular vectors in u and computing right singular ! vectors in a ! (workspace: need bdspac) call stdlib${ii}$_dbdsqr( 'U', n, ncvt, nru, 0_${ik}$, s, work( ie ), a, lda,u, ldu, dum, & 1_${ik}$, work( iwork ), info ) else ! perform bidiagonal qr iteration, if desired, computing ! left singular vectors in a and computing right singular ! vectors in vt ! (workspace: need bdspac) call stdlib${ii}$_dbdsqr( 'U', n, ncvt, nru, 0_${ik}$, s, work( ie ), vt,ldvt, a, lda, dum,& 1_${ik}$, work( iwork ), info ) end if end if else ! a has more columns than rows. if a has sufficiently more ! columns than rows, first reduce using the lq decomposition (if ! sufficient workspace available) if( n>=mnthr ) then if( wntvn ) then ! path 1t(n much larger than m, jobvt='n') ! no right singular vectors to be computed itau = 1_${ik}$ iwork = itau + m ! compute a=l*q ! (workspace: need 2*m, prefer m + m*nb) call stdlib${ii}$_dgelqf( m, n, a, lda, work( itau ), work( iwork ),lwork-iwork+1, & ierr ) ! zero out above l if (m>1_${ik}$) call stdlib${ii}$_dlaset( 'U', m-1, m-1, zero, zero, a( 1_${ik}$, 2_${ik}$ ), lda ) ie = 1_${ik}$ itauq = ie + m itaup = itauq + m iwork = itaup + m ! bidiagonalize l in a ! (workspace: need 4*m, prefer 3*m + 2*m*nb) call stdlib${ii}$_dgebrd( m, m, a, lda, s, work( ie ), work( itauq ),work( itaup ), & work( iwork ), lwork-iwork+1,ierr ) if( wntuo .or. wntuas ) then ! if left singular vectors desired, generate q ! (workspace: need 4*m, prefer 3*m + m*nb) call stdlib${ii}$_dorgbr( 'Q', m, m, m, a, lda, work( itauq ),work( iwork ), & lwork-iwork+1, ierr ) end if iwork = ie + m nru = 0_${ik}$ if( wntuo .or. wntuas )nru = m ! perform bidiagonal qr iteration, computing left singular ! vectors of a in a if desired ! (workspace: need bdspac) call stdlib${ii}$_dbdsqr( 'U', m, 0_${ik}$, nru, 0_${ik}$, s, work( ie ), dum, 1_${ik}$, a,lda, dum, 1_${ik}$, & work( iwork ), info ) ! if left singular vectors desired in u, copy them there if( wntuas )call stdlib${ii}$_dlacpy( 'F', m, m, a, lda, u, ldu ) else if( wntvo .and. wntun ) then ! path 2t(n much larger than m, jobu='n', jobvt='o') ! m right singular vectors to be overwritten on a and ! no left singular vectors to be computed if( lwork>=m*m+max( 4_${ik}$*m, bdspac ) ) then ! sufficient workspace for a fast algorithm ir = 1_${ik}$ if( lwork>=max( wrkbl, lda*n + m ) + lda*m ) then ! work(iu) is lda by n and work(ir) is lda by m ldwrku = lda chunk = n ldwrkr = lda else if( lwork>=max( wrkbl, lda*n + m ) + m*m ) then ! work(iu) is lda by n and work(ir) is m by m ldwrku = lda chunk = n ldwrkr = m else ! work(iu) is m by chunk and work(ir) is m by m ldwrku = m chunk = ( lwork-m*m-m ) / m ldwrkr = m end if itau = ir + ldwrkr*m iwork = itau + m ! compute a=l*q ! (workspace: need m*m + 2*m, prefer m*m + m + m*nb) call stdlib${ii}$_dgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-iwork+& 1_${ik}$, ierr ) ! copy l to work(ir) and zero out above it call stdlib${ii}$_dlacpy( 'L', m, m, a, lda, work( ir ), ldwrkr ) call stdlib${ii}$_dlaset( 'U', m-1, m-1, zero, zero,work( ir+ldwrkr ), ldwrkr ) ! generate q in a ! (workspace: need m*m + 2*m, prefer m*m + m + m*nb) call stdlib${ii}$_dorglq( m, n, m, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = itau itauq = ie + m itaup = itauq + m iwork = itaup + m ! bidiagonalize l in work(ir) ! (workspace: need m*m + 4*m, prefer m*m + 3*m + 2*m*nb) call stdlib${ii}$_dgebrd( m, m, work( ir ), ldwrkr, s, work( ie ),work( itauq ), & work( itaup ),work( iwork ), lwork-iwork+1, ierr ) ! generate right vectors bidiagonalizing l ! (workspace: need m*m + 4*m-1, prefer m*m + 3*m + (m-1)*nb) call stdlib${ii}$_dorgbr( 'P', m, m, m, work( ir ), ldwrkr,work( itaup ), work( & iwork ),lwork-iwork+1, ierr ) iwork = ie + m ! perform bidiagonal qr iteration, computing right ! singular vectors of l in work(ir) ! (workspace: need m*m + bdspac) call stdlib${ii}$_dbdsqr( 'U', m, m, 0_${ik}$, 0_${ik}$, s, work( ie ),work( ir ), ldwrkr, dum,& 1_${ik}$, dum, 1_${ik}$,work( iwork ), info ) iu = ie + m ! multiply right singular vectors of l in work(ir) by q ! in a, storing result in work(iu) and copying to a ! (workspace: need m*m + 2*m, prefer m*m + m*n + m) do i = 1, n, chunk blk = min( n-i+1, chunk ) call stdlib${ii}$_dgemm( 'N', 'N', m, blk, m, one, work( ir ),ldwrkr, a( 1_${ik}$, i & ), lda, zero,work( iu ), ldwrku ) call stdlib${ii}$_dlacpy( 'F', m, blk, work( iu ), ldwrku,a( 1_${ik}$, i ), lda ) end do else ! insufficient workspace for a fast algorithm ie = 1_${ik}$ itauq = ie + m itaup = itauq + m iwork = itaup + m ! bidiagonalize a ! (workspace: need 3*m + n, prefer 3*m + (m + n)*nb) call stdlib${ii}$_dgebrd( m, n, a, lda, s, work( ie ),work( itauq ), work( itaup & ),work( iwork ), lwork-iwork+1, ierr ) ! generate right vectors bidiagonalizing a ! (workspace: need 4*m, prefer 3*m + m*nb) call stdlib${ii}$_dorgbr( 'P', m, n, m, a, lda, work( itaup ),work( iwork ), & lwork-iwork+1, ierr ) iwork = ie + m ! perform bidiagonal qr iteration, computing right ! singular vectors of a in a ! (workspace: need bdspac) call stdlib${ii}$_dbdsqr( 'L', m, n, 0_${ik}$, 0_${ik}$, s, work( ie ), a, lda,dum, 1_${ik}$, dum, 1_${ik}$, & work( iwork ), info ) end if else if( wntvo .and. wntuas ) then ! path 3t(n much larger than m, jobu='s' or 'a', jobvt='o') ! m right singular vectors to be overwritten on a and ! m left singular vectors to be computed in u if( lwork>=m*m+max( 4_${ik}$*m, bdspac ) ) then ! sufficient workspace for a fast algorithm ir = 1_${ik}$ if( lwork>=max( wrkbl, lda*n + m ) + lda*m ) then ! work(iu) is lda by n and work(ir) is lda by m ldwrku = lda chunk = n ldwrkr = lda else if( lwork>=max( wrkbl, lda*n + m ) + m*m ) then ! work(iu) is lda by n and work(ir) is m by m ldwrku = lda chunk = n ldwrkr = m else ! work(iu) is m by chunk and work(ir) is m by m ldwrku = m chunk = ( lwork-m*m-m ) / m ldwrkr = m end if itau = ir + ldwrkr*m iwork = itau + m ! compute a=l*q ! (workspace: need m*m + 2*m, prefer m*m + m + m*nb) call stdlib${ii}$_dgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-iwork+& 1_${ik}$, ierr ) ! copy l to u, zeroing about above it call stdlib${ii}$_dlacpy( 'L', m, m, a, lda, u, ldu ) if (m>1_${ik}$) call stdlib${ii}$_dlaset( 'U', m-1, m-1, zero, zero, u( 1_${ik}$, 2_${ik}$ ),ldu ) ! generate q in a ! (workspace: need m*m + 2*m, prefer m*m + m + m*nb) call stdlib${ii}$_dorglq( m, n, m, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = itau itauq = ie + m itaup = itauq + m iwork = itaup + m ! bidiagonalize l in u, copying result to work(ir) ! (workspace: need m*m + 4*m, prefer m*m + 3*m + 2*m*nb) call stdlib${ii}$_dgebrd( m, m, u, ldu, s, work( ie ),work( itauq ), work( itaup & ),work( iwork ), lwork-iwork+1, ierr ) call stdlib${ii}$_dlacpy( 'U', m, m, u, ldu, work( ir ), ldwrkr ) ! generate right vectors bidiagonalizing l in work(ir) ! (workspace: need m*m + 4*m-1, prefer m*m + 3*m + (m-1)*nb) call stdlib${ii}$_dorgbr( 'P', m, m, m, work( ir ), ldwrkr,work( itaup ), work( & iwork ),lwork-iwork+1, ierr ) ! generate left vectors bidiagonalizing l in u ! (workspace: need m*m + 4*m, prefer m*m + 3*m + m*nb) call stdlib${ii}$_dorgbr( 'Q', m, m, m, u, ldu, work( itauq ),work( iwork ), & lwork-iwork+1, ierr ) iwork = ie + m ! perform bidiagonal qr iteration, computing left ! singular vectors of l in u, and computing right ! singular vectors of l in work(ir) ! (workspace: need m*m + bdspac) call stdlib${ii}$_dbdsqr( 'U', m, m, m, 0_${ik}$, s, work( ie ),work( ir ), ldwrkr, u, & ldu, dum, 1_${ik}$,work( iwork ), info ) iu = ie + m ! multiply right singular vectors of l in work(ir) by q ! in a, storing result in work(iu) and copying to a ! (workspace: need m*m + 2*m, prefer m*m + m*n + m)) do i = 1, n, chunk blk = min( n-i+1, chunk ) call stdlib${ii}$_dgemm( 'N', 'N', m, blk, m, one, work( ir ),ldwrkr, a( 1_${ik}$, i & ), lda, zero,work( iu ), ldwrku ) call stdlib${ii}$_dlacpy( 'F', m, blk, work( iu ), ldwrku,a( 1_${ik}$, i ), lda ) end do else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + m ! compute a=l*q ! (workspace: need 2*m, prefer m + m*nb) call stdlib${ii}$_dgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-iwork+& 1_${ik}$, ierr ) ! copy l to u, zeroing out above it call stdlib${ii}$_dlacpy( 'L', m, m, a, lda, u, ldu ) if (m>1_${ik}$) call stdlib${ii}$_dlaset( 'U', m-1, m-1, zero, zero, u( 1_${ik}$, 2_${ik}$ ),ldu ) ! generate q in a ! (workspace: need 2*m, prefer m + m*nb) call stdlib${ii}$_dorglq( m, n, m, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = itau itauq = ie + m itaup = itauq + m iwork = itaup + m ! bidiagonalize l in u ! (workspace: need 4*m, prefer 3*m + 2*m*nb) call stdlib${ii}$_dgebrd( m, m, u, ldu, s, work( ie ),work( itauq ), work( itaup & ),work( iwork ), lwork-iwork+1, ierr ) ! multiply right vectors bidiagonalizing l by q in a ! (workspace: need 3*m + n, prefer 3*m + n*nb) call stdlib${ii}$_dormbr( 'P', 'L', 'T', m, n, m, u, ldu,work( itaup ), a, lda, & work( iwork ),lwork-iwork+1, ierr ) ! generate left vectors bidiagonalizing l in u ! (workspace: need 4*m, prefer 3*m + m*nb) call stdlib${ii}$_dorgbr( 'Q', m, m, m, u, ldu, work( itauq ),work( iwork ), & lwork-iwork+1, ierr ) iwork = ie + m ! perform bidiagonal qr iteration, computing left ! singular vectors of a in u and computing right ! singular vectors of a in a ! (workspace: need bdspac) call stdlib${ii}$_dbdsqr( 'U', m, n, m, 0_${ik}$, s, work( ie ), a, lda,u, ldu, dum, 1_${ik}$, & work( iwork ), info ) end if else if( wntvs ) then if( wntun ) then ! path 4t(n much larger than m, jobu='n', jobvt='s') ! m right singular vectors to be computed in vt and ! no left singular vectors to be computed if( lwork>=m*m+max( 4_${ik}$*m, bdspac ) ) then ! sufficient workspace for a fast algorithm ir = 1_${ik}$ if( lwork>=wrkbl+lda*m ) then ! work(ir) is lda by m ldwrkr = lda else ! work(ir) is m by m ldwrkr = m end if itau = ir + ldwrkr*m iwork = itau + m ! compute a=l*q ! (workspace: need m*m + 2*m, prefer m*m + m + m*nb) call stdlib${ii}$_dgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ! copy l to work(ir), zeroing out above it call stdlib${ii}$_dlacpy( 'L', m, m, a, lda, work( ir ),ldwrkr ) call stdlib${ii}$_dlaset( 'U', m-1, m-1, zero, zero,work( ir+ldwrkr ), ldwrkr & ) ! generate q in a ! (workspace: need m*m + 2*m, prefer m*m + m + m*nb) call stdlib${ii}$_dorglq( m, n, m, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = itau itauq = ie + m itaup = itauq + m iwork = itaup + m ! bidiagonalize l in work(ir) ! (workspace: need m*m + 4*m, prefer m*m + 3*m + 2*m*nb) call stdlib${ii}$_dgebrd( m, m, work( ir ), ldwrkr, s,work( ie ), work( itauq & ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) ! generate right vectors bidiagonalizing l in ! work(ir) ! (workspace: need m*m + 4*m, prefer m*m + 3*m + (m-1)*nb) call stdlib${ii}$_dorgbr( 'P', m, m, m, work( ir ), ldwrkr,work( itaup ), & work( iwork ),lwork-iwork+1, ierr ) iwork = ie + m ! perform bidiagonal qr iteration, computing right ! singular vectors of l in work(ir) ! (workspace: need m*m + bdspac) call stdlib${ii}$_dbdsqr( 'U', m, m, 0_${ik}$, 0_${ik}$, s, work( ie ),work( ir ), ldwrkr, & dum, 1_${ik}$, dum, 1_${ik}$,work( iwork ), info ) ! multiply right singular vectors of l in work(ir) by ! q in a, storing result in vt ! (workspace: need m*m) call stdlib${ii}$_dgemm( 'N', 'N', m, n, m, one, work( ir ),ldwrkr, a, lda, & zero, vt, ldvt ) else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + m ! compute a=l*q ! (workspace: need 2*m, prefer m + m*nb) call stdlib${ii}$_dgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ! copy result to vt call stdlib${ii}$_dlacpy( 'U', m, n, a, lda, vt, ldvt ) ! generate q in vt ! (workspace: need 2*m, prefer m + m*nb) call stdlib${ii}$_dorglq( m, n, m, vt, ldvt, work( itau ),work( iwork ), & lwork-iwork+1, ierr ) ie = itau itauq = ie + m itaup = itauq + m iwork = itaup + m ! zero out above l in a if (m>1_${ik}$) call stdlib${ii}$_dlaset( 'U', m-1, m-1, zero, zero, a( 1_${ik}$, 2_${ik}$ ),lda ) ! bidiagonalize l in a ! (workspace: need 4*m, prefer 3*m + 2*m*nb) call stdlib${ii}$_dgebrd( m, m, a, lda, s, work( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply right vectors bidiagonalizing l by q in vt ! (workspace: need 3*m + n, prefer 3*m + n*nb) call stdlib${ii}$_dormbr( 'P', 'L', 'T', m, n, m, a, lda,work( itaup ), vt, & ldvt,work( iwork ), lwork-iwork+1, ierr ) iwork = ie + m ! perform bidiagonal qr iteration, computing right ! singular vectors of a in vt ! (workspace: need bdspac) call stdlib${ii}$_dbdsqr( 'U', m, n, 0_${ik}$, 0_${ik}$, s, work( ie ), vt,ldvt, dum, 1_${ik}$, & dum, 1_${ik}$, work( iwork ),info ) end if else if( wntuo ) then ! path 5t(n much larger than m, jobu='o', jobvt='s') ! m right singular vectors to be computed in vt and ! m left singular vectors to be overwritten on a if( lwork>=2_${ik}$*m*m+max( 4_${ik}$*m, bdspac ) ) then ! sufficient workspace for a fast algorithm iu = 1_${ik}$ if( lwork>=wrkbl+2*lda*m ) then ! work(iu) is lda by m and work(ir) is lda by m ldwrku = lda ir = iu + ldwrku*m ldwrkr = lda else if( lwork>=wrkbl+( lda + m )*m ) then ! work(iu) is lda by m and work(ir) is m by m ldwrku = lda ir = iu + ldwrku*m ldwrkr = m else ! work(iu) is m by m and work(ir) is m by m ldwrku = m ir = iu + ldwrku*m ldwrkr = m end if itau = ir + ldwrkr*m iwork = itau + m ! compute a=l*q ! (workspace: need 2*m*m + 2*m, prefer 2*m*m + m + m*nb) call stdlib${ii}$_dgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ! copy l to work(iu), zeroing out below it call stdlib${ii}$_dlacpy( 'L', m, m, a, lda, work( iu ),ldwrku ) call stdlib${ii}$_dlaset( 'U', m-1, m-1, zero, zero,work( iu+ldwrku ), ldwrku & ) ! generate q in a ! (workspace: need 2*m*m + 2*m, prefer 2*m*m + m + m*nb) call stdlib${ii}$_dorglq( m, n, m, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = itau itauq = ie + m itaup = itauq + m iwork = itaup + m ! bidiagonalize l in work(iu), copying result to ! work(ir) ! (workspace: need 2*m*m + 4*m, ! prefer 2*m*m+3*m+2*m*nb) call stdlib${ii}$_dgebrd( m, m, work( iu ), ldwrku, s,work( ie ), work( itauq & ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) call stdlib${ii}$_dlacpy( 'L', m, m, work( iu ), ldwrku,work( ir ), ldwrkr ) ! generate right bidiagonalizing vectors in work(iu) ! (workspace: need 2*m*m + 4*m-1, ! prefer 2*m*m+3*m+(m-1)*nb) call stdlib${ii}$_dorgbr( 'P', m, m, m, work( iu ), ldwrku,work( itaup ), & work( iwork ),lwork-iwork+1, ierr ) ! generate left bidiagonalizing vectors in work(ir) ! (workspace: need 2*m*m + 4*m, prefer 2*m*m + 3*m + m*nb) call stdlib${ii}$_dorgbr( 'Q', m, m, m, work( ir ), ldwrkr,work( itauq ), & work( iwork ),lwork-iwork+1, ierr ) iwork = ie + m ! perform bidiagonal qr iteration, computing left ! singular vectors of l in work(ir) and computing ! right singular vectors of l in work(iu) ! (workspace: need 2*m*m + bdspac) call stdlib${ii}$_dbdsqr( 'U', m, m, m, 0_${ik}$, s, work( ie ),work( iu ), ldwrku, & work( ir ),ldwrkr, dum, 1_${ik}$, work( iwork ), info ) ! multiply right singular vectors of l in work(iu) by ! q in a, storing result in vt ! (workspace: need m*m) call stdlib${ii}$_dgemm( 'N', 'N', m, n, m, one, work( iu ),ldwrku, a, lda, & zero, vt, ldvt ) ! copy left singular vectors of l to a ! (workspace: need m*m) call stdlib${ii}$_dlacpy( 'F', m, m, work( ir ), ldwrkr, a,lda ) else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + m ! compute a=l*q, copying result to vt ! (workspace: need 2*m, prefer m + m*nb) call stdlib${ii}$_dgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_dlacpy( 'U', m, n, a, lda, vt, ldvt ) ! generate q in vt ! (workspace: need 2*m, prefer m + m*nb) call stdlib${ii}$_dorglq( m, n, m, vt, ldvt, work( itau ),work( iwork ), & lwork-iwork+1, ierr ) ie = itau itauq = ie + m itaup = itauq + m iwork = itaup + m ! zero out above l in a if (m>1_${ik}$) call stdlib${ii}$_dlaset( 'U', m-1, m-1, zero, zero, a( 1_${ik}$, 2_${ik}$ ),lda ) ! bidiagonalize l in a ! (workspace: need 4*m, prefer 3*m + 2*m*nb) call stdlib${ii}$_dgebrd( m, m, a, lda, s, work( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply right vectors bidiagonalizing l by q in vt ! (workspace: need 3*m + n, prefer 3*m + n*nb) call stdlib${ii}$_dormbr( 'P', 'L', 'T', m, n, m, a, lda,work( itaup ), vt, & ldvt,work( iwork ), lwork-iwork+1, ierr ) ! generate left bidiagonalizing vectors of l in a ! (workspace: need 4*m, prefer 3*m + m*nb) call stdlib${ii}$_dorgbr( 'Q', m, m, m, a, lda, work( itauq ),work( iwork ), & lwork-iwork+1, ierr ) iwork = ie + m ! perform bidiagonal qr iteration, compute left ! singular vectors of a in a and compute right ! singular vectors of a in vt ! (workspace: need bdspac) call stdlib${ii}$_dbdsqr( 'U', m, n, m, 0_${ik}$, s, work( ie ), vt,ldvt, a, lda, & dum, 1_${ik}$, work( iwork ),info ) end if else if( wntuas ) then ! path 6t(n much larger than m, jobu='s' or 'a', ! jobvt='s') ! m right singular vectors to be computed in vt and ! m left singular vectors to be computed in u if( lwork>=m*m+max( 4_${ik}$*m, bdspac ) ) then ! sufficient workspace for a fast algorithm iu = 1_${ik}$ if( lwork>=wrkbl+lda*m ) then ! work(iu) is lda by n ldwrku = lda else ! work(iu) is lda by m ldwrku = m end if itau = iu + ldwrku*m iwork = itau + m ! compute a=l*q ! (workspace: need m*m + 2*m, prefer m*m + m + m*nb) call stdlib${ii}$_dgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ! copy l to work(iu), zeroing out above it call stdlib${ii}$_dlacpy( 'L', m, m, a, lda, work( iu ),ldwrku ) call stdlib${ii}$_dlaset( 'U', m-1, m-1, zero, zero,work( iu+ldwrku ), ldwrku & ) ! generate q in a ! (workspace: need m*m + 2*m, prefer m*m + m + m*nb) call stdlib${ii}$_dorglq( m, n, m, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = itau itauq = ie + m itaup = itauq + m iwork = itaup + m ! bidiagonalize l in work(iu), copying result to u ! (workspace: need m*m + 4*m, prefer m*m + 3*m + 2*m*nb) call stdlib${ii}$_dgebrd( m, m, work( iu ), ldwrku, s,work( ie ), work( itauq & ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) call stdlib${ii}$_dlacpy( 'L', m, m, work( iu ), ldwrku, u,ldu ) ! generate right bidiagonalizing vectors in work(iu) ! (workspace: need m*m + 4*m-1, ! prefer m*m+3*m+(m-1)*nb) call stdlib${ii}$_dorgbr( 'P', m, m, m, work( iu ), ldwrku,work( itaup ), & work( iwork ),lwork-iwork+1, ierr ) ! generate left bidiagonalizing vectors in u ! (workspace: need m*m + 4*m, prefer m*m + 3*m + m*nb) call stdlib${ii}$_dorgbr( 'Q', m, m, m, u, ldu, work( itauq ),work( iwork ), & lwork-iwork+1, ierr ) iwork = ie + m ! perform bidiagonal qr iteration, computing left ! singular vectors of l in u and computing right ! singular vectors of l in work(iu) ! (workspace: need m*m + bdspac) call stdlib${ii}$_dbdsqr( 'U', m, m, m, 0_${ik}$, s, work( ie ),work( iu ), ldwrku, & u, ldu, dum, 1_${ik}$,work( iwork ), info ) ! multiply right singular vectors of l in work(iu) by ! q in a, storing result in vt ! (workspace: need m*m) call stdlib${ii}$_dgemm( 'N', 'N', m, n, m, one, work( iu ),ldwrku, a, lda, & zero, vt, ldvt ) else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + m ! compute a=l*q, copying result to vt ! (workspace: need 2*m, prefer m + m*nb) call stdlib${ii}$_dgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_dlacpy( 'U', m, n, a, lda, vt, ldvt ) ! generate q in vt ! (workspace: need 2*m, prefer m + m*nb) call stdlib${ii}$_dorglq( m, n, m, vt, ldvt, work( itau ),work( iwork ), & lwork-iwork+1, ierr ) ! copy l to u, zeroing out above it call stdlib${ii}$_dlacpy( 'L', m, m, a, lda, u, ldu ) if (m>1_${ik}$) call stdlib${ii}$_dlaset( 'U', m-1, m-1, zero, zero, u( 1_${ik}$, 2_${ik}$ ),ldu ) ie = itau itauq = ie + m itaup = itauq + m iwork = itaup + m ! bidiagonalize l in u ! (workspace: need 4*m, prefer 3*m + 2*m*nb) call stdlib${ii}$_dgebrd( m, m, u, ldu, s, work( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply right bidiagonalizing vectors in u by q ! in vt ! (workspace: need 3*m + n, prefer 3*m + n*nb) call stdlib${ii}$_dormbr( 'P', 'L', 'T', m, n, m, u, ldu,work( itaup ), vt, & ldvt,work( iwork ), lwork-iwork+1, ierr ) ! generate left bidiagonalizing vectors in u ! (workspace: need 4*m, prefer 3*m + m*nb) call stdlib${ii}$_dorgbr( 'Q', m, m, m, u, ldu, work( itauq ),work( iwork ), & lwork-iwork+1, ierr ) iwork = ie + m ! perform bidiagonal qr iteration, computing left ! singular vectors of a in u and computing right ! singular vectors of a in vt ! (workspace: need bdspac) call stdlib${ii}$_dbdsqr( 'U', m, n, m, 0_${ik}$, s, work( ie ), vt,ldvt, u, ldu, & dum, 1_${ik}$, work( iwork ),info ) end if end if else if( wntva ) then if( wntun ) then ! path 7t(n much larger than m, jobu='n', jobvt='a') ! n right singular vectors to be computed in vt and ! no left singular vectors to be computed if( lwork>=m*m+max( n + m, 4_${ik}$*m, bdspac ) ) then ! sufficient workspace for a fast algorithm ir = 1_${ik}$ if( lwork>=wrkbl+lda*m ) then ! work(ir) is lda by m ldwrkr = lda else ! work(ir) is m by m ldwrkr = m end if itau = ir + ldwrkr*m iwork = itau + m ! compute a=l*q, copying result to vt ! (workspace: need m*m + 2*m, prefer m*m + m + m*nb) call stdlib${ii}$_dgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_dlacpy( 'U', m, n, a, lda, vt, ldvt ) ! copy l to work(ir), zeroing out above it call stdlib${ii}$_dlacpy( 'L', m, m, a, lda, work( ir ),ldwrkr ) call stdlib${ii}$_dlaset( 'U', m-1, m-1, zero, zero,work( ir+ldwrkr ), ldwrkr & ) ! generate q in vt ! (workspace: need m*m + m + n, prefer m*m + m + n*nb) call stdlib${ii}$_dorglq( n, n, m, vt, ldvt, work( itau ),work( iwork ), & lwork-iwork+1, ierr ) ie = itau itauq = ie + m itaup = itauq + m iwork = itaup + m ! bidiagonalize l in work(ir) ! (workspace: need m*m + 4*m, prefer m*m + 3*m + 2*m*nb) call stdlib${ii}$_dgebrd( m, m, work( ir ), ldwrkr, s,work( ie ), work( itauq & ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) ! generate right bidiagonalizing vectors in work(ir) ! (workspace: need m*m + 4*m-1, ! prefer m*m+3*m+(m-1)*nb) call stdlib${ii}$_dorgbr( 'P', m, m, m, work( ir ), ldwrkr,work( itaup ), & work( iwork ),lwork-iwork+1, ierr ) iwork = ie + m ! perform bidiagonal qr iteration, computing right ! singular vectors of l in work(ir) ! (workspace: need m*m + bdspac) call stdlib${ii}$_dbdsqr( 'U', m, m, 0_${ik}$, 0_${ik}$, s, work( ie ),work( ir ), ldwrkr, & dum, 1_${ik}$, dum, 1_${ik}$,work( iwork ), info ) ! multiply right singular vectors of l in work(ir) by ! q in vt, storing result in a ! (workspace: need m*m) call stdlib${ii}$_dgemm( 'N', 'N', m, n, m, one, work( ir ),ldwrkr, vt, ldvt, & zero, a, lda ) ! copy right singular vectors of a from a to vt call stdlib${ii}$_dlacpy( 'F', m, n, a, lda, vt, ldvt ) else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + m ! compute a=l*q, copying result to vt ! (workspace: need 2*m, prefer m + m*nb) call stdlib${ii}$_dgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_dlacpy( 'U', m, n, a, lda, vt, ldvt ) ! generate q in vt ! (workspace: need m + n, prefer m + n*nb) call stdlib${ii}$_dorglq( n, n, m, vt, ldvt, work( itau ),work( iwork ), & lwork-iwork+1, ierr ) ie = itau itauq = ie + m itaup = itauq + m iwork = itaup + m ! zero out above l in a if (m>1_${ik}$) call stdlib${ii}$_dlaset( 'U', m-1, m-1, zero, zero, a( 1_${ik}$, 2_${ik}$ ),lda ) ! bidiagonalize l in a ! (workspace: need 4*m, prefer 3*m + 2*m*nb) call stdlib${ii}$_dgebrd( m, m, a, lda, s, work( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply right bidiagonalizing vectors in a by q ! in vt ! (workspace: need 3*m + n, prefer 3*m + n*nb) call stdlib${ii}$_dormbr( 'P', 'L', 'T', m, n, m, a, lda,work( itaup ), vt, & ldvt,work( iwork ), lwork-iwork+1, ierr ) iwork = ie + m ! perform bidiagonal qr iteration, computing right ! singular vectors of a in vt ! (workspace: need bdspac) call stdlib${ii}$_dbdsqr( 'U', m, n, 0_${ik}$, 0_${ik}$, s, work( ie ), vt,ldvt, dum, 1_${ik}$, & dum, 1_${ik}$, work( iwork ),info ) end if else if( wntuo ) then ! path 8t(n much larger than m, jobu='o', jobvt='a') ! n right singular vectors to be computed in vt and ! m left singular vectors to be overwritten on a if( lwork>=2_${ik}$*m*m+max( n + m, 4_${ik}$*m, bdspac ) ) then ! sufficient workspace for a fast algorithm iu = 1_${ik}$ if( lwork>=wrkbl+2*lda*m ) then ! work(iu) is lda by m and work(ir) is lda by m ldwrku = lda ir = iu + ldwrku*m ldwrkr = lda else if( lwork>=wrkbl+( lda + m )*m ) then ! work(iu) is lda by m and work(ir) is m by m ldwrku = lda ir = iu + ldwrku*m ldwrkr = m else ! work(iu) is m by m and work(ir) is m by m ldwrku = m ir = iu + ldwrku*m ldwrkr = m end if itau = ir + ldwrkr*m iwork = itau + m ! compute a=l*q, copying result to vt ! (workspace: need 2*m*m + 2*m, prefer 2*m*m + m + m*nb) call stdlib${ii}$_dgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_dlacpy( 'U', m, n, a, lda, vt, ldvt ) ! generate q in vt ! (workspace: need 2*m*m + m + n, prefer 2*m*m + m + n*nb) call stdlib${ii}$_dorglq( n, n, m, vt, ldvt, work( itau ),work( iwork ), & lwork-iwork+1, ierr ) ! copy l to work(iu), zeroing out above it call stdlib${ii}$_dlacpy( 'L', m, m, a, lda, work( iu ),ldwrku ) call stdlib${ii}$_dlaset( 'U', m-1, m-1, zero, zero,work( iu+ldwrku ), ldwrku & ) ie = itau itauq = ie + m itaup = itauq + m iwork = itaup + m ! bidiagonalize l in work(iu), copying result to ! work(ir) ! (workspace: need 2*m*m + 4*m, ! prefer 2*m*m+3*m+2*m*nb) call stdlib${ii}$_dgebrd( m, m, work( iu ), ldwrku, s,work( ie ), work( itauq & ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) call stdlib${ii}$_dlacpy( 'L', m, m, work( iu ), ldwrku,work( ir ), ldwrkr ) ! generate right bidiagonalizing vectors in work(iu) ! (workspace: need 2*m*m + 4*m-1, ! prefer 2*m*m+3*m+(m-1)*nb) call stdlib${ii}$_dorgbr( 'P', m, m, m, work( iu ), ldwrku,work( itaup ), & work( iwork ),lwork-iwork+1, ierr ) ! generate left bidiagonalizing vectors in work(ir) ! (workspace: need 2*m*m + 4*m, prefer 2*m*m + 3*m + m*nb) call stdlib${ii}$_dorgbr( 'Q', m, m, m, work( ir ), ldwrkr,work( itauq ), & work( iwork ),lwork-iwork+1, ierr ) iwork = ie + m ! perform bidiagonal qr iteration, computing left ! singular vectors of l in work(ir) and computing ! right singular vectors of l in work(iu) ! (workspace: need 2*m*m + bdspac) call stdlib${ii}$_dbdsqr( 'U', m, m, m, 0_${ik}$, s, work( ie ),work( iu ), ldwrku, & work( ir ),ldwrkr, dum, 1_${ik}$, work( iwork ), info ) ! multiply right singular vectors of l in work(iu) by ! q in vt, storing result in a ! (workspace: need m*m) call stdlib${ii}$_dgemm( 'N', 'N', m, n, m, one, work( iu ),ldwrku, vt, ldvt, & zero, a, lda ) ! copy right singular vectors of a from a to vt call stdlib${ii}$_dlacpy( 'F', m, n, a, lda, vt, ldvt ) ! copy left singular vectors of a from work(ir) to a call stdlib${ii}$_dlacpy( 'F', m, m, work( ir ), ldwrkr, a,lda ) else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + m ! compute a=l*q, copying result to vt ! (workspace: need 2*m, prefer m + m*nb) call stdlib${ii}$_dgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_dlacpy( 'U', m, n, a, lda, vt, ldvt ) ! generate q in vt ! (workspace: need m + n, prefer m + n*nb) call stdlib${ii}$_dorglq( n, n, m, vt, ldvt, work( itau ),work( iwork ), & lwork-iwork+1, ierr ) ie = itau itauq = ie + m itaup = itauq + m iwork = itaup + m ! zero out above l in a if (m>1_${ik}$) call stdlib${ii}$_dlaset( 'U', m-1, m-1, zero, zero, a( 1_${ik}$, 2_${ik}$ ),lda ) ! bidiagonalize l in a ! (workspace: need 4*m, prefer 3*m + 2*m*nb) call stdlib${ii}$_dgebrd( m, m, a, lda, s, work( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply right bidiagonalizing vectors in a by q ! in vt ! (workspace: need 3*m + n, prefer 3*m + n*nb) call stdlib${ii}$_dormbr( 'P', 'L', 'T', m, n, m, a, lda,work( itaup ), vt, & ldvt,work( iwork ), lwork-iwork+1, ierr ) ! generate left bidiagonalizing vectors in a ! (workspace: need 4*m, prefer 3*m + m*nb) call stdlib${ii}$_dorgbr( 'Q', m, m, m, a, lda, work( itauq ),work( iwork ), & lwork-iwork+1, ierr ) iwork = ie + m ! perform bidiagonal qr iteration, computing left ! singular vectors of a in a and computing right ! singular vectors of a in vt ! (workspace: need bdspac) call stdlib${ii}$_dbdsqr( 'U', m, n, m, 0_${ik}$, s, work( ie ), vt,ldvt, a, lda, & dum, 1_${ik}$, work( iwork ),info ) end if else if( wntuas ) then ! path 9t(n much larger than m, jobu='s' or 'a', ! jobvt='a') ! n right singular vectors to be computed in vt and ! m left singular vectors to be computed in u if( lwork>=m*m+max( n + m, 4_${ik}$*m, bdspac ) ) then ! sufficient workspace for a fast algorithm iu = 1_${ik}$ if( lwork>=wrkbl+lda*m ) then ! work(iu) is lda by m ldwrku = lda else ! work(iu) is m by m ldwrku = m end if itau = iu + ldwrku*m iwork = itau + m ! compute a=l*q, copying result to vt ! (workspace: need m*m + 2*m, prefer m*m + m + m*nb) call stdlib${ii}$_dgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_dlacpy( 'U', m, n, a, lda, vt, ldvt ) ! generate q in vt ! (workspace: need m*m + m + n, prefer m*m + m + n*nb) call stdlib${ii}$_dorglq( n, n, m, vt, ldvt, work( itau ),work( iwork ), & lwork-iwork+1, ierr ) ! copy l to work(iu), zeroing out above it call stdlib${ii}$_dlacpy( 'L', m, m, a, lda, work( iu ),ldwrku ) call stdlib${ii}$_dlaset( 'U', m-1, m-1, zero, zero,work( iu+ldwrku ), ldwrku & ) ie = itau itauq = ie + m itaup = itauq + m iwork = itaup + m ! bidiagonalize l in work(iu), copying result to u ! (workspace: need m*m + 4*m, prefer m*m + 3*m + 2*m*nb) call stdlib${ii}$_dgebrd( m, m, work( iu ), ldwrku, s,work( ie ), work( itauq & ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) call stdlib${ii}$_dlacpy( 'L', m, m, work( iu ), ldwrku, u,ldu ) ! generate right bidiagonalizing vectors in work(iu) ! (workspace: need m*m + 4*m, prefer m*m + 3*m + (m-1)*nb) call stdlib${ii}$_dorgbr( 'P', m, m, m, work( iu ), ldwrku,work( itaup ), & work( iwork ),lwork-iwork+1, ierr ) ! generate left bidiagonalizing vectors in u ! (workspace: need m*m + 4*m, prefer m*m + 3*m + m*nb) call stdlib${ii}$_dorgbr( 'Q', m, m, m, u, ldu, work( itauq ),work( iwork ), & lwork-iwork+1, ierr ) iwork = ie + m ! perform bidiagonal qr iteration, computing left ! singular vectors of l in u and computing right ! singular vectors of l in work(iu) ! (workspace: need m*m + bdspac) call stdlib${ii}$_dbdsqr( 'U', m, m, m, 0_${ik}$, s, work( ie ),work( iu ), ldwrku, & u, ldu, dum, 1_${ik}$,work( iwork ), info ) ! multiply right singular vectors of l in work(iu) by ! q in vt, storing result in a ! (workspace: need m*m) call stdlib${ii}$_dgemm( 'N', 'N', m, n, m, one, work( iu ),ldwrku, vt, ldvt, & zero, a, lda ) ! copy right singular vectors of a from a to vt call stdlib${ii}$_dlacpy( 'F', m, n, a, lda, vt, ldvt ) else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + m ! compute a=l*q, copying result to vt ! (workspace: need 2*m, prefer m + m*nb) call stdlib${ii}$_dgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_dlacpy( 'U', m, n, a, lda, vt, ldvt ) ! generate q in vt ! (workspace: need m + n, prefer m + n*nb) call stdlib${ii}$_dorglq( n, n, m, vt, ldvt, work( itau ),work( iwork ), & lwork-iwork+1, ierr ) ! copy l to u, zeroing out above it call stdlib${ii}$_dlacpy( 'L', m, m, a, lda, u, ldu ) if (m>1_${ik}$) call stdlib${ii}$_dlaset( 'U', m-1, m-1, zero, zero, u( 1_${ik}$, 2_${ik}$ ),ldu ) ie = itau itauq = ie + m itaup = itauq + m iwork = itaup + m ! bidiagonalize l in u ! (workspace: need 4*m, prefer 3*m + 2*m*nb) call stdlib${ii}$_dgebrd( m, m, u, ldu, s, work( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply right bidiagonalizing vectors in u by q ! in vt ! (workspace: need 3*m + n, prefer 3*m + n*nb) call stdlib${ii}$_dormbr( 'P', 'L', 'T', m, n, m, u, ldu,work( itaup ), vt, & ldvt,work( iwork ), lwork-iwork+1, ierr ) ! generate left bidiagonalizing vectors in u ! (workspace: need 4*m, prefer 3*m + m*nb) call stdlib${ii}$_dorgbr( 'Q', m, m, m, u, ldu, work( itauq ),work( iwork ), & lwork-iwork+1, ierr ) iwork = ie + m ! perform bidiagonal qr iteration, computing left ! singular vectors of a in u and computing right ! singular vectors of a in vt ! (workspace: need bdspac) call stdlib${ii}$_dbdsqr( 'U', m, n, m, 0_${ik}$, s, work( ie ), vt,ldvt, u, ldu, & dum, 1_${ik}$, work( iwork ),info ) end if end if end if else ! n < mnthr ! path 10t(n greater than m, but not much larger) ! reduce to bidiagonal form without lq decomposition ie = 1_${ik}$ itauq = ie + m itaup = itauq + m iwork = itaup + m ! bidiagonalize a ! (workspace: need 3*m + n, prefer 3*m + (m + n)*nb) call stdlib${ii}$_dgebrd( m, n, a, lda, s, work( ie ), work( itauq ),work( itaup ), & work( iwork ), lwork-iwork+1,ierr ) if( wntuas ) then ! if left singular vectors desired in u, copy result to u ! and generate left bidiagonalizing vectors in u ! (workspace: need 4*m-1, prefer 3*m + (m-1)*nb) call stdlib${ii}$_dlacpy( 'L', m, m, a, lda, u, ldu ) call stdlib${ii}$_dorgbr( 'Q', m, m, n, u, ldu, work( itauq ),work( iwork ), lwork-& iwork+1, ierr ) end if if( wntvas ) then ! if right singular vectors desired in vt, copy result to ! vt and generate right bidiagonalizing vectors in vt ! (workspace: need 3*m + nrvt, prefer 3*m + nrvt*nb) call stdlib${ii}$_dlacpy( 'U', m, n, a, lda, vt, ldvt ) if( wntva )nrvt = n if( wntvs )nrvt = m call stdlib${ii}$_dorgbr( 'P', nrvt, n, m, vt, ldvt, work( itaup ),work( iwork ), & lwork-iwork+1, ierr ) end if if( wntuo ) then ! if left singular vectors desired in a, generate left ! bidiagonalizing vectors in a ! (workspace: need 4*m-1, prefer 3*m + (m-1)*nb) call stdlib${ii}$_dorgbr( 'Q', m, m, n, a, lda, work( itauq ),work( iwork ), lwork-& iwork+1, ierr ) end if if( wntvo ) then ! if right singular vectors desired in a, generate right ! bidiagonalizing vectors in a ! (workspace: need 4*m, prefer 3*m + m*nb) call stdlib${ii}$_dorgbr( 'P', m, n, m, a, lda, work( itaup ),work( iwork ), lwork-& iwork+1, ierr ) end if iwork = ie + m if( wntuas .or. wntuo )nru = m if( wntun )nru = 0_${ik}$ if( wntvas .or. wntvo )ncvt = n if( wntvn )ncvt = 0_${ik}$ if( ( .not.wntuo ) .and. ( .not.wntvo ) ) then ! perform bidiagonal qr iteration, if desired, computing ! left singular vectors in u and computing right singular ! vectors in vt ! (workspace: need bdspac) call stdlib${ii}$_dbdsqr( 'L', m, ncvt, nru, 0_${ik}$, s, work( ie ), vt,ldvt, u, ldu, dum,& 1_${ik}$, work( iwork ), info ) else if( ( .not.wntuo ) .and. wntvo ) then ! perform bidiagonal qr iteration, if desired, computing ! left singular vectors in u and computing right singular ! vectors in a ! (workspace: need bdspac) call stdlib${ii}$_dbdsqr( 'L', m, ncvt, nru, 0_${ik}$, s, work( ie ), a, lda,u, ldu, dum, & 1_${ik}$, work( iwork ), info ) else ! perform bidiagonal qr iteration, if desired, computing ! left singular vectors in a and computing right singular ! vectors in vt ! (workspace: need bdspac) call stdlib${ii}$_dbdsqr( 'L', m, ncvt, nru, 0_${ik}$, s, work( ie ), vt,ldvt, a, lda, dum,& 1_${ik}$, work( iwork ), info ) end if end if end if ! if stdlib${ii}$_dbdsqr failed to converge, copy unconverged superdiagonals ! to work( 2:minmn ) if( info/=0_${ik}$ ) then if( ie>2_${ik}$ ) then do i = 1, minmn - 1 work( i+1 ) = work( i+ie-1 ) end do end if if( ie<2_${ik}$ ) then do i = minmn - 1, 1, -1 work( i+1 ) = work( i+ie-1 ) end do end if end if ! undo scaling if necessary if( iscl==1_${ik}$ ) then if( anrm>bignum )call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, bignum, anrm, minmn, 1_${ik}$, s, minmn,& ierr ) if( info/=0_${ik}$ .and. anrm>bignum )call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, bignum, anrm, minmn-1,& 1_${ik}$, work( 2_${ik}$ ),minmn, ierr ) if( anrm<smlnum )call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, smlnum, anrm, minmn, 1_${ik}$, s, minmn,& ierr ) if( info/=0_${ik}$ .and. anrm<smlnum )call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, smlnum, anrm, minmn-1,& 1_${ik}$, work( 2_${ik}$ ),minmn, ierr ) end if ! return optimal workspace in work(1) work( 1_${ik}$ ) = maxwrk return end subroutine stdlib${ii}$_dgesvd #:for rk,rt,ri in REAL_KINDS_TYPES #:if not rk in ["sp","dp"] module subroutine stdlib${ii}$_${ri}$gesvd( jobu, jobvt, m, n, a, lda, s, u, ldu,vt, ldvt, work, lwork, info ) !! DGESVD: computes the singular value decomposition (SVD) of a real !! M-by-N matrix A, optionally computing the left and/or right singular !! vectors. The SVD is written !! A = U * SIGMA * transpose(V) !! where SIGMA is an M-by-N matrix which is zero except for its !! min(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and !! V is an N-by-N orthogonal matrix. The diagonal elements of SIGMA !! are the singular values of A; they are real and non-negative, and !! are returned in descending order. The first min(m,n) columns of !! U and V are the left and right singular vectors of A. !! Note that the routine returns V**T, not V. ! -- lapack driver routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments character, intent(in) :: jobu, jobvt integer(${ik}$), intent(out) :: info integer(${ik}$), intent(in) :: lda, ldu, ldvt, lwork, m, n ! Array Arguments real(${rk}$), intent(inout) :: a(lda,*) real(${rk}$), intent(out) :: s(*), u(ldu,*), vt(ldvt,*), work(*) ! ===================================================================== ! Local Scalars logical(lk) :: lquery, wntua, wntuas, wntun, wntuo, wntus, wntva, wntvas, wntvn, wntvo,& wntvs integer(${ik}$) :: bdspac, blk, chunk, i, ie, ierr, ir, iscl, itau, itaup, itauq, iu, & iwork, ldwrkr, ldwrku, maxwrk, minmn, minwrk, mnthr, ncu, ncvt, nru, nrvt, & wrkbl integer(${ik}$) :: lwork_qgeqrf, lwork_qorgqr_n, lwork_qorgqr_m, lwork_qgebrd, & lwork_qorgbr_p, lwork_qorgbr_q, lwork_qgelqf, lwork_qorglq_n, lwork_qorglq_m real(${rk}$) :: anrm, bignum, eps, smlnum ! Local Arrays real(${rk}$) :: dum(1_${ik}$) ! Intrinsic Functions ! Executable Statements ! test the input arguments info = 0_${ik}$ minmn = min( m, n ) wntua = stdlib_lsame( jobu, 'A' ) wntus = stdlib_lsame( jobu, 'S' ) wntuas = wntua .or. wntus wntuo = stdlib_lsame( jobu, 'O' ) wntun = stdlib_lsame( jobu, 'N' ) wntva = stdlib_lsame( jobvt, 'A' ) wntvs = stdlib_lsame( jobvt, 'S' ) wntvas = wntva .or. wntvs wntvo = stdlib_lsame( jobvt, 'O' ) wntvn = stdlib_lsame( jobvt, 'N' ) lquery = ( lwork==-1_${ik}$ ) if( .not.( wntua .or. wntus .or. wntuo .or. wntun ) ) then info = -1_${ik}$ else if( .not.( wntva .or. wntvs .or. wntvo .or. wntvn ) .or.( wntvo .and. wntuo ) ) & then info = -2_${ik}$ else if( m<0_${ik}$ ) then info = -3_${ik}$ else if( n<0_${ik}$ ) then info = -4_${ik}$ else if( lda<max( 1_${ik}$, m ) ) then info = -6_${ik}$ else if( ldu<1_${ik}$ .or. ( wntuas .and. ldu<m ) ) then info = -9_${ik}$ else if( ldvt<1_${ik}$ .or. ( wntva .and. ldvt<n ) .or.( wntvs .and. ldvt<minmn ) ) & then info = -11_${ik}$ end if ! compute workspace ! (note: comments in the code beginning "workspace:" describe the ! minimal amount of workspace needed at that point in the code, ! as well as the preferred amount for good performance. ! nb refers to the optimal block size for the immediately ! following subroutine, as returned by stdlib${ii}$_ilaenv.) if( info==0_${ik}$ ) then minwrk = 1_${ik}$ maxwrk = 1_${ik}$ if( m>=n .and. minmn>0_${ik}$ ) then ! compute space needed for stdlib${ii}$_${ri}$bdsqr mnthr = stdlib${ii}$_ilaenv( 6_${ik}$, 'DGESVD', jobu // jobvt, m, n, 0_${ik}$, 0_${ik}$ ) bdspac = 5_${ik}$*n ! compute space needed for stdlib${ii}$_${ri}$geqrf call stdlib${ii}$_${ri}$geqrf( m, n, a, lda, dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, ierr ) lwork_qgeqrf = int( dum(1_${ik}$),KIND=${ik}$) ! compute space needed for stdlib${ii}$_${ri}$orgqr call stdlib${ii}$_${ri}$orgqr( m, n, n, a, lda, dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, ierr ) lwork_qorgqr_n = int( dum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_${ri}$orgqr( m, m, n, a, lda, dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, ierr ) lwork_qorgqr_m = int( dum(1_${ik}$),KIND=${ik}$) ! compute space needed for stdlib${ii}$_${ri}$gebrd call stdlib${ii}$_${ri}$gebrd( n, n, a, lda, s, dum(1_${ik}$), dum(1_${ik}$),dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, ierr ) lwork_qgebrd = int( dum(1_${ik}$),KIND=${ik}$) ! compute space needed for stdlib${ii}$_${ri}$orgbr p call stdlib${ii}$_${ri}$orgbr( 'P', n, n, n, a, lda, dum(1_${ik}$),dum(1_${ik}$), -1_${ik}$, ierr ) lwork_qorgbr_p = int( dum(1_${ik}$),KIND=${ik}$) ! compute space needed for stdlib${ii}$_${ri}$orgbr q call stdlib${ii}$_${ri}$orgbr( 'Q', n, n, n, a, lda, dum(1_${ik}$),dum(1_${ik}$), -1_${ik}$, ierr ) lwork_qorgbr_q = int( dum(1_${ik}$),KIND=${ik}$) if( m>=mnthr ) then if( wntun ) then ! path 1 (m much larger than n, jobu='n') maxwrk = n + lwork_qgeqrf maxwrk = max( maxwrk, 3_${ik}$*n + lwork_qgebrd ) if( wntvo .or. wntvas )maxwrk = max( maxwrk, 3_${ik}$*n + lwork_qorgbr_p ) maxwrk = max( maxwrk, bdspac ) minwrk = max( 4_${ik}$*n, bdspac ) else if( wntuo .and. wntvn ) then ! path 2 (m much larger than n, jobu='o', jobvt='n') wrkbl = n + lwork_qgeqrf wrkbl = max( wrkbl, n + lwork_qorgqr_n ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_qgebrd ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_qorgbr_q ) wrkbl = max( wrkbl, bdspac ) maxwrk = max( n*n + wrkbl, n*n + m*n + n ) minwrk = max( 3_${ik}$*n + m, bdspac ) else if( wntuo .and. wntvas ) then ! path 3 (m much larger than n, jobu='o', jobvt='s' or ! 'a') wrkbl = n + lwork_qgeqrf wrkbl = max( wrkbl, n + lwork_qorgqr_n ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_qgebrd ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_qorgbr_q ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_qorgbr_p ) wrkbl = max( wrkbl, bdspac ) maxwrk = max( n*n + wrkbl, n*n + m*n + n ) minwrk = max( 3_${ik}$*n + m, bdspac ) else if( wntus .and. wntvn ) then ! path 4 (m much larger than n, jobu='s', jobvt='n') wrkbl = n + lwork_qgeqrf wrkbl = max( wrkbl, n + lwork_qorgqr_n ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_qgebrd ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_qorgbr_q ) wrkbl = max( wrkbl, bdspac ) maxwrk = n*n + wrkbl minwrk = max( 3_${ik}$*n + m, bdspac ) else if( wntus .and. wntvo ) then ! path 5 (m much larger than n, jobu='s', jobvt='o') wrkbl = n + lwork_qgeqrf wrkbl = max( wrkbl, n + lwork_qorgqr_n ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_qgebrd ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_qorgbr_q ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_qorgbr_p ) wrkbl = max( wrkbl, bdspac ) maxwrk = 2_${ik}$*n*n + wrkbl minwrk = max( 3_${ik}$*n + m, bdspac ) else if( wntus .and. wntvas ) then ! path 6 (m much larger than n, jobu='s', jobvt='s' or ! 'a') wrkbl = n + lwork_qgeqrf wrkbl = max( wrkbl, n + lwork_qorgqr_n ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_qgebrd ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_qorgbr_q ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_qorgbr_p ) wrkbl = max( wrkbl, bdspac ) maxwrk = n*n + wrkbl minwrk = max( 3_${ik}$*n + m, bdspac ) else if( wntua .and. wntvn ) then ! path 7 (m much larger than n, jobu='a', jobvt='n') wrkbl = n + lwork_qgeqrf wrkbl = max( wrkbl, n + lwork_qorgqr_m ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_qgebrd ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_qorgbr_q ) wrkbl = max( wrkbl, bdspac ) maxwrk = n*n + wrkbl minwrk = max( 3_${ik}$*n + m, bdspac ) else if( wntua .and. wntvo ) then ! path 8 (m much larger than n, jobu='a', jobvt='o') wrkbl = n + lwork_qgeqrf wrkbl = max( wrkbl, n + lwork_qorgqr_m ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_qgebrd ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_qorgbr_q ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_qorgbr_p ) wrkbl = max( wrkbl, bdspac ) maxwrk = 2_${ik}$*n*n + wrkbl minwrk = max( 3_${ik}$*n + m, bdspac ) else if( wntua .and. wntvas ) then ! path 9 (m much larger than n, jobu='a', jobvt='s' or ! 'a') wrkbl = n + lwork_qgeqrf wrkbl = max( wrkbl, n + lwork_qorgqr_m ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_qgebrd ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_qorgbr_q ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_qorgbr_p ) wrkbl = max( wrkbl, bdspac ) maxwrk = n*n + wrkbl minwrk = max( 3_${ik}$*n + m, bdspac ) end if else ! path 10 (m at least n, but not much larger) call stdlib${ii}$_${ri}$gebrd( m, n, a, lda, s, dum(1_${ik}$), dum(1_${ik}$),dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, ierr ) lwork_qgebrd = int( dum(1_${ik}$),KIND=${ik}$) maxwrk = 3_${ik}$*n + lwork_qgebrd if( wntus .or. wntuo ) then call stdlib${ii}$_${ri}$orgbr( 'Q', m, n, n, a, lda, dum(1_${ik}$),dum(1_${ik}$), -1_${ik}$, ierr ) lwork_qorgbr_q = int( dum(1_${ik}$),KIND=${ik}$) maxwrk = max( maxwrk, 3_${ik}$*n + lwork_qorgbr_q ) end if if( wntua ) then call stdlib${ii}$_${ri}$orgbr( 'Q', m, m, n, a, lda, dum(1_${ik}$),dum(1_${ik}$), -1_${ik}$, ierr ) lwork_qorgbr_q = int( dum(1_${ik}$),KIND=${ik}$) maxwrk = max( maxwrk, 3_${ik}$*n + lwork_qorgbr_q ) end if if( .not.wntvn ) then maxwrk = max( maxwrk, 3_${ik}$*n + lwork_qorgbr_p ) end if maxwrk = max( maxwrk, bdspac ) minwrk = max( 3_${ik}$*n + m, bdspac ) end if else if( minmn>0_${ik}$ ) then ! compute space needed for stdlib${ii}$_${ri}$bdsqr mnthr = stdlib${ii}$_ilaenv( 6_${ik}$, 'DGESVD', jobu // jobvt, m, n, 0_${ik}$, 0_${ik}$ ) bdspac = 5_${ik}$*m ! compute space needed for stdlib${ii}$_${ri}$gelqf call stdlib${ii}$_${ri}$gelqf( m, n, a, lda, dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, ierr ) lwork_qgelqf = int( dum(1_${ik}$),KIND=${ik}$) ! compute space needed for stdlib${ii}$_${ri}$orglq call stdlib${ii}$_${ri}$orglq( n, n, m, dum(1_${ik}$), n, dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, ierr ) lwork_qorglq_n = int( dum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_${ri}$orglq( m, n, m, a, lda, dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, ierr ) lwork_qorglq_m = int( dum(1_${ik}$),KIND=${ik}$) ! compute space needed for stdlib${ii}$_${ri}$gebrd call stdlib${ii}$_${ri}$gebrd( m, m, a, lda, s, dum(1_${ik}$), dum(1_${ik}$),dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, ierr ) lwork_qgebrd = int( dum(1_${ik}$),KIND=${ik}$) ! compute space needed for stdlib${ii}$_${ri}$orgbr p call stdlib${ii}$_${ri}$orgbr( 'P', m, m, m, a, n, dum(1_${ik}$),dum(1_${ik}$), -1_${ik}$, ierr ) lwork_qorgbr_p = int( dum(1_${ik}$),KIND=${ik}$) ! compute space needed for stdlib${ii}$_${ri}$orgbr q call stdlib${ii}$_${ri}$orgbr( 'Q', m, m, m, a, n, dum(1_${ik}$),dum(1_${ik}$), -1_${ik}$, ierr ) lwork_qorgbr_q = int( dum(1_${ik}$),KIND=${ik}$) if( n>=mnthr ) then if( wntvn ) then ! path 1t(n much larger than m, jobvt='n') maxwrk = m + lwork_qgelqf maxwrk = max( maxwrk, 3_${ik}$*m + lwork_qgebrd ) if( wntuo .or. wntuas )maxwrk = max( maxwrk, 3_${ik}$*m + lwork_qorgbr_q ) maxwrk = max( maxwrk, bdspac ) minwrk = max( 4_${ik}$*m, bdspac ) else if( wntvo .and. wntun ) then ! path 2t(n much larger than m, jobu='n', jobvt='o') wrkbl = m + lwork_qgelqf wrkbl = max( wrkbl, m + lwork_qorglq_m ) wrkbl = max( wrkbl, 3_${ik}$*m + lwork_qgebrd ) wrkbl = max( wrkbl, 3_${ik}$*m + lwork_qorgbr_p ) wrkbl = max( wrkbl, bdspac ) maxwrk = max( m*m + wrkbl, m*m + m*n + m ) minwrk = max( 3_${ik}$*m + n, bdspac ) else if( wntvo .and. wntuas ) then ! path 3t(n much larger than m, jobu='s' or 'a', ! jobvt='o') wrkbl = m + lwork_qgelqf wrkbl = max( wrkbl, m + lwork_qorglq_m ) wrkbl = max( wrkbl, 3_${ik}$*m + lwork_qgebrd ) wrkbl = max( wrkbl, 3_${ik}$*m + lwork_qorgbr_p ) wrkbl = max( wrkbl, 3_${ik}$*m + lwork_qorgbr_q ) wrkbl = max( wrkbl, bdspac ) maxwrk = max( m*m + wrkbl, m*m + m*n + m ) minwrk = max( 3_${ik}$*m + n, bdspac ) else if( wntvs .and. wntun ) then ! path 4t(n much larger than m, jobu='n', jobvt='s') wrkbl = m + lwork_qgelqf wrkbl = max( wrkbl, m + lwork_qorglq_m ) wrkbl = max( wrkbl, 3_${ik}$*m + lwork_qgebrd ) wrkbl = max( wrkbl, 3_${ik}$*m + lwork_qorgbr_p ) wrkbl = max( wrkbl, bdspac ) maxwrk = m*m + wrkbl minwrk = max( 3_${ik}$*m + n, bdspac ) else if( wntvs .and. wntuo ) then ! path 5t(n much larger than m, jobu='o', jobvt='s') wrkbl = m + lwork_qgelqf wrkbl = max( wrkbl, m + lwork_qorglq_m ) wrkbl = max( wrkbl, 3_${ik}$*m + lwork_qgebrd ) wrkbl = max( wrkbl, 3_${ik}$*m + lwork_qorgbr_p ) wrkbl = max( wrkbl, 3_${ik}$*m + lwork_qorgbr_q ) wrkbl = max( wrkbl, bdspac ) maxwrk = 2_${ik}$*m*m + wrkbl minwrk = max( 3_${ik}$*m + n, bdspac ) else if( wntvs .and. wntuas ) then ! path 6t(n much larger than m, jobu='s' or 'a', ! jobvt='s') wrkbl = m + lwork_qgelqf wrkbl = max( wrkbl, m + lwork_qorglq_m ) wrkbl = max( wrkbl, 3_${ik}$*m + lwork_qgebrd ) wrkbl = max( wrkbl, 3_${ik}$*m + lwork_qorgbr_p ) wrkbl = max( wrkbl, 3_${ik}$*m + lwork_qorgbr_q ) wrkbl = max( wrkbl, bdspac ) maxwrk = m*m + wrkbl minwrk = max( 3_${ik}$*m + n, bdspac ) else if( wntva .and. wntun ) then ! path 7t(n much larger than m, jobu='n', jobvt='a') wrkbl = m + lwork_qgelqf wrkbl = max( wrkbl, m + lwork_qorglq_n ) wrkbl = max( wrkbl, 3_${ik}$*m + lwork_qgebrd ) wrkbl = max( wrkbl, 3_${ik}$*m + lwork_qorgbr_p ) wrkbl = max( wrkbl, bdspac ) maxwrk = m*m + wrkbl minwrk = max( 3_${ik}$*m + n, bdspac ) else if( wntva .and. wntuo ) then ! path 8t(n much larger than m, jobu='o', jobvt='a') wrkbl = m + lwork_qgelqf wrkbl = max( wrkbl, m + lwork_qorglq_n ) wrkbl = max( wrkbl, 3_${ik}$*m + lwork_qgebrd ) wrkbl = max( wrkbl, 3_${ik}$*m + lwork_qorgbr_p ) wrkbl = max( wrkbl, 3_${ik}$*m + lwork_qorgbr_q ) wrkbl = max( wrkbl, bdspac ) maxwrk = 2_${ik}$*m*m + wrkbl minwrk = max( 3_${ik}$*m + n, bdspac ) else if( wntva .and. wntuas ) then ! path 9t(n much larger than m, jobu='s' or 'a', ! jobvt='a') wrkbl = m + lwork_qgelqf wrkbl = max( wrkbl, m + lwork_qorglq_n ) wrkbl = max( wrkbl, 3_${ik}$*m + lwork_qgebrd ) wrkbl = max( wrkbl, 3_${ik}$*m + lwork_qorgbr_p ) wrkbl = max( wrkbl, 3_${ik}$*m + lwork_qorgbr_q ) wrkbl = max( wrkbl, bdspac ) maxwrk = m*m + wrkbl minwrk = max( 3_${ik}$*m + n, bdspac ) end if else ! path 10t(n greater than m, but not much larger) call stdlib${ii}$_${ri}$gebrd( m, n, a, lda, s, dum(1_${ik}$), dum(1_${ik}$),dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, ierr ) lwork_qgebrd = int( dum(1_${ik}$),KIND=${ik}$) maxwrk = 3_${ik}$*m + lwork_qgebrd if( wntvs .or. wntvo ) then ! compute space needed for stdlib${ii}$_${ri}$orgbr p call stdlib${ii}$_${ri}$orgbr( 'P', m, n, m, a, n, dum(1_${ik}$),dum(1_${ik}$), -1_${ik}$, ierr ) lwork_qorgbr_p = int( dum(1_${ik}$),KIND=${ik}$) maxwrk = max( maxwrk, 3_${ik}$*m + lwork_qorgbr_p ) end if if( wntva ) then call stdlib${ii}$_${ri}$orgbr( 'P', n, n, m, a, n, dum(1_${ik}$),dum(1_${ik}$), -1_${ik}$, ierr ) lwork_qorgbr_p = int( dum(1_${ik}$),KIND=${ik}$) maxwrk = max( maxwrk, 3_${ik}$*m + lwork_qorgbr_p ) end if if( .not.wntun ) then maxwrk = max( maxwrk, 3_${ik}$*m + lwork_qorgbr_q ) end if maxwrk = max( maxwrk, bdspac ) minwrk = max( 3_${ik}$*m + n, bdspac ) end if end if maxwrk = max( maxwrk, minwrk ) work( 1_${ik}$ ) = maxwrk if( lwork<minwrk .and. .not.lquery ) then info = -13_${ik}$ end if end if if( info/=0_${ik}$ ) then call stdlib${ii}$_xerbla( 'DGESVD', -info ) return else if( lquery ) then return end if ! quick return if possible if( m==0_${ik}$ .or. n==0_${ik}$ ) then return end if ! get machine constants eps = stdlib${ii}$_${ri}$lamch( 'P' ) smlnum = sqrt( stdlib${ii}$_${ri}$lamch( 'S' ) ) / eps bignum = one / smlnum ! scale a if max element outside range [smlnum,bignum] anrm = stdlib${ii}$_${ri}$lange( 'M', m, n, a, lda, dum ) iscl = 0_${ik}$ if( anrm>zero .and. anrm<smlnum ) then iscl = 1_${ik}$ call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, m, n, a, lda, ierr ) else if( anrm>bignum ) then iscl = 1_${ik}$ call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, m, n, a, lda, ierr ) end if if( m>=n ) then ! a has at least as many rows as columns. if a has sufficiently ! more rows than columns, first reduce using the qr ! decomposition (if sufficient workspace available) if( m>=mnthr ) then if( wntun ) then ! path 1 (m much larger than n, jobu='n') ! no left singular vectors to be computed itau = 1_${ik}$ iwork = itau + n ! compute a=q*r ! (workspace: need 2*n, prefer n + n*nb) call stdlib${ii}$_${ri}$geqrf( m, n, a, lda, work( itau ), work( iwork ),lwork-iwork+1, & ierr ) ! zero out below r if( n > 1_${ik}$ ) then call stdlib${ii}$_${ri}$laset( 'L', n-1, n-1, zero, zero, a( 2_${ik}$, 1_${ik}$ ),lda ) end if ie = 1_${ik}$ itauq = ie + n itaup = itauq + n iwork = itaup + n ! bidiagonalize r in a ! (workspace: need 4*n, prefer 3*n + 2*n*nb) call stdlib${ii}$_${ri}$gebrd( n, n, a, lda, s, work( ie ), work( itauq ),work( itaup ), & work( iwork ), lwork-iwork+1,ierr ) ncvt = 0_${ik}$ if( wntvo .or. wntvas ) then ! if right singular vectors desired, generate p'. ! (workspace: need 4*n-1, prefer 3*n + (n-1)*nb) call stdlib${ii}$_${ri}$orgbr( 'P', n, n, n, a, lda, work( itaup ),work( iwork ), & lwork-iwork+1, ierr ) ncvt = n end if iwork = ie + n ! perform bidiagonal qr iteration, computing right ! singular vectors of a in a if desired ! (workspace: need bdspac) call stdlib${ii}$_${ri}$bdsqr( 'U', n, ncvt, 0_${ik}$, 0_${ik}$, s, work( ie ), a, lda,dum, 1_${ik}$, dum, 1_${ik}$, & work( iwork ), info ) ! if right singular vectors desired in vt, copy them there if( wntvas )call stdlib${ii}$_${ri}$lacpy( 'F', n, n, a, lda, vt, ldvt ) else if( wntuo .and. wntvn ) then ! path 2 (m much larger than n, jobu='o', jobvt='n') ! n left singular vectors to be overwritten on a and ! no right singular vectors to be computed if( lwork>=n*n+max( 4_${ik}$*n, bdspac ) ) then ! sufficient workspace for a fast algorithm ir = 1_${ik}$ if( lwork>=max( wrkbl, lda*n + n ) + lda*n ) then ! work(iu) is lda by n, work(ir) is lda by n ldwrku = lda ldwrkr = lda else if( lwork>=max( wrkbl, lda*n + n ) + n*n ) then ! work(iu) is lda by n, work(ir) is n by n ldwrku = lda ldwrkr = n else ! work(iu) is ldwrku by n, work(ir) is n by n ldwrku = ( lwork-n*n-n ) / n ldwrkr = n end if itau = ir + ldwrkr*n iwork = itau + n ! compute a=q*r ! (workspace: need n*n + 2*n, prefer n*n + n + n*nb) call stdlib${ii}$_${ri}$geqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-iwork+& 1_${ik}$, ierr ) ! copy r to work(ir) and zero out below it call stdlib${ii}$_${ri}$lacpy( 'U', n, n, a, lda, work( ir ), ldwrkr ) call stdlib${ii}$_${ri}$laset( 'L', n-1, n-1, zero, zero, work( ir+1 ),ldwrkr ) ! generate q in a ! (workspace: need n*n + 2*n, prefer n*n + n + n*nb) call stdlib${ii}$_${ri}$orgqr( m, n, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = itau itauq = ie + n itaup = itauq + n iwork = itaup + n ! bidiagonalize r in work(ir) ! (workspace: need n*n + 4*n, prefer n*n + 3*n + 2*n*nb) call stdlib${ii}$_${ri}$gebrd( n, n, work( ir ), ldwrkr, s, work( ie ),work( itauq ), & work( itaup ),work( iwork ), lwork-iwork+1, ierr ) ! generate left vectors bidiagonalizing r ! (workspace: need n*n + 4*n, prefer n*n + 3*n + n*nb) call stdlib${ii}$_${ri}$orgbr( 'Q', n, n, n, work( ir ), ldwrkr,work( itauq ), work( & iwork ),lwork-iwork+1, ierr ) iwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of r in work(ir) ! (workspace: need n*n + bdspac) call stdlib${ii}$_${ri}$bdsqr( 'U', n, 0_${ik}$, n, 0_${ik}$, s, work( ie ), dum, 1_${ik}$,work( ir ), & ldwrkr, dum, 1_${ik}$,work( iwork ), info ) iu = ie + n ! multiply q in a by left singular vectors of r in ! work(ir), storing result in work(iu) and copying to a ! (workspace: need n*n + 2*n, prefer n*n + m*n + n) do i = 1, m, ldwrku chunk = min( m-i+1, ldwrku ) call stdlib${ii}$_${ri}$gemm( 'N', 'N', chunk, n, n, one, a( i, 1_${ik}$ ),lda, work( ir )& , ldwrkr, zero,work( iu ), ldwrku ) call stdlib${ii}$_${ri}$lacpy( 'F', chunk, n, work( iu ), ldwrku,a( i, 1_${ik}$ ), lda ) end do else ! insufficient workspace for a fast algorithm ie = 1_${ik}$ itauq = ie + n itaup = itauq + n iwork = itaup + n ! bidiagonalize a ! (workspace: need 3*n + m, prefer 3*n + (m + n)*nb) call stdlib${ii}$_${ri}$gebrd( m, n, a, lda, s, work( ie ),work( itauq ), work( itaup & ),work( iwork ), lwork-iwork+1, ierr ) ! generate left vectors bidiagonalizing a ! (workspace: need 4*n, prefer 3*n + n*nb) call stdlib${ii}$_${ri}$orgbr( 'Q', m, n, n, a, lda, work( itauq ),work( iwork ), & lwork-iwork+1, ierr ) iwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of a in a ! (workspace: need bdspac) call stdlib${ii}$_${ri}$bdsqr( 'U', n, 0_${ik}$, m, 0_${ik}$, s, work( ie ), dum, 1_${ik}$,a, lda, dum, 1_${ik}$, & work( iwork ), info ) end if else if( wntuo .and. wntvas ) then ! path 3 (m much larger than n, jobu='o', jobvt='s' or 'a') ! n left singular vectors to be overwritten on a and ! n right singular vectors to be computed in vt if( lwork>=n*n+max( 4_${ik}$*n, bdspac ) ) then ! sufficient workspace for a fast algorithm ir = 1_${ik}$ if( lwork>=max( wrkbl, lda*n + n ) + lda*n ) then ! work(iu) is lda by n and work(ir) is lda by n ldwrku = lda ldwrkr = lda else if( lwork>=max( wrkbl, lda*n + n ) + n*n ) then ! work(iu) is lda by n and work(ir) is n by n ldwrku = lda ldwrkr = n else ! work(iu) is ldwrku by n and work(ir) is n by n ldwrku = ( lwork-n*n-n ) / n ldwrkr = n end if itau = ir + ldwrkr*n iwork = itau + n ! compute a=q*r ! (workspace: need n*n + 2*n, prefer n*n + n + n*nb) call stdlib${ii}$_${ri}$geqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-iwork+& 1_${ik}$, ierr ) ! copy r to vt, zeroing out below it call stdlib${ii}$_${ri}$lacpy( 'U', n, n, a, lda, vt, ldvt ) if( n>1_${ik}$ )call stdlib${ii}$_${ri}$laset( 'L', n-1, n-1, zero, zero,vt( 2_${ik}$, 1_${ik}$ ), ldvt ) ! generate q in a ! (workspace: need n*n + 2*n, prefer n*n + n + n*nb) call stdlib${ii}$_${ri}$orgqr( m, n, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = itau itauq = ie + n itaup = itauq + n iwork = itaup + n ! bidiagonalize r in vt, copying result to work(ir) ! (workspace: need n*n + 4*n, prefer n*n + 3*n + 2*n*nb) call stdlib${ii}$_${ri}$gebrd( n, n, vt, ldvt, s, work( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) call stdlib${ii}$_${ri}$lacpy( 'L', n, n, vt, ldvt, work( ir ), ldwrkr ) ! generate left vectors bidiagonalizing r in work(ir) ! (workspace: need n*n + 4*n, prefer n*n + 3*n + n*nb) call stdlib${ii}$_${ri}$orgbr( 'Q', n, n, n, work( ir ), ldwrkr,work( itauq ), work( & iwork ),lwork-iwork+1, ierr ) ! generate right vectors bidiagonalizing r in vt ! (workspace: need n*n + 4*n-1, prefer n*n + 3*n + (n-1)*nb) call stdlib${ii}$_${ri}$orgbr( 'P', n, n, n, vt, ldvt, work( itaup ),work( iwork ), & lwork-iwork+1, ierr ) iwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of r in work(ir) and computing right ! singular vectors of r in vt ! (workspace: need n*n + bdspac) call stdlib${ii}$_${ri}$bdsqr( 'U', n, n, n, 0_${ik}$, s, work( ie ), vt, ldvt,work( ir ), & ldwrkr, dum, 1_${ik}$,work( iwork ), info ) iu = ie + n ! multiply q in a by left singular vectors of r in ! work(ir), storing result in work(iu) and copying to a ! (workspace: need n*n + 2*n, prefer n*n + m*n + n) do i = 1, m, ldwrku chunk = min( m-i+1, ldwrku ) call stdlib${ii}$_${ri}$gemm( 'N', 'N', chunk, n, n, one, a( i, 1_${ik}$ ),lda, work( ir )& , ldwrkr, zero,work( iu ), ldwrku ) call stdlib${ii}$_${ri}$lacpy( 'F', chunk, n, work( iu ), ldwrku,a( i, 1_${ik}$ ), lda ) end do else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + n ! compute a=q*r ! (workspace: need 2*n, prefer n + n*nb) call stdlib${ii}$_${ri}$geqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-iwork+& 1_${ik}$, ierr ) ! copy r to vt, zeroing out below it call stdlib${ii}$_${ri}$lacpy( 'U', n, n, a, lda, vt, ldvt ) if( n>1_${ik}$ )call stdlib${ii}$_${ri}$laset( 'L', n-1, n-1, zero, zero,vt( 2_${ik}$, 1_${ik}$ ), ldvt ) ! generate q in a ! (workspace: need 2*n, prefer n + n*nb) call stdlib${ii}$_${ri}$orgqr( m, n, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = itau itauq = ie + n itaup = itauq + n iwork = itaup + n ! bidiagonalize r in vt ! (workspace: need 4*n, prefer 3*n + 2*n*nb) call stdlib${ii}$_${ri}$gebrd( n, n, vt, ldvt, s, work( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply q in a by left vectors bidiagonalizing r ! (workspace: need 3*n + m, prefer 3*n + m*nb) call stdlib${ii}$_${ri}$ormbr( 'Q', 'R', 'N', m, n, n, vt, ldvt,work( itauq ), a, lda,& work( iwork ),lwork-iwork+1, ierr ) ! generate right vectors bidiagonalizing r in vt ! (workspace: need 4*n-1, prefer 3*n + (n-1)*nb) call stdlib${ii}$_${ri}$orgbr( 'P', n, n, n, vt, ldvt, work( itaup ),work( iwork ), & lwork-iwork+1, ierr ) iwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of a in a and computing right ! singular vectors of a in vt ! (workspace: need bdspac) call stdlib${ii}$_${ri}$bdsqr( 'U', n, n, m, 0_${ik}$, s, work( ie ), vt, ldvt,a, lda, dum, & 1_${ik}$, work( iwork ), info ) end if else if( wntus ) then if( wntvn ) then ! path 4 (m much larger than n, jobu='s', jobvt='n') ! n left singular vectors to be computed in u and ! no right singular vectors to be computed if( lwork>=n*n+max( 4_${ik}$*n, bdspac ) ) then ! sufficient workspace for a fast algorithm ir = 1_${ik}$ if( lwork>=wrkbl+lda*n ) then ! work(ir) is lda by n ldwrkr = lda else ! work(ir) is n by n ldwrkr = n end if itau = ir + ldwrkr*n iwork = itau + n ! compute a=q*r ! (workspace: need n*n + 2*n, prefer n*n + n + n*nb) call stdlib${ii}$_${ri}$geqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ! copy r to work(ir), zeroing out below it call stdlib${ii}$_${ri}$lacpy( 'U', n, n, a, lda, work( ir ),ldwrkr ) call stdlib${ii}$_${ri}$laset( 'L', n-1, n-1, zero, zero,work( ir+1 ), ldwrkr ) ! generate q in a ! (workspace: need n*n + 2*n, prefer n*n + n + n*nb) call stdlib${ii}$_${ri}$orgqr( m, n, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = itau itauq = ie + n itaup = itauq + n iwork = itaup + n ! bidiagonalize r in work(ir) ! (workspace: need n*n + 4*n, prefer n*n + 3*n + 2*n*nb) call stdlib${ii}$_${ri}$gebrd( n, n, work( ir ), ldwrkr, s,work( ie ), work( itauq & ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) ! generate left vectors bidiagonalizing r in work(ir) ! (workspace: need n*n + 4*n, prefer n*n + 3*n + n*nb) call stdlib${ii}$_${ri}$orgbr( 'Q', n, n, n, work( ir ), ldwrkr,work( itauq ), & work( iwork ),lwork-iwork+1, ierr ) iwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of r in work(ir) ! (workspace: need n*n + bdspac) call stdlib${ii}$_${ri}$bdsqr( 'U', n, 0_${ik}$, n, 0_${ik}$, s, work( ie ), dum,1_${ik}$, work( ir ), & ldwrkr, dum, 1_${ik}$,work( iwork ), info ) ! multiply q in a by left singular vectors of r in ! work(ir), storing result in u ! (workspace: need n*n) call stdlib${ii}$_${ri}$gemm( 'N', 'N', m, n, n, one, a, lda,work( ir ), ldwrkr, & zero, u, ldu ) else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + n ! compute a=q*r, copying result to u ! (workspace: need 2*n, prefer n + n*nb) call stdlib${ii}$_${ri}$geqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_${ri}$lacpy( 'L', m, n, a, lda, u, ldu ) ! generate q in u ! (workspace: need 2*n, prefer n + n*nb) call stdlib${ii}$_${ri}$orgqr( m, n, n, u, ldu, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = itau itauq = ie + n itaup = itauq + n iwork = itaup + n ! zero out below r in a if( n > 1_${ik}$ ) then call stdlib${ii}$_${ri}$laset( 'L', n-1, n-1, zero, zero,a( 2_${ik}$, 1_${ik}$ ), lda ) end if ! bidiagonalize r in a ! (workspace: need 4*n, prefer 3*n + 2*n*nb) call stdlib${ii}$_${ri}$gebrd( n, n, a, lda, s, work( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply q in u by left vectors bidiagonalizing r ! (workspace: need 3*n + m, prefer 3*n + m*nb) call stdlib${ii}$_${ri}$ormbr( 'Q', 'R', 'N', m, n, n, a, lda,work( itauq ), u, & ldu, work( iwork ),lwork-iwork+1, ierr ) iwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of a in u ! (workspace: need bdspac) call stdlib${ii}$_${ri}$bdsqr( 'U', n, 0_${ik}$, m, 0_${ik}$, s, work( ie ), dum,1_${ik}$, u, ldu, dum, & 1_${ik}$, work( iwork ),info ) end if else if( wntvo ) then ! path 5 (m much larger than n, jobu='s', jobvt='o') ! n left singular vectors to be computed in u and ! n right singular vectors to be overwritten on a if( lwork>=2_${ik}$*n*n+max( 4_${ik}$*n, bdspac ) ) then ! sufficient workspace for a fast algorithm iu = 1_${ik}$ if( lwork>=wrkbl+2*lda*n ) then ! work(iu) is lda by n and work(ir) is lda by n ldwrku = lda ir = iu + ldwrku*n ldwrkr = lda else if( lwork>=wrkbl+( lda + n )*n ) then ! work(iu) is lda by n and work(ir) is n by n ldwrku = lda ir = iu + ldwrku*n ldwrkr = n else ! work(iu) is n by n and work(ir) is n by n ldwrku = n ir = iu + ldwrku*n ldwrkr = n end if itau = ir + ldwrkr*n iwork = itau + n ! compute a=q*r ! (workspace: need 2*n*n + 2*n, prefer 2*n*n + n + n*nb) call stdlib${ii}$_${ri}$geqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ! copy r to work(iu), zeroing out below it call stdlib${ii}$_${ri}$lacpy( 'U', n, n, a, lda, work( iu ),ldwrku ) call stdlib${ii}$_${ri}$laset( 'L', n-1, n-1, zero, zero,work( iu+1 ), ldwrku ) ! generate q in a ! (workspace: need 2*n*n + 2*n, prefer 2*n*n + n + n*nb) call stdlib${ii}$_${ri}$orgqr( m, n, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = itau itauq = ie + n itaup = itauq + n iwork = itaup + n ! bidiagonalize r in work(iu), copying result to ! work(ir) ! (workspace: need 2*n*n + 4*n, ! prefer 2*n*n+3*n+2*n*nb) call stdlib${ii}$_${ri}$gebrd( n, n, work( iu ), ldwrku, s,work( ie ), work( itauq & ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) call stdlib${ii}$_${ri}$lacpy( 'U', n, n, work( iu ), ldwrku,work( ir ), ldwrkr ) ! generate left bidiagonalizing vectors in work(iu) ! (workspace: need 2*n*n + 4*n, prefer 2*n*n + 3*n + n*nb) call stdlib${ii}$_${ri}$orgbr( 'Q', n, n, n, work( iu ), ldwrku,work( itauq ), & work( iwork ),lwork-iwork+1, ierr ) ! generate right bidiagonalizing vectors in work(ir) ! (workspace: need 2*n*n + 4*n-1, ! prefer 2*n*n+3*n+(n-1)*nb) call stdlib${ii}$_${ri}$orgbr( 'P', n, n, n, work( ir ), ldwrkr,work( itaup ), & work( iwork ),lwork-iwork+1, ierr ) iwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of r in work(iu) and computing ! right singular vectors of r in work(ir) ! (workspace: need 2*n*n + bdspac) call stdlib${ii}$_${ri}$bdsqr( 'U', n, n, n, 0_${ik}$, s, work( ie ),work( ir ), ldwrkr, & work( iu ),ldwrku, dum, 1_${ik}$, work( iwork ), info ) ! multiply q in a by left singular vectors of r in ! work(iu), storing result in u ! (workspace: need n*n) call stdlib${ii}$_${ri}$gemm( 'N', 'N', m, n, n, one, a, lda,work( iu ), ldwrku, & zero, u, ldu ) ! copy right singular vectors of r to a ! (workspace: need n*n) call stdlib${ii}$_${ri}$lacpy( 'F', n, n, work( ir ), ldwrkr, a,lda ) else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + n ! compute a=q*r, copying result to u ! (workspace: need 2*n, prefer n + n*nb) call stdlib${ii}$_${ri}$geqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_${ri}$lacpy( 'L', m, n, a, lda, u, ldu ) ! generate q in u ! (workspace: need 2*n, prefer n + n*nb) call stdlib${ii}$_${ri}$orgqr( m, n, n, u, ldu, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = itau itauq = ie + n itaup = itauq + n iwork = itaup + n ! zero out below r in a if( n > 1_${ik}$ ) then call stdlib${ii}$_${ri}$laset( 'L', n-1, n-1, zero, zero,a( 2_${ik}$, 1_${ik}$ ), lda ) end if ! bidiagonalize r in a ! (workspace: need 4*n, prefer 3*n + 2*n*nb) call stdlib${ii}$_${ri}$gebrd( n, n, a, lda, s, work( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply q in u by left vectors bidiagonalizing r ! (workspace: need 3*n + m, prefer 3*n + m*nb) call stdlib${ii}$_${ri}$ormbr( 'Q', 'R', 'N', m, n, n, a, lda,work( itauq ), u, & ldu, work( iwork ),lwork-iwork+1, ierr ) ! generate right vectors bidiagonalizing r in a ! (workspace: need 4*n-1, prefer 3*n + (n-1)*nb) call stdlib${ii}$_${ri}$orgbr( 'P', n, n, n, a, lda, work( itaup ),work( iwork ), & lwork-iwork+1, ierr ) iwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of a in u and computing right ! singular vectors of a in a ! (workspace: need bdspac) call stdlib${ii}$_${ri}$bdsqr( 'U', n, n, m, 0_${ik}$, s, work( ie ), a,lda, u, ldu, dum, & 1_${ik}$, work( iwork ),info ) end if else if( wntvas ) then ! path 6 (m much larger than n, jobu='s', jobvt='s' ! or 'a') ! n left singular vectors to be computed in u and ! n right singular vectors to be computed in vt if( lwork>=n*n+max( 4_${ik}$*n, bdspac ) ) then ! sufficient workspace for a fast algorithm iu = 1_${ik}$ if( lwork>=wrkbl+lda*n ) then ! work(iu) is lda by n ldwrku = lda else ! work(iu) is n by n ldwrku = n end if itau = iu + ldwrku*n iwork = itau + n ! compute a=q*r ! (workspace: need n*n + 2*n, prefer n*n + n + n*nb) call stdlib${ii}$_${ri}$geqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ! copy r to work(iu), zeroing out below it call stdlib${ii}$_${ri}$lacpy( 'U', n, n, a, lda, work( iu ),ldwrku ) call stdlib${ii}$_${ri}$laset( 'L', n-1, n-1, zero, zero,work( iu+1 ), ldwrku ) ! generate q in a ! (workspace: need n*n + 2*n, prefer n*n + n + n*nb) call stdlib${ii}$_${ri}$orgqr( m, n, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = itau itauq = ie + n itaup = itauq + n iwork = itaup + n ! bidiagonalize r in work(iu), copying result to vt ! (workspace: need n*n + 4*n, prefer n*n + 3*n + 2*n*nb) call stdlib${ii}$_${ri}$gebrd( n, n, work( iu ), ldwrku, s,work( ie ), work( itauq & ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) call stdlib${ii}$_${ri}$lacpy( 'U', n, n, work( iu ), ldwrku, vt,ldvt ) ! generate left bidiagonalizing vectors in work(iu) ! (workspace: need n*n + 4*n, prefer n*n + 3*n + n*nb) call stdlib${ii}$_${ri}$orgbr( 'Q', n, n, n, work( iu ), ldwrku,work( itauq ), & work( iwork ),lwork-iwork+1, ierr ) ! generate right bidiagonalizing vectors in vt ! (workspace: need n*n + 4*n-1, ! prefer n*n+3*n+(n-1)*nb) call stdlib${ii}$_${ri}$orgbr( 'P', n, n, n, vt, ldvt, work( itaup ),work( iwork ),& lwork-iwork+1, ierr ) iwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of r in work(iu) and computing ! right singular vectors of r in vt ! (workspace: need n*n + bdspac) call stdlib${ii}$_${ri}$bdsqr( 'U', n, n, n, 0_${ik}$, s, work( ie ), vt,ldvt, work( iu ),& ldwrku, dum, 1_${ik}$,work( iwork ), info ) ! multiply q in a by left singular vectors of r in ! work(iu), storing result in u ! (workspace: need n*n) call stdlib${ii}$_${ri}$gemm( 'N', 'N', m, n, n, one, a, lda,work( iu ), ldwrku, & zero, u, ldu ) else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + n ! compute a=q*r, copying result to u ! (workspace: need 2*n, prefer n + n*nb) call stdlib${ii}$_${ri}$geqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_${ri}$lacpy( 'L', m, n, a, lda, u, ldu ) ! generate q in u ! (workspace: need 2*n, prefer n + n*nb) call stdlib${ii}$_${ri}$orgqr( m, n, n, u, ldu, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ! copy r to vt, zeroing out below it call stdlib${ii}$_${ri}$lacpy( 'U', n, n, a, lda, vt, ldvt ) if( n>1_${ik}$ )call stdlib${ii}$_${ri}$laset( 'L', n-1, n-1, zero, zero,vt( 2_${ik}$, 1_${ik}$ ), ldvt & ) ie = itau itauq = ie + n itaup = itauq + n iwork = itaup + n ! bidiagonalize r in vt ! (workspace: need 4*n, prefer 3*n + 2*n*nb) call stdlib${ii}$_${ri}$gebrd( n, n, vt, ldvt, s, work( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply q in u by left bidiagonalizing vectors ! in vt ! (workspace: need 3*n + m, prefer 3*n + m*nb) call stdlib${ii}$_${ri}$ormbr( 'Q', 'R', 'N', m, n, n, vt, ldvt,work( itauq ), u, & ldu, work( iwork ),lwork-iwork+1, ierr ) ! generate right bidiagonalizing vectors in vt ! (workspace: need 4*n-1, prefer 3*n + (n-1)*nb) call stdlib${ii}$_${ri}$orgbr( 'P', n, n, n, vt, ldvt, work( itaup ),work( iwork ),& lwork-iwork+1, ierr ) iwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of a in u and computing right ! singular vectors of a in vt ! (workspace: need bdspac) call stdlib${ii}$_${ri}$bdsqr( 'U', n, n, m, 0_${ik}$, s, work( ie ), vt,ldvt, u, ldu, & dum, 1_${ik}$, work( iwork ),info ) end if end if else if( wntua ) then if( wntvn ) then ! path 7 (m much larger than n, jobu='a', jobvt='n') ! m left singular vectors to be computed in u and ! no right singular vectors to be computed if( lwork>=n*n+max( n+m, 4_${ik}$*n, bdspac ) ) then ! sufficient workspace for a fast algorithm ir = 1_${ik}$ if( lwork>=wrkbl+lda*n ) then ! work(ir) is lda by n ldwrkr = lda else ! work(ir) is n by n ldwrkr = n end if itau = ir + ldwrkr*n iwork = itau + n ! compute a=q*r, copying result to u ! (workspace: need n*n + 2*n, prefer n*n + n + n*nb) call stdlib${ii}$_${ri}$geqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_${ri}$lacpy( 'L', m, n, a, lda, u, ldu ) ! copy r to work(ir), zeroing out below it call stdlib${ii}$_${ri}$lacpy( 'U', n, n, a, lda, work( ir ),ldwrkr ) call stdlib${ii}$_${ri}$laset( 'L', n-1, n-1, zero, zero,work( ir+1 ), ldwrkr ) ! generate q in u ! (workspace: need n*n + n + m, prefer n*n + n + m*nb) call stdlib${ii}$_${ri}$orgqr( m, m, n, u, ldu, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = itau itauq = ie + n itaup = itauq + n iwork = itaup + n ! bidiagonalize r in work(ir) ! (workspace: need n*n + 4*n, prefer n*n + 3*n + 2*n*nb) call stdlib${ii}$_${ri}$gebrd( n, n, work( ir ), ldwrkr, s,work( ie ), work( itauq & ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) ! generate left bidiagonalizing vectors in work(ir) ! (workspace: need n*n + 4*n, prefer n*n + 3*n + n*nb) call stdlib${ii}$_${ri}$orgbr( 'Q', n, n, n, work( ir ), ldwrkr,work( itauq ), & work( iwork ),lwork-iwork+1, ierr ) iwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of r in work(ir) ! (workspace: need n*n + bdspac) call stdlib${ii}$_${ri}$bdsqr( 'U', n, 0_${ik}$, n, 0_${ik}$, s, work( ie ), dum,1_${ik}$, work( ir ), & ldwrkr, dum, 1_${ik}$,work( iwork ), info ) ! multiply q in u by left singular vectors of r in ! work(ir), storing result in a ! (workspace: need n*n) call stdlib${ii}$_${ri}$gemm( 'N', 'N', m, n, n, one, u, ldu,work( ir ), ldwrkr, & zero, a, lda ) ! copy left singular vectors of a from a to u call stdlib${ii}$_${ri}$lacpy( 'F', m, n, a, lda, u, ldu ) else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + n ! compute a=q*r, copying result to u ! (workspace: need 2*n, prefer n + n*nb) call stdlib${ii}$_${ri}$geqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_${ri}$lacpy( 'L', m, n, a, lda, u, ldu ) ! generate q in u ! (workspace: need n + m, prefer n + m*nb) call stdlib${ii}$_${ri}$orgqr( m, m, n, u, ldu, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = itau itauq = ie + n itaup = itauq + n iwork = itaup + n ! zero out below r in a if( n > 1_${ik}$ ) then call stdlib${ii}$_${ri}$laset( 'L', n-1, n-1, zero, zero,a( 2_${ik}$, 1_${ik}$ ), lda ) end if ! bidiagonalize r in a ! (workspace: need 4*n, prefer 3*n + 2*n*nb) call stdlib${ii}$_${ri}$gebrd( n, n, a, lda, s, work( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply q in u by left bidiagonalizing vectors ! in a ! (workspace: need 3*n + m, prefer 3*n + m*nb) call stdlib${ii}$_${ri}$ormbr( 'Q', 'R', 'N', m, n, n, a, lda,work( itauq ), u, & ldu, work( iwork ),lwork-iwork+1, ierr ) iwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of a in u ! (workspace: need bdspac) call stdlib${ii}$_${ri}$bdsqr( 'U', n, 0_${ik}$, m, 0_${ik}$, s, work( ie ), dum,1_${ik}$, u, ldu, dum, & 1_${ik}$, work( iwork ),info ) end if else if( wntvo ) then ! path 8 (m much larger than n, jobu='a', jobvt='o') ! m left singular vectors to be computed in u and ! n right singular vectors to be overwritten on a if( lwork>=2_${ik}$*n*n+max( n+m, 4_${ik}$*n, bdspac ) ) then ! sufficient workspace for a fast algorithm iu = 1_${ik}$ if( lwork>=wrkbl+2*lda*n ) then ! work(iu) is lda by n and work(ir) is lda by n ldwrku = lda ir = iu + ldwrku*n ldwrkr = lda else if( lwork>=wrkbl+( lda + n )*n ) then ! work(iu) is lda by n and work(ir) is n by n ldwrku = lda ir = iu + ldwrku*n ldwrkr = n else ! work(iu) is n by n and work(ir) is n by n ldwrku = n ir = iu + ldwrku*n ldwrkr = n end if itau = ir + ldwrkr*n iwork = itau + n ! compute a=q*r, copying result to u ! (workspace: need 2*n*n + 2*n, prefer 2*n*n + n + n*nb) call stdlib${ii}$_${ri}$geqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_${ri}$lacpy( 'L', m, n, a, lda, u, ldu ) ! generate q in u ! (workspace: need 2*n*n + n + m, prefer 2*n*n + n + m*nb) call stdlib${ii}$_${ri}$orgqr( m, m, n, u, ldu, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ! copy r to work(iu), zeroing out below it call stdlib${ii}$_${ri}$lacpy( 'U', n, n, a, lda, work( iu ),ldwrku ) call stdlib${ii}$_${ri}$laset( 'L', n-1, n-1, zero, zero,work( iu+1 ), ldwrku ) ie = itau itauq = ie + n itaup = itauq + n iwork = itaup + n ! bidiagonalize r in work(iu), copying result to ! work(ir) ! (workspace: need 2*n*n + 4*n, ! prefer 2*n*n+3*n+2*n*nb) call stdlib${ii}$_${ri}$gebrd( n, n, work( iu ), ldwrku, s,work( ie ), work( itauq & ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) call stdlib${ii}$_${ri}$lacpy( 'U', n, n, work( iu ), ldwrku,work( ir ), ldwrkr ) ! generate left bidiagonalizing vectors in work(iu) ! (workspace: need 2*n*n + 4*n, prefer 2*n*n + 3*n + n*nb) call stdlib${ii}$_${ri}$orgbr( 'Q', n, n, n, work( iu ), ldwrku,work( itauq ), & work( iwork ),lwork-iwork+1, ierr ) ! generate right bidiagonalizing vectors in work(ir) ! (workspace: need 2*n*n + 4*n-1, ! prefer 2*n*n+3*n+(n-1)*nb) call stdlib${ii}$_${ri}$orgbr( 'P', n, n, n, work( ir ), ldwrkr,work( itaup ), & work( iwork ),lwork-iwork+1, ierr ) iwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of r in work(iu) and computing ! right singular vectors of r in work(ir) ! (workspace: need 2*n*n + bdspac) call stdlib${ii}$_${ri}$bdsqr( 'U', n, n, n, 0_${ik}$, s, work( ie ),work( ir ), ldwrkr, & work( iu ),ldwrku, dum, 1_${ik}$, work( iwork ), info ) ! multiply q in u by left singular vectors of r in ! work(iu), storing result in a ! (workspace: need n*n) call stdlib${ii}$_${ri}$gemm( 'N', 'N', m, n, n, one, u, ldu,work( iu ), ldwrku, & zero, a, lda ) ! copy left singular vectors of a from a to u call stdlib${ii}$_${ri}$lacpy( 'F', m, n, a, lda, u, ldu ) ! copy right singular vectors of r from work(ir) to a call stdlib${ii}$_${ri}$lacpy( 'F', n, n, work( ir ), ldwrkr, a,lda ) else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + n ! compute a=q*r, copying result to u ! (workspace: need 2*n, prefer n + n*nb) call stdlib${ii}$_${ri}$geqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_${ri}$lacpy( 'L', m, n, a, lda, u, ldu ) ! generate q in u ! (workspace: need n + m, prefer n + m*nb) call stdlib${ii}$_${ri}$orgqr( m, m, n, u, ldu, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = itau itauq = ie + n itaup = itauq + n iwork = itaup + n ! zero out below r in a if( n > 1_${ik}$ ) then call stdlib${ii}$_${ri}$laset( 'L', n-1, n-1, zero, zero,a( 2_${ik}$, 1_${ik}$ ), lda ) end if ! bidiagonalize r in a ! (workspace: need 4*n, prefer 3*n + 2*n*nb) call stdlib${ii}$_${ri}$gebrd( n, n, a, lda, s, work( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply q in u by left bidiagonalizing vectors ! in a ! (workspace: need 3*n + m, prefer 3*n + m*nb) call stdlib${ii}$_${ri}$ormbr( 'Q', 'R', 'N', m, n, n, a, lda,work( itauq ), u, & ldu, work( iwork ),lwork-iwork+1, ierr ) ! generate right bidiagonalizing vectors in a ! (workspace: need 4*n-1, prefer 3*n + (n-1)*nb) call stdlib${ii}$_${ri}$orgbr( 'P', n, n, n, a, lda, work( itaup ),work( iwork ), & lwork-iwork+1, ierr ) iwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of a in u and computing right ! singular vectors of a in a ! (workspace: need bdspac) call stdlib${ii}$_${ri}$bdsqr( 'U', n, n, m, 0_${ik}$, s, work( ie ), a,lda, u, ldu, dum, & 1_${ik}$, work( iwork ),info ) end if else if( wntvas ) then ! path 9 (m much larger than n, jobu='a', jobvt='s' ! or 'a') ! m left singular vectors to be computed in u and ! n right singular vectors to be computed in vt if( lwork>=n*n+max( n+m, 4_${ik}$*n, bdspac ) ) then ! sufficient workspace for a fast algorithm iu = 1_${ik}$ if( lwork>=wrkbl+lda*n ) then ! work(iu) is lda by n ldwrku = lda else ! work(iu) is n by n ldwrku = n end if itau = iu + ldwrku*n iwork = itau + n ! compute a=q*r, copying result to u ! (workspace: need n*n + 2*n, prefer n*n + n + n*nb) call stdlib${ii}$_${ri}$geqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_${ri}$lacpy( 'L', m, n, a, lda, u, ldu ) ! generate q in u ! (workspace: need n*n + n + m, prefer n*n + n + m*nb) call stdlib${ii}$_${ri}$orgqr( m, m, n, u, ldu, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ! copy r to work(iu), zeroing out below it call stdlib${ii}$_${ri}$lacpy( 'U', n, n, a, lda, work( iu ),ldwrku ) call stdlib${ii}$_${ri}$laset( 'L', n-1, n-1, zero, zero,work( iu+1 ), ldwrku ) ie = itau itauq = ie + n itaup = itauq + n iwork = itaup + n ! bidiagonalize r in work(iu), copying result to vt ! (workspace: need n*n + 4*n, prefer n*n + 3*n + 2*n*nb) call stdlib${ii}$_${ri}$gebrd( n, n, work( iu ), ldwrku, s,work( ie ), work( itauq & ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) call stdlib${ii}$_${ri}$lacpy( 'U', n, n, work( iu ), ldwrku, vt,ldvt ) ! generate left bidiagonalizing vectors in work(iu) ! (workspace: need n*n + 4*n, prefer n*n + 3*n + n*nb) call stdlib${ii}$_${ri}$orgbr( 'Q', n, n, n, work( iu ), ldwrku,work( itauq ), & work( iwork ),lwork-iwork+1, ierr ) ! generate right bidiagonalizing vectors in vt ! (workspace: need n*n + 4*n-1, ! prefer n*n+3*n+(n-1)*nb) call stdlib${ii}$_${ri}$orgbr( 'P', n, n, n, vt, ldvt, work( itaup ),work( iwork ),& lwork-iwork+1, ierr ) iwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of r in work(iu) and computing ! right singular vectors of r in vt ! (workspace: need n*n + bdspac) call stdlib${ii}$_${ri}$bdsqr( 'U', n, n, n, 0_${ik}$, s, work( ie ), vt,ldvt, work( iu ),& ldwrku, dum, 1_${ik}$,work( iwork ), info ) ! multiply q in u by left singular vectors of r in ! work(iu), storing result in a ! (workspace: need n*n) call stdlib${ii}$_${ri}$gemm( 'N', 'N', m, n, n, one, u, ldu,work( iu ), ldwrku, & zero, a, lda ) ! copy left singular vectors of a from a to u call stdlib${ii}$_${ri}$lacpy( 'F', m, n, a, lda, u, ldu ) else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + n ! compute a=q*r, copying result to u ! (workspace: need 2*n, prefer n + n*nb) call stdlib${ii}$_${ri}$geqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_${ri}$lacpy( 'L', m, n, a, lda, u, ldu ) ! generate q in u ! (workspace: need n + m, prefer n + m*nb) call stdlib${ii}$_${ri}$orgqr( m, m, n, u, ldu, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ! copy r from a to vt, zeroing out below it call stdlib${ii}$_${ri}$lacpy( 'U', n, n, a, lda, vt, ldvt ) if( n>1_${ik}$ )call stdlib${ii}$_${ri}$laset( 'L', n-1, n-1, zero, zero,vt( 2_${ik}$, 1_${ik}$ ), ldvt & ) ie = itau itauq = ie + n itaup = itauq + n iwork = itaup + n ! bidiagonalize r in vt ! (workspace: need 4*n, prefer 3*n + 2*n*nb) call stdlib${ii}$_${ri}$gebrd( n, n, vt, ldvt, s, work( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply q in u by left bidiagonalizing vectors ! in vt ! (workspace: need 3*n + m, prefer 3*n + m*nb) call stdlib${ii}$_${ri}$ormbr( 'Q', 'R', 'N', m, n, n, vt, ldvt,work( itauq ), u, & ldu, work( iwork ),lwork-iwork+1, ierr ) ! generate right bidiagonalizing vectors in vt ! (workspace: need 4*n-1, prefer 3*n + (n-1)*nb) call stdlib${ii}$_${ri}$orgbr( 'P', n, n, n, vt, ldvt, work( itaup ),work( iwork ),& lwork-iwork+1, ierr ) iwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of a in u and computing right ! singular vectors of a in vt ! (workspace: need bdspac) call stdlib${ii}$_${ri}$bdsqr( 'U', n, n, m, 0_${ik}$, s, work( ie ), vt,ldvt, u, ldu, & dum, 1_${ik}$, work( iwork ),info ) end if end if end if else ! m < mnthr ! path 10 (m at least n, but not much larger) ! reduce to bidiagonal form without qr decomposition ie = 1_${ik}$ itauq = ie + n itaup = itauq + n iwork = itaup + n ! bidiagonalize a ! (workspace: need 3*n + m, prefer 3*n + (m + n)*nb) call stdlib${ii}$_${ri}$gebrd( m, n, a, lda, s, work( ie ), work( itauq ),work( itaup ), & work( iwork ), lwork-iwork+1,ierr ) if( wntuas ) then ! if left singular vectors desired in u, copy result to u ! and generate left bidiagonalizing vectors in u ! (workspace: need 3*n + ncu, prefer 3*n + ncu*nb) call stdlib${ii}$_${ri}$lacpy( 'L', m, n, a, lda, u, ldu ) if( wntus )ncu = n if( wntua )ncu = m call stdlib${ii}$_${ri}$orgbr( 'Q', m, ncu, n, u, ldu, work( itauq ),work( iwork ), & lwork-iwork+1, ierr ) end if if( wntvas ) then ! if right singular vectors desired in vt, copy result to ! vt and generate right bidiagonalizing vectors in vt ! (workspace: need 4*n-1, prefer 3*n + (n-1)*nb) call stdlib${ii}$_${ri}$lacpy( 'U', n, n, a, lda, vt, ldvt ) call stdlib${ii}$_${ri}$orgbr( 'P', n, n, n, vt, ldvt, work( itaup ),work( iwork ), & lwork-iwork+1, ierr ) end if if( wntuo ) then ! if left singular vectors desired in a, generate left ! bidiagonalizing vectors in a ! (workspace: need 4*n, prefer 3*n + n*nb) call stdlib${ii}$_${ri}$orgbr( 'Q', m, n, n, a, lda, work( itauq ),work( iwork ), lwork-& iwork+1, ierr ) end if if( wntvo ) then ! if right singular vectors desired in a, generate right ! bidiagonalizing vectors in a ! (workspace: need 4*n-1, prefer 3*n + (n-1)*nb) call stdlib${ii}$_${ri}$orgbr( 'P', n, n, n, a, lda, work( itaup ),work( iwork ), lwork-& iwork+1, ierr ) end if iwork = ie + n if( wntuas .or. wntuo )nru = m if( wntun )nru = 0_${ik}$ if( wntvas .or. wntvo )ncvt = n if( wntvn )ncvt = 0_${ik}$ if( ( .not.wntuo ) .and. ( .not.wntvo ) ) then ! perform bidiagonal qr iteration, if desired, computing ! left singular vectors in u and computing right singular ! vectors in vt ! (workspace: need bdspac) call stdlib${ii}$_${ri}$bdsqr( 'U', n, ncvt, nru, 0_${ik}$, s, work( ie ), vt,ldvt, u, ldu, dum,& 1_${ik}$, work( iwork ), info ) else if( ( .not.wntuo ) .and. wntvo ) then ! perform bidiagonal qr iteration, if desired, computing ! left singular vectors in u and computing right singular ! vectors in a ! (workspace: need bdspac) call stdlib${ii}$_${ri}$bdsqr( 'U', n, ncvt, nru, 0_${ik}$, s, work( ie ), a, lda,u, ldu, dum, & 1_${ik}$, work( iwork ), info ) else ! perform bidiagonal qr iteration, if desired, computing ! left singular vectors in a and computing right singular ! vectors in vt ! (workspace: need bdspac) call stdlib${ii}$_${ri}$bdsqr( 'U', n, ncvt, nru, 0_${ik}$, s, work( ie ), vt,ldvt, a, lda, dum,& 1_${ik}$, work( iwork ), info ) end if end if else ! a has more columns than rows. if a has sufficiently more ! columns than rows, first reduce using the lq decomposition (if ! sufficient workspace available) if( n>=mnthr ) then if( wntvn ) then ! path 1t(n much larger than m, jobvt='n') ! no right singular vectors to be computed itau = 1_${ik}$ iwork = itau + m ! compute a=l*q ! (workspace: need 2*m, prefer m + m*nb) call stdlib${ii}$_${ri}$gelqf( m, n, a, lda, work( itau ), work( iwork ),lwork-iwork+1, & ierr ) ! zero out above l if (m>1_${ik}$) call stdlib${ii}$_${ri}$laset( 'U', m-1, m-1, zero, zero, a( 1_${ik}$, 2_${ik}$ ), lda ) ie = 1_${ik}$ itauq = ie + m itaup = itauq + m iwork = itaup + m ! bidiagonalize l in a ! (workspace: need 4*m, prefer 3*m + 2*m*nb) call stdlib${ii}$_${ri}$gebrd( m, m, a, lda, s, work( ie ), work( itauq ),work( itaup ), & work( iwork ), lwork-iwork+1,ierr ) if( wntuo .or. wntuas ) then ! if left singular vectors desired, generate q ! (workspace: need 4*m, prefer 3*m + m*nb) call stdlib${ii}$_${ri}$orgbr( 'Q', m, m, m, a, lda, work( itauq ),work( iwork ), & lwork-iwork+1, ierr ) end if iwork = ie + m nru = 0_${ik}$ if( wntuo .or. wntuas )nru = m ! perform bidiagonal qr iteration, computing left singular ! vectors of a in a if desired ! (workspace: need bdspac) call stdlib${ii}$_${ri}$bdsqr( 'U', m, 0_${ik}$, nru, 0_${ik}$, s, work( ie ), dum, 1_${ik}$, a,lda, dum, 1_${ik}$, & work( iwork ), info ) ! if left singular vectors desired in u, copy them there if( wntuas )call stdlib${ii}$_${ri}$lacpy( 'F', m, m, a, lda, u, ldu ) else if( wntvo .and. wntun ) then ! path 2t(n much larger than m, jobu='n', jobvt='o') ! m right singular vectors to be overwritten on a and ! no left singular vectors to be computed if( lwork>=m*m+max( 4_${ik}$*m, bdspac ) ) then ! sufficient workspace for a fast algorithm ir = 1_${ik}$ if( lwork>=max( wrkbl, lda*n + m ) + lda*m ) then ! work(iu) is lda by n and work(ir) is lda by m ldwrku = lda chunk = n ldwrkr = lda else if( lwork>=max( wrkbl, lda*n + m ) + m*m ) then ! work(iu) is lda by n and work(ir) is m by m ldwrku = lda chunk = n ldwrkr = m else ! work(iu) is m by chunk and work(ir) is m by m ldwrku = m chunk = ( lwork-m*m-m ) / m ldwrkr = m end if itau = ir + ldwrkr*m iwork = itau + m ! compute a=l*q ! (workspace: need m*m + 2*m, prefer m*m + m + m*nb) call stdlib${ii}$_${ri}$gelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-iwork+& 1_${ik}$, ierr ) ! copy l to work(ir) and zero out above it call stdlib${ii}$_${ri}$lacpy( 'L', m, m, a, lda, work( ir ), ldwrkr ) call stdlib${ii}$_${ri}$laset( 'U', m-1, m-1, zero, zero,work( ir+ldwrkr ), ldwrkr ) ! generate q in a ! (workspace: need m*m + 2*m, prefer m*m + m + m*nb) call stdlib${ii}$_${ri}$orglq( m, n, m, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = itau itauq = ie + m itaup = itauq + m iwork = itaup + m ! bidiagonalize l in work(ir) ! (workspace: need m*m + 4*m, prefer m*m + 3*m + 2*m*nb) call stdlib${ii}$_${ri}$gebrd( m, m, work( ir ), ldwrkr, s, work( ie ),work( itauq ), & work( itaup ),work( iwork ), lwork-iwork+1, ierr ) ! generate right vectors bidiagonalizing l ! (workspace: need m*m + 4*m-1, prefer m*m + 3*m + (m-1)*nb) call stdlib${ii}$_${ri}$orgbr( 'P', m, m, m, work( ir ), ldwrkr,work( itaup ), work( & iwork ),lwork-iwork+1, ierr ) iwork = ie + m ! perform bidiagonal qr iteration, computing right ! singular vectors of l in work(ir) ! (workspace: need m*m + bdspac) call stdlib${ii}$_${ri}$bdsqr( 'U', m, m, 0_${ik}$, 0_${ik}$, s, work( ie ),work( ir ), ldwrkr, dum,& 1_${ik}$, dum, 1_${ik}$,work( iwork ), info ) iu = ie + m ! multiply right singular vectors of l in work(ir) by q ! in a, storing result in work(iu) and copying to a ! (workspace: need m*m + 2*m, prefer m*m + m*n + m) do i = 1, n, chunk blk = min( n-i+1, chunk ) call stdlib${ii}$_${ri}$gemm( 'N', 'N', m, blk, m, one, work( ir ),ldwrkr, a( 1_${ik}$, i & ), lda, zero,work( iu ), ldwrku ) call stdlib${ii}$_${ri}$lacpy( 'F', m, blk, work( iu ), ldwrku,a( 1_${ik}$, i ), lda ) end do else ! insufficient workspace for a fast algorithm ie = 1_${ik}$ itauq = ie + m itaup = itauq + m iwork = itaup + m ! bidiagonalize a ! (workspace: need 3*m + n, prefer 3*m + (m + n)*nb) call stdlib${ii}$_${ri}$gebrd( m, n, a, lda, s, work( ie ),work( itauq ), work( itaup & ),work( iwork ), lwork-iwork+1, ierr ) ! generate right vectors bidiagonalizing a ! (workspace: need 4*m, prefer 3*m + m*nb) call stdlib${ii}$_${ri}$orgbr( 'P', m, n, m, a, lda, work( itaup ),work( iwork ), & lwork-iwork+1, ierr ) iwork = ie + m ! perform bidiagonal qr iteration, computing right ! singular vectors of a in a ! (workspace: need bdspac) call stdlib${ii}$_${ri}$bdsqr( 'L', m, n, 0_${ik}$, 0_${ik}$, s, work( ie ), a, lda,dum, 1_${ik}$, dum, 1_${ik}$, & work( iwork ), info ) end if else if( wntvo .and. wntuas ) then ! path 3t(n much larger than m, jobu='s' or 'a', jobvt='o') ! m right singular vectors to be overwritten on a and ! m left singular vectors to be computed in u if( lwork>=m*m+max( 4_${ik}$*m, bdspac ) ) then ! sufficient workspace for a fast algorithm ir = 1_${ik}$ if( lwork>=max( wrkbl, lda*n + m ) + lda*m ) then ! work(iu) is lda by n and work(ir) is lda by m ldwrku = lda chunk = n ldwrkr = lda else if( lwork>=max( wrkbl, lda*n + m ) + m*m ) then ! work(iu) is lda by n and work(ir) is m by m ldwrku = lda chunk = n ldwrkr = m else ! work(iu) is m by chunk and work(ir) is m by m ldwrku = m chunk = ( lwork-m*m-m ) / m ldwrkr = m end if itau = ir + ldwrkr*m iwork = itau + m ! compute a=l*q ! (workspace: need m*m + 2*m, prefer m*m + m + m*nb) call stdlib${ii}$_${ri}$gelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-iwork+& 1_${ik}$, ierr ) ! copy l to u, zeroing about above it call stdlib${ii}$_${ri}$lacpy( 'L', m, m, a, lda, u, ldu ) if (m>1_${ik}$) call stdlib${ii}$_${ri}$laset( 'U', m-1, m-1, zero, zero, u( 1_${ik}$, 2_${ik}$ ),ldu ) ! generate q in a ! (workspace: need m*m + 2*m, prefer m*m + m + m*nb) call stdlib${ii}$_${ri}$orglq( m, n, m, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = itau itauq = ie + m itaup = itauq + m iwork = itaup + m ! bidiagonalize l in u, copying result to work(ir) ! (workspace: need m*m + 4*m, prefer m*m + 3*m + 2*m*nb) call stdlib${ii}$_${ri}$gebrd( m, m, u, ldu, s, work( ie ),work( itauq ), work( itaup & ),work( iwork ), lwork-iwork+1, ierr ) call stdlib${ii}$_${ri}$lacpy( 'U', m, m, u, ldu, work( ir ), ldwrkr ) ! generate right vectors bidiagonalizing l in work(ir) ! (workspace: need m*m + 4*m-1, prefer m*m + 3*m + (m-1)*nb) call stdlib${ii}$_${ri}$orgbr( 'P', m, m, m, work( ir ), ldwrkr,work( itaup ), work( & iwork ),lwork-iwork+1, ierr ) ! generate left vectors bidiagonalizing l in u ! (workspace: need m*m + 4*m, prefer m*m + 3*m + m*nb) call stdlib${ii}$_${ri}$orgbr( 'Q', m, m, m, u, ldu, work( itauq ),work( iwork ), & lwork-iwork+1, ierr ) iwork = ie + m ! perform bidiagonal qr iteration, computing left ! singular vectors of l in u, and computing right ! singular vectors of l in work(ir) ! (workspace: need m*m + bdspac) call stdlib${ii}$_${ri}$bdsqr( 'U', m, m, m, 0_${ik}$, s, work( ie ),work( ir ), ldwrkr, u, & ldu, dum, 1_${ik}$,work( iwork ), info ) iu = ie + m ! multiply right singular vectors of l in work(ir) by q ! in a, storing result in work(iu) and copying to a ! (workspace: need m*m + 2*m, prefer m*m + m*n + m)) do i = 1, n, chunk blk = min( n-i+1, chunk ) call stdlib${ii}$_${ri}$gemm( 'N', 'N', m, blk, m, one, work( ir ),ldwrkr, a( 1_${ik}$, i & ), lda, zero,work( iu ), ldwrku ) call stdlib${ii}$_${ri}$lacpy( 'F', m, blk, work( iu ), ldwrku,a( 1_${ik}$, i ), lda ) end do else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + m ! compute a=l*q ! (workspace: need 2*m, prefer m + m*nb) call stdlib${ii}$_${ri}$gelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-iwork+& 1_${ik}$, ierr ) ! copy l to u, zeroing out above it call stdlib${ii}$_${ri}$lacpy( 'L', m, m, a, lda, u, ldu ) if (m>1_${ik}$) call stdlib${ii}$_${ri}$laset( 'U', m-1, m-1, zero, zero, u( 1_${ik}$, 2_${ik}$ ),ldu ) ! generate q in a ! (workspace: need 2*m, prefer m + m*nb) call stdlib${ii}$_${ri}$orglq( m, n, m, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = itau itauq = ie + m itaup = itauq + m iwork = itaup + m ! bidiagonalize l in u ! (workspace: need 4*m, prefer 3*m + 2*m*nb) call stdlib${ii}$_${ri}$gebrd( m, m, u, ldu, s, work( ie ),work( itauq ), work( itaup & ),work( iwork ), lwork-iwork+1, ierr ) ! multiply right vectors bidiagonalizing l by q in a ! (workspace: need 3*m + n, prefer 3*m + n*nb) call stdlib${ii}$_${ri}$ormbr( 'P', 'L', 'T', m, n, m, u, ldu,work( itaup ), a, lda, & work( iwork ),lwork-iwork+1, ierr ) ! generate left vectors bidiagonalizing l in u ! (workspace: need 4*m, prefer 3*m + m*nb) call stdlib${ii}$_${ri}$orgbr( 'Q', m, m, m, u, ldu, work( itauq ),work( iwork ), & lwork-iwork+1, ierr ) iwork = ie + m ! perform bidiagonal qr iteration, computing left ! singular vectors of a in u and computing right ! singular vectors of a in a ! (workspace: need bdspac) call stdlib${ii}$_${ri}$bdsqr( 'U', m, n, m, 0_${ik}$, s, work( ie ), a, lda,u, ldu, dum, 1_${ik}$, & work( iwork ), info ) end if else if( wntvs ) then if( wntun ) then ! path 4t(n much larger than m, jobu='n', jobvt='s') ! m right singular vectors to be computed in vt and ! no left singular vectors to be computed if( lwork>=m*m+max( 4_${ik}$*m, bdspac ) ) then ! sufficient workspace for a fast algorithm ir = 1_${ik}$ if( lwork>=wrkbl+lda*m ) then ! work(ir) is lda by m ldwrkr = lda else ! work(ir) is m by m ldwrkr = m end if itau = ir + ldwrkr*m iwork = itau + m ! compute a=l*q ! (workspace: need m*m + 2*m, prefer m*m + m + m*nb) call stdlib${ii}$_${ri}$gelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ! copy l to work(ir), zeroing out above it call stdlib${ii}$_${ri}$lacpy( 'L', m, m, a, lda, work( ir ),ldwrkr ) call stdlib${ii}$_${ri}$laset( 'U', m-1, m-1, zero, zero,work( ir+ldwrkr ), ldwrkr & ) ! generate q in a ! (workspace: need m*m + 2*m, prefer m*m + m + m*nb) call stdlib${ii}$_${ri}$orglq( m, n, m, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = itau itauq = ie + m itaup = itauq + m iwork = itaup + m ! bidiagonalize l in work(ir) ! (workspace: need m*m + 4*m, prefer m*m + 3*m + 2*m*nb) call stdlib${ii}$_${ri}$gebrd( m, m, work( ir ), ldwrkr, s,work( ie ), work( itauq & ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) ! generate right vectors bidiagonalizing l in ! work(ir) ! (workspace: need m*m + 4*m, prefer m*m + 3*m + (m-1)*nb) call stdlib${ii}$_${ri}$orgbr( 'P', m, m, m, work( ir ), ldwrkr,work( itaup ), & work( iwork ),lwork-iwork+1, ierr ) iwork = ie + m ! perform bidiagonal qr iteration, computing right ! singular vectors of l in work(ir) ! (workspace: need m*m + bdspac) call stdlib${ii}$_${ri}$bdsqr( 'U', m, m, 0_${ik}$, 0_${ik}$, s, work( ie ),work( ir ), ldwrkr, & dum, 1_${ik}$, dum, 1_${ik}$,work( iwork ), info ) ! multiply right singular vectors of l in work(ir) by ! q in a, storing result in vt ! (workspace: need m*m) call stdlib${ii}$_${ri}$gemm( 'N', 'N', m, n, m, one, work( ir ),ldwrkr, a, lda, & zero, vt, ldvt ) else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + m ! compute a=l*q ! (workspace: need 2*m, prefer m + m*nb) call stdlib${ii}$_${ri}$gelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ! copy result to vt call stdlib${ii}$_${ri}$lacpy( 'U', m, n, a, lda, vt, ldvt ) ! generate q in vt ! (workspace: need 2*m, prefer m + m*nb) call stdlib${ii}$_${ri}$orglq( m, n, m, vt, ldvt, work( itau ),work( iwork ), & lwork-iwork+1, ierr ) ie = itau itauq = ie + m itaup = itauq + m iwork = itaup + m ! zero out above l in a if (m>1_${ik}$) call stdlib${ii}$_${ri}$laset( 'U', m-1, m-1, zero, zero, a( 1_${ik}$, 2_${ik}$ ),lda ) ! bidiagonalize l in a ! (workspace: need 4*m, prefer 3*m + 2*m*nb) call stdlib${ii}$_${ri}$gebrd( m, m, a, lda, s, work( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply right vectors bidiagonalizing l by q in vt ! (workspace: need 3*m + n, prefer 3*m + n*nb) call stdlib${ii}$_${ri}$ormbr( 'P', 'L', 'T', m, n, m, a, lda,work( itaup ), vt, & ldvt,work( iwork ), lwork-iwork+1, ierr ) iwork = ie + m ! perform bidiagonal qr iteration, computing right ! singular vectors of a in vt ! (workspace: need bdspac) call stdlib${ii}$_${ri}$bdsqr( 'U', m, n, 0_${ik}$, 0_${ik}$, s, work( ie ), vt,ldvt, dum, 1_${ik}$, & dum, 1_${ik}$, work( iwork ),info ) end if else if( wntuo ) then ! path 5t(n much larger than m, jobu='o', jobvt='s') ! m right singular vectors to be computed in vt and ! m left singular vectors to be overwritten on a if( lwork>=2_${ik}$*m*m+max( 4_${ik}$*m, bdspac ) ) then ! sufficient workspace for a fast algorithm iu = 1_${ik}$ if( lwork>=wrkbl+2*lda*m ) then ! work(iu) is lda by m and work(ir) is lda by m ldwrku = lda ir = iu + ldwrku*m ldwrkr = lda else if( lwork>=wrkbl+( lda + m )*m ) then ! work(iu) is lda by m and work(ir) is m by m ldwrku = lda ir = iu + ldwrku*m ldwrkr = m else ! work(iu) is m by m and work(ir) is m by m ldwrku = m ir = iu + ldwrku*m ldwrkr = m end if itau = ir + ldwrkr*m iwork = itau + m ! compute a=l*q ! (workspace: need 2*m*m + 2*m, prefer 2*m*m + m + m*nb) call stdlib${ii}$_${ri}$gelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ! copy l to work(iu), zeroing out below it call stdlib${ii}$_${ri}$lacpy( 'L', m, m, a, lda, work( iu ),ldwrku ) call stdlib${ii}$_${ri}$laset( 'U', m-1, m-1, zero, zero,work( iu+ldwrku ), ldwrku & ) ! generate q in a ! (workspace: need 2*m*m + 2*m, prefer 2*m*m + m + m*nb) call stdlib${ii}$_${ri}$orglq( m, n, m, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = itau itauq = ie + m itaup = itauq + m iwork = itaup + m ! bidiagonalize l in work(iu), copying result to ! work(ir) ! (workspace: need 2*m*m + 4*m, ! prefer 2*m*m+3*m+2*m*nb) call stdlib${ii}$_${ri}$gebrd( m, m, work( iu ), ldwrku, s,work( ie ), work( itauq & ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) call stdlib${ii}$_${ri}$lacpy( 'L', m, m, work( iu ), ldwrku,work( ir ), ldwrkr ) ! generate right bidiagonalizing vectors in work(iu) ! (workspace: need 2*m*m + 4*m-1, ! prefer 2*m*m+3*m+(m-1)*nb) call stdlib${ii}$_${ri}$orgbr( 'P', m, m, m, work( iu ), ldwrku,work( itaup ), & work( iwork ),lwork-iwork+1, ierr ) ! generate left bidiagonalizing vectors in work(ir) ! (workspace: need 2*m*m + 4*m, prefer 2*m*m + 3*m + m*nb) call stdlib${ii}$_${ri}$orgbr( 'Q', m, m, m, work( ir ), ldwrkr,work( itauq ), & work( iwork ),lwork-iwork+1, ierr ) iwork = ie + m ! perform bidiagonal qr iteration, computing left ! singular vectors of l in work(ir) and computing ! right singular vectors of l in work(iu) ! (workspace: need 2*m*m + bdspac) call stdlib${ii}$_${ri}$bdsqr( 'U', m, m, m, 0_${ik}$, s, work( ie ),work( iu ), ldwrku, & work( ir ),ldwrkr, dum, 1_${ik}$, work( iwork ), info ) ! multiply right singular vectors of l in work(iu) by ! q in a, storing result in vt ! (workspace: need m*m) call stdlib${ii}$_${ri}$gemm( 'N', 'N', m, n, m, one, work( iu ),ldwrku, a, lda, & zero, vt, ldvt ) ! copy left singular vectors of l to a ! (workspace: need m*m) call stdlib${ii}$_${ri}$lacpy( 'F', m, m, work( ir ), ldwrkr, a,lda ) else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + m ! compute a=l*q, copying result to vt ! (workspace: need 2*m, prefer m + m*nb) call stdlib${ii}$_${ri}$gelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_${ri}$lacpy( 'U', m, n, a, lda, vt, ldvt ) ! generate q in vt ! (workspace: need 2*m, prefer m + m*nb) call stdlib${ii}$_${ri}$orglq( m, n, m, vt, ldvt, work( itau ),work( iwork ), & lwork-iwork+1, ierr ) ie = itau itauq = ie + m itaup = itauq + m iwork = itaup + m ! zero out above l in a if (m>1_${ik}$) call stdlib${ii}$_${ri}$laset( 'U', m-1, m-1, zero, zero, a( 1_${ik}$, 2_${ik}$ ),lda ) ! bidiagonalize l in a ! (workspace: need 4*m, prefer 3*m + 2*m*nb) call stdlib${ii}$_${ri}$gebrd( m, m, a, lda, s, work( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply right vectors bidiagonalizing l by q in vt ! (workspace: need 3*m + n, prefer 3*m + n*nb) call stdlib${ii}$_${ri}$ormbr( 'P', 'L', 'T', m, n, m, a, lda,work( itaup ), vt, & ldvt,work( iwork ), lwork-iwork+1, ierr ) ! generate left bidiagonalizing vectors of l in a ! (workspace: need 4*m, prefer 3*m + m*nb) call stdlib${ii}$_${ri}$orgbr( 'Q', m, m, m, a, lda, work( itauq ),work( iwork ), & lwork-iwork+1, ierr ) iwork = ie + m ! perform bidiagonal qr iteration, compute left ! singular vectors of a in a and compute right ! singular vectors of a in vt ! (workspace: need bdspac) call stdlib${ii}$_${ri}$bdsqr( 'U', m, n, m, 0_${ik}$, s, work( ie ), vt,ldvt, a, lda, & dum, 1_${ik}$, work( iwork ),info ) end if else if( wntuas ) then ! path 6t(n much larger than m, jobu='s' or 'a', ! jobvt='s') ! m right singular vectors to be computed in vt and ! m left singular vectors to be computed in u if( lwork>=m*m+max( 4_${ik}$*m, bdspac ) ) then ! sufficient workspace for a fast algorithm iu = 1_${ik}$ if( lwork>=wrkbl+lda*m ) then ! work(iu) is lda by n ldwrku = lda else ! work(iu) is lda by m ldwrku = m end if itau = iu + ldwrku*m iwork = itau + m ! compute a=l*q ! (workspace: need m*m + 2*m, prefer m*m + m + m*nb) call stdlib${ii}$_${ri}$gelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ! copy l to work(iu), zeroing out above it call stdlib${ii}$_${ri}$lacpy( 'L', m, m, a, lda, work( iu ),ldwrku ) call stdlib${ii}$_${ri}$laset( 'U', m-1, m-1, zero, zero,work( iu+ldwrku ), ldwrku & ) ! generate q in a ! (workspace: need m*m + 2*m, prefer m*m + m + m*nb) call stdlib${ii}$_${ri}$orglq( m, n, m, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = itau itauq = ie + m itaup = itauq + m iwork = itaup + m ! bidiagonalize l in work(iu), copying result to u ! (workspace: need m*m + 4*m, prefer m*m + 3*m + 2*m*nb) call stdlib${ii}$_${ri}$gebrd( m, m, work( iu ), ldwrku, s,work( ie ), work( itauq & ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) call stdlib${ii}$_${ri}$lacpy( 'L', m, m, work( iu ), ldwrku, u,ldu ) ! generate right bidiagonalizing vectors in work(iu) ! (workspace: need m*m + 4*m-1, ! prefer m*m+3*m+(m-1)*nb) call stdlib${ii}$_${ri}$orgbr( 'P', m, m, m, work( iu ), ldwrku,work( itaup ), & work( iwork ),lwork-iwork+1, ierr ) ! generate left bidiagonalizing vectors in u ! (workspace: need m*m + 4*m, prefer m*m + 3*m + m*nb) call stdlib${ii}$_${ri}$orgbr( 'Q', m, m, m, u, ldu, work( itauq ),work( iwork ), & lwork-iwork+1, ierr ) iwork = ie + m ! perform bidiagonal qr iteration, computing left ! singular vectors of l in u and computing right ! singular vectors of l in work(iu) ! (workspace: need m*m + bdspac) call stdlib${ii}$_${ri}$bdsqr( 'U', m, m, m, 0_${ik}$, s, work( ie ),work( iu ), ldwrku, & u, ldu, dum, 1_${ik}$,work( iwork ), info ) ! multiply right singular vectors of l in work(iu) by ! q in a, storing result in vt ! (workspace: need m*m) call stdlib${ii}$_${ri}$gemm( 'N', 'N', m, n, m, one, work( iu ),ldwrku, a, lda, & zero, vt, ldvt ) else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + m ! compute a=l*q, copying result to vt ! (workspace: need 2*m, prefer m + m*nb) call stdlib${ii}$_${ri}$gelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_${ri}$lacpy( 'U', m, n, a, lda, vt, ldvt ) ! generate q in vt ! (workspace: need 2*m, prefer m + m*nb) call stdlib${ii}$_${ri}$orglq( m, n, m, vt, ldvt, work( itau ),work( iwork ), & lwork-iwork+1, ierr ) ! copy l to u, zeroing out above it call stdlib${ii}$_${ri}$lacpy( 'L', m, m, a, lda, u, ldu ) if (m>1_${ik}$) call stdlib${ii}$_${ri}$laset( 'U', m-1, m-1, zero, zero, u( 1_${ik}$, 2_${ik}$ ),ldu ) ie = itau itauq = ie + m itaup = itauq + m iwork = itaup + m ! bidiagonalize l in u ! (workspace: need 4*m, prefer 3*m + 2*m*nb) call stdlib${ii}$_${ri}$gebrd( m, m, u, ldu, s, work( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply right bidiagonalizing vectors in u by q ! in vt ! (workspace: need 3*m + n, prefer 3*m + n*nb) call stdlib${ii}$_${ri}$ormbr( 'P', 'L', 'T', m, n, m, u, ldu,work( itaup ), vt, & ldvt,work( iwork ), lwork-iwork+1, ierr ) ! generate left bidiagonalizing vectors in u ! (workspace: need 4*m, prefer 3*m + m*nb) call stdlib${ii}$_${ri}$orgbr( 'Q', m, m, m, u, ldu, work( itauq ),work( iwork ), & lwork-iwork+1, ierr ) iwork = ie + m ! perform bidiagonal qr iteration, computing left ! singular vectors of a in u and computing right ! singular vectors of a in vt ! (workspace: need bdspac) call stdlib${ii}$_${ri}$bdsqr( 'U', m, n, m, 0_${ik}$, s, work( ie ), vt,ldvt, u, ldu, & dum, 1_${ik}$, work( iwork ),info ) end if end if else if( wntva ) then if( wntun ) then ! path 7t(n much larger than m, jobu='n', jobvt='a') ! n right singular vectors to be computed in vt and ! no left singular vectors to be computed if( lwork>=m*m+max( n + m, 4_${ik}$*m, bdspac ) ) then ! sufficient workspace for a fast algorithm ir = 1_${ik}$ if( lwork>=wrkbl+lda*m ) then ! work(ir) is lda by m ldwrkr = lda else ! work(ir) is m by m ldwrkr = m end if itau = ir + ldwrkr*m iwork = itau + m ! compute a=l*q, copying result to vt ! (workspace: need m*m + 2*m, prefer m*m + m + m*nb) call stdlib${ii}$_${ri}$gelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_${ri}$lacpy( 'U', m, n, a, lda, vt, ldvt ) ! copy l to work(ir), zeroing out above it call stdlib${ii}$_${ri}$lacpy( 'L', m, m, a, lda, work( ir ),ldwrkr ) call stdlib${ii}$_${ri}$laset( 'U', m-1, m-1, zero, zero,work( ir+ldwrkr ), ldwrkr & ) ! generate q in vt ! (workspace: need m*m + m + n, prefer m*m + m + n*nb) call stdlib${ii}$_${ri}$orglq( n, n, m, vt, ldvt, work( itau ),work( iwork ), & lwork-iwork+1, ierr ) ie = itau itauq = ie + m itaup = itauq + m iwork = itaup + m ! bidiagonalize l in work(ir) ! (workspace: need m*m + 4*m, prefer m*m + 3*m + 2*m*nb) call stdlib${ii}$_${ri}$gebrd( m, m, work( ir ), ldwrkr, s,work( ie ), work( itauq & ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) ! generate right bidiagonalizing vectors in work(ir) ! (workspace: need m*m + 4*m-1, ! prefer m*m+3*m+(m-1)*nb) call stdlib${ii}$_${ri}$orgbr( 'P', m, m, m, work( ir ), ldwrkr,work( itaup ), & work( iwork ),lwork-iwork+1, ierr ) iwork = ie + m ! perform bidiagonal qr iteration, computing right ! singular vectors of l in work(ir) ! (workspace: need m*m + bdspac) call stdlib${ii}$_${ri}$bdsqr( 'U', m, m, 0_${ik}$, 0_${ik}$, s, work( ie ),work( ir ), ldwrkr, & dum, 1_${ik}$, dum, 1_${ik}$,work( iwork ), info ) ! multiply right singular vectors of l in work(ir) by ! q in vt, storing result in a ! (workspace: need m*m) call stdlib${ii}$_${ri}$gemm( 'N', 'N', m, n, m, one, work( ir ),ldwrkr, vt, ldvt, & zero, a, lda ) ! copy right singular vectors of a from a to vt call stdlib${ii}$_${ri}$lacpy( 'F', m, n, a, lda, vt, ldvt ) else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + m ! compute a=l*q, copying result to vt ! (workspace: need 2*m, prefer m + m*nb) call stdlib${ii}$_${ri}$gelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_${ri}$lacpy( 'U', m, n, a, lda, vt, ldvt ) ! generate q in vt ! (workspace: need m + n, prefer m + n*nb) call stdlib${ii}$_${ri}$orglq( n, n, m, vt, ldvt, work( itau ),work( iwork ), & lwork-iwork+1, ierr ) ie = itau itauq = ie + m itaup = itauq + m iwork = itaup + m ! zero out above l in a if (m>1_${ik}$) call stdlib${ii}$_${ri}$laset( 'U', m-1, m-1, zero, zero, a( 1_${ik}$, 2_${ik}$ ),lda ) ! bidiagonalize l in a ! (workspace: need 4*m, prefer 3*m + 2*m*nb) call stdlib${ii}$_${ri}$gebrd( m, m, a, lda, s, work( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply right bidiagonalizing vectors in a by q ! in vt ! (workspace: need 3*m + n, prefer 3*m + n*nb) call stdlib${ii}$_${ri}$ormbr( 'P', 'L', 'T', m, n, m, a, lda,work( itaup ), vt, & ldvt,work( iwork ), lwork-iwork+1, ierr ) iwork = ie + m ! perform bidiagonal qr iteration, computing right ! singular vectors of a in vt ! (workspace: need bdspac) call stdlib${ii}$_${ri}$bdsqr( 'U', m, n, 0_${ik}$, 0_${ik}$, s, work( ie ), vt,ldvt, dum, 1_${ik}$, & dum, 1_${ik}$, work( iwork ),info ) end if else if( wntuo ) then ! path 8t(n much larger than m, jobu='o', jobvt='a') ! n right singular vectors to be computed in vt and ! m left singular vectors to be overwritten on a if( lwork>=2_${ik}$*m*m+max( n + m, 4_${ik}$*m, bdspac ) ) then ! sufficient workspace for a fast algorithm iu = 1_${ik}$ if( lwork>=wrkbl+2*lda*m ) then ! work(iu) is lda by m and work(ir) is lda by m ldwrku = lda ir = iu + ldwrku*m ldwrkr = lda else if( lwork>=wrkbl+( lda + m )*m ) then ! work(iu) is lda by m and work(ir) is m by m ldwrku = lda ir = iu + ldwrku*m ldwrkr = m else ! work(iu) is m by m and work(ir) is m by m ldwrku = m ir = iu + ldwrku*m ldwrkr = m end if itau = ir + ldwrkr*m iwork = itau + m ! compute a=l*q, copying result to vt ! (workspace: need 2*m*m + 2*m, prefer 2*m*m + m + m*nb) call stdlib${ii}$_${ri}$gelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_${ri}$lacpy( 'U', m, n, a, lda, vt, ldvt ) ! generate q in vt ! (workspace: need 2*m*m + m + n, prefer 2*m*m + m + n*nb) call stdlib${ii}$_${ri}$orglq( n, n, m, vt, ldvt, work( itau ),work( iwork ), & lwork-iwork+1, ierr ) ! copy l to work(iu), zeroing out above it call stdlib${ii}$_${ri}$lacpy( 'L', m, m, a, lda, work( iu ),ldwrku ) call stdlib${ii}$_${ri}$laset( 'U', m-1, m-1, zero, zero,work( iu+ldwrku ), ldwrku & ) ie = itau itauq = ie + m itaup = itauq + m iwork = itaup + m ! bidiagonalize l in work(iu), copying result to ! work(ir) ! (workspace: need 2*m*m + 4*m, ! prefer 2*m*m+3*m+2*m*nb) call stdlib${ii}$_${ri}$gebrd( m, m, work( iu ), ldwrku, s,work( ie ), work( itauq & ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) call stdlib${ii}$_${ri}$lacpy( 'L', m, m, work( iu ), ldwrku,work( ir ), ldwrkr ) ! generate right bidiagonalizing vectors in work(iu) ! (workspace: need 2*m*m + 4*m-1, ! prefer 2*m*m+3*m+(m-1)*nb) call stdlib${ii}$_${ri}$orgbr( 'P', m, m, m, work( iu ), ldwrku,work( itaup ), & work( iwork ),lwork-iwork+1, ierr ) ! generate left bidiagonalizing vectors in work(ir) ! (workspace: need 2*m*m + 4*m, prefer 2*m*m + 3*m + m*nb) call stdlib${ii}$_${ri}$orgbr( 'Q', m, m, m, work( ir ), ldwrkr,work( itauq ), & work( iwork ),lwork-iwork+1, ierr ) iwork = ie + m ! perform bidiagonal qr iteration, computing left ! singular vectors of l in work(ir) and computing ! right singular vectors of l in work(iu) ! (workspace: need 2*m*m + bdspac) call stdlib${ii}$_${ri}$bdsqr( 'U', m, m, m, 0_${ik}$, s, work( ie ),work( iu ), ldwrku, & work( ir ),ldwrkr, dum, 1_${ik}$, work( iwork ), info ) ! multiply right singular vectors of l in work(iu) by ! q in vt, storing result in a ! (workspace: need m*m) call stdlib${ii}$_${ri}$gemm( 'N', 'N', m, n, m, one, work( iu ),ldwrku, vt, ldvt, & zero, a, lda ) ! copy right singular vectors of a from a to vt call stdlib${ii}$_${ri}$lacpy( 'F', m, n, a, lda, vt, ldvt ) ! copy left singular vectors of a from work(ir) to a call stdlib${ii}$_${ri}$lacpy( 'F', m, m, work( ir ), ldwrkr, a,lda ) else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + m ! compute a=l*q, copying result to vt ! (workspace: need 2*m, prefer m + m*nb) call stdlib${ii}$_${ri}$gelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_${ri}$lacpy( 'U', m, n, a, lda, vt, ldvt ) ! generate q in vt ! (workspace: need m + n, prefer m + n*nb) call stdlib${ii}$_${ri}$orglq( n, n, m, vt, ldvt, work( itau ),work( iwork ), & lwork-iwork+1, ierr ) ie = itau itauq = ie + m itaup = itauq + m iwork = itaup + m ! zero out above l in a if (m>1_${ik}$) call stdlib${ii}$_${ri}$laset( 'U', m-1, m-1, zero, zero, a( 1_${ik}$, 2_${ik}$ ),lda ) ! bidiagonalize l in a ! (workspace: need 4*m, prefer 3*m + 2*m*nb) call stdlib${ii}$_${ri}$gebrd( m, m, a, lda, s, work( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply right bidiagonalizing vectors in a by q ! in vt ! (workspace: need 3*m + n, prefer 3*m + n*nb) call stdlib${ii}$_${ri}$ormbr( 'P', 'L', 'T', m, n, m, a, lda,work( itaup ), vt, & ldvt,work( iwork ), lwork-iwork+1, ierr ) ! generate left bidiagonalizing vectors in a ! (workspace: need 4*m, prefer 3*m + m*nb) call stdlib${ii}$_${ri}$orgbr( 'Q', m, m, m, a, lda, work( itauq ),work( iwork ), & lwork-iwork+1, ierr ) iwork = ie + m ! perform bidiagonal qr iteration, computing left ! singular vectors of a in a and computing right ! singular vectors of a in vt ! (workspace: need bdspac) call stdlib${ii}$_${ri}$bdsqr( 'U', m, n, m, 0_${ik}$, s, work( ie ), vt,ldvt, a, lda, & dum, 1_${ik}$, work( iwork ),info ) end if else if( wntuas ) then ! path 9t(n much larger than m, jobu='s' or 'a', ! jobvt='a') ! n right singular vectors to be computed in vt and ! m left singular vectors to be computed in u if( lwork>=m*m+max( n + m, 4_${ik}$*m, bdspac ) ) then ! sufficient workspace for a fast algorithm iu = 1_${ik}$ if( lwork>=wrkbl+lda*m ) then ! work(iu) is lda by m ldwrku = lda else ! work(iu) is m by m ldwrku = m end if itau = iu + ldwrku*m iwork = itau + m ! compute a=l*q, copying result to vt ! (workspace: need m*m + 2*m, prefer m*m + m + m*nb) call stdlib${ii}$_${ri}$gelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_${ri}$lacpy( 'U', m, n, a, lda, vt, ldvt ) ! generate q in vt ! (workspace: need m*m + m + n, prefer m*m + m + n*nb) call stdlib${ii}$_${ri}$orglq( n, n, m, vt, ldvt, work( itau ),work( iwork ), & lwork-iwork+1, ierr ) ! copy l to work(iu), zeroing out above it call stdlib${ii}$_${ri}$lacpy( 'L', m, m, a, lda, work( iu ),ldwrku ) call stdlib${ii}$_${ri}$laset( 'U', m-1, m-1, zero, zero,work( iu+ldwrku ), ldwrku & ) ie = itau itauq = ie + m itaup = itauq + m iwork = itaup + m ! bidiagonalize l in work(iu), copying result to u ! (workspace: need m*m + 4*m, prefer m*m + 3*m + 2*m*nb) call stdlib${ii}$_${ri}$gebrd( m, m, work( iu ), ldwrku, s,work( ie ), work( itauq & ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) call stdlib${ii}$_${ri}$lacpy( 'L', m, m, work( iu ), ldwrku, u,ldu ) ! generate right bidiagonalizing vectors in work(iu) ! (workspace: need m*m + 4*m, prefer m*m + 3*m + (m-1)*nb) call stdlib${ii}$_${ri}$orgbr( 'P', m, m, m, work( iu ), ldwrku,work( itaup ), & work( iwork ),lwork-iwork+1, ierr ) ! generate left bidiagonalizing vectors in u ! (workspace: need m*m + 4*m, prefer m*m + 3*m + m*nb) call stdlib${ii}$_${ri}$orgbr( 'Q', m, m, m, u, ldu, work( itauq ),work( iwork ), & lwork-iwork+1, ierr ) iwork = ie + m ! perform bidiagonal qr iteration, computing left ! singular vectors of l in u and computing right ! singular vectors of l in work(iu) ! (workspace: need m*m + bdspac) call stdlib${ii}$_${ri}$bdsqr( 'U', m, m, m, 0_${ik}$, s, work( ie ),work( iu ), ldwrku, & u, ldu, dum, 1_${ik}$,work( iwork ), info ) ! multiply right singular vectors of l in work(iu) by ! q in vt, storing result in a ! (workspace: need m*m) call stdlib${ii}$_${ri}$gemm( 'N', 'N', m, n, m, one, work( iu ),ldwrku, vt, ldvt, & zero, a, lda ) ! copy right singular vectors of a from a to vt call stdlib${ii}$_${ri}$lacpy( 'F', m, n, a, lda, vt, ldvt ) else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + m ! compute a=l*q, copying result to vt ! (workspace: need 2*m, prefer m + m*nb) call stdlib${ii}$_${ri}$gelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_${ri}$lacpy( 'U', m, n, a, lda, vt, ldvt ) ! generate q in vt ! (workspace: need m + n, prefer m + n*nb) call stdlib${ii}$_${ri}$orglq( n, n, m, vt, ldvt, work( itau ),work( iwork ), & lwork-iwork+1, ierr ) ! copy l to u, zeroing out above it call stdlib${ii}$_${ri}$lacpy( 'L', m, m, a, lda, u, ldu ) if (m>1_${ik}$) call stdlib${ii}$_${ri}$laset( 'U', m-1, m-1, zero, zero, u( 1_${ik}$, 2_${ik}$ ),ldu ) ie = itau itauq = ie + m itaup = itauq + m iwork = itaup + m ! bidiagonalize l in u ! (workspace: need 4*m, prefer 3*m + 2*m*nb) call stdlib${ii}$_${ri}$gebrd( m, m, u, ldu, s, work( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply right bidiagonalizing vectors in u by q ! in vt ! (workspace: need 3*m + n, prefer 3*m + n*nb) call stdlib${ii}$_${ri}$ormbr( 'P', 'L', 'T', m, n, m, u, ldu,work( itaup ), vt, & ldvt,work( iwork ), lwork-iwork+1, ierr ) ! generate left bidiagonalizing vectors in u ! (workspace: need 4*m, prefer 3*m + m*nb) call stdlib${ii}$_${ri}$orgbr( 'Q', m, m, m, u, ldu, work( itauq ),work( iwork ), & lwork-iwork+1, ierr ) iwork = ie + m ! perform bidiagonal qr iteration, computing left ! singular vectors of a in u and computing right ! singular vectors of a in vt ! (workspace: need bdspac) call stdlib${ii}$_${ri}$bdsqr( 'U', m, n, m, 0_${ik}$, s, work( ie ), vt,ldvt, u, ldu, & dum, 1_${ik}$, work( iwork ),info ) end if end if end if else ! n < mnthr ! path 10t(n greater than m, but not much larger) ! reduce to bidiagonal form without lq decomposition ie = 1_${ik}$ itauq = ie + m itaup = itauq + m iwork = itaup + m ! bidiagonalize a ! (workspace: need 3*m + n, prefer 3*m + (m + n)*nb) call stdlib${ii}$_${ri}$gebrd( m, n, a, lda, s, work( ie ), work( itauq ),work( itaup ), & work( iwork ), lwork-iwork+1,ierr ) if( wntuas ) then ! if left singular vectors desired in u, copy result to u ! and generate left bidiagonalizing vectors in u ! (workspace: need 4*m-1, prefer 3*m + (m-1)*nb) call stdlib${ii}$_${ri}$lacpy( 'L', m, m, a, lda, u, ldu ) call stdlib${ii}$_${ri}$orgbr( 'Q', m, m, n, u, ldu, work( itauq ),work( iwork ), lwork-& iwork+1, ierr ) end if if( wntvas ) then ! if right singular vectors desired in vt, copy result to ! vt and generate right bidiagonalizing vectors in vt ! (workspace: need 3*m + nrvt, prefer 3*m + nrvt*nb) call stdlib${ii}$_${ri}$lacpy( 'U', m, n, a, lda, vt, ldvt ) if( wntva )nrvt = n if( wntvs )nrvt = m call stdlib${ii}$_${ri}$orgbr( 'P', nrvt, n, m, vt, ldvt, work( itaup ),work( iwork ), & lwork-iwork+1, ierr ) end if if( wntuo ) then ! if left singular vectors desired in a, generate left ! bidiagonalizing vectors in a ! (workspace: need 4*m-1, prefer 3*m + (m-1)*nb) call stdlib${ii}$_${ri}$orgbr( 'Q', m, m, n, a, lda, work( itauq ),work( iwork ), lwork-& iwork+1, ierr ) end if if( wntvo ) then ! if right singular vectors desired in a, generate right ! bidiagonalizing vectors in a ! (workspace: need 4*m, prefer 3*m + m*nb) call stdlib${ii}$_${ri}$orgbr( 'P', m, n, m, a, lda, work( itaup ),work( iwork ), lwork-& iwork+1, ierr ) end if iwork = ie + m if( wntuas .or. wntuo )nru = m if( wntun )nru = 0_${ik}$ if( wntvas .or. wntvo )ncvt = n if( wntvn )ncvt = 0_${ik}$ if( ( .not.wntuo ) .and. ( .not.wntvo ) ) then ! perform bidiagonal qr iteration, if desired, computing ! left singular vectors in u and computing right singular ! vectors in vt ! (workspace: need bdspac) call stdlib${ii}$_${ri}$bdsqr( 'L', m, ncvt, nru, 0_${ik}$, s, work( ie ), vt,ldvt, u, ldu, dum,& 1_${ik}$, work( iwork ), info ) else if( ( .not.wntuo ) .and. wntvo ) then ! perform bidiagonal qr iteration, if desired, computing ! left singular vectors in u and computing right singular ! vectors in a ! (workspace: need bdspac) call stdlib${ii}$_${ri}$bdsqr( 'L', m, ncvt, nru, 0_${ik}$, s, work( ie ), a, lda,u, ldu, dum, & 1_${ik}$, work( iwork ), info ) else ! perform bidiagonal qr iteration, if desired, computing ! left singular vectors in a and computing right singular ! vectors in vt ! (workspace: need bdspac) call stdlib${ii}$_${ri}$bdsqr( 'L', m, ncvt, nru, 0_${ik}$, s, work( ie ), vt,ldvt, a, lda, dum,& 1_${ik}$, work( iwork ), info ) end if end if end if ! if stdlib${ii}$_${ri}$bdsqr failed to converge, copy unconverged superdiagonals ! to work( 2:minmn ) if( info/=0_${ik}$ ) then if( ie>2_${ik}$ ) then do i = 1, minmn - 1 work( i+1 ) = work( i+ie-1 ) end do end if if( ie<2_${ik}$ ) then do i = minmn - 1, 1, -1 work( i+1 ) = work( i+ie-1 ) end do end if end if ! undo scaling if necessary if( iscl==1_${ik}$ ) then if( anrm>bignum )call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, bignum, anrm, minmn, 1_${ik}$, s, minmn,& ierr ) if( info/=0_${ik}$ .and. anrm>bignum )call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, bignum, anrm, minmn-1,& 1_${ik}$, work( 2_${ik}$ ),minmn, ierr ) if( anrm<smlnum )call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, smlnum, anrm, minmn, 1_${ik}$, s, minmn,& ierr ) if( info/=0_${ik}$ .and. anrm<smlnum )call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, smlnum, anrm, minmn-1,& 1_${ik}$, work( 2_${ik}$ ),minmn, ierr ) end if ! return optimal workspace in work(1) work( 1_${ik}$ ) = maxwrk return end subroutine stdlib${ii}$_${ri}$gesvd #:endif #:endfor module subroutine stdlib${ii}$_cgesvd( jobu, jobvt, m, n, a, lda, s, u, ldu, vt, ldvt,work, lwork, rwork, & !! CGESVD computes the singular value decomposition (SVD) of a complex !! M-by-N matrix A, optionally computing the left and/or right singular !! vectors. The SVD is written !! A = U * SIGMA * conjugate-transpose(V) !! where SIGMA is an M-by-N matrix which is zero except for its !! min(m,n) diagonal elements, U is an M-by-M unitary matrix, and !! V is an N-by-N unitary matrix. The diagonal elements of SIGMA !! are the singular values of A; they are real and non-negative, and !! are returned in descending order. The first min(m,n) columns of !! U and V are the left and right singular vectors of A. !! Note that the routine returns V**H, not V. info ) ! -- lapack driver routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments character, intent(in) :: jobu, jobvt integer(${ik}$), intent(out) :: info integer(${ik}$), intent(in) :: lda, ldu, ldvt, lwork, m, n ! Array Arguments real(sp), intent(out) :: rwork(*), s(*) complex(sp), intent(inout) :: a(lda,*) complex(sp), intent(out) :: u(ldu,*), vt(ldvt,*), work(*) ! ===================================================================== ! Local Scalars logical(lk) :: lquery, wntua, wntuas, wntun, wntuo, wntus, wntva, wntvas, wntvn, wntvo,& wntvs integer(${ik}$) :: blk, chunk, i, ie, ierr, ir, irwork, iscl, itau, itaup, itauq, iu, & iwork, ldwrkr, ldwrku, maxwrk, minmn, minwrk, mnthr, ncu, ncvt, nru, nrvt, & wrkbl integer(${ik}$) :: lwork_cgeqrf, lwork_cungqr_n, lwork_cungqr_m, lwork_cgebrd, & lwork_cungbr_p, lwork_cungbr_q, lwork_cgelqf, lwork_cunglq_n, lwork_cunglq_m real(sp) :: anrm, bignum, eps, smlnum ! Local Arrays real(sp) :: dum(1_${ik}$) complex(sp) :: cdum(1_${ik}$) ! Intrinsic Functions ! Executable Statements ! test the input arguments info = 0_${ik}$ minmn = min( m, n ) wntua = stdlib_lsame( jobu, 'A' ) wntus = stdlib_lsame( jobu, 'S' ) wntuas = wntua .or. wntus wntuo = stdlib_lsame( jobu, 'O' ) wntun = stdlib_lsame( jobu, 'N' ) wntva = stdlib_lsame( jobvt, 'A' ) wntvs = stdlib_lsame( jobvt, 'S' ) wntvas = wntva .or. wntvs wntvo = stdlib_lsame( jobvt, 'O' ) wntvn = stdlib_lsame( jobvt, 'N' ) lquery = ( lwork==-1_${ik}$ ) if( .not.( wntua .or. wntus .or. wntuo .or. wntun ) ) then info = -1_${ik}$ else if( .not.( wntva .or. wntvs .or. wntvo .or. wntvn ) .or.( wntvo .and. wntuo ) ) & then info = -2_${ik}$ else if( m<0_${ik}$ ) then info = -3_${ik}$ else if( n<0_${ik}$ ) then info = -4_${ik}$ else if( lda<max( 1_${ik}$, m ) ) then info = -6_${ik}$ else if( ldu<1_${ik}$ .or. ( wntuas .and. ldu<m ) ) then info = -9_${ik}$ else if( ldvt<1_${ik}$ .or. ( wntva .and. ldvt<n ) .or.( wntvs .and. ldvt<minmn ) ) & then info = -11_${ik}$ end if ! compute workspace ! (note: comments in the code beginning "workspace:" describe the ! minimal amount of workspace needed at that point in the code, ! as well as the preferred amount for good performance. ! cworkspace refers to complex workspace, and rworkspace to ! real workspace. nb refers to the optimal block size for the ! immediately following subroutine, as returned by stdlib${ii}$_ilaenv.) if( info==0_${ik}$ ) then minwrk = 1_${ik}$ maxwrk = 1_${ik}$ if( m>=n .and. minmn>0_${ik}$ ) then ! space needed for stdlib${ii}$_zbdsqr is bdspac = 5*n mnthr = stdlib${ii}$_ilaenv( 6_${ik}$, 'CGESVD', jobu // jobvt, m, n, 0_${ik}$, 0_${ik}$ ) ! compute space needed for stdlib${ii}$_cgeqrf call stdlib${ii}$_cgeqrf( m, n, a, lda, cdum(1_${ik}$), cdum(1_${ik}$), -1_${ik}$, ierr ) lwork_cgeqrf = int( cdum(1_${ik}$),KIND=${ik}$) ! compute space needed for stdlib${ii}$_cungqr call stdlib${ii}$_cungqr( m, n, n, a, lda, cdum(1_${ik}$), cdum(1_${ik}$), -1_${ik}$, ierr ) lwork_cungqr_n = int( cdum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_cungqr( m, m, n, a, lda, cdum(1_${ik}$), cdum(1_${ik}$), -1_${ik}$, ierr ) lwork_cungqr_m = int( cdum(1_${ik}$),KIND=${ik}$) ! compute space needed for stdlib${ii}$_cgebrd call stdlib${ii}$_cgebrd( n, n, a, lda, s, dum(1_${ik}$), cdum(1_${ik}$),cdum(1_${ik}$), cdum(1_${ik}$), -1_${ik}$, ierr ) lwork_cgebrd = int( cdum(1_${ik}$),KIND=${ik}$) ! compute space needed for stdlib${ii}$_cungbr call stdlib${ii}$_cungbr( 'P', n, n, n, a, lda, cdum(1_${ik}$),cdum(1_${ik}$), -1_${ik}$, ierr ) lwork_cungbr_p = int( cdum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_cungbr( 'Q', n, n, n, a, lda, cdum(1_${ik}$),cdum(1_${ik}$), -1_${ik}$, ierr ) lwork_cungbr_q = int( cdum(1_${ik}$),KIND=${ik}$) mnthr = stdlib${ii}$_ilaenv( 6_${ik}$, 'CGESVD', jobu // jobvt, m, n, 0_${ik}$, 0_${ik}$ ) if( m>=mnthr ) then if( wntun ) then ! path 1 (m much larger than n, jobu='n') maxwrk = n + lwork_cgeqrf maxwrk = max( maxwrk, 2_${ik}$*n+lwork_cgebrd ) if( wntvo .or. wntvas )maxwrk = max( maxwrk, 2_${ik}$*n+lwork_cungbr_p ) minwrk = 3_${ik}$*n else if( wntuo .and. wntvn ) then ! path 2 (m much larger than n, jobu='o', jobvt='n') wrkbl = n + lwork_cgeqrf wrkbl = max( wrkbl, n+lwork_cungqr_n ) wrkbl = max( wrkbl, 2_${ik}$*n+lwork_cgebrd ) wrkbl = max( wrkbl, 2_${ik}$*n+lwork_cungbr_q ) maxwrk = max( n*n+wrkbl, n*n+m*n ) minwrk = 2_${ik}$*n + m else if( wntuo .and. wntvas ) then ! path 3 (m much larger than n, jobu='o', jobvt='s' or ! 'a') wrkbl = n + lwork_cgeqrf wrkbl = max( wrkbl, n+lwork_cungqr_n ) wrkbl = max( wrkbl, 2_${ik}$*n+lwork_cgebrd ) wrkbl = max( wrkbl, 2_${ik}$*n+lwork_cungbr_q ) wrkbl = max( wrkbl, 2_${ik}$*n+lwork_cungbr_p ) maxwrk = max( n*n+wrkbl, n*n+m*n ) minwrk = 2_${ik}$*n + m else if( wntus .and. wntvn ) then ! path 4 (m much larger than n, jobu='s', jobvt='n') wrkbl = n + lwork_cgeqrf wrkbl = max( wrkbl, n+lwork_cungqr_n ) wrkbl = max( wrkbl, 2_${ik}$*n+lwork_cgebrd ) wrkbl = max( wrkbl, 2_${ik}$*n+lwork_cungbr_q ) maxwrk = n*n + wrkbl minwrk = 2_${ik}$*n + m else if( wntus .and. wntvo ) then ! path 5 (m much larger than n, jobu='s', jobvt='o') wrkbl = n + lwork_cgeqrf wrkbl = max( wrkbl, n+lwork_cungqr_n ) wrkbl = max( wrkbl, 2_${ik}$*n+lwork_cgebrd ) wrkbl = max( wrkbl, 2_${ik}$*n+lwork_cungbr_q ) wrkbl = max( wrkbl, 2_${ik}$*n+lwork_cungbr_p ) maxwrk = 2_${ik}$*n*n + wrkbl minwrk = 2_${ik}$*n + m else if( wntus .and. wntvas ) then ! path 6 (m much larger than n, jobu='s', jobvt='s' or ! 'a') wrkbl = n + lwork_cgeqrf wrkbl = max( wrkbl, n+lwork_cungqr_n ) wrkbl = max( wrkbl, 2_${ik}$*n+lwork_cgebrd ) wrkbl = max( wrkbl, 2_${ik}$*n+lwork_cungbr_q ) wrkbl = max( wrkbl, 2_${ik}$*n+lwork_cungbr_p ) maxwrk = n*n + wrkbl minwrk = 2_${ik}$*n + m else if( wntua .and. wntvn ) then ! path 7 (m much larger than n, jobu='a', jobvt='n') wrkbl = n + lwork_cgeqrf wrkbl = max( wrkbl, n+lwork_cungqr_m ) wrkbl = max( wrkbl, 2_${ik}$*n+lwork_cgebrd ) wrkbl = max( wrkbl, 2_${ik}$*n+lwork_cungbr_q ) maxwrk = n*n + wrkbl minwrk = 2_${ik}$*n + m else if( wntua .and. wntvo ) then ! path 8 (m much larger than n, jobu='a', jobvt='o') wrkbl = n + lwork_cgeqrf wrkbl = max( wrkbl, n+lwork_cungqr_m ) wrkbl = max( wrkbl, 2_${ik}$*n+lwork_cgebrd ) wrkbl = max( wrkbl, 2_${ik}$*n+lwork_cungbr_q ) wrkbl = max( wrkbl, 2_${ik}$*n+lwork_cungbr_p ) maxwrk = 2_${ik}$*n*n + wrkbl minwrk = 2_${ik}$*n + m else if( wntua .and. wntvas ) then ! path 9 (m much larger than n, jobu='a', jobvt='s' or ! 'a') wrkbl = n + lwork_cgeqrf wrkbl = max( wrkbl, n+lwork_cungqr_m ) wrkbl = max( wrkbl, 2_${ik}$*n+lwork_cgebrd ) wrkbl = max( wrkbl, 2_${ik}$*n+lwork_cungbr_q ) wrkbl = max( wrkbl, 2_${ik}$*n+lwork_cungbr_p ) maxwrk = n*n + wrkbl minwrk = 2_${ik}$*n + m end if else ! path 10 (m at least n, but not much larger) call stdlib${ii}$_cgebrd( m, n, a, lda, s, dum(1_${ik}$), cdum(1_${ik}$),cdum(1_${ik}$), cdum(1_${ik}$), -1_${ik}$, & ierr ) lwork_cgebrd = int( cdum(1_${ik}$),KIND=${ik}$) maxwrk = 2_${ik}$*n + lwork_cgebrd if( wntus .or. wntuo ) then call stdlib${ii}$_cungbr( 'Q', m, n, n, a, lda, cdum(1_${ik}$),cdum(1_${ik}$), -1_${ik}$, ierr ) lwork_cungbr_q = int( cdum(1_${ik}$),KIND=${ik}$) maxwrk = max( maxwrk, 2_${ik}$*n+lwork_cungbr_q ) end if if( wntua ) then call stdlib${ii}$_cungbr( 'Q', m, m, n, a, lda, cdum(1_${ik}$),cdum(1_${ik}$), -1_${ik}$, ierr ) lwork_cungbr_q = int( cdum(1_${ik}$),KIND=${ik}$) maxwrk = max( maxwrk, 2_${ik}$*n+lwork_cungbr_q ) end if if( .not.wntvn ) then maxwrk = max( maxwrk, 2_${ik}$*n+lwork_cungbr_p ) end if minwrk = 2_${ik}$*n + m end if else if( minmn>0_${ik}$ ) then ! space needed for stdlib${ii}$_cbdsqr is bdspac = 5*m mnthr = stdlib${ii}$_ilaenv( 6_${ik}$, 'CGESVD', jobu // jobvt, m, n, 0_${ik}$, 0_${ik}$ ) ! compute space needed for stdlib${ii}$_cgelqf call stdlib${ii}$_cgelqf( m, n, a, lda, cdum(1_${ik}$), cdum(1_${ik}$), -1_${ik}$, ierr ) lwork_cgelqf = int( cdum(1_${ik}$),KIND=${ik}$) ! compute space needed for stdlib${ii}$_cunglq call stdlib${ii}$_cunglq( n, n, m, cdum(1_${ik}$), n, cdum(1_${ik}$), cdum(1_${ik}$), -1_${ik}$,ierr ) lwork_cunglq_n = int( cdum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_cunglq( m, n, m, a, lda, cdum(1_${ik}$), cdum(1_${ik}$), -1_${ik}$, ierr ) lwork_cunglq_m = int( cdum(1_${ik}$),KIND=${ik}$) ! compute space needed for stdlib${ii}$_cgebrd call stdlib${ii}$_cgebrd( m, m, a, lda, s, dum(1_${ik}$), cdum(1_${ik}$),cdum(1_${ik}$), cdum(1_${ik}$), -1_${ik}$, ierr ) lwork_cgebrd = int( cdum(1_${ik}$),KIND=${ik}$) ! compute space needed for stdlib${ii}$_cungbr p call stdlib${ii}$_cungbr( 'P', m, m, m, a, n, cdum(1_${ik}$),cdum(1_${ik}$), -1_${ik}$, ierr ) lwork_cungbr_p = int( cdum(1_${ik}$),KIND=${ik}$) ! compute space needed for stdlib${ii}$_cungbr q call stdlib${ii}$_cungbr( 'Q', m, m, m, a, n, cdum(1_${ik}$),cdum(1_${ik}$), -1_${ik}$, ierr ) lwork_cungbr_q = int( cdum(1_${ik}$),KIND=${ik}$) if( n>=mnthr ) then if( wntvn ) then ! path 1t(n much larger than m, jobvt='n') maxwrk = m + lwork_cgelqf maxwrk = max( maxwrk, 2_${ik}$*m+lwork_cgebrd ) if( wntuo .or. wntuas )maxwrk = max( maxwrk, 2_${ik}$*m+lwork_cungbr_q ) minwrk = 3_${ik}$*m else if( wntvo .and. wntun ) then ! path 2t(n much larger than m, jobu='n', jobvt='o') wrkbl = m + lwork_cgelqf wrkbl = max( wrkbl, m+lwork_cunglq_m ) wrkbl = max( wrkbl, 2_${ik}$*m+lwork_cgebrd ) wrkbl = max( wrkbl, 2_${ik}$*m+lwork_cungbr_p ) maxwrk = max( m*m+wrkbl, m*m+m*n ) minwrk = 2_${ik}$*m + n else if( wntvo .and. wntuas ) then ! path 3t(n much larger than m, jobu='s' or 'a', ! jobvt='o') wrkbl = m + lwork_cgelqf wrkbl = max( wrkbl, m+lwork_cunglq_m ) wrkbl = max( wrkbl, 2_${ik}$*m+lwork_cgebrd ) wrkbl = max( wrkbl, 2_${ik}$*m+lwork_cungbr_p ) wrkbl = max( wrkbl, 2_${ik}$*m+lwork_cungbr_q ) maxwrk = max( m*m+wrkbl, m*m+m*n ) minwrk = 2_${ik}$*m + n else if( wntvs .and. wntun ) then ! path 4t(n much larger than m, jobu='n', jobvt='s') wrkbl = m + lwork_cgelqf wrkbl = max( wrkbl, m+lwork_cunglq_m ) wrkbl = max( wrkbl, 2_${ik}$*m+lwork_cgebrd ) wrkbl = max( wrkbl, 2_${ik}$*m+lwork_cungbr_p ) maxwrk = m*m + wrkbl minwrk = 2_${ik}$*m + n else if( wntvs .and. wntuo ) then ! path 5t(n much larger than m, jobu='o', jobvt='s') wrkbl = m + lwork_cgelqf wrkbl = max( wrkbl, m+lwork_cunglq_m ) wrkbl = max( wrkbl, 2_${ik}$*m+lwork_cgebrd ) wrkbl = max( wrkbl, 2_${ik}$*m+lwork_cungbr_p ) wrkbl = max( wrkbl, 2_${ik}$*m+lwork_cungbr_q ) maxwrk = 2_${ik}$*m*m + wrkbl minwrk = 2_${ik}$*m + n else if( wntvs .and. wntuas ) then ! path 6t(n much larger than m, jobu='s' or 'a', ! jobvt='s') wrkbl = m + lwork_cgelqf wrkbl = max( wrkbl, m+lwork_cunglq_m ) wrkbl = max( wrkbl, 2_${ik}$*m+lwork_cgebrd ) wrkbl = max( wrkbl, 2_${ik}$*m+lwork_cungbr_p ) wrkbl = max( wrkbl, 2_${ik}$*m+lwork_cungbr_q ) maxwrk = m*m + wrkbl minwrk = 2_${ik}$*m + n else if( wntva .and. wntun ) then ! path 7t(n much larger than m, jobu='n', jobvt='a') wrkbl = m + lwork_cgelqf wrkbl = max( wrkbl, m+lwork_cunglq_n ) wrkbl = max( wrkbl, 2_${ik}$*m+lwork_cgebrd ) wrkbl = max( wrkbl, 2_${ik}$*m+lwork_cungbr_p ) maxwrk = m*m + wrkbl minwrk = 2_${ik}$*m + n else if( wntva .and. wntuo ) then ! path 8t(n much larger than m, jobu='o', jobvt='a') wrkbl = m + lwork_cgelqf wrkbl = max( wrkbl, m+lwork_cunglq_n ) wrkbl = max( wrkbl, 2_${ik}$*m+lwork_cgebrd ) wrkbl = max( wrkbl, 2_${ik}$*m+lwork_cungbr_p ) wrkbl = max( wrkbl, 2_${ik}$*m+lwork_cungbr_q ) maxwrk = 2_${ik}$*m*m + wrkbl minwrk = 2_${ik}$*m + n else if( wntva .and. wntuas ) then ! path 9t(n much larger than m, jobu='s' or 'a', ! jobvt='a') wrkbl = m + lwork_cgelqf wrkbl = max( wrkbl, m+lwork_cunglq_n ) wrkbl = max( wrkbl, 2_${ik}$*m+lwork_cgebrd ) wrkbl = max( wrkbl, 2_${ik}$*m+lwork_cungbr_p ) wrkbl = max( wrkbl, 2_${ik}$*m+lwork_cungbr_q ) maxwrk = m*m + wrkbl minwrk = 2_${ik}$*m + n end if else ! path 10t(n greater than m, but not much larger) call stdlib${ii}$_cgebrd( m, n, a, lda, s, dum(1_${ik}$), cdum(1_${ik}$),cdum(1_${ik}$), cdum(1_${ik}$), -1_${ik}$, & ierr ) lwork_cgebrd = int( cdum(1_${ik}$),KIND=${ik}$) maxwrk = 2_${ik}$*m + lwork_cgebrd if( wntvs .or. wntvo ) then ! compute space needed for stdlib${ii}$_cungbr p call stdlib${ii}$_cungbr( 'P', m, n, m, a, n, cdum(1_${ik}$),cdum(1_${ik}$), -1_${ik}$, ierr ) lwork_cungbr_p = int( cdum(1_${ik}$),KIND=${ik}$) maxwrk = max( maxwrk, 2_${ik}$*m+lwork_cungbr_p ) end if if( wntva ) then call stdlib${ii}$_cungbr( 'P', n, n, m, a, n, cdum(1_${ik}$),cdum(1_${ik}$), -1_${ik}$, ierr ) lwork_cungbr_p = int( cdum(1_${ik}$),KIND=${ik}$) maxwrk = max( maxwrk, 2_${ik}$*m+lwork_cungbr_p ) end if if( .not.wntun ) then maxwrk = max( maxwrk, 2_${ik}$*m+lwork_cungbr_q ) end if minwrk = 2_${ik}$*m + n end if end if maxwrk = max( minwrk, maxwrk ) work( 1_${ik}$ ) = maxwrk if( lwork<minwrk .and. .not.lquery ) then info = -13_${ik}$ end if end if if( info/=0_${ik}$ ) then call stdlib${ii}$_xerbla( 'CGESVD', -info ) return else if( lquery ) then return end if ! quick return if possible if( m==0_${ik}$ .or. n==0_${ik}$ ) then return end if ! get machine constants eps = stdlib${ii}$_slamch( 'P' ) smlnum = sqrt( stdlib${ii}$_slamch( 'S' ) ) / eps bignum = one / smlnum ! scale a if max element outside range [smlnum,bignum] anrm = stdlib${ii}$_clange( 'M', m, n, a, lda, dum ) iscl = 0_${ik}$ if( anrm>zero .and. anrm<smlnum ) then iscl = 1_${ik}$ call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, m, n, a, lda, ierr ) else if( anrm>bignum ) then iscl = 1_${ik}$ call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, m, n, a, lda, ierr ) end if if( m>=n ) then ! a has at least as many rows as columns. if a has sufficiently ! more rows than columns, first reduce using the qr ! decomposition (if sufficient workspace available) if( m>=mnthr ) then if( wntun ) then ! path 1 (m much larger than n, jobu='n') ! no left singular vectors to be computed itau = 1_${ik}$ iwork = itau + n ! compute a=q*r ! (cworkspace: need 2*n, prefer n+n*nb) ! (rworkspace: need 0) call stdlib${ii}$_cgeqrf( m, n, a, lda, work( itau ), work( iwork ),lwork-iwork+1, & ierr ) ! zero out below r if( n > 1_${ik}$ ) then call stdlib${ii}$_claset( 'L', n-1, n-1, czero, czero, a( 2_${ik}$, 1_${ik}$ ),lda ) end if ie = 1_${ik}$ itauq = 1_${ik}$ itaup = itauq + n iwork = itaup + n ! bidiagonalize r in a ! (cworkspace: need 3*n, prefer 2*n+2*n*nb) ! (rworkspace: need n) call stdlib${ii}$_cgebrd( n, n, a, lda, s, rwork( ie ), work( itauq ),work( itaup ),& work( iwork ), lwork-iwork+1,ierr ) ncvt = 0_${ik}$ if( wntvo .or. wntvas ) then ! if right singular vectors desired, generate p'. ! (cworkspace: need 3*n-1, prefer 2*n+(n-1)*nb) ! (rworkspace: 0) call stdlib${ii}$_cungbr( 'P', n, n, n, a, lda, work( itaup ),work( iwork ), & lwork-iwork+1, ierr ) ncvt = n end if irwork = ie + n ! perform bidiagonal qr iteration, computing right ! singular vectors of a in a if desired ! (cworkspace: 0) ! (rworkspace: need bdspac) call stdlib${ii}$_cbdsqr( 'U', n, ncvt, 0_${ik}$, 0_${ik}$, s, rwork( ie ), a, lda,cdum, 1_${ik}$, cdum, & 1_${ik}$, rwork( irwork ), info ) ! if right singular vectors desired in vt, copy them there if( wntvas )call stdlib${ii}$_clacpy( 'F', n, n, a, lda, vt, ldvt ) else if( wntuo .and. wntvn ) then ! path 2 (m much larger than n, jobu='o', jobvt='n') ! n left singular vectors to be overwritten on a and ! no right singular vectors to be computed if( lwork>=n*n+3*n ) then ! sufficient workspace for a fast algorithm ir = 1_${ik}$ if( lwork>=max( wrkbl, lda*n )+lda*n ) then ! work(iu) is lda by n, work(ir) is lda by n ldwrku = lda ldwrkr = lda else if( lwork>=max( wrkbl, lda*n )+n*n ) then ! work(iu) is lda by n, work(ir) is n by n ldwrku = lda ldwrkr = n else ! work(iu) is ldwrku by n, work(ir) is n by n ldwrku = ( lwork-n*n ) / n ldwrkr = n end if itau = ir + ldwrkr*n iwork = itau + n ! compute a=q*r ! (cworkspace: need n*n+2*n, prefer n*n+n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_cgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-iwork+& 1_${ik}$, ierr ) ! copy r to work(ir) and zero out below it call stdlib${ii}$_clacpy( 'U', n, n, a, lda, work( ir ), ldwrkr ) call stdlib${ii}$_claset( 'L', n-1, n-1, czero, czero,work( ir+1 ), ldwrkr ) ! generate q in a ! (cworkspace: need n*n+2*n, prefer n*n+n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_cungqr( m, n, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = 1_${ik}$ itauq = itau itaup = itauq + n iwork = itaup + n ! bidiagonalize r in work(ir) ! (cworkspace: need n*n+3*n, prefer n*n+2*n+2*n*nb) ! (rworkspace: need n) call stdlib${ii}$_cgebrd( n, n, work( ir ), ldwrkr, s, rwork( ie ),work( itauq ),& work( itaup ),work( iwork ), lwork-iwork+1, ierr ) ! generate left vectors bidiagonalizing r ! (cworkspace: need n*n+3*n, prefer n*n+2*n+n*nb) ! (rworkspace: need 0) call stdlib${ii}$_cungbr( 'Q', n, n, n, work( ir ), ldwrkr,work( itauq ), work( & iwork ),lwork-iwork+1, ierr ) irwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of r in work(ir) ! (cworkspace: need n*n) ! (rworkspace: need bdspac) call stdlib${ii}$_cbdsqr( 'U', n, 0_${ik}$, n, 0_${ik}$, s, rwork( ie ), cdum, 1_${ik}$,work( ir ), & ldwrkr, cdum, 1_${ik}$,rwork( irwork ), info ) iu = itauq ! multiply q in a by left singular vectors of r in ! work(ir), storing result in work(iu) and copying to a ! (cworkspace: need n*n+n, prefer n*n+m*n) ! (rworkspace: 0) do i = 1, m, ldwrku chunk = min( m-i+1, ldwrku ) call stdlib${ii}$_cgemm( 'N', 'N', chunk, n, n, cone, a( i, 1_${ik}$ ),lda, work( ir & ), ldwrkr, czero,work( iu ), ldwrku ) call stdlib${ii}$_clacpy( 'F', chunk, n, work( iu ), ldwrku,a( i, 1_${ik}$ ), lda ) end do else ! insufficient workspace for a fast algorithm ie = 1_${ik}$ itauq = 1_${ik}$ itaup = itauq + n iwork = itaup + n ! bidiagonalize a ! (cworkspace: need 2*n+m, prefer 2*n+(m+n)*nb) ! (rworkspace: n) call stdlib${ii}$_cgebrd( m, n, a, lda, s, rwork( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! generate left vectors bidiagonalizing a ! (cworkspace: need 3*n, prefer 2*n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_cungbr( 'Q', m, n, n, a, lda, work( itauq ),work( iwork ), & lwork-iwork+1, ierr ) irwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of a in a ! (cworkspace: need 0) ! (rworkspace: need bdspac) call stdlib${ii}$_cbdsqr( 'U', n, 0_${ik}$, m, 0_${ik}$, s, rwork( ie ), cdum, 1_${ik}$,a, lda, cdum, & 1_${ik}$, rwork( irwork ), info ) end if else if( wntuo .and. wntvas ) then ! path 3 (m much larger than n, jobu='o', jobvt='s' or 'a') ! n left singular vectors to be overwritten on a and ! n right singular vectors to be computed in vt if( lwork>=n*n+3*n ) then ! sufficient workspace for a fast algorithm ir = 1_${ik}$ if( lwork>=max( wrkbl, lda*n )+lda*n ) then ! work(iu) is lda by n and work(ir) is lda by n ldwrku = lda ldwrkr = lda else if( lwork>=max( wrkbl, lda*n )+n*n ) then ! work(iu) is lda by n and work(ir) is n by n ldwrku = lda ldwrkr = n else ! work(iu) is ldwrku by n and work(ir) is n by n ldwrku = ( lwork-n*n ) / n ldwrkr = n end if itau = ir + ldwrkr*n iwork = itau + n ! compute a=q*r ! (cworkspace: need n*n+2*n, prefer n*n+n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_cgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-iwork+& 1_${ik}$, ierr ) ! copy r to vt, zeroing out below it call stdlib${ii}$_clacpy( 'U', n, n, a, lda, vt, ldvt ) if( n>1_${ik}$ )call stdlib${ii}$_claset( 'L', n-1, n-1, czero, czero,vt( 2_${ik}$, 1_${ik}$ ), ldvt ) ! generate q in a ! (cworkspace: need n*n+2*n, prefer n*n+n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_cungqr( m, n, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = 1_${ik}$ itauq = itau itaup = itauq + n iwork = itaup + n ! bidiagonalize r in vt, copying result to work(ir) ! (cworkspace: need n*n+3*n, prefer n*n+2*n+2*n*nb) ! (rworkspace: need n) call stdlib${ii}$_cgebrd( n, n, vt, ldvt, s, rwork( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) call stdlib${ii}$_clacpy( 'L', n, n, vt, ldvt, work( ir ), ldwrkr ) ! generate left vectors bidiagonalizing r in work(ir) ! (cworkspace: need n*n+3*n, prefer n*n+2*n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_cungbr( 'Q', n, n, n, work( ir ), ldwrkr,work( itauq ), work( & iwork ),lwork-iwork+1, ierr ) ! generate right vectors bidiagonalizing r in vt ! (cworkspace: need n*n+3*n-1, prefer n*n+2*n+(n-1)*nb) ! (rworkspace: 0) call stdlib${ii}$_cungbr( 'P', n, n, n, vt, ldvt, work( itaup ),work( iwork ), & lwork-iwork+1, ierr ) irwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of r in work(ir) and computing right ! singular vectors of r in vt ! (cworkspace: need n*n) ! (rworkspace: need bdspac) call stdlib${ii}$_cbdsqr( 'U', n, n, n, 0_${ik}$, s, rwork( ie ), vt,ldvt, work( ir ), & ldwrkr, cdum, 1_${ik}$,rwork( irwork ), info ) iu = itauq ! multiply q in a by left singular vectors of r in ! work(ir), storing result in work(iu) and copying to a ! (cworkspace: need n*n+n, prefer n*n+m*n) ! (rworkspace: 0) do i = 1, m, ldwrku chunk = min( m-i+1, ldwrku ) call stdlib${ii}$_cgemm( 'N', 'N', chunk, n, n, cone, a( i, 1_${ik}$ ),lda, work( ir & ), ldwrkr, czero,work( iu ), ldwrku ) call stdlib${ii}$_clacpy( 'F', chunk, n, work( iu ), ldwrku,a( i, 1_${ik}$ ), lda ) end do else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + n ! compute a=q*r ! (cworkspace: need 2*n, prefer n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_cgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-iwork+& 1_${ik}$, ierr ) ! copy r to vt, zeroing out below it call stdlib${ii}$_clacpy( 'U', n, n, a, lda, vt, ldvt ) if( n>1_${ik}$ )call stdlib${ii}$_claset( 'L', n-1, n-1, czero, czero,vt( 2_${ik}$, 1_${ik}$ ), ldvt ) ! generate q in a ! (cworkspace: need 2*n, prefer n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_cungqr( m, n, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = 1_${ik}$ itauq = itau itaup = itauq + n iwork = itaup + n ! bidiagonalize r in vt ! (cworkspace: need 3*n, prefer 2*n+2*n*nb) ! (rworkspace: n) call stdlib${ii}$_cgebrd( n, n, vt, ldvt, s, rwork( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply q in a by left vectors bidiagonalizing r ! (cworkspace: need 2*n+m, prefer 2*n+m*nb) ! (rworkspace: 0) call stdlib${ii}$_cunmbr( 'Q', 'R', 'N', m, n, n, vt, ldvt,work( itauq ), a, lda,& work( iwork ),lwork-iwork+1, ierr ) ! generate right vectors bidiagonalizing r in vt ! (cworkspace: need 3*n-1, prefer 2*n+(n-1)*nb) ! (rworkspace: 0) call stdlib${ii}$_cungbr( 'P', n, n, n, vt, ldvt, work( itaup ),work( iwork ), & lwork-iwork+1, ierr ) irwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of a in a and computing right ! singular vectors of a in vt ! (cworkspace: 0) ! (rworkspace: need bdspac) call stdlib${ii}$_cbdsqr( 'U', n, n, m, 0_${ik}$, s, rwork( ie ), vt,ldvt, a, lda, cdum,& 1_${ik}$, rwork( irwork ),info ) end if else if( wntus ) then if( wntvn ) then ! path 4 (m much larger than n, jobu='s', jobvt='n') ! n left singular vectors to be computed in u and ! no right singular vectors to be computed if( lwork>=n*n+3*n ) then ! sufficient workspace for a fast algorithm ir = 1_${ik}$ if( lwork>=wrkbl+lda*n ) then ! work(ir) is lda by n ldwrkr = lda else ! work(ir) is n by n ldwrkr = n end if itau = ir + ldwrkr*n iwork = itau + n ! compute a=q*r ! (cworkspace: need n*n+2*n, prefer n*n+n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_cgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ! copy r to work(ir), zeroing out below it call stdlib${ii}$_clacpy( 'U', n, n, a, lda, work( ir ),ldwrkr ) call stdlib${ii}$_claset( 'L', n-1, n-1, czero, czero,work( ir+1 ), ldwrkr ) ! generate q in a ! (cworkspace: need n*n+2*n, prefer n*n+n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_cungqr( m, n, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = 1_${ik}$ itauq = itau itaup = itauq + n iwork = itaup + n ! bidiagonalize r in work(ir) ! (cworkspace: need n*n+3*n, prefer n*n+2*n+2*n*nb) ! (rworkspace: need n) call stdlib${ii}$_cgebrd( n, n, work( ir ), ldwrkr, s,rwork( ie ), work( & itauq ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) ! generate left vectors bidiagonalizing r in work(ir) ! (cworkspace: need n*n+3*n, prefer n*n+2*n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_cungbr( 'Q', n, n, n, work( ir ), ldwrkr,work( itauq ), & work( iwork ),lwork-iwork+1, ierr ) irwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of r in work(ir) ! (cworkspace: need n*n) ! (rworkspace: need bdspac) call stdlib${ii}$_cbdsqr( 'U', n, 0_${ik}$, n, 0_${ik}$, s, rwork( ie ), cdum,1_${ik}$, work( ir ),& ldwrkr, cdum, 1_${ik}$,rwork( irwork ), info ) ! multiply q in a by left singular vectors of r in ! work(ir), storing result in u ! (cworkspace: need n*n) ! (rworkspace: 0) call stdlib${ii}$_cgemm( 'N', 'N', m, n, n, cone, a, lda,work( ir ), ldwrkr, & czero, u, ldu ) else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + n ! compute a=q*r, copying result to u ! (cworkspace: need 2*n, prefer n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_cgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_clacpy( 'L', m, n, a, lda, u, ldu ) ! generate q in u ! (cworkspace: need 2*n, prefer n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_cungqr( m, n, n, u, ldu, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = 1_${ik}$ itauq = itau itaup = itauq + n iwork = itaup + n ! zero out below r in a if( n > 1_${ik}$ ) then call stdlib${ii}$_claset( 'L', n-1, n-1, czero, czero,a( 2_${ik}$, 1_${ik}$ ), lda ) end if ! bidiagonalize r in a ! (cworkspace: need 3*n, prefer 2*n+2*n*nb) ! (rworkspace: need n) call stdlib${ii}$_cgebrd( n, n, a, lda, s, rwork( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply q in u by left vectors bidiagonalizing r ! (cworkspace: need 2*n+m, prefer 2*n+m*nb) ! (rworkspace: 0) call stdlib${ii}$_cunmbr( 'Q', 'R', 'N', m, n, n, a, lda,work( itauq ), u, & ldu, work( iwork ),lwork-iwork+1, ierr ) irwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of a in u ! (cworkspace: 0) ! (rworkspace: need bdspac) call stdlib${ii}$_cbdsqr( 'U', n, 0_${ik}$, m, 0_${ik}$, s, rwork( ie ), cdum,1_${ik}$, u, ldu, & cdum, 1_${ik}$, rwork( irwork ),info ) end if else if( wntvo ) then ! path 5 (m much larger than n, jobu='s', jobvt='o') ! n left singular vectors to be computed in u and ! n right singular vectors to be overwritten on a if( lwork>=2_${ik}$*n*n+3*n ) then ! sufficient workspace for a fast algorithm iu = 1_${ik}$ if( lwork>=wrkbl+2*lda*n ) then ! work(iu) is lda by n and work(ir) is lda by n ldwrku = lda ir = iu + ldwrku*n ldwrkr = lda else if( lwork>=wrkbl+( lda+n )*n ) then ! work(iu) is lda by n and work(ir) is n by n ldwrku = lda ir = iu + ldwrku*n ldwrkr = n else ! work(iu) is n by n and work(ir) is n by n ldwrku = n ir = iu + ldwrku*n ldwrkr = n end if itau = ir + ldwrkr*n iwork = itau + n ! compute a=q*r ! (cworkspace: need 2*n*n+2*n, prefer 2*n*n+n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_cgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ! copy r to work(iu), zeroing out below it call stdlib${ii}$_clacpy( 'U', n, n, a, lda, work( iu ),ldwrku ) call stdlib${ii}$_claset( 'L', n-1, n-1, czero, czero,work( iu+1 ), ldwrku ) ! generate q in a ! (cworkspace: need 2*n*n+2*n, prefer 2*n*n+n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_cungqr( m, n, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = 1_${ik}$ itauq = itau itaup = itauq + n iwork = itaup + n ! bidiagonalize r in work(iu), copying result to ! work(ir) ! (cworkspace: need 2*n*n+3*n, ! prefer 2*n*n+2*n+2*n*nb) ! (rworkspace: need n) call stdlib${ii}$_cgebrd( n, n, work( iu ), ldwrku, s,rwork( ie ), work( & itauq ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) call stdlib${ii}$_clacpy( 'U', n, n, work( iu ), ldwrku,work( ir ), ldwrkr ) ! generate left bidiagonalizing vectors in work(iu) ! (cworkspace: need 2*n*n+3*n, prefer 2*n*n+2*n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_cungbr( 'Q', n, n, n, work( iu ), ldwrku,work( itauq ), & work( iwork ),lwork-iwork+1, ierr ) ! generate right bidiagonalizing vectors in work(ir) ! (cworkspace: need 2*n*n+3*n-1, ! prefer 2*n*n+2*n+(n-1)*nb) ! (rworkspace: 0) call stdlib${ii}$_cungbr( 'P', n, n, n, work( ir ), ldwrkr,work( itaup ), & work( iwork ),lwork-iwork+1, ierr ) irwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of r in work(iu) and computing ! right singular vectors of r in work(ir) ! (cworkspace: need 2*n*n) ! (rworkspace: need bdspac) call stdlib${ii}$_cbdsqr( 'U', n, n, n, 0_${ik}$, s, rwork( ie ),work( ir ), ldwrkr, & work( iu ),ldwrku, cdum, 1_${ik}$, rwork( irwork ),info ) ! multiply q in a by left singular vectors of r in ! work(iu), storing result in u ! (cworkspace: need n*n) ! (rworkspace: 0) call stdlib${ii}$_cgemm( 'N', 'N', m, n, n, cone, a, lda,work( iu ), ldwrku, & czero, u, ldu ) ! copy right singular vectors of r to a ! (cworkspace: need n*n) ! (rworkspace: 0) call stdlib${ii}$_clacpy( 'F', n, n, work( ir ), ldwrkr, a,lda ) else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + n ! compute a=q*r, copying result to u ! (cworkspace: need 2*n, prefer n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_cgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_clacpy( 'L', m, n, a, lda, u, ldu ) ! generate q in u ! (cworkspace: need 2*n, prefer n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_cungqr( m, n, n, u, ldu, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = 1_${ik}$ itauq = itau itaup = itauq + n iwork = itaup + n ! zero out below r in a if( n > 1_${ik}$ ) then call stdlib${ii}$_claset( 'L', n-1, n-1, czero, czero,a( 2_${ik}$, 1_${ik}$ ), lda ) end if ! bidiagonalize r in a ! (cworkspace: need 3*n, prefer 2*n+2*n*nb) ! (rworkspace: need n) call stdlib${ii}$_cgebrd( n, n, a, lda, s, rwork( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply q in u by left vectors bidiagonalizing r ! (cworkspace: need 2*n+m, prefer 2*n+m*nb) ! (rworkspace: 0) call stdlib${ii}$_cunmbr( 'Q', 'R', 'N', m, n, n, a, lda,work( itauq ), u, & ldu, work( iwork ),lwork-iwork+1, ierr ) ! generate right vectors bidiagonalizing r in a ! (cworkspace: need 3*n-1, prefer 2*n+(n-1)*nb) ! (rworkspace: 0) call stdlib${ii}$_cungbr( 'P', n, n, n, a, lda, work( itaup ),work( iwork ), & lwork-iwork+1, ierr ) irwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of a in u and computing right ! singular vectors of a in a ! (cworkspace: 0) ! (rworkspace: need bdspac) call stdlib${ii}$_cbdsqr( 'U', n, n, m, 0_${ik}$, s, rwork( ie ), a,lda, u, ldu, & cdum, 1_${ik}$, rwork( irwork ),info ) end if else if( wntvas ) then ! path 6 (m much larger than n, jobu='s', jobvt='s' ! or 'a') ! n left singular vectors to be computed in u and ! n right singular vectors to be computed in vt if( lwork>=n*n+3*n ) then ! sufficient workspace for a fast algorithm iu = 1_${ik}$ if( lwork>=wrkbl+lda*n ) then ! work(iu) is lda by n ldwrku = lda else ! work(iu) is n by n ldwrku = n end if itau = iu + ldwrku*n iwork = itau + n ! compute a=q*r ! (cworkspace: need n*n+2*n, prefer n*n+n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_cgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ! copy r to work(iu), zeroing out below it call stdlib${ii}$_clacpy( 'U', n, n, a, lda, work( iu ),ldwrku ) call stdlib${ii}$_claset( 'L', n-1, n-1, czero, czero,work( iu+1 ), ldwrku ) ! generate q in a ! (cworkspace: need n*n+2*n, prefer n*n+n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_cungqr( m, n, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = 1_${ik}$ itauq = itau itaup = itauq + n iwork = itaup + n ! bidiagonalize r in work(iu), copying result to vt ! (cworkspace: need n*n+3*n, prefer n*n+2*n+2*n*nb) ! (rworkspace: need n) call stdlib${ii}$_cgebrd( n, n, work( iu ), ldwrku, s,rwork( ie ), work( & itauq ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) call stdlib${ii}$_clacpy( 'U', n, n, work( iu ), ldwrku, vt,ldvt ) ! generate left bidiagonalizing vectors in work(iu) ! (cworkspace: need n*n+3*n, prefer n*n+2*n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_cungbr( 'Q', n, n, n, work( iu ), ldwrku,work( itauq ), & work( iwork ),lwork-iwork+1, ierr ) ! generate right bidiagonalizing vectors in vt ! (cworkspace: need n*n+3*n-1, ! prefer n*n+2*n+(n-1)*nb) ! (rworkspace: 0) call stdlib${ii}$_cungbr( 'P', n, n, n, vt, ldvt, work( itaup ),work( iwork ),& lwork-iwork+1, ierr ) irwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of r in work(iu) and computing ! right singular vectors of r in vt ! (cworkspace: need n*n) ! (rworkspace: need bdspac) call stdlib${ii}$_cbdsqr( 'U', n, n, n, 0_${ik}$, s, rwork( ie ), vt,ldvt, work( iu )& , ldwrku, cdum, 1_${ik}$,rwork( irwork ), info ) ! multiply q in a by left singular vectors of r in ! work(iu), storing result in u ! (cworkspace: need n*n) ! (rworkspace: 0) call stdlib${ii}$_cgemm( 'N', 'N', m, n, n, cone, a, lda,work( iu ), ldwrku, & czero, u, ldu ) else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + n ! compute a=q*r, copying result to u ! (cworkspace: need 2*n, prefer n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_cgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_clacpy( 'L', m, n, a, lda, u, ldu ) ! generate q in u ! (cworkspace: need 2*n, prefer n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_cungqr( m, n, n, u, ldu, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ! copy r to vt, zeroing out below it call stdlib${ii}$_clacpy( 'U', n, n, a, lda, vt, ldvt ) if( n>1_${ik}$ )call stdlib${ii}$_claset( 'L', n-1, n-1, czero, czero,vt( 2_${ik}$, 1_${ik}$ ), & ldvt ) ie = 1_${ik}$ itauq = itau itaup = itauq + n iwork = itaup + n ! bidiagonalize r in vt ! (cworkspace: need 3*n, prefer 2*n+2*n*nb) ! (rworkspace: need n) call stdlib${ii}$_cgebrd( n, n, vt, ldvt, s, rwork( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply q in u by left bidiagonalizing vectors ! in vt ! (cworkspace: need 2*n+m, prefer 2*n+m*nb) ! (rworkspace: 0) call stdlib${ii}$_cunmbr( 'Q', 'R', 'N', m, n, n, vt, ldvt,work( itauq ), u, & ldu, work( iwork ),lwork-iwork+1, ierr ) ! generate right bidiagonalizing vectors in vt ! (cworkspace: need 3*n-1, prefer 2*n+(n-1)*nb) ! (rworkspace: 0) call stdlib${ii}$_cungbr( 'P', n, n, n, vt, ldvt, work( itaup ),work( iwork ),& lwork-iwork+1, ierr ) irwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of a in u and computing right ! singular vectors of a in vt ! (cworkspace: 0) ! (rworkspace: need bdspac) call stdlib${ii}$_cbdsqr( 'U', n, n, m, 0_${ik}$, s, rwork( ie ), vt,ldvt, u, ldu, & cdum, 1_${ik}$,rwork( irwork ), info ) end if end if else if( wntua ) then if( wntvn ) then ! path 7 (m much larger than n, jobu='a', jobvt='n') ! m left singular vectors to be computed in u and ! no right singular vectors to be computed if( lwork>=n*n+max( n+m, 3_${ik}$*n ) ) then ! sufficient workspace for a fast algorithm ir = 1_${ik}$ if( lwork>=wrkbl+lda*n ) then ! work(ir) is lda by n ldwrkr = lda else ! work(ir) is n by n ldwrkr = n end if itau = ir + ldwrkr*n iwork = itau + n ! compute a=q*r, copying result to u ! (cworkspace: need n*n+2*n, prefer n*n+n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_cgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_clacpy( 'L', m, n, a, lda, u, ldu ) ! copy r to work(ir), zeroing out below it call stdlib${ii}$_clacpy( 'U', n, n, a, lda, work( ir ),ldwrkr ) call stdlib${ii}$_claset( 'L', n-1, n-1, czero, czero,work( ir+1 ), ldwrkr ) ! generate q in u ! (cworkspace: need n*n+n+m, prefer n*n+n+m*nb) ! (rworkspace: 0) call stdlib${ii}$_cungqr( m, m, n, u, ldu, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = 1_${ik}$ itauq = itau itaup = itauq + n iwork = itaup + n ! bidiagonalize r in work(ir) ! (cworkspace: need n*n+3*n, prefer n*n+2*n+2*n*nb) ! (rworkspace: need n) call stdlib${ii}$_cgebrd( n, n, work( ir ), ldwrkr, s,rwork( ie ), work( & itauq ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) ! generate left bidiagonalizing vectors in work(ir) ! (cworkspace: need n*n+3*n, prefer n*n+2*n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_cungbr( 'Q', n, n, n, work( ir ), ldwrkr,work( itauq ), & work( iwork ),lwork-iwork+1, ierr ) irwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of r in work(ir) ! (cworkspace: need n*n) ! (rworkspace: need bdspac) call stdlib${ii}$_cbdsqr( 'U', n, 0_${ik}$, n, 0_${ik}$, s, rwork( ie ), cdum,1_${ik}$, work( ir ),& ldwrkr, cdum, 1_${ik}$,rwork( irwork ), info ) ! multiply q in u by left singular vectors of r in ! work(ir), storing result in a ! (cworkspace: need n*n) ! (rworkspace: 0) call stdlib${ii}$_cgemm( 'N', 'N', m, n, n, cone, u, ldu,work( ir ), ldwrkr, & czero, a, lda ) ! copy left singular vectors of a from a to u call stdlib${ii}$_clacpy( 'F', m, n, a, lda, u, ldu ) else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + n ! compute a=q*r, copying result to u ! (cworkspace: need 2*n, prefer n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_cgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_clacpy( 'L', m, n, a, lda, u, ldu ) ! generate q in u ! (cworkspace: need n+m, prefer n+m*nb) ! (rworkspace: 0) call stdlib${ii}$_cungqr( m, m, n, u, ldu, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = 1_${ik}$ itauq = itau itaup = itauq + n iwork = itaup + n ! zero out below r in a if( n > 1_${ik}$ ) then call stdlib${ii}$_claset( 'L', n-1, n-1, czero, czero,a( 2_${ik}$, 1_${ik}$ ), lda ) end if ! bidiagonalize r in a ! (cworkspace: need 3*n, prefer 2*n+2*n*nb) ! (rworkspace: need n) call stdlib${ii}$_cgebrd( n, n, a, lda, s, rwork( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply q in u by left bidiagonalizing vectors ! in a ! (cworkspace: need 2*n+m, prefer 2*n+m*nb) ! (rworkspace: 0) call stdlib${ii}$_cunmbr( 'Q', 'R', 'N', m, n, n, a, lda,work( itauq ), u, & ldu, work( iwork ),lwork-iwork+1, ierr ) irwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of a in u ! (cworkspace: 0) ! (rworkspace: need bdspac) call stdlib${ii}$_cbdsqr( 'U', n, 0_${ik}$, m, 0_${ik}$, s, rwork( ie ), cdum,1_${ik}$, u, ldu, & cdum, 1_${ik}$, rwork( irwork ),info ) end if else if( wntvo ) then ! path 8 (m much larger than n, jobu='a', jobvt='o') ! m left singular vectors to be computed in u and ! n right singular vectors to be overwritten on a if( lwork>=2_${ik}$*n*n+max( n+m, 3_${ik}$*n ) ) then ! sufficient workspace for a fast algorithm iu = 1_${ik}$ if( lwork>=wrkbl+2*lda*n ) then ! work(iu) is lda by n and work(ir) is lda by n ldwrku = lda ir = iu + ldwrku*n ldwrkr = lda else if( lwork>=wrkbl+( lda+n )*n ) then ! work(iu) is lda by n and work(ir) is n by n ldwrku = lda ir = iu + ldwrku*n ldwrkr = n else ! work(iu) is n by n and work(ir) is n by n ldwrku = n ir = iu + ldwrku*n ldwrkr = n end if itau = ir + ldwrkr*n iwork = itau + n ! compute a=q*r, copying result to u ! (cworkspace: need 2*n*n+2*n, prefer 2*n*n+n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_cgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_clacpy( 'L', m, n, a, lda, u, ldu ) ! generate q in u ! (cworkspace: need 2*n*n+n+m, prefer 2*n*n+n+m*nb) ! (rworkspace: 0) call stdlib${ii}$_cungqr( m, m, n, u, ldu, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ! copy r to work(iu), zeroing out below it call stdlib${ii}$_clacpy( 'U', n, n, a, lda, work( iu ),ldwrku ) call stdlib${ii}$_claset( 'L', n-1, n-1, czero, czero,work( iu+1 ), ldwrku ) ie = 1_${ik}$ itauq = itau itaup = itauq + n iwork = itaup + n ! bidiagonalize r in work(iu), copying result to ! work(ir) ! (cworkspace: need 2*n*n+3*n, ! prefer 2*n*n+2*n+2*n*nb) ! (rworkspace: need n) call stdlib${ii}$_cgebrd( n, n, work( iu ), ldwrku, s,rwork( ie ), work( & itauq ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) call stdlib${ii}$_clacpy( 'U', n, n, work( iu ), ldwrku,work( ir ), ldwrkr ) ! generate left bidiagonalizing vectors in work(iu) ! (cworkspace: need 2*n*n+3*n, prefer 2*n*n+2*n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_cungbr( 'Q', n, n, n, work( iu ), ldwrku,work( itauq ), & work( iwork ),lwork-iwork+1, ierr ) ! generate right bidiagonalizing vectors in work(ir) ! (cworkspace: need 2*n*n+3*n-1, ! prefer 2*n*n+2*n+(n-1)*nb) ! (rworkspace: 0) call stdlib${ii}$_cungbr( 'P', n, n, n, work( ir ), ldwrkr,work( itaup ), & work( iwork ),lwork-iwork+1, ierr ) irwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of r in work(iu) and computing ! right singular vectors of r in work(ir) ! (cworkspace: need 2*n*n) ! (rworkspace: need bdspac) call stdlib${ii}$_cbdsqr( 'U', n, n, n, 0_${ik}$, s, rwork( ie ),work( ir ), ldwrkr, & work( iu ),ldwrku, cdum, 1_${ik}$, rwork( irwork ),info ) ! multiply q in u by left singular vectors of r in ! work(iu), storing result in a ! (cworkspace: need n*n) ! (rworkspace: 0) call stdlib${ii}$_cgemm( 'N', 'N', m, n, n, cone, u, ldu,work( iu ), ldwrku, & czero, a, lda ) ! copy left singular vectors of a from a to u call stdlib${ii}$_clacpy( 'F', m, n, a, lda, u, ldu ) ! copy right singular vectors of r from work(ir) to a call stdlib${ii}$_clacpy( 'F', n, n, work( ir ), ldwrkr, a,lda ) else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + n ! compute a=q*r, copying result to u ! (cworkspace: need 2*n, prefer n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_cgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_clacpy( 'L', m, n, a, lda, u, ldu ) ! generate q in u ! (cworkspace: need n+m, prefer n+m*nb) ! (rworkspace: 0) call stdlib${ii}$_cungqr( m, m, n, u, ldu, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = 1_${ik}$ itauq = itau itaup = itauq + n iwork = itaup + n ! zero out below r in a if( n > 1_${ik}$ ) then call stdlib${ii}$_claset( 'L', n-1, n-1, czero, czero,a( 2_${ik}$, 1_${ik}$ ), lda ) end if ! bidiagonalize r in a ! (cworkspace: need 3*n, prefer 2*n+2*n*nb) ! (rworkspace: need n) call stdlib${ii}$_cgebrd( n, n, a, lda, s, rwork( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply q in u by left bidiagonalizing vectors ! in a ! (cworkspace: need 2*n+m, prefer 2*n+m*nb) ! (rworkspace: 0) call stdlib${ii}$_cunmbr( 'Q', 'R', 'N', m, n, n, a, lda,work( itauq ), u, & ldu, work( iwork ),lwork-iwork+1, ierr ) ! generate right bidiagonalizing vectors in a ! (cworkspace: need 3*n-1, prefer 2*n+(n-1)*nb) ! (rworkspace: 0) call stdlib${ii}$_cungbr( 'P', n, n, n, a, lda, work( itaup ),work( iwork ), & lwork-iwork+1, ierr ) irwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of a in u and computing right ! singular vectors of a in a ! (cworkspace: 0) ! (rworkspace: need bdspac) call stdlib${ii}$_cbdsqr( 'U', n, n, m, 0_${ik}$, s, rwork( ie ), a,lda, u, ldu, & cdum, 1_${ik}$, rwork( irwork ),info ) end if else if( wntvas ) then ! path 9 (m much larger than n, jobu='a', jobvt='s' ! or 'a') ! m left singular vectors to be computed in u and ! n right singular vectors to be computed in vt if( lwork>=n*n+max( n+m, 3_${ik}$*n ) ) then ! sufficient workspace for a fast algorithm iu = 1_${ik}$ if( lwork>=wrkbl+lda*n ) then ! work(iu) is lda by n ldwrku = lda else ! work(iu) is n by n ldwrku = n end if itau = iu + ldwrku*n iwork = itau + n ! compute a=q*r, copying result to u ! (cworkspace: need n*n+2*n, prefer n*n+n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_cgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_clacpy( 'L', m, n, a, lda, u, ldu ) ! generate q in u ! (cworkspace: need n*n+n+m, prefer n*n+n+m*nb) ! (rworkspace: 0) call stdlib${ii}$_cungqr( m, m, n, u, ldu, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ! copy r to work(iu), zeroing out below it call stdlib${ii}$_clacpy( 'U', n, n, a, lda, work( iu ),ldwrku ) call stdlib${ii}$_claset( 'L', n-1, n-1, czero, czero,work( iu+1 ), ldwrku ) ie = 1_${ik}$ itauq = itau itaup = itauq + n iwork = itaup + n ! bidiagonalize r in work(iu), copying result to vt ! (cworkspace: need n*n+3*n, prefer n*n+2*n+2*n*nb) ! (rworkspace: need n) call stdlib${ii}$_cgebrd( n, n, work( iu ), ldwrku, s,rwork( ie ), work( & itauq ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) call stdlib${ii}$_clacpy( 'U', n, n, work( iu ), ldwrku, vt,ldvt ) ! generate left bidiagonalizing vectors in work(iu) ! (cworkspace: need n*n+3*n, prefer n*n+2*n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_cungbr( 'Q', n, n, n, work( iu ), ldwrku,work( itauq ), & work( iwork ),lwork-iwork+1, ierr ) ! generate right bidiagonalizing vectors in vt ! (cworkspace: need n*n+3*n-1, ! prefer n*n+2*n+(n-1)*nb) ! (rworkspace: need 0) call stdlib${ii}$_cungbr( 'P', n, n, n, vt, ldvt, work( itaup ),work( iwork ),& lwork-iwork+1, ierr ) irwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of r in work(iu) and computing ! right singular vectors of r in vt ! (cworkspace: need n*n) ! (rworkspace: need bdspac) call stdlib${ii}$_cbdsqr( 'U', n, n, n, 0_${ik}$, s, rwork( ie ), vt,ldvt, work( iu )& , ldwrku, cdum, 1_${ik}$,rwork( irwork ), info ) ! multiply q in u by left singular vectors of r in ! work(iu), storing result in a ! (cworkspace: need n*n) ! (rworkspace: 0) call stdlib${ii}$_cgemm( 'N', 'N', m, n, n, cone, u, ldu,work( iu ), ldwrku, & czero, a, lda ) ! copy left singular vectors of a from a to u call stdlib${ii}$_clacpy( 'F', m, n, a, lda, u, ldu ) else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + n ! compute a=q*r, copying result to u ! (cworkspace: need 2*n, prefer n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_cgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_clacpy( 'L', m, n, a, lda, u, ldu ) ! generate q in u ! (cworkspace: need n+m, prefer n+m*nb) ! (rworkspace: 0) call stdlib${ii}$_cungqr( m, m, n, u, ldu, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ! copy r from a to vt, zeroing out below it call stdlib${ii}$_clacpy( 'U', n, n, a, lda, vt, ldvt ) if( n>1_${ik}$ )call stdlib${ii}$_claset( 'L', n-1, n-1, czero, czero,vt( 2_${ik}$, 1_${ik}$ ), & ldvt ) ie = 1_${ik}$ itauq = itau itaup = itauq + n iwork = itaup + n ! bidiagonalize r in vt ! (cworkspace: need 3*n, prefer 2*n+2*n*nb) ! (rworkspace: need n) call stdlib${ii}$_cgebrd( n, n, vt, ldvt, s, rwork( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply q in u by left bidiagonalizing vectors ! in vt ! (cworkspace: need 2*n+m, prefer 2*n+m*nb) ! (rworkspace: 0) call stdlib${ii}$_cunmbr( 'Q', 'R', 'N', m, n, n, vt, ldvt,work( itauq ), u, & ldu, work( iwork ),lwork-iwork+1, ierr ) ! generate right bidiagonalizing vectors in vt ! (cworkspace: need 3*n-1, prefer 2*n+(n-1)*nb) ! (rworkspace: 0) call stdlib${ii}$_cungbr( 'P', n, n, n, vt, ldvt, work( itaup ),work( iwork ),& lwork-iwork+1, ierr ) irwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of a in u and computing right ! singular vectors of a in vt ! (cworkspace: 0) ! (rworkspace: need bdspac) call stdlib${ii}$_cbdsqr( 'U', n, n, m, 0_${ik}$, s, rwork( ie ), vt,ldvt, u, ldu, & cdum, 1_${ik}$,rwork( irwork ), info ) end if end if end if else ! m < mnthr ! path 10 (m at least n, but not much larger) ! reduce to bidiagonal form without qr decomposition ie = 1_${ik}$ itauq = 1_${ik}$ itaup = itauq + n iwork = itaup + n ! bidiagonalize a ! (cworkspace: need 2*n+m, prefer 2*n+(m+n)*nb) ! (rworkspace: need n) call stdlib${ii}$_cgebrd( m, n, a, lda, s, rwork( ie ), work( itauq ),work( itaup ), & work( iwork ), lwork-iwork+1,ierr ) if( wntuas ) then ! if left singular vectors desired in u, copy result to u ! and generate left bidiagonalizing vectors in u ! (cworkspace: need 2*n+ncu, prefer 2*n+ncu*nb) ! (rworkspace: 0) call stdlib${ii}$_clacpy( 'L', m, n, a, lda, u, ldu ) if( wntus )ncu = n if( wntua )ncu = m call stdlib${ii}$_cungbr( 'Q', m, ncu, n, u, ldu, work( itauq ),work( iwork ), & lwork-iwork+1, ierr ) end if if( wntvas ) then ! if right singular vectors desired in vt, copy result to ! vt and generate right bidiagonalizing vectors in vt ! (cworkspace: need 3*n-1, prefer 2*n+(n-1)*nb) ! (rworkspace: 0) call stdlib${ii}$_clacpy( 'U', n, n, a, lda, vt, ldvt ) call stdlib${ii}$_cungbr( 'P', n, n, n, vt, ldvt, work( itaup ),work( iwork ), & lwork-iwork+1, ierr ) end if if( wntuo ) then ! if left singular vectors desired in a, generate left ! bidiagonalizing vectors in a ! (cworkspace: need 3*n, prefer 2*n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_cungbr( 'Q', m, n, n, a, lda, work( itauq ),work( iwork ), lwork-& iwork+1, ierr ) end if if( wntvo ) then ! if right singular vectors desired in a, generate right ! bidiagonalizing vectors in a ! (cworkspace: need 3*n-1, prefer 2*n+(n-1)*nb) ! (rworkspace: 0) call stdlib${ii}$_cungbr( 'P', n, n, n, a, lda, work( itaup ),work( iwork ), lwork-& iwork+1, ierr ) end if irwork = ie + n if( wntuas .or. wntuo )nru = m if( wntun )nru = 0_${ik}$ if( wntvas .or. wntvo )ncvt = n if( wntvn )ncvt = 0_${ik}$ if( ( .not.wntuo ) .and. ( .not.wntvo ) ) then ! perform bidiagonal qr iteration, if desired, computing ! left singular vectors in u and computing right singular ! vectors in vt ! (cworkspace: 0) ! (rworkspace: need bdspac) call stdlib${ii}$_cbdsqr( 'U', n, ncvt, nru, 0_${ik}$, s, rwork( ie ), vt,ldvt, u, ldu, & cdum, 1_${ik}$, rwork( irwork ),info ) else if( ( .not.wntuo ) .and. wntvo ) then ! perform bidiagonal qr iteration, if desired, computing ! left singular vectors in u and computing right singular ! vectors in a ! (cworkspace: 0) ! (rworkspace: need bdspac) call stdlib${ii}$_cbdsqr( 'U', n, ncvt, nru, 0_${ik}$, s, rwork( ie ), a,lda, u, ldu, cdum,& 1_${ik}$, rwork( irwork ),info ) else ! perform bidiagonal qr iteration, if desired, computing ! left singular vectors in a and computing right singular ! vectors in vt ! (cworkspace: 0) ! (rworkspace: need bdspac) call stdlib${ii}$_cbdsqr( 'U', n, ncvt, nru, 0_${ik}$, s, rwork( ie ), vt,ldvt, a, lda, & cdum, 1_${ik}$, rwork( irwork ),info ) end if end if else ! a has more columns than rows. if a has sufficiently more ! columns than rows, first reduce using the lq decomposition (if ! sufficient workspace available) if( n>=mnthr ) then if( wntvn ) then ! path 1t(n much larger than m, jobvt='n') ! no right singular vectors to be computed itau = 1_${ik}$ iwork = itau + m ! compute a=l*q ! (cworkspace: need 2*m, prefer m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_cgelqf( m, n, a, lda, work( itau ), work( iwork ),lwork-iwork+1, & ierr ) ! zero out above l if (m>1_${ik}$) call stdlib${ii}$_claset( 'U', m-1, m-1, czero, czero, a( 1_${ik}$, 2_${ik}$ ),lda ) ie = 1_${ik}$ itauq = 1_${ik}$ itaup = itauq + m iwork = itaup + m ! bidiagonalize l in a ! (cworkspace: need 3*m, prefer 2*m+2*m*nb) ! (rworkspace: need m) call stdlib${ii}$_cgebrd( m, m, a, lda, s, rwork( ie ), work( itauq ),work( itaup ),& work( iwork ), lwork-iwork+1,ierr ) if( wntuo .or. wntuas ) then ! if left singular vectors desired, generate q ! (cworkspace: need 3*m, prefer 2*m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_cungbr( 'Q', m, m, m, a, lda, work( itauq ),work( iwork ), & lwork-iwork+1, ierr ) end if irwork = ie + m nru = 0_${ik}$ if( wntuo .or. wntuas )nru = m ! perform bidiagonal qr iteration, computing left singular ! vectors of a in a if desired ! (cworkspace: 0) ! (rworkspace: need bdspac) call stdlib${ii}$_cbdsqr( 'U', m, 0_${ik}$, nru, 0_${ik}$, s, rwork( ie ), cdum, 1_${ik}$,a, lda, cdum, & 1_${ik}$, rwork( irwork ), info ) ! if left singular vectors desired in u, copy them there if( wntuas )call stdlib${ii}$_clacpy( 'F', m, m, a, lda, u, ldu ) else if( wntvo .and. wntun ) then ! path 2t(n much larger than m, jobu='n', jobvt='o') ! m right singular vectors to be overwritten on a and ! no left singular vectors to be computed if( lwork>=m*m+3*m ) then ! sufficient workspace for a fast algorithm ir = 1_${ik}$ if( lwork>=max( wrkbl, lda*n )+lda*m ) then ! work(iu) is lda by n and work(ir) is lda by m ldwrku = lda chunk = n ldwrkr = lda else if( lwork>=max( wrkbl, lda*n )+m*m ) then ! work(iu) is lda by n and work(ir) is m by m ldwrku = lda chunk = n ldwrkr = m else ! work(iu) is m by chunk and work(ir) is m by m ldwrku = m chunk = ( lwork-m*m ) / m ldwrkr = m end if itau = ir + ldwrkr*m iwork = itau + m ! compute a=l*q ! (cworkspace: need m*m+2*m, prefer m*m+m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_cgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-iwork+& 1_${ik}$, ierr ) ! copy l to work(ir) and zero out above it call stdlib${ii}$_clacpy( 'L', m, m, a, lda, work( ir ), ldwrkr ) call stdlib${ii}$_claset( 'U', m-1, m-1, czero, czero,work( ir+ldwrkr ), ldwrkr ) ! generate q in a ! (cworkspace: need m*m+2*m, prefer m*m+m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_cunglq( m, n, m, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = 1_${ik}$ itauq = itau itaup = itauq + m iwork = itaup + m ! bidiagonalize l in work(ir) ! (cworkspace: need m*m+3*m, prefer m*m+2*m+2*m*nb) ! (rworkspace: need m) call stdlib${ii}$_cgebrd( m, m, work( ir ), ldwrkr, s, rwork( ie ),work( itauq ),& work( itaup ),work( iwork ), lwork-iwork+1, ierr ) ! generate right vectors bidiagonalizing l ! (cworkspace: need m*m+3*m-1, prefer m*m+2*m+(m-1)*nb) ! (rworkspace: 0) call stdlib${ii}$_cungbr( 'P', m, m, m, work( ir ), ldwrkr,work( itaup ), work( & iwork ),lwork-iwork+1, ierr ) irwork = ie + m ! perform bidiagonal qr iteration, computing right ! singular vectors of l in work(ir) ! (cworkspace: need m*m) ! (rworkspace: need bdspac) call stdlib${ii}$_cbdsqr( 'U', m, m, 0_${ik}$, 0_${ik}$, s, rwork( ie ),work( ir ), ldwrkr, & cdum, 1_${ik}$, cdum, 1_${ik}$,rwork( irwork ), info ) iu = itauq ! multiply right singular vectors of l in work(ir) by q ! in a, storing result in work(iu) and copying to a ! (cworkspace: need m*m+m, prefer m*m+m*n) ! (rworkspace: 0) do i = 1, n, chunk blk = min( n-i+1, chunk ) call stdlib${ii}$_cgemm( 'N', 'N', m, blk, m, cone, work( ir ),ldwrkr, a( 1_${ik}$, & i ), lda, czero,work( iu ), ldwrku ) call stdlib${ii}$_clacpy( 'F', m, blk, work( iu ), ldwrku,a( 1_${ik}$, i ), lda ) end do else ! insufficient workspace for a fast algorithm ie = 1_${ik}$ itauq = 1_${ik}$ itaup = itauq + m iwork = itaup + m ! bidiagonalize a ! (cworkspace: need 2*m+n, prefer 2*m+(m+n)*nb) ! (rworkspace: need m) call stdlib${ii}$_cgebrd( m, n, a, lda, s, rwork( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! generate right vectors bidiagonalizing a ! (cworkspace: need 3*m, prefer 2*m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_cungbr( 'P', m, n, m, a, lda, work( itaup ),work( iwork ), & lwork-iwork+1, ierr ) irwork = ie + m ! perform bidiagonal qr iteration, computing right ! singular vectors of a in a ! (cworkspace: 0) ! (rworkspace: need bdspac) call stdlib${ii}$_cbdsqr( 'L', m, n, 0_${ik}$, 0_${ik}$, s, rwork( ie ), a, lda,cdum, 1_${ik}$, cdum, & 1_${ik}$, rwork( irwork ), info ) end if else if( wntvo .and. wntuas ) then ! path 3t(n much larger than m, jobu='s' or 'a', jobvt='o') ! m right singular vectors to be overwritten on a and ! m left singular vectors to be computed in u if( lwork>=m*m+3*m ) then ! sufficient workspace for a fast algorithm ir = 1_${ik}$ if( lwork>=max( wrkbl, lda*n )+lda*m ) then ! work(iu) is lda by n and work(ir) is lda by m ldwrku = lda chunk = n ldwrkr = lda else if( lwork>=max( wrkbl, lda*n )+m*m ) then ! work(iu) is lda by n and work(ir) is m by m ldwrku = lda chunk = n ldwrkr = m else ! work(iu) is m by chunk and work(ir) is m by m ldwrku = m chunk = ( lwork-m*m ) / m ldwrkr = m end if itau = ir + ldwrkr*m iwork = itau + m ! compute a=l*q ! (cworkspace: need m*m+2*m, prefer m*m+m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_cgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-iwork+& 1_${ik}$, ierr ) ! copy l to u, zeroing about above it call stdlib${ii}$_clacpy( 'L', m, m, a, lda, u, ldu ) if (m>1_${ik}$) call stdlib${ii}$_claset( 'U', m-1, m-1, czero, czero, u( 1_${ik}$, 2_${ik}$ ),ldu ) ! generate q in a ! (cworkspace: need m*m+2*m, prefer m*m+m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_cunglq( m, n, m, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = 1_${ik}$ itauq = itau itaup = itauq + m iwork = itaup + m ! bidiagonalize l in u, copying result to work(ir) ! (cworkspace: need m*m+3*m, prefer m*m+2*m+2*m*nb) ! (rworkspace: need m) call stdlib${ii}$_cgebrd( m, m, u, ldu, s, rwork( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) call stdlib${ii}$_clacpy( 'U', m, m, u, ldu, work( ir ), ldwrkr ) ! generate right vectors bidiagonalizing l in work(ir) ! (cworkspace: need m*m+3*m-1, prefer m*m+2*m+(m-1)*nb) ! (rworkspace: 0) call stdlib${ii}$_cungbr( 'P', m, m, m, work( ir ), ldwrkr,work( itaup ), work( & iwork ),lwork-iwork+1, ierr ) ! generate left vectors bidiagonalizing l in u ! (cworkspace: need m*m+3*m, prefer m*m+2*m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_cungbr( 'Q', m, m, m, u, ldu, work( itauq ),work( iwork ), & lwork-iwork+1, ierr ) irwork = ie + m ! perform bidiagonal qr iteration, computing left ! singular vectors of l in u, and computing right ! singular vectors of l in work(ir) ! (cworkspace: need m*m) ! (rworkspace: need bdspac) call stdlib${ii}$_cbdsqr( 'U', m, m, m, 0_${ik}$, s, rwork( ie ),work( ir ), ldwrkr, u, & ldu, cdum, 1_${ik}$,rwork( irwork ), info ) iu = itauq ! multiply right singular vectors of l in work(ir) by q ! in a, storing result in work(iu) and copying to a ! (cworkspace: need m*m+m, prefer m*m+m*n)) ! (rworkspace: 0) do i = 1, n, chunk blk = min( n-i+1, chunk ) call stdlib${ii}$_cgemm( 'N', 'N', m, blk, m, cone, work( ir ),ldwrkr, a( 1_${ik}$, & i ), lda, czero,work( iu ), ldwrku ) call stdlib${ii}$_clacpy( 'F', m, blk, work( iu ), ldwrku,a( 1_${ik}$, i ), lda ) end do else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + m ! compute a=l*q ! (cworkspace: need 2*m, prefer m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_cgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-iwork+& 1_${ik}$, ierr ) ! copy l to u, zeroing out above it call stdlib${ii}$_clacpy( 'L', m, m, a, lda, u, ldu ) if (m>1_${ik}$) call stdlib${ii}$_claset( 'U', m-1, m-1, czero, czero, u( 1_${ik}$, 2_${ik}$ ),ldu ) ! generate q in a ! (cworkspace: need 2*m, prefer m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_cunglq( m, n, m, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = 1_${ik}$ itauq = itau itaup = itauq + m iwork = itaup + m ! bidiagonalize l in u ! (cworkspace: need 3*m, prefer 2*m+2*m*nb) ! (rworkspace: need m) call stdlib${ii}$_cgebrd( m, m, u, ldu, s, rwork( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply right vectors bidiagonalizing l by q in a ! (cworkspace: need 2*m+n, prefer 2*m+n*nb) ! (rworkspace: 0) call stdlib${ii}$_cunmbr( 'P', 'L', 'C', m, n, m, u, ldu,work( itaup ), a, lda, & work( iwork ),lwork-iwork+1, ierr ) ! generate left vectors bidiagonalizing l in u ! (cworkspace: need 3*m, prefer 2*m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_cungbr( 'Q', m, m, m, u, ldu, work( itauq ),work( iwork ), & lwork-iwork+1, ierr ) irwork = ie + m ! perform bidiagonal qr iteration, computing left ! singular vectors of a in u and computing right ! singular vectors of a in a ! (cworkspace: 0) ! (rworkspace: need bdspac) call stdlib${ii}$_cbdsqr( 'U', m, n, m, 0_${ik}$, s, rwork( ie ), a, lda,u, ldu, cdum, & 1_${ik}$, rwork( irwork ), info ) end if else if( wntvs ) then if( wntun ) then ! path 4t(n much larger than m, jobu='n', jobvt='s') ! m right singular vectors to be computed in vt and ! no left singular vectors to be computed if( lwork>=m*m+3*m ) then ! sufficient workspace for a fast algorithm ir = 1_${ik}$ if( lwork>=wrkbl+lda*m ) then ! work(ir) is lda by m ldwrkr = lda else ! work(ir) is m by m ldwrkr = m end if itau = ir + ldwrkr*m iwork = itau + m ! compute a=l*q ! (cworkspace: need m*m+2*m, prefer m*m+m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_cgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ! copy l to work(ir), zeroing out above it call stdlib${ii}$_clacpy( 'L', m, m, a, lda, work( ir ),ldwrkr ) call stdlib${ii}$_claset( 'U', m-1, m-1, czero, czero,work( ir+ldwrkr ), & ldwrkr ) ! generate q in a ! (cworkspace: need m*m+2*m, prefer m*m+m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_cunglq( m, n, m, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = 1_${ik}$ itauq = itau itaup = itauq + m iwork = itaup + m ! bidiagonalize l in work(ir) ! (cworkspace: need m*m+3*m, prefer m*m+2*m+2*m*nb) ! (rworkspace: need m) call stdlib${ii}$_cgebrd( m, m, work( ir ), ldwrkr, s,rwork( ie ), work( & itauq ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) ! generate right vectors bidiagonalizing l in ! work(ir) ! (cworkspace: need m*m+3*m, prefer m*m+2*m+(m-1)*nb) ! (rworkspace: 0) call stdlib${ii}$_cungbr( 'P', m, m, m, work( ir ), ldwrkr,work( itaup ), & work( iwork ),lwork-iwork+1, ierr ) irwork = ie + m ! perform bidiagonal qr iteration, computing right ! singular vectors of l in work(ir) ! (cworkspace: need m*m) ! (rworkspace: need bdspac) call stdlib${ii}$_cbdsqr( 'U', m, m, 0_${ik}$, 0_${ik}$, s, rwork( ie ),work( ir ), ldwrkr, & cdum, 1_${ik}$, cdum, 1_${ik}$,rwork( irwork ), info ) ! multiply right singular vectors of l in work(ir) by ! q in a, storing result in vt ! (cworkspace: need m*m) ! (rworkspace: 0) call stdlib${ii}$_cgemm( 'N', 'N', m, n, m, cone, work( ir ),ldwrkr, a, lda, & czero, vt, ldvt ) else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + m ! compute a=l*q ! (cworkspace: need 2*m, prefer m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_cgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ! copy result to vt call stdlib${ii}$_clacpy( 'U', m, n, a, lda, vt, ldvt ) ! generate q in vt ! (cworkspace: need 2*m, prefer m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_cunglq( m, n, m, vt, ldvt, work( itau ),work( iwork ), & lwork-iwork+1, ierr ) ie = 1_${ik}$ itauq = itau itaup = itauq + m iwork = itaup + m ! zero out above l in a if (m>1_${ik}$) call stdlib${ii}$_claset( 'U', m-1, m-1, czero, czero,a( 1_${ik}$, 2_${ik}$ ), lda ) ! bidiagonalize l in a ! (cworkspace: need 3*m, prefer 2*m+2*m*nb) ! (rworkspace: need m) call stdlib${ii}$_cgebrd( m, m, a, lda, s, rwork( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply right vectors bidiagonalizing l by q in vt ! (cworkspace: need 2*m+n, prefer 2*m+n*nb) ! (rworkspace: 0) call stdlib${ii}$_cunmbr( 'P', 'L', 'C', m, n, m, a, lda,work( itaup ), vt, & ldvt,work( iwork ), lwork-iwork+1, ierr ) irwork = ie + m ! perform bidiagonal qr iteration, computing right ! singular vectors of a in vt ! (cworkspace: 0) ! (rworkspace: need bdspac) call stdlib${ii}$_cbdsqr( 'U', m, n, 0_${ik}$, 0_${ik}$, s, rwork( ie ), vt,ldvt, cdum, 1_${ik}$, & cdum, 1_${ik}$,rwork( irwork ), info ) end if else if( wntuo ) then ! path 5t(n much larger than m, jobu='o', jobvt='s') ! m right singular vectors to be computed in vt and ! m left singular vectors to be overwritten on a if( lwork>=2_${ik}$*m*m+3*m ) then ! sufficient workspace for a fast algorithm iu = 1_${ik}$ if( lwork>=wrkbl+2*lda*m ) then ! work(iu) is lda by m and work(ir) is lda by m ldwrku = lda ir = iu + ldwrku*m ldwrkr = lda else if( lwork>=wrkbl+( lda+m )*m ) then ! work(iu) is lda by m and work(ir) is m by m ldwrku = lda ir = iu + ldwrku*m ldwrkr = m else ! work(iu) is m by m and work(ir) is m by m ldwrku = m ir = iu + ldwrku*m ldwrkr = m end if itau = ir + ldwrkr*m iwork = itau + m ! compute a=l*q ! (cworkspace: need 2*m*m+2*m, prefer 2*m*m+m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_cgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ! copy l to work(iu), zeroing out below it call stdlib${ii}$_clacpy( 'L', m, m, a, lda, work( iu ),ldwrku ) call stdlib${ii}$_claset( 'U', m-1, m-1, czero, czero,work( iu+ldwrku ), & ldwrku ) ! generate q in a ! (cworkspace: need 2*m*m+2*m, prefer 2*m*m+m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_cunglq( m, n, m, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = 1_${ik}$ itauq = itau itaup = itauq + m iwork = itaup + m ! bidiagonalize l in work(iu), copying result to ! work(ir) ! (cworkspace: need 2*m*m+3*m, ! prefer 2*m*m+2*m+2*m*nb) ! (rworkspace: need m) call stdlib${ii}$_cgebrd( m, m, work( iu ), ldwrku, s,rwork( ie ), work( & itauq ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) call stdlib${ii}$_clacpy( 'L', m, m, work( iu ), ldwrku,work( ir ), ldwrkr ) ! generate right bidiagonalizing vectors in work(iu) ! (cworkspace: need 2*m*m+3*m-1, ! prefer 2*m*m+2*m+(m-1)*nb) ! (rworkspace: 0) call stdlib${ii}$_cungbr( 'P', m, m, m, work( iu ), ldwrku,work( itaup ), & work( iwork ),lwork-iwork+1, ierr ) ! generate left bidiagonalizing vectors in work(ir) ! (cworkspace: need 2*m*m+3*m, prefer 2*m*m+2*m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_cungbr( 'Q', m, m, m, work( ir ), ldwrkr,work( itauq ), & work( iwork ),lwork-iwork+1, ierr ) irwork = ie + m ! perform bidiagonal qr iteration, computing left ! singular vectors of l in work(ir) and computing ! right singular vectors of l in work(iu) ! (cworkspace: need 2*m*m) ! (rworkspace: need bdspac) call stdlib${ii}$_cbdsqr( 'U', m, m, m, 0_${ik}$, s, rwork( ie ),work( iu ), ldwrku, & work( ir ),ldwrkr, cdum, 1_${ik}$, rwork( irwork ),info ) ! multiply right singular vectors of l in work(iu) by ! q in a, storing result in vt ! (cworkspace: need m*m) ! (rworkspace: 0) call stdlib${ii}$_cgemm( 'N', 'N', m, n, m, cone, work( iu ),ldwrku, a, lda, & czero, vt, ldvt ) ! copy left singular vectors of l to a ! (cworkspace: need m*m) ! (rworkspace: 0) call stdlib${ii}$_clacpy( 'F', m, m, work( ir ), ldwrkr, a,lda ) else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + m ! compute a=l*q, copying result to vt ! (cworkspace: need 2*m, prefer m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_cgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_clacpy( 'U', m, n, a, lda, vt, ldvt ) ! generate q in vt ! (cworkspace: need 2*m, prefer m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_cunglq( m, n, m, vt, ldvt, work( itau ),work( iwork ), & lwork-iwork+1, ierr ) ie = 1_${ik}$ itauq = itau itaup = itauq + m iwork = itaup + m ! zero out above l in a if (m>1_${ik}$) call stdlib${ii}$_claset( 'U', m-1, m-1, czero, czero,a( 1_${ik}$, 2_${ik}$ ), lda ) ! bidiagonalize l in a ! (cworkspace: need 3*m, prefer 2*m+2*m*nb) ! (rworkspace: need m) call stdlib${ii}$_cgebrd( m, m, a, lda, s, rwork( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply right vectors bidiagonalizing l by q in vt ! (cworkspace: need 2*m+n, prefer 2*m+n*nb) ! (rworkspace: 0) call stdlib${ii}$_cunmbr( 'P', 'L', 'C', m, n, m, a, lda,work( itaup ), vt, & ldvt,work( iwork ), lwork-iwork+1, ierr ) ! generate left bidiagonalizing vectors of l in a ! (cworkspace: need 3*m, prefer 2*m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_cungbr( 'Q', m, m, m, a, lda, work( itauq ),work( iwork ), & lwork-iwork+1, ierr ) irwork = ie + m ! perform bidiagonal qr iteration, computing left ! singular vectors of a in a and computing right ! singular vectors of a in vt ! (cworkspace: 0) ! (rworkspace: need bdspac) call stdlib${ii}$_cbdsqr( 'U', m, n, m, 0_${ik}$, s, rwork( ie ), vt,ldvt, a, lda, & cdum, 1_${ik}$,rwork( irwork ), info ) end if else if( wntuas ) then ! path 6t(n much larger than m, jobu='s' or 'a', ! jobvt='s') ! m right singular vectors to be computed in vt and ! m left singular vectors to be computed in u if( lwork>=m*m+3*m ) then ! sufficient workspace for a fast algorithm iu = 1_${ik}$ if( lwork>=wrkbl+lda*m ) then ! work(iu) is lda by n ldwrku = lda else ! work(iu) is lda by m ldwrku = m end if itau = iu + ldwrku*m iwork = itau + m ! compute a=l*q ! (cworkspace: need m*m+2*m, prefer m*m+m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_cgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ! copy l to work(iu), zeroing out above it call stdlib${ii}$_clacpy( 'L', m, m, a, lda, work( iu ),ldwrku ) call stdlib${ii}$_claset( 'U', m-1, m-1, czero, czero,work( iu+ldwrku ), & ldwrku ) ! generate q in a ! (cworkspace: need m*m+2*m, prefer m*m+m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_cunglq( m, n, m, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = 1_${ik}$ itauq = itau itaup = itauq + m iwork = itaup + m ! bidiagonalize l in work(iu), copying result to u ! (cworkspace: need m*m+3*m, prefer m*m+2*m+2*m*nb) ! (rworkspace: need m) call stdlib${ii}$_cgebrd( m, m, work( iu ), ldwrku, s,rwork( ie ), work( & itauq ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) call stdlib${ii}$_clacpy( 'L', m, m, work( iu ), ldwrku, u,ldu ) ! generate right bidiagonalizing vectors in work(iu) ! (cworkspace: need m*m+3*m-1, ! prefer m*m+2*m+(m-1)*nb) ! (rworkspace: 0) call stdlib${ii}$_cungbr( 'P', m, m, m, work( iu ), ldwrku,work( itaup ), & work( iwork ),lwork-iwork+1, ierr ) ! generate left bidiagonalizing vectors in u ! (cworkspace: need m*m+3*m, prefer m*m+2*m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_cungbr( 'Q', m, m, m, u, ldu, work( itauq ),work( iwork ), & lwork-iwork+1, ierr ) irwork = ie + m ! perform bidiagonal qr iteration, computing left ! singular vectors of l in u and computing right ! singular vectors of l in work(iu) ! (cworkspace: need m*m) ! (rworkspace: need bdspac) call stdlib${ii}$_cbdsqr( 'U', m, m, m, 0_${ik}$, s, rwork( ie ),work( iu ), ldwrku, & u, ldu, cdum, 1_${ik}$,rwork( irwork ), info ) ! multiply right singular vectors of l in work(iu) by ! q in a, storing result in vt ! (cworkspace: need m*m) ! (rworkspace: 0) call stdlib${ii}$_cgemm( 'N', 'N', m, n, m, cone, work( iu ),ldwrku, a, lda, & czero, vt, ldvt ) else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + m ! compute a=l*q, copying result to vt ! (cworkspace: need 2*m, prefer m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_cgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_clacpy( 'U', m, n, a, lda, vt, ldvt ) ! generate q in vt ! (cworkspace: need 2*m, prefer m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_cunglq( m, n, m, vt, ldvt, work( itau ),work( iwork ), & lwork-iwork+1, ierr ) ! copy l to u, zeroing out above it call stdlib${ii}$_clacpy( 'L', m, m, a, lda, u, ldu ) if (m>1_${ik}$) call stdlib${ii}$_claset( 'U', m-1, m-1, czero, czero,u( 1_${ik}$, 2_${ik}$ ), ldu ) ie = 1_${ik}$ itauq = itau itaup = itauq + m iwork = itaup + m ! bidiagonalize l in u ! (cworkspace: need 3*m, prefer 2*m+2*m*nb) ! (rworkspace: need m) call stdlib${ii}$_cgebrd( m, m, u, ldu, s, rwork( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply right bidiagonalizing vectors in u by q ! in vt ! (cworkspace: need 2*m+n, prefer 2*m+n*nb) ! (rworkspace: 0) call stdlib${ii}$_cunmbr( 'P', 'L', 'C', m, n, m, u, ldu,work( itaup ), vt, & ldvt,work( iwork ), lwork-iwork+1, ierr ) ! generate left bidiagonalizing vectors in u ! (cworkspace: need 3*m, prefer 2*m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_cungbr( 'Q', m, m, m, u, ldu, work( itauq ),work( iwork ), & lwork-iwork+1, ierr ) irwork = ie + m ! perform bidiagonal qr iteration, computing left ! singular vectors of a in u and computing right ! singular vectors of a in vt ! (cworkspace: 0) ! (rworkspace: need bdspac) call stdlib${ii}$_cbdsqr( 'U', m, n, m, 0_${ik}$, s, rwork( ie ), vt,ldvt, u, ldu, & cdum, 1_${ik}$,rwork( irwork ), info ) end if end if else if( wntva ) then if( wntun ) then ! path 7t(n much larger than m, jobu='n', jobvt='a') ! n right singular vectors to be computed in vt and ! no left singular vectors to be computed if( lwork>=m*m+max( n+m, 3_${ik}$*m ) ) then ! sufficient workspace for a fast algorithm ir = 1_${ik}$ if( lwork>=wrkbl+lda*m ) then ! work(ir) is lda by m ldwrkr = lda else ! work(ir) is m by m ldwrkr = m end if itau = ir + ldwrkr*m iwork = itau + m ! compute a=l*q, copying result to vt ! (cworkspace: need m*m+2*m, prefer m*m+m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_cgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_clacpy( 'U', m, n, a, lda, vt, ldvt ) ! copy l to work(ir), zeroing out above it call stdlib${ii}$_clacpy( 'L', m, m, a, lda, work( ir ),ldwrkr ) call stdlib${ii}$_claset( 'U', m-1, m-1, czero, czero,work( ir+ldwrkr ), & ldwrkr ) ! generate q in vt ! (cworkspace: need m*m+m+n, prefer m*m+m+n*nb) ! (rworkspace: 0) call stdlib${ii}$_cunglq( n, n, m, vt, ldvt, work( itau ),work( iwork ), & lwork-iwork+1, ierr ) ie = 1_${ik}$ itauq = itau itaup = itauq + m iwork = itaup + m ! bidiagonalize l in work(ir) ! (cworkspace: need m*m+3*m, prefer m*m+2*m+2*m*nb) ! (rworkspace: need m) call stdlib${ii}$_cgebrd( m, m, work( ir ), ldwrkr, s,rwork( ie ), work( & itauq ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) ! generate right bidiagonalizing vectors in work(ir) ! (cworkspace: need m*m+3*m-1, ! prefer m*m+2*m+(m-1)*nb) ! (rworkspace: 0) call stdlib${ii}$_cungbr( 'P', m, m, m, work( ir ), ldwrkr,work( itaup ), & work( iwork ),lwork-iwork+1, ierr ) irwork = ie + m ! perform bidiagonal qr iteration, computing right ! singular vectors of l in work(ir) ! (cworkspace: need m*m) ! (rworkspace: need bdspac) call stdlib${ii}$_cbdsqr( 'U', m, m, 0_${ik}$, 0_${ik}$, s, rwork( ie ),work( ir ), ldwrkr, & cdum, 1_${ik}$, cdum, 1_${ik}$,rwork( irwork ), info ) ! multiply right singular vectors of l in work(ir) by ! q in vt, storing result in a ! (cworkspace: need m*m) ! (rworkspace: 0) call stdlib${ii}$_cgemm( 'N', 'N', m, n, m, cone, work( ir ),ldwrkr, vt, ldvt,& czero, a, lda ) ! copy right singular vectors of a from a to vt call stdlib${ii}$_clacpy( 'F', m, n, a, lda, vt, ldvt ) else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + m ! compute a=l*q, copying result to vt ! (cworkspace: need 2*m, prefer m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_cgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_clacpy( 'U', m, n, a, lda, vt, ldvt ) ! generate q in vt ! (cworkspace: need m+n, prefer m+n*nb) ! (rworkspace: 0) call stdlib${ii}$_cunglq( n, n, m, vt, ldvt, work( itau ),work( iwork ), & lwork-iwork+1, ierr ) ie = 1_${ik}$ itauq = itau itaup = itauq + m iwork = itaup + m ! zero out above l in a if (m>1_${ik}$) call stdlib${ii}$_claset( 'U', m-1, m-1, czero, czero,a( 1_${ik}$, 2_${ik}$ ), lda ) ! bidiagonalize l in a ! (cworkspace: need 3*m, prefer 2*m+2*m*nb) ! (rworkspace: need m) call stdlib${ii}$_cgebrd( m, m, a, lda, s, rwork( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply right bidiagonalizing vectors in a by q ! in vt ! (cworkspace: need 2*m+n, prefer 2*m+n*nb) ! (rworkspace: 0) call stdlib${ii}$_cunmbr( 'P', 'L', 'C', m, n, m, a, lda,work( itaup ), vt, & ldvt,work( iwork ), lwork-iwork+1, ierr ) irwork = ie + m ! perform bidiagonal qr iteration, computing right ! singular vectors of a in vt ! (cworkspace: 0) ! (rworkspace: need bdspac) call stdlib${ii}$_cbdsqr( 'U', m, n, 0_${ik}$, 0_${ik}$, s, rwork( ie ), vt,ldvt, cdum, 1_${ik}$, & cdum, 1_${ik}$,rwork( irwork ), info ) end if else if( wntuo ) then ! path 8t(n much larger than m, jobu='o', jobvt='a') ! n right singular vectors to be computed in vt and ! m left singular vectors to be overwritten on a if( lwork>=2_${ik}$*m*m+max( n+m, 3_${ik}$*m ) ) then ! sufficient workspace for a fast algorithm iu = 1_${ik}$ if( lwork>=wrkbl+2*lda*m ) then ! work(iu) is lda by m and work(ir) is lda by m ldwrku = lda ir = iu + ldwrku*m ldwrkr = lda else if( lwork>=wrkbl+( lda+m )*m ) then ! work(iu) is lda by m and work(ir) is m by m ldwrku = lda ir = iu + ldwrku*m ldwrkr = m else ! work(iu) is m by m and work(ir) is m by m ldwrku = m ir = iu + ldwrku*m ldwrkr = m end if itau = ir + ldwrkr*m iwork = itau + m ! compute a=l*q, copying result to vt ! (cworkspace: need 2*m*m+2*m, prefer 2*m*m+m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_cgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_clacpy( 'U', m, n, a, lda, vt, ldvt ) ! generate q in vt ! (cworkspace: need 2*m*m+m+n, prefer 2*m*m+m+n*nb) ! (rworkspace: 0) call stdlib${ii}$_cunglq( n, n, m, vt, ldvt, work( itau ),work( iwork ), & lwork-iwork+1, ierr ) ! copy l to work(iu), zeroing out above it call stdlib${ii}$_clacpy( 'L', m, m, a, lda, work( iu ),ldwrku ) call stdlib${ii}$_claset( 'U', m-1, m-1, czero, czero,work( iu+ldwrku ), & ldwrku ) ie = 1_${ik}$ itauq = itau itaup = itauq + m iwork = itaup + m ! bidiagonalize l in work(iu), copying result to ! work(ir) ! (cworkspace: need 2*m*m+3*m, ! prefer 2*m*m+2*m+2*m*nb) ! (rworkspace: need m) call stdlib${ii}$_cgebrd( m, m, work( iu ), ldwrku, s,rwork( ie ), work( & itauq ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) call stdlib${ii}$_clacpy( 'L', m, m, work( iu ), ldwrku,work( ir ), ldwrkr ) ! generate right bidiagonalizing vectors in work(iu) ! (cworkspace: need 2*m*m+3*m-1, ! prefer 2*m*m+2*m+(m-1)*nb) ! (rworkspace: 0) call stdlib${ii}$_cungbr( 'P', m, m, m, work( iu ), ldwrku,work( itaup ), & work( iwork ),lwork-iwork+1, ierr ) ! generate left bidiagonalizing vectors in work(ir) ! (cworkspace: need 2*m*m+3*m, prefer 2*m*m+2*m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_cungbr( 'Q', m, m, m, work( ir ), ldwrkr,work( itauq ), & work( iwork ),lwork-iwork+1, ierr ) irwork = ie + m ! perform bidiagonal qr iteration, computing left ! singular vectors of l in work(ir) and computing ! right singular vectors of l in work(iu) ! (cworkspace: need 2*m*m) ! (rworkspace: need bdspac) call stdlib${ii}$_cbdsqr( 'U', m, m, m, 0_${ik}$, s, rwork( ie ),work( iu ), ldwrku, & work( ir ),ldwrkr, cdum, 1_${ik}$, rwork( irwork ),info ) ! multiply right singular vectors of l in work(iu) by ! q in vt, storing result in a ! (cworkspace: need m*m) ! (rworkspace: 0) call stdlib${ii}$_cgemm( 'N', 'N', m, n, m, cone, work( iu ),ldwrku, vt, ldvt,& czero, a, lda ) ! copy right singular vectors of a from a to vt call stdlib${ii}$_clacpy( 'F', m, n, a, lda, vt, ldvt ) ! copy left singular vectors of a from work(ir) to a call stdlib${ii}$_clacpy( 'F', m, m, work( ir ), ldwrkr, a,lda ) else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + m ! compute a=l*q, copying result to vt ! (cworkspace: need 2*m, prefer m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_cgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_clacpy( 'U', m, n, a, lda, vt, ldvt ) ! generate q in vt ! (cworkspace: need m+n, prefer m+n*nb) ! (rworkspace: 0) call stdlib${ii}$_cunglq( n, n, m, vt, ldvt, work( itau ),work( iwork ), & lwork-iwork+1, ierr ) ie = 1_${ik}$ itauq = itau itaup = itauq + m iwork = itaup + m ! zero out above l in a if (m>1_${ik}$) call stdlib${ii}$_claset( 'U', m-1, m-1, czero, czero,a( 1_${ik}$, 2_${ik}$ ), lda ) ! bidiagonalize l in a ! (cworkspace: need 3*m, prefer 2*m+2*m*nb) ! (rworkspace: need m) call stdlib${ii}$_cgebrd( m, m, a, lda, s, rwork( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply right bidiagonalizing vectors in a by q ! in vt ! (cworkspace: need 2*m+n, prefer 2*m+n*nb) ! (rworkspace: 0) call stdlib${ii}$_cunmbr( 'P', 'L', 'C', m, n, m, a, lda,work( itaup ), vt, & ldvt,work( iwork ), lwork-iwork+1, ierr ) ! generate left bidiagonalizing vectors in a ! (cworkspace: need 3*m, prefer 2*m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_cungbr( 'Q', m, m, m, a, lda, work( itauq ),work( iwork ), & lwork-iwork+1, ierr ) irwork = ie + m ! perform bidiagonal qr iteration, computing left ! singular vectors of a in a and computing right ! singular vectors of a in vt ! (cworkspace: 0) ! (rworkspace: need bdspac) call stdlib${ii}$_cbdsqr( 'U', m, n, m, 0_${ik}$, s, rwork( ie ), vt,ldvt, a, lda, & cdum, 1_${ik}$,rwork( irwork ), info ) end if else if( wntuas ) then ! path 9t(n much larger than m, jobu='s' or 'a', ! jobvt='a') ! n right singular vectors to be computed in vt and ! m left singular vectors to be computed in u if( lwork>=m*m+max( n+m, 3_${ik}$*m ) ) then ! sufficient workspace for a fast algorithm iu = 1_${ik}$ if( lwork>=wrkbl+lda*m ) then ! work(iu) is lda by m ldwrku = lda else ! work(iu) is m by m ldwrku = m end if itau = iu + ldwrku*m iwork = itau + m ! compute a=l*q, copying result to vt ! (cworkspace: need m*m+2*m, prefer m*m+m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_cgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_clacpy( 'U', m, n, a, lda, vt, ldvt ) ! generate q in vt ! (cworkspace: need m*m+m+n, prefer m*m+m+n*nb) ! (rworkspace: 0) call stdlib${ii}$_cunglq( n, n, m, vt, ldvt, work( itau ),work( iwork ), & lwork-iwork+1, ierr ) ! copy l to work(iu), zeroing out above it call stdlib${ii}$_clacpy( 'L', m, m, a, lda, work( iu ),ldwrku ) call stdlib${ii}$_claset( 'U', m-1, m-1, czero, czero,work( iu+ldwrku ), & ldwrku ) ie = 1_${ik}$ itauq = itau itaup = itauq + m iwork = itaup + m ! bidiagonalize l in work(iu), copying result to u ! (cworkspace: need m*m+3*m, prefer m*m+2*m+2*m*nb) ! (rworkspace: need m) call stdlib${ii}$_cgebrd( m, m, work( iu ), ldwrku, s,rwork( ie ), work( & itauq ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) call stdlib${ii}$_clacpy( 'L', m, m, work( iu ), ldwrku, u,ldu ) ! generate right bidiagonalizing vectors in work(iu) ! (cworkspace: need m*m+3*m, prefer m*m+2*m+(m-1)*nb) ! (rworkspace: 0) call stdlib${ii}$_cungbr( 'P', m, m, m, work( iu ), ldwrku,work( itaup ), & work( iwork ),lwork-iwork+1, ierr ) ! generate left bidiagonalizing vectors in u ! (cworkspace: need m*m+3*m, prefer m*m+2*m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_cungbr( 'Q', m, m, m, u, ldu, work( itauq ),work( iwork ), & lwork-iwork+1, ierr ) irwork = ie + m ! perform bidiagonal qr iteration, computing left ! singular vectors of l in u and computing right ! singular vectors of l in work(iu) ! (cworkspace: need m*m) ! (rworkspace: need bdspac) call stdlib${ii}$_cbdsqr( 'U', m, m, m, 0_${ik}$, s, rwork( ie ),work( iu ), ldwrku, & u, ldu, cdum, 1_${ik}$,rwork( irwork ), info ) ! multiply right singular vectors of l in work(iu) by ! q in vt, storing result in a ! (cworkspace: need m*m) ! (rworkspace: 0) call stdlib${ii}$_cgemm( 'N', 'N', m, n, m, cone, work( iu ),ldwrku, vt, ldvt,& czero, a, lda ) ! copy right singular vectors of a from a to vt call stdlib${ii}$_clacpy( 'F', m, n, a, lda, vt, ldvt ) else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + m ! compute a=l*q, copying result to vt ! (cworkspace: need 2*m, prefer m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_cgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_clacpy( 'U', m, n, a, lda, vt, ldvt ) ! generate q in vt ! (cworkspace: need m+n, prefer m+n*nb) ! (rworkspace: 0) call stdlib${ii}$_cunglq( n, n, m, vt, ldvt, work( itau ),work( iwork ), & lwork-iwork+1, ierr ) ! copy l to u, zeroing out above it call stdlib${ii}$_clacpy( 'L', m, m, a, lda, u, ldu ) if (m>1_${ik}$) call stdlib${ii}$_claset( 'U', m-1, m-1, czero, czero,u( 1_${ik}$, 2_${ik}$ ), ldu ) ie = 1_${ik}$ itauq = itau itaup = itauq + m iwork = itaup + m ! bidiagonalize l in u ! (cworkspace: need 3*m, prefer 2*m+2*m*nb) ! (rworkspace: need m) call stdlib${ii}$_cgebrd( m, m, u, ldu, s, rwork( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply right bidiagonalizing vectors in u by q ! in vt ! (cworkspace: need 2*m+n, prefer 2*m+n*nb) ! (rworkspace: 0) call stdlib${ii}$_cunmbr( 'P', 'L', 'C', m, n, m, u, ldu,work( itaup ), vt, & ldvt,work( iwork ), lwork-iwork+1, ierr ) ! generate left bidiagonalizing vectors in u ! (cworkspace: need 3*m, prefer 2*m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_cungbr( 'Q', m, m, m, u, ldu, work( itauq ),work( iwork ), & lwork-iwork+1, ierr ) irwork = ie + m ! perform bidiagonal qr iteration, computing left ! singular vectors of a in u and computing right ! singular vectors of a in vt ! (cworkspace: 0) ! (rworkspace: need bdspac) call stdlib${ii}$_cbdsqr( 'U', m, n, m, 0_${ik}$, s, rwork( ie ), vt,ldvt, u, ldu, & cdum, 1_${ik}$,rwork( irwork ), info ) end if end if end if else ! n < mnthr ! path 10t(n greater than m, but not much larger) ! reduce to bidiagonal form without lq decomposition ie = 1_${ik}$ itauq = 1_${ik}$ itaup = itauq + m iwork = itaup + m ! bidiagonalize a ! (cworkspace: need 2*m+n, prefer 2*m+(m+n)*nb) ! (rworkspace: m) call stdlib${ii}$_cgebrd( m, n, a, lda, s, rwork( ie ), work( itauq ),work( itaup ), & work( iwork ), lwork-iwork+1,ierr ) if( wntuas ) then ! if left singular vectors desired in u, copy result to u ! and generate left bidiagonalizing vectors in u ! (cworkspace: need 3*m-1, prefer 2*m+(m-1)*nb) ! (rworkspace: 0) call stdlib${ii}$_clacpy( 'L', m, m, a, lda, u, ldu ) call stdlib${ii}$_cungbr( 'Q', m, m, n, u, ldu, work( itauq ),work( iwork ), lwork-& iwork+1, ierr ) end if if( wntvas ) then ! if right singular vectors desired in vt, copy result to ! vt and generate right bidiagonalizing vectors in vt ! (cworkspace: need 2*m+nrvt, prefer 2*m+nrvt*nb) ! (rworkspace: 0) call stdlib${ii}$_clacpy( 'U', m, n, a, lda, vt, ldvt ) if( wntva )nrvt = n if( wntvs )nrvt = m call stdlib${ii}$_cungbr( 'P', nrvt, n, m, vt, ldvt, work( itaup ),work( iwork ), & lwork-iwork+1, ierr ) end if if( wntuo ) then ! if left singular vectors desired in a, generate left ! bidiagonalizing vectors in a ! (cworkspace: need 3*m-1, prefer 2*m+(m-1)*nb) ! (rworkspace: 0) call stdlib${ii}$_cungbr( 'Q', m, m, n, a, lda, work( itauq ),work( iwork ), lwork-& iwork+1, ierr ) end if if( wntvo ) then ! if right singular vectors desired in a, generate right ! bidiagonalizing vectors in a ! (cworkspace: need 3*m, prefer 2*m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_cungbr( 'P', m, n, m, a, lda, work( itaup ),work( iwork ), lwork-& iwork+1, ierr ) end if irwork = ie + m if( wntuas .or. wntuo )nru = m if( wntun )nru = 0_${ik}$ if( wntvas .or. wntvo )ncvt = n if( wntvn )ncvt = 0_${ik}$ if( ( .not.wntuo ) .and. ( .not.wntvo ) ) then ! perform bidiagonal qr iteration, if desired, computing ! left singular vectors in u and computing right singular ! vectors in vt ! (cworkspace: 0) ! (rworkspace: need bdspac) call stdlib${ii}$_cbdsqr( 'L', m, ncvt, nru, 0_${ik}$, s, rwork( ie ), vt,ldvt, u, ldu, & cdum, 1_${ik}$, rwork( irwork ),info ) else if( ( .not.wntuo ) .and. wntvo ) then ! perform bidiagonal qr iteration, if desired, computing ! left singular vectors in u and computing right singular ! vectors in a ! (cworkspace: 0) ! (rworkspace: need bdspac) call stdlib${ii}$_cbdsqr( 'L', m, ncvt, nru, 0_${ik}$, s, rwork( ie ), a,lda, u, ldu, cdum,& 1_${ik}$, rwork( irwork ),info ) else ! perform bidiagonal qr iteration, if desired, computing ! left singular vectors in a and computing right singular ! vectors in vt ! (cworkspace: 0) ! (rworkspace: need bdspac) call stdlib${ii}$_cbdsqr( 'L', m, ncvt, nru, 0_${ik}$, s, rwork( ie ), vt,ldvt, a, lda, & cdum, 1_${ik}$, rwork( irwork ),info ) end if end if end if ! undo scaling if necessary if( iscl==1_${ik}$ ) then if( anrm>bignum )call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, bignum, anrm, minmn, 1_${ik}$, s, minmn,& ierr ) if( info/=0_${ik}$ .and. anrm>bignum )call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, bignum, anrm, minmn-1,& 1_${ik}$,rwork( ie ), minmn, ierr ) if( anrm<smlnum )call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, smlnum, anrm, minmn, 1_${ik}$, s, minmn,& ierr ) if( info/=0_${ik}$ .and. anrm<smlnum )call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, smlnum, anrm, minmn-1,& 1_${ik}$,rwork( ie ), minmn, ierr ) end if ! return optimal workspace in work(1) work( 1_${ik}$ ) = maxwrk return end subroutine stdlib${ii}$_cgesvd module subroutine stdlib${ii}$_zgesvd( jobu, jobvt, m, n, a, lda, s, u, ldu,vt, ldvt, work, lwork, rwork, & !! ZGESVD computes the singular value decomposition (SVD) of a complex !! M-by-N matrix A, optionally computing the left and/or right singular !! vectors. The SVD is written !! A = U * SIGMA * conjugate-transpose(V) !! where SIGMA is an M-by-N matrix which is zero except for its !! min(m,n) diagonal elements, U is an M-by-M unitary matrix, and !! V is an N-by-N unitary matrix. The diagonal elements of SIGMA !! are the singular values of A; they are real and non-negative, and !! are returned in descending order. The first min(m,n) columns of !! U and V are the left and right singular vectors of A. !! Note that the routine returns V**H, not V. info ) ! -- lapack driver routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments character, intent(in) :: jobu, jobvt integer(${ik}$), intent(out) :: info integer(${ik}$), intent(in) :: lda, ldu, ldvt, lwork, m, n ! Array Arguments real(dp), intent(out) :: rwork(*), s(*) complex(dp), intent(inout) :: a(lda,*) complex(dp), intent(out) :: u(ldu,*), vt(ldvt,*), work(*) ! ===================================================================== ! Local Scalars logical(lk) :: lquery, wntua, wntuas, wntun, wntuo, wntus, wntva, wntvas, wntvn, wntvo,& wntvs integer(${ik}$) :: blk, chunk, i, ie, ierr, ir, irwork, iscl, itau, itaup, itauq, iu, & iwork, ldwrkr, ldwrku, maxwrk, minmn, minwrk, mnthr, ncu, ncvt, nru, nrvt, & wrkbl integer(${ik}$) :: lwork_zgeqrf, lwork_zungqr_n, lwork_zungqr_m, lwork_zgebrd, & lwork_zungbr_p, lwork_zungbr_q, lwork_zgelqf, lwork_zunglq_n, lwork_zunglq_m real(dp) :: anrm, bignum, eps, smlnum ! Local Arrays real(dp) :: dum(1_${ik}$) complex(dp) :: cdum(1_${ik}$) ! Intrinsic Functions ! Executable Statements ! test the input arguments info = 0_${ik}$ minmn = min( m, n ) wntua = stdlib_lsame( jobu, 'A' ) wntus = stdlib_lsame( jobu, 'S' ) wntuas = wntua .or. wntus wntuo = stdlib_lsame( jobu, 'O' ) wntun = stdlib_lsame( jobu, 'N' ) wntva = stdlib_lsame( jobvt, 'A' ) wntvs = stdlib_lsame( jobvt, 'S' ) wntvas = wntva .or. wntvs wntvo = stdlib_lsame( jobvt, 'O' ) wntvn = stdlib_lsame( jobvt, 'N' ) lquery = ( lwork==-1_${ik}$ ) if( .not.( wntua .or. wntus .or. wntuo .or. wntun ) ) then info = -1_${ik}$ else if( .not.( wntva .or. wntvs .or. wntvo .or. wntvn ) .or.( wntvo .and. wntuo ) ) & then info = -2_${ik}$ else if( m<0_${ik}$ ) then info = -3_${ik}$ else if( n<0_${ik}$ ) then info = -4_${ik}$ else if( lda<max( 1_${ik}$, m ) ) then info = -6_${ik}$ else if( ldu<1_${ik}$ .or. ( wntuas .and. ldu<m ) ) then info = -9_${ik}$ else if( ldvt<1_${ik}$ .or. ( wntva .and. ldvt<n ) .or.( wntvs .and. ldvt<minmn ) ) & then info = -11_${ik}$ end if ! compute workspace ! (note: comments in the code beginning "workspace:" describe the ! minimal amount of workspace needed at that point in the code, ! as well as the preferred amount for good performance. ! cworkspace refers to complex workspace, and rworkspace to ! real workspace. nb refers to the optimal block size for the ! immediately following subroutine, as returned by stdlib${ii}$_ilaenv.) if( info==0_${ik}$ ) then minwrk = 1_${ik}$ maxwrk = 1_${ik}$ if( m>=n .and. minmn>0_${ik}$ ) then ! space needed for stdlib${ii}$_zbdsqr is bdspac = 5*n mnthr = stdlib${ii}$_ilaenv( 6_${ik}$, 'ZGESVD', jobu // jobvt, m, n, 0_${ik}$, 0_${ik}$ ) ! compute space needed for stdlib${ii}$_zgeqrf call stdlib${ii}$_zgeqrf( m, n, a, lda, cdum(1_${ik}$), cdum(1_${ik}$), -1_${ik}$, ierr ) lwork_zgeqrf = int( cdum(1_${ik}$),KIND=${ik}$) ! compute space needed for stdlib${ii}$_zungqr call stdlib${ii}$_zungqr( m, n, n, a, lda, cdum(1_${ik}$), cdum(1_${ik}$), -1_${ik}$, ierr ) lwork_zungqr_n = int( cdum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_zungqr( m, m, n, a, lda, cdum(1_${ik}$), cdum(1_${ik}$), -1_${ik}$, ierr ) lwork_zungqr_m = int( cdum(1_${ik}$),KIND=${ik}$) ! compute space needed for stdlib${ii}$_zgebrd call stdlib${ii}$_zgebrd( n, n, a, lda, s, dum(1_${ik}$), cdum(1_${ik}$),cdum(1_${ik}$), cdum(1_${ik}$), -1_${ik}$, ierr ) lwork_zgebrd = int( cdum(1_${ik}$),KIND=${ik}$) ! compute space needed for stdlib${ii}$_zungbr call stdlib${ii}$_zungbr( 'P', n, n, n, a, lda, cdum(1_${ik}$),cdum(1_${ik}$), -1_${ik}$, ierr ) lwork_zungbr_p = int( cdum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_zungbr( 'Q', n, n, n, a, lda, cdum(1_${ik}$),cdum(1_${ik}$), -1_${ik}$, ierr ) lwork_zungbr_q = int( cdum(1_${ik}$),KIND=${ik}$) if( m>=mnthr ) then if( wntun ) then ! path 1 (m much larger than n, jobu='n') maxwrk = n + lwork_zgeqrf maxwrk = max( maxwrk, 2_${ik}$*n+lwork_zgebrd ) if( wntvo .or. wntvas )maxwrk = max( maxwrk, 2_${ik}$*n+lwork_zungbr_p ) minwrk = 3_${ik}$*n else if( wntuo .and. wntvn ) then ! path 2 (m much larger than n, jobu='o', jobvt='n') wrkbl = n + lwork_zgeqrf wrkbl = max( wrkbl, n+lwork_zungqr_n ) wrkbl = max( wrkbl, 2_${ik}$*n+lwork_zgebrd ) wrkbl = max( wrkbl, 2_${ik}$*n+lwork_zungbr_q ) maxwrk = max( n*n+wrkbl, n*n+m*n ) minwrk = 2_${ik}$*n + m else if( wntuo .and. wntvas ) then ! path 3 (m much larger than n, jobu='o', jobvt='s' or ! 'a') wrkbl = n + lwork_zgeqrf wrkbl = max( wrkbl, n+lwork_zungqr_n ) wrkbl = max( wrkbl, 2_${ik}$*n+lwork_zgebrd ) wrkbl = max( wrkbl, 2_${ik}$*n+lwork_zungbr_q ) wrkbl = max( wrkbl, 2_${ik}$*n+lwork_zungbr_p ) maxwrk = max( n*n+wrkbl, n*n+m*n ) minwrk = 2_${ik}$*n + m else if( wntus .and. wntvn ) then ! path 4 (m much larger than n, jobu='s', jobvt='n') wrkbl = n + lwork_zgeqrf wrkbl = max( wrkbl, n+lwork_zungqr_n ) wrkbl = max( wrkbl, 2_${ik}$*n+lwork_zgebrd ) wrkbl = max( wrkbl, 2_${ik}$*n+lwork_zungbr_q ) maxwrk = n*n + wrkbl minwrk = 2_${ik}$*n + m else if( wntus .and. wntvo ) then ! path 5 (m much larger than n, jobu='s', jobvt='o') wrkbl = n + lwork_zgeqrf wrkbl = max( wrkbl, n+lwork_zungqr_n ) wrkbl = max( wrkbl, 2_${ik}$*n+lwork_zgebrd ) wrkbl = max( wrkbl, 2_${ik}$*n+lwork_zungbr_q ) wrkbl = max( wrkbl, 2_${ik}$*n+lwork_zungbr_p ) maxwrk = 2_${ik}$*n*n + wrkbl minwrk = 2_${ik}$*n + m else if( wntus .and. wntvas ) then ! path 6 (m much larger than n, jobu='s', jobvt='s' or ! 'a') wrkbl = n + lwork_zgeqrf wrkbl = max( wrkbl, n+lwork_zungqr_n ) wrkbl = max( wrkbl, 2_${ik}$*n+lwork_zgebrd ) wrkbl = max( wrkbl, 2_${ik}$*n+lwork_zungbr_q ) wrkbl = max( wrkbl, 2_${ik}$*n+lwork_zungbr_p ) maxwrk = n*n + wrkbl minwrk = 2_${ik}$*n + m else if( wntua .and. wntvn ) then ! path 7 (m much larger than n, jobu='a', jobvt='n') wrkbl = n + lwork_zgeqrf wrkbl = max( wrkbl, n+lwork_zungqr_m ) wrkbl = max( wrkbl, 2_${ik}$*n+lwork_zgebrd ) wrkbl = max( wrkbl, 2_${ik}$*n+lwork_zungbr_q ) maxwrk = n*n + wrkbl minwrk = 2_${ik}$*n + m else if( wntua .and. wntvo ) then ! path 8 (m much larger than n, jobu='a', jobvt='o') wrkbl = n + lwork_zgeqrf wrkbl = max( wrkbl, n+lwork_zungqr_m ) wrkbl = max( wrkbl, 2_${ik}$*n+lwork_zgebrd ) wrkbl = max( wrkbl, 2_${ik}$*n+lwork_zungbr_q ) wrkbl = max( wrkbl, 2_${ik}$*n+lwork_zungbr_p ) maxwrk = 2_${ik}$*n*n + wrkbl minwrk = 2_${ik}$*n + m else if( wntua .and. wntvas ) then ! path 9 (m much larger than n, jobu='a', jobvt='s' or ! 'a') wrkbl = n + lwork_zgeqrf wrkbl = max( wrkbl, n+lwork_zungqr_m ) wrkbl = max( wrkbl, 2_${ik}$*n+lwork_zgebrd ) wrkbl = max( wrkbl, 2_${ik}$*n+lwork_zungbr_q ) wrkbl = max( wrkbl, 2_${ik}$*n+lwork_zungbr_p ) maxwrk = n*n + wrkbl minwrk = 2_${ik}$*n + m end if else ! path 10 (m at least n, but not much larger) call stdlib${ii}$_zgebrd( m, n, a, lda, s, dum(1_${ik}$), cdum(1_${ik}$),cdum(1_${ik}$), cdum(1_${ik}$), -1_${ik}$, & ierr ) lwork_zgebrd = int( cdum(1_${ik}$),KIND=${ik}$) maxwrk = 2_${ik}$*n + lwork_zgebrd if( wntus .or. wntuo ) then call stdlib${ii}$_zungbr( 'Q', m, n, n, a, lda, cdum(1_${ik}$),cdum(1_${ik}$), -1_${ik}$, ierr ) lwork_zungbr_q = int( cdum(1_${ik}$),KIND=${ik}$) maxwrk = max( maxwrk, 2_${ik}$*n+lwork_zungbr_q ) end if if( wntua ) then call stdlib${ii}$_zungbr( 'Q', m, m, n, a, lda, cdum(1_${ik}$),cdum(1_${ik}$), -1_${ik}$, ierr ) lwork_zungbr_q = int( cdum(1_${ik}$),KIND=${ik}$) maxwrk = max( maxwrk, 2_${ik}$*n+lwork_zungbr_q ) end if if( .not.wntvn ) then maxwrk = max( maxwrk, 2_${ik}$*n+lwork_zungbr_p ) end if minwrk = 2_${ik}$*n + m end if else if( minmn>0_${ik}$ ) then ! space needed for stdlib${ii}$_zbdsqr is bdspac = 5*m mnthr = stdlib${ii}$_ilaenv( 6_${ik}$, 'ZGESVD', jobu // jobvt, m, n, 0_${ik}$, 0_${ik}$ ) ! compute space needed for stdlib${ii}$_zgelqf call stdlib${ii}$_zgelqf( m, n, a, lda, cdum(1_${ik}$), cdum(1_${ik}$), -1_${ik}$, ierr ) lwork_zgelqf = int( cdum(1_${ik}$),KIND=${ik}$) ! compute space needed for stdlib${ii}$_zunglq call stdlib${ii}$_zunglq( n, n, m, cdum(1_${ik}$), n, cdum(1_${ik}$), cdum(1_${ik}$), -1_${ik}$,ierr ) lwork_zunglq_n = int( cdum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_zunglq( m, n, m, a, lda, cdum(1_${ik}$), cdum(1_${ik}$), -1_${ik}$, ierr ) lwork_zunglq_m = int( cdum(1_${ik}$),KIND=${ik}$) ! compute space needed for stdlib${ii}$_zgebrd call stdlib${ii}$_zgebrd( m, m, a, lda, s, dum(1_${ik}$), cdum(1_${ik}$),cdum(1_${ik}$), cdum(1_${ik}$), -1_${ik}$, ierr ) lwork_zgebrd = int( cdum(1_${ik}$),KIND=${ik}$) ! compute space needed for stdlib${ii}$_zungbr p call stdlib${ii}$_zungbr( 'P', m, m, m, a, n, cdum(1_${ik}$),cdum(1_${ik}$), -1_${ik}$, ierr ) lwork_zungbr_p = int( cdum(1_${ik}$),KIND=${ik}$) ! compute space needed for stdlib${ii}$_zungbr q call stdlib${ii}$_zungbr( 'Q', m, m, m, a, n, cdum(1_${ik}$),cdum(1_${ik}$), -1_${ik}$, ierr ) lwork_zungbr_q = int( cdum(1_${ik}$),KIND=${ik}$) if( n>=mnthr ) then if( wntvn ) then ! path 1t(n much larger than m, jobvt='n') maxwrk = m + lwork_zgelqf maxwrk = max( maxwrk, 2_${ik}$*m+lwork_zgebrd ) if( wntuo .or. wntuas )maxwrk = max( maxwrk, 2_${ik}$*m+lwork_zungbr_q ) minwrk = 3_${ik}$*m else if( wntvo .and. wntun ) then ! path 2t(n much larger than m, jobu='n', jobvt='o') wrkbl = m + lwork_zgelqf wrkbl = max( wrkbl, m+lwork_zunglq_m ) wrkbl = max( wrkbl, 2_${ik}$*m+lwork_zgebrd ) wrkbl = max( wrkbl, 2_${ik}$*m+lwork_zungbr_p ) maxwrk = max( m*m+wrkbl, m*m+m*n ) minwrk = 2_${ik}$*m + n else if( wntvo .and. wntuas ) then ! path 3t(n much larger than m, jobu='s' or 'a', ! jobvt='o') wrkbl = m + lwork_zgelqf wrkbl = max( wrkbl, m+lwork_zunglq_m ) wrkbl = max( wrkbl, 2_${ik}$*m+lwork_zgebrd ) wrkbl = max( wrkbl, 2_${ik}$*m+lwork_zungbr_p ) wrkbl = max( wrkbl, 2_${ik}$*m+lwork_zungbr_q ) maxwrk = max( m*m+wrkbl, m*m+m*n ) minwrk = 2_${ik}$*m + n else if( wntvs .and. wntun ) then ! path 4t(n much larger than m, jobu='n', jobvt='s') wrkbl = m + lwork_zgelqf wrkbl = max( wrkbl, m+lwork_zunglq_m ) wrkbl = max( wrkbl, 2_${ik}$*m+lwork_zgebrd ) wrkbl = max( wrkbl, 2_${ik}$*m+lwork_zungbr_p ) maxwrk = m*m + wrkbl minwrk = 2_${ik}$*m + n else if( wntvs .and. wntuo ) then ! path 5t(n much larger than m, jobu='o', jobvt='s') wrkbl = m + lwork_zgelqf wrkbl = max( wrkbl, m+lwork_zunglq_m ) wrkbl = max( wrkbl, 2_${ik}$*m+lwork_zgebrd ) wrkbl = max( wrkbl, 2_${ik}$*m+lwork_zungbr_p ) wrkbl = max( wrkbl, 2_${ik}$*m+lwork_zungbr_q ) maxwrk = 2_${ik}$*m*m + wrkbl minwrk = 2_${ik}$*m + n else if( wntvs .and. wntuas ) then ! path 6t(n much larger than m, jobu='s' or 'a', ! jobvt='s') wrkbl = m + lwork_zgelqf wrkbl = max( wrkbl, m+lwork_zunglq_m ) wrkbl = max( wrkbl, 2_${ik}$*m+lwork_zgebrd ) wrkbl = max( wrkbl, 2_${ik}$*m+lwork_zungbr_p ) wrkbl = max( wrkbl, 2_${ik}$*m+lwork_zungbr_q ) maxwrk = m*m + wrkbl minwrk = 2_${ik}$*m + n else if( wntva .and. wntun ) then ! path 7t(n much larger than m, jobu='n', jobvt='a') wrkbl = m + lwork_zgelqf wrkbl = max( wrkbl, m+lwork_zunglq_n ) wrkbl = max( wrkbl, 2_${ik}$*m+lwork_zgebrd ) wrkbl = max( wrkbl, 2_${ik}$*m+lwork_zungbr_p ) maxwrk = m*m + wrkbl minwrk = 2_${ik}$*m + n else if( wntva .and. wntuo ) then ! path 8t(n much larger than m, jobu='o', jobvt='a') wrkbl = m + lwork_zgelqf wrkbl = max( wrkbl, m+lwork_zunglq_n ) wrkbl = max( wrkbl, 2_${ik}$*m+lwork_zgebrd ) wrkbl = max( wrkbl, 2_${ik}$*m+lwork_zungbr_p ) wrkbl = max( wrkbl, 2_${ik}$*m+lwork_zungbr_q ) maxwrk = 2_${ik}$*m*m + wrkbl minwrk = 2_${ik}$*m + n else if( wntva .and. wntuas ) then ! path 9t(n much larger than m, jobu='s' or 'a', ! jobvt='a') wrkbl = m + lwork_zgelqf wrkbl = max( wrkbl, m+lwork_zunglq_n ) wrkbl = max( wrkbl, 2_${ik}$*m+lwork_zgebrd ) wrkbl = max( wrkbl, 2_${ik}$*m+lwork_zungbr_p ) wrkbl = max( wrkbl, 2_${ik}$*m+lwork_zungbr_q ) maxwrk = m*m + wrkbl minwrk = 2_${ik}$*m + n end if else ! path 10t(n greater than m, but not much larger) call stdlib${ii}$_zgebrd( m, n, a, lda, s, dum(1_${ik}$), cdum(1_${ik}$),cdum(1_${ik}$), cdum(1_${ik}$), -1_${ik}$, & ierr ) lwork_zgebrd = int( cdum(1_${ik}$),KIND=${ik}$) maxwrk = 2_${ik}$*m + lwork_zgebrd if( wntvs .or. wntvo ) then ! compute space needed for stdlib${ii}$_zungbr p call stdlib${ii}$_zungbr( 'P', m, n, m, a, n, cdum(1_${ik}$),cdum(1_${ik}$), -1_${ik}$, ierr ) lwork_zungbr_p = int( cdum(1_${ik}$),KIND=${ik}$) maxwrk = max( maxwrk, 2_${ik}$*m+lwork_zungbr_p ) end if if( wntva ) then call stdlib${ii}$_zungbr( 'P', n, n, m, a, n, cdum(1_${ik}$),cdum(1_${ik}$), -1_${ik}$, ierr ) lwork_zungbr_p = int( cdum(1_${ik}$),KIND=${ik}$) maxwrk = max( maxwrk, 2_${ik}$*m+lwork_zungbr_p ) end if if( .not.wntun ) then maxwrk = max( maxwrk, 2_${ik}$*m+lwork_zungbr_q ) end if minwrk = 2_${ik}$*m + n end if end if maxwrk = max( maxwrk, minwrk ) work( 1_${ik}$ ) = maxwrk if( lwork<minwrk .and. .not.lquery ) then info = -13_${ik}$ end if end if if( info/=0_${ik}$ ) then call stdlib${ii}$_xerbla( 'ZGESVD', -info ) return else if( lquery ) then return end if ! quick return if possible if( m==0_${ik}$ .or. n==0_${ik}$ ) then return end if ! get machine constants eps = stdlib${ii}$_dlamch( 'P' ) smlnum = sqrt( stdlib${ii}$_dlamch( 'S' ) ) / eps bignum = one / smlnum ! scale a if max element outside range [smlnum,bignum] anrm = stdlib${ii}$_zlange( 'M', m, n, a, lda, dum ) iscl = 0_${ik}$ if( anrm>zero .and. anrm<smlnum ) then iscl = 1_${ik}$ call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, m, n, a, lda, ierr ) else if( anrm>bignum ) then iscl = 1_${ik}$ call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, m, n, a, lda, ierr ) end if if( m>=n ) then ! a has at least as many rows as columns. if a has sufficiently ! more rows than columns, first reduce using the qr ! decomposition (if sufficient workspace available) if( m>=mnthr ) then if( wntun ) then ! path 1 (m much larger than n, jobu='n') ! no left singular vectors to be computed itau = 1_${ik}$ iwork = itau + n ! compute a=q*r ! (cworkspace: need 2*n, prefer n+n*nb) ! (rworkspace: need 0) call stdlib${ii}$_zgeqrf( m, n, a, lda, work( itau ), work( iwork ),lwork-iwork+1, & ierr ) ! zero out below r if( n > 1_${ik}$ ) then call stdlib${ii}$_zlaset( 'L', n-1, n-1, czero, czero, a( 2_${ik}$, 1_${ik}$ ),lda ) end if ie = 1_${ik}$ itauq = 1_${ik}$ itaup = itauq + n iwork = itaup + n ! bidiagonalize r in a ! (cworkspace: need 3*n, prefer 2*n+2*n*nb) ! (rworkspace: need n) call stdlib${ii}$_zgebrd( n, n, a, lda, s, rwork( ie ), work( itauq ),work( itaup ),& work( iwork ), lwork-iwork+1,ierr ) ncvt = 0_${ik}$ if( wntvo .or. wntvas ) then ! if right singular vectors desired, generate p'. ! (cworkspace: need 3*n-1, prefer 2*n+(n-1)*nb) ! (rworkspace: 0) call stdlib${ii}$_zungbr( 'P', n, n, n, a, lda, work( itaup ),work( iwork ), & lwork-iwork+1, ierr ) ncvt = n end if irwork = ie + n ! perform bidiagonal qr iteration, computing right ! singular vectors of a in a if desired ! (cworkspace: 0) ! (rworkspace: need bdspac) call stdlib${ii}$_zbdsqr( 'U', n, ncvt, 0_${ik}$, 0_${ik}$, s, rwork( ie ), a, lda,cdum, 1_${ik}$, cdum, & 1_${ik}$, rwork( irwork ), info ) ! if right singular vectors desired in vt, copy them there if( wntvas )call stdlib${ii}$_zlacpy( 'F', n, n, a, lda, vt, ldvt ) else if( wntuo .and. wntvn ) then ! path 2 (m much larger than n, jobu='o', jobvt='n') ! n left singular vectors to be overwritten on a and ! no right singular vectors to be computed if( lwork>=n*n+3*n ) then ! sufficient workspace for a fast algorithm ir = 1_${ik}$ if( lwork>=max( wrkbl, lda*n )+lda*n ) then ! work(iu) is lda by n, work(ir) is lda by n ldwrku = lda ldwrkr = lda else if( lwork>=max( wrkbl, lda*n )+n*n ) then ! work(iu) is lda by n, work(ir) is n by n ldwrku = lda ldwrkr = n else ! work(iu) is ldwrku by n, work(ir) is n by n ldwrku = ( lwork-n*n ) / n ldwrkr = n end if itau = ir + ldwrkr*n iwork = itau + n ! compute a=q*r ! (cworkspace: need n*n+2*n, prefer n*n+n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_zgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-iwork+& 1_${ik}$, ierr ) ! copy r to work(ir) and zero out below it call stdlib${ii}$_zlacpy( 'U', n, n, a, lda, work( ir ), ldwrkr ) call stdlib${ii}$_zlaset( 'L', n-1, n-1, czero, czero,work( ir+1 ), ldwrkr ) ! generate q in a ! (cworkspace: need n*n+2*n, prefer n*n+n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_zungqr( m, n, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = 1_${ik}$ itauq = itau itaup = itauq + n iwork = itaup + n ! bidiagonalize r in work(ir) ! (cworkspace: need n*n+3*n, prefer n*n+2*n+2*n*nb) ! (rworkspace: need n) call stdlib${ii}$_zgebrd( n, n, work( ir ), ldwrkr, s, rwork( ie ),work( itauq ),& work( itaup ),work( iwork ), lwork-iwork+1, ierr ) ! generate left vectors bidiagonalizing r ! (cworkspace: need n*n+3*n, prefer n*n+2*n+n*nb) ! (rworkspace: need 0) call stdlib${ii}$_zungbr( 'Q', n, n, n, work( ir ), ldwrkr,work( itauq ), work( & iwork ),lwork-iwork+1, ierr ) irwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of r in work(ir) ! (cworkspace: need n*n) ! (rworkspace: need bdspac) call stdlib${ii}$_zbdsqr( 'U', n, 0_${ik}$, n, 0_${ik}$, s, rwork( ie ), cdum, 1_${ik}$,work( ir ), & ldwrkr, cdum, 1_${ik}$,rwork( irwork ), info ) iu = itauq ! multiply q in a by left singular vectors of r in ! work(ir), storing result in work(iu) and copying to a ! (cworkspace: need n*n+n, prefer n*n+m*n) ! (rworkspace: 0) do i = 1, m, ldwrku chunk = min( m-i+1, ldwrku ) call stdlib${ii}$_zgemm( 'N', 'N', chunk, n, n, cone, a( i, 1_${ik}$ ),lda, work( ir & ), ldwrkr, czero,work( iu ), ldwrku ) call stdlib${ii}$_zlacpy( 'F', chunk, n, work( iu ), ldwrku,a( i, 1_${ik}$ ), lda ) end do else ! insufficient workspace for a fast algorithm ie = 1_${ik}$ itauq = 1_${ik}$ itaup = itauq + n iwork = itaup + n ! bidiagonalize a ! (cworkspace: need 2*n+m, prefer 2*n+(m+n)*nb) ! (rworkspace: n) call stdlib${ii}$_zgebrd( m, n, a, lda, s, rwork( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! generate left vectors bidiagonalizing a ! (cworkspace: need 3*n, prefer 2*n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_zungbr( 'Q', m, n, n, a, lda, work( itauq ),work( iwork ), & lwork-iwork+1, ierr ) irwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of a in a ! (cworkspace: need 0) ! (rworkspace: need bdspac) call stdlib${ii}$_zbdsqr( 'U', n, 0_${ik}$, m, 0_${ik}$, s, rwork( ie ), cdum, 1_${ik}$,a, lda, cdum, & 1_${ik}$, rwork( irwork ), info ) end if else if( wntuo .and. wntvas ) then ! path 3 (m much larger than n, jobu='o', jobvt='s' or 'a') ! n left singular vectors to be overwritten on a and ! n right singular vectors to be computed in vt if( lwork>=n*n+3*n ) then ! sufficient workspace for a fast algorithm ir = 1_${ik}$ if( lwork>=max( wrkbl, lda*n )+lda*n ) then ! work(iu) is lda by n and work(ir) is lda by n ldwrku = lda ldwrkr = lda else if( lwork>=max( wrkbl, lda*n )+n*n ) then ! work(iu) is lda by n and work(ir) is n by n ldwrku = lda ldwrkr = n else ! work(iu) is ldwrku by n and work(ir) is n by n ldwrku = ( lwork-n*n ) / n ldwrkr = n end if itau = ir + ldwrkr*n iwork = itau + n ! compute a=q*r ! (cworkspace: need n*n+2*n, prefer n*n+n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_zgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-iwork+& 1_${ik}$, ierr ) ! copy r to vt, zeroing out below it call stdlib${ii}$_zlacpy( 'U', n, n, a, lda, vt, ldvt ) if( n>1_${ik}$ )call stdlib${ii}$_zlaset( 'L', n-1, n-1, czero, czero,vt( 2_${ik}$, 1_${ik}$ ), ldvt ) ! generate q in a ! (cworkspace: need n*n+2*n, prefer n*n+n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_zungqr( m, n, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = 1_${ik}$ itauq = itau itaup = itauq + n iwork = itaup + n ! bidiagonalize r in vt, copying result to work(ir) ! (cworkspace: need n*n+3*n, prefer n*n+2*n+2*n*nb) ! (rworkspace: need n) call stdlib${ii}$_zgebrd( n, n, vt, ldvt, s, rwork( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) call stdlib${ii}$_zlacpy( 'L', n, n, vt, ldvt, work( ir ), ldwrkr ) ! generate left vectors bidiagonalizing r in work(ir) ! (cworkspace: need n*n+3*n, prefer n*n+2*n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_zungbr( 'Q', n, n, n, work( ir ), ldwrkr,work( itauq ), work( & iwork ),lwork-iwork+1, ierr ) ! generate right vectors bidiagonalizing r in vt ! (cworkspace: need n*n+3*n-1, prefer n*n+2*n+(n-1)*nb) ! (rworkspace: 0) call stdlib${ii}$_zungbr( 'P', n, n, n, vt, ldvt, work( itaup ),work( iwork ), & lwork-iwork+1, ierr ) irwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of r in work(ir) and computing right ! singular vectors of r in vt ! (cworkspace: need n*n) ! (rworkspace: need bdspac) call stdlib${ii}$_zbdsqr( 'U', n, n, n, 0_${ik}$, s, rwork( ie ), vt,ldvt, work( ir ), & ldwrkr, cdum, 1_${ik}$,rwork( irwork ), info ) iu = itauq ! multiply q in a by left singular vectors of r in ! work(ir), storing result in work(iu) and copying to a ! (cworkspace: need n*n+n, prefer n*n+m*n) ! (rworkspace: 0) do i = 1, m, ldwrku chunk = min( m-i+1, ldwrku ) call stdlib${ii}$_zgemm( 'N', 'N', chunk, n, n, cone, a( i, 1_${ik}$ ),lda, work( ir & ), ldwrkr, czero,work( iu ), ldwrku ) call stdlib${ii}$_zlacpy( 'F', chunk, n, work( iu ), ldwrku,a( i, 1_${ik}$ ), lda ) end do else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + n ! compute a=q*r ! (cworkspace: need 2*n, prefer n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_zgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-iwork+& 1_${ik}$, ierr ) ! copy r to vt, zeroing out below it call stdlib${ii}$_zlacpy( 'U', n, n, a, lda, vt, ldvt ) if( n>1_${ik}$ )call stdlib${ii}$_zlaset( 'L', n-1, n-1, czero, czero,vt( 2_${ik}$, 1_${ik}$ ), ldvt ) ! generate q in a ! (cworkspace: need 2*n, prefer n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_zungqr( m, n, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = 1_${ik}$ itauq = itau itaup = itauq + n iwork = itaup + n ! bidiagonalize r in vt ! (cworkspace: need 3*n, prefer 2*n+2*n*nb) ! (rworkspace: n) call stdlib${ii}$_zgebrd( n, n, vt, ldvt, s, rwork( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply q in a by left vectors bidiagonalizing r ! (cworkspace: need 2*n+m, prefer 2*n+m*nb) ! (rworkspace: 0) call stdlib${ii}$_zunmbr( 'Q', 'R', 'N', m, n, n, vt, ldvt,work( itauq ), a, lda,& work( iwork ),lwork-iwork+1, ierr ) ! generate right vectors bidiagonalizing r in vt ! (cworkspace: need 3*n-1, prefer 2*n+(n-1)*nb) ! (rworkspace: 0) call stdlib${ii}$_zungbr( 'P', n, n, n, vt, ldvt, work( itaup ),work( iwork ), & lwork-iwork+1, ierr ) irwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of a in a and computing right ! singular vectors of a in vt ! (cworkspace: 0) ! (rworkspace: need bdspac) call stdlib${ii}$_zbdsqr( 'U', n, n, m, 0_${ik}$, s, rwork( ie ), vt,ldvt, a, lda, cdum,& 1_${ik}$, rwork( irwork ),info ) end if else if( wntus ) then if( wntvn ) then ! path 4 (m much larger than n, jobu='s', jobvt='n') ! n left singular vectors to be computed in u and ! no right singular vectors to be computed if( lwork>=n*n+3*n ) then ! sufficient workspace for a fast algorithm ir = 1_${ik}$ if( lwork>=wrkbl+lda*n ) then ! work(ir) is lda by n ldwrkr = lda else ! work(ir) is n by n ldwrkr = n end if itau = ir + ldwrkr*n iwork = itau + n ! compute a=q*r ! (cworkspace: need n*n+2*n, prefer n*n+n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_zgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ! copy r to work(ir), zeroing out below it call stdlib${ii}$_zlacpy( 'U', n, n, a, lda, work( ir ),ldwrkr ) call stdlib${ii}$_zlaset( 'L', n-1, n-1, czero, czero,work( ir+1 ), ldwrkr ) ! generate q in a ! (cworkspace: need n*n+2*n, prefer n*n+n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_zungqr( m, n, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = 1_${ik}$ itauq = itau itaup = itauq + n iwork = itaup + n ! bidiagonalize r in work(ir) ! (cworkspace: need n*n+3*n, prefer n*n+2*n+2*n*nb) ! (rworkspace: need n) call stdlib${ii}$_zgebrd( n, n, work( ir ), ldwrkr, s,rwork( ie ), work( & itauq ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) ! generate left vectors bidiagonalizing r in work(ir) ! (cworkspace: need n*n+3*n, prefer n*n+2*n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_zungbr( 'Q', n, n, n, work( ir ), ldwrkr,work( itauq ), & work( iwork ),lwork-iwork+1, ierr ) irwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of r in work(ir) ! (cworkspace: need n*n) ! (rworkspace: need bdspac) call stdlib${ii}$_zbdsqr( 'U', n, 0_${ik}$, n, 0_${ik}$, s, rwork( ie ), cdum,1_${ik}$, work( ir ),& ldwrkr, cdum, 1_${ik}$,rwork( irwork ), info ) ! multiply q in a by left singular vectors of r in ! work(ir), storing result in u ! (cworkspace: need n*n) ! (rworkspace: 0) call stdlib${ii}$_zgemm( 'N', 'N', m, n, n, cone, a, lda,work( ir ), ldwrkr, & czero, u, ldu ) else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + n ! compute a=q*r, copying result to u ! (cworkspace: need 2*n, prefer n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_zgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_zlacpy( 'L', m, n, a, lda, u, ldu ) ! generate q in u ! (cworkspace: need 2*n, prefer n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_zungqr( m, n, n, u, ldu, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = 1_${ik}$ itauq = itau itaup = itauq + n iwork = itaup + n ! zero out below r in a if( n > 1_${ik}$ ) then call stdlib${ii}$_zlaset( 'L', n-1, n-1, czero, czero,a( 2_${ik}$, 1_${ik}$ ), lda ) end if ! bidiagonalize r in a ! (cworkspace: need 3*n, prefer 2*n+2*n*nb) ! (rworkspace: need n) call stdlib${ii}$_zgebrd( n, n, a, lda, s, rwork( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply q in u by left vectors bidiagonalizing r ! (cworkspace: need 2*n+m, prefer 2*n+m*nb) ! (rworkspace: 0) call stdlib${ii}$_zunmbr( 'Q', 'R', 'N', m, n, n, a, lda,work( itauq ), u, & ldu, work( iwork ),lwork-iwork+1, ierr ) irwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of a in u ! (cworkspace: 0) ! (rworkspace: need bdspac) call stdlib${ii}$_zbdsqr( 'U', n, 0_${ik}$, m, 0_${ik}$, s, rwork( ie ), cdum,1_${ik}$, u, ldu, & cdum, 1_${ik}$, rwork( irwork ),info ) end if else if( wntvo ) then ! path 5 (m much larger than n, jobu='s', jobvt='o') ! n left singular vectors to be computed in u and ! n right singular vectors to be overwritten on a if( lwork>=2_${ik}$*n*n+3*n ) then ! sufficient workspace for a fast algorithm iu = 1_${ik}$ if( lwork>=wrkbl+2*lda*n ) then ! work(iu) is lda by n and work(ir) is lda by n ldwrku = lda ir = iu + ldwrku*n ldwrkr = lda else if( lwork>=wrkbl+( lda+n )*n ) then ! work(iu) is lda by n and work(ir) is n by n ldwrku = lda ir = iu + ldwrku*n ldwrkr = n else ! work(iu) is n by n and work(ir) is n by n ldwrku = n ir = iu + ldwrku*n ldwrkr = n end if itau = ir + ldwrkr*n iwork = itau + n ! compute a=q*r ! (cworkspace: need 2*n*n+2*n, prefer 2*n*n+n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_zgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ! copy r to work(iu), zeroing out below it call stdlib${ii}$_zlacpy( 'U', n, n, a, lda, work( iu ),ldwrku ) call stdlib${ii}$_zlaset( 'L', n-1, n-1, czero, czero,work( iu+1 ), ldwrku ) ! generate q in a ! (cworkspace: need 2*n*n+2*n, prefer 2*n*n+n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_zungqr( m, n, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = 1_${ik}$ itauq = itau itaup = itauq + n iwork = itaup + n ! bidiagonalize r in work(iu), copying result to ! work(ir) ! (cworkspace: need 2*n*n+3*n, ! prefer 2*n*n+2*n+2*n*nb) ! (rworkspace: need n) call stdlib${ii}$_zgebrd( n, n, work( iu ), ldwrku, s,rwork( ie ), work( & itauq ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) call stdlib${ii}$_zlacpy( 'U', n, n, work( iu ), ldwrku,work( ir ), ldwrkr ) ! generate left bidiagonalizing vectors in work(iu) ! (cworkspace: need 2*n*n+3*n, prefer 2*n*n+2*n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_zungbr( 'Q', n, n, n, work( iu ), ldwrku,work( itauq ), & work( iwork ),lwork-iwork+1, ierr ) ! generate right bidiagonalizing vectors in work(ir) ! (cworkspace: need 2*n*n+3*n-1, ! prefer 2*n*n+2*n+(n-1)*nb) ! (rworkspace: 0) call stdlib${ii}$_zungbr( 'P', n, n, n, work( ir ), ldwrkr,work( itaup ), & work( iwork ),lwork-iwork+1, ierr ) irwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of r in work(iu) and computing ! right singular vectors of r in work(ir) ! (cworkspace: need 2*n*n) ! (rworkspace: need bdspac) call stdlib${ii}$_zbdsqr( 'U', n, n, n, 0_${ik}$, s, rwork( ie ),work( ir ), ldwrkr, & work( iu ),ldwrku, cdum, 1_${ik}$, rwork( irwork ),info ) ! multiply q in a by left singular vectors of r in ! work(iu), storing result in u ! (cworkspace: need n*n) ! (rworkspace: 0) call stdlib${ii}$_zgemm( 'N', 'N', m, n, n, cone, a, lda,work( iu ), ldwrku, & czero, u, ldu ) ! copy right singular vectors of r to a ! (cworkspace: need n*n) ! (rworkspace: 0) call stdlib${ii}$_zlacpy( 'F', n, n, work( ir ), ldwrkr, a,lda ) else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + n ! compute a=q*r, copying result to u ! (cworkspace: need 2*n, prefer n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_zgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_zlacpy( 'L', m, n, a, lda, u, ldu ) ! generate q in u ! (cworkspace: need 2*n, prefer n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_zungqr( m, n, n, u, ldu, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = 1_${ik}$ itauq = itau itaup = itauq + n iwork = itaup + n ! zero out below r in a if( n > 1_${ik}$ ) then call stdlib${ii}$_zlaset( 'L', n-1, n-1, czero, czero,a( 2_${ik}$, 1_${ik}$ ), lda ) end if ! bidiagonalize r in a ! (cworkspace: need 3*n, prefer 2*n+2*n*nb) ! (rworkspace: need n) call stdlib${ii}$_zgebrd( n, n, a, lda, s, rwork( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply q in u by left vectors bidiagonalizing r ! (cworkspace: need 2*n+m, prefer 2*n+m*nb) ! (rworkspace: 0) call stdlib${ii}$_zunmbr( 'Q', 'R', 'N', m, n, n, a, lda,work( itauq ), u, & ldu, work( iwork ),lwork-iwork+1, ierr ) ! generate right vectors bidiagonalizing r in a ! (cworkspace: need 3*n-1, prefer 2*n+(n-1)*nb) ! (rworkspace: 0) call stdlib${ii}$_zungbr( 'P', n, n, n, a, lda, work( itaup ),work( iwork ), & lwork-iwork+1, ierr ) irwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of a in u and computing right ! singular vectors of a in a ! (cworkspace: 0) ! (rworkspace: need bdspac) call stdlib${ii}$_zbdsqr( 'U', n, n, m, 0_${ik}$, s, rwork( ie ), a,lda, u, ldu, & cdum, 1_${ik}$, rwork( irwork ),info ) end if else if( wntvas ) then ! path 6 (m much larger than n, jobu='s', jobvt='s' ! or 'a') ! n left singular vectors to be computed in u and ! n right singular vectors to be computed in vt if( lwork>=n*n+3*n ) then ! sufficient workspace for a fast algorithm iu = 1_${ik}$ if( lwork>=wrkbl+lda*n ) then ! work(iu) is lda by n ldwrku = lda else ! work(iu) is n by n ldwrku = n end if itau = iu + ldwrku*n iwork = itau + n ! compute a=q*r ! (cworkspace: need n*n+2*n, prefer n*n+n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_zgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ! copy r to work(iu), zeroing out below it call stdlib${ii}$_zlacpy( 'U', n, n, a, lda, work( iu ),ldwrku ) call stdlib${ii}$_zlaset( 'L', n-1, n-1, czero, czero,work( iu+1 ), ldwrku ) ! generate q in a ! (cworkspace: need n*n+2*n, prefer n*n+n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_zungqr( m, n, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = 1_${ik}$ itauq = itau itaup = itauq + n iwork = itaup + n ! bidiagonalize r in work(iu), copying result to vt ! (cworkspace: need n*n+3*n, prefer n*n+2*n+2*n*nb) ! (rworkspace: need n) call stdlib${ii}$_zgebrd( n, n, work( iu ), ldwrku, s,rwork( ie ), work( & itauq ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) call stdlib${ii}$_zlacpy( 'U', n, n, work( iu ), ldwrku, vt,ldvt ) ! generate left bidiagonalizing vectors in work(iu) ! (cworkspace: need n*n+3*n, prefer n*n+2*n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_zungbr( 'Q', n, n, n, work( iu ), ldwrku,work( itauq ), & work( iwork ),lwork-iwork+1, ierr ) ! generate right bidiagonalizing vectors in vt ! (cworkspace: need n*n+3*n-1, ! prefer n*n+2*n+(n-1)*nb) ! (rworkspace: 0) call stdlib${ii}$_zungbr( 'P', n, n, n, vt, ldvt, work( itaup ),work( iwork ),& lwork-iwork+1, ierr ) irwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of r in work(iu) and computing ! right singular vectors of r in vt ! (cworkspace: need n*n) ! (rworkspace: need bdspac) call stdlib${ii}$_zbdsqr( 'U', n, n, n, 0_${ik}$, s, rwork( ie ), vt,ldvt, work( iu )& , ldwrku, cdum, 1_${ik}$,rwork( irwork ), info ) ! multiply q in a by left singular vectors of r in ! work(iu), storing result in u ! (cworkspace: need n*n) ! (rworkspace: 0) call stdlib${ii}$_zgemm( 'N', 'N', m, n, n, cone, a, lda,work( iu ), ldwrku, & czero, u, ldu ) else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + n ! compute a=q*r, copying result to u ! (cworkspace: need 2*n, prefer n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_zgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_zlacpy( 'L', m, n, a, lda, u, ldu ) ! generate q in u ! (cworkspace: need 2*n, prefer n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_zungqr( m, n, n, u, ldu, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ! copy r to vt, zeroing out below it call stdlib${ii}$_zlacpy( 'U', n, n, a, lda, vt, ldvt ) if( n>1_${ik}$ )call stdlib${ii}$_zlaset( 'L', n-1, n-1, czero, czero,vt( 2_${ik}$, 1_${ik}$ ), & ldvt ) ie = 1_${ik}$ itauq = itau itaup = itauq + n iwork = itaup + n ! bidiagonalize r in vt ! (cworkspace: need 3*n, prefer 2*n+2*n*nb) ! (rworkspace: need n) call stdlib${ii}$_zgebrd( n, n, vt, ldvt, s, rwork( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply q in u by left bidiagonalizing vectors ! in vt ! (cworkspace: need 2*n+m, prefer 2*n+m*nb) ! (rworkspace: 0) call stdlib${ii}$_zunmbr( 'Q', 'R', 'N', m, n, n, vt, ldvt,work( itauq ), u, & ldu, work( iwork ),lwork-iwork+1, ierr ) ! generate right bidiagonalizing vectors in vt ! (cworkspace: need 3*n-1, prefer 2*n+(n-1)*nb) ! (rworkspace: 0) call stdlib${ii}$_zungbr( 'P', n, n, n, vt, ldvt, work( itaup ),work( iwork ),& lwork-iwork+1, ierr ) irwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of a in u and computing right ! singular vectors of a in vt ! (cworkspace: 0) ! (rworkspace: need bdspac) call stdlib${ii}$_zbdsqr( 'U', n, n, m, 0_${ik}$, s, rwork( ie ), vt,ldvt, u, ldu, & cdum, 1_${ik}$,rwork( irwork ), info ) end if end if else if( wntua ) then if( wntvn ) then ! path 7 (m much larger than n, jobu='a', jobvt='n') ! m left singular vectors to be computed in u and ! no right singular vectors to be computed if( lwork>=n*n+max( n+m, 3_${ik}$*n ) ) then ! sufficient workspace for a fast algorithm ir = 1_${ik}$ if( lwork>=wrkbl+lda*n ) then ! work(ir) is lda by n ldwrkr = lda else ! work(ir) is n by n ldwrkr = n end if itau = ir + ldwrkr*n iwork = itau + n ! compute a=q*r, copying result to u ! (cworkspace: need n*n+2*n, prefer n*n+n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_zgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_zlacpy( 'L', m, n, a, lda, u, ldu ) ! copy r to work(ir), zeroing out below it call stdlib${ii}$_zlacpy( 'U', n, n, a, lda, work( ir ),ldwrkr ) call stdlib${ii}$_zlaset( 'L', n-1, n-1, czero, czero,work( ir+1 ), ldwrkr ) ! generate q in u ! (cworkspace: need n*n+n+m, prefer n*n+n+m*nb) ! (rworkspace: 0) call stdlib${ii}$_zungqr( m, m, n, u, ldu, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = 1_${ik}$ itauq = itau itaup = itauq + n iwork = itaup + n ! bidiagonalize r in work(ir) ! (cworkspace: need n*n+3*n, prefer n*n+2*n+2*n*nb) ! (rworkspace: need n) call stdlib${ii}$_zgebrd( n, n, work( ir ), ldwrkr, s,rwork( ie ), work( & itauq ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) ! generate left bidiagonalizing vectors in work(ir) ! (cworkspace: need n*n+3*n, prefer n*n+2*n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_zungbr( 'Q', n, n, n, work( ir ), ldwrkr,work( itauq ), & work( iwork ),lwork-iwork+1, ierr ) irwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of r in work(ir) ! (cworkspace: need n*n) ! (rworkspace: need bdspac) call stdlib${ii}$_zbdsqr( 'U', n, 0_${ik}$, n, 0_${ik}$, s, rwork( ie ), cdum,1_${ik}$, work( ir ),& ldwrkr, cdum, 1_${ik}$,rwork( irwork ), info ) ! multiply q in u by left singular vectors of r in ! work(ir), storing result in a ! (cworkspace: need n*n) ! (rworkspace: 0) call stdlib${ii}$_zgemm( 'N', 'N', m, n, n, cone, u, ldu,work( ir ), ldwrkr, & czero, a, lda ) ! copy left singular vectors of a from a to u call stdlib${ii}$_zlacpy( 'F', m, n, a, lda, u, ldu ) else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + n ! compute a=q*r, copying result to u ! (cworkspace: need 2*n, prefer n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_zgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_zlacpy( 'L', m, n, a, lda, u, ldu ) ! generate q in u ! (cworkspace: need n+m, prefer n+m*nb) ! (rworkspace: 0) call stdlib${ii}$_zungqr( m, m, n, u, ldu, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = 1_${ik}$ itauq = itau itaup = itauq + n iwork = itaup + n ! zero out below r in a if( n > 1_${ik}$ ) then call stdlib${ii}$_zlaset( 'L', n-1, n-1, czero, czero,a( 2_${ik}$, 1_${ik}$ ), lda ) end if ! bidiagonalize r in a ! (cworkspace: need 3*n, prefer 2*n+2*n*nb) ! (rworkspace: need n) call stdlib${ii}$_zgebrd( n, n, a, lda, s, rwork( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply q in u by left bidiagonalizing vectors ! in a ! (cworkspace: need 2*n+m, prefer 2*n+m*nb) ! (rworkspace: 0) call stdlib${ii}$_zunmbr( 'Q', 'R', 'N', m, n, n, a, lda,work( itauq ), u, & ldu, work( iwork ),lwork-iwork+1, ierr ) irwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of a in u ! (cworkspace: 0) ! (rworkspace: need bdspac) call stdlib${ii}$_zbdsqr( 'U', n, 0_${ik}$, m, 0_${ik}$, s, rwork( ie ), cdum,1_${ik}$, u, ldu, & cdum, 1_${ik}$, rwork( irwork ),info ) end if else if( wntvo ) then ! path 8 (m much larger than n, jobu='a', jobvt='o') ! m left singular vectors to be computed in u and ! n right singular vectors to be overwritten on a if( lwork>=2_${ik}$*n*n+max( n+m, 3_${ik}$*n ) ) then ! sufficient workspace for a fast algorithm iu = 1_${ik}$ if( lwork>=wrkbl+2*lda*n ) then ! work(iu) is lda by n and work(ir) is lda by n ldwrku = lda ir = iu + ldwrku*n ldwrkr = lda else if( lwork>=wrkbl+( lda+n )*n ) then ! work(iu) is lda by n and work(ir) is n by n ldwrku = lda ir = iu + ldwrku*n ldwrkr = n else ! work(iu) is n by n and work(ir) is n by n ldwrku = n ir = iu + ldwrku*n ldwrkr = n end if itau = ir + ldwrkr*n iwork = itau + n ! compute a=q*r, copying result to u ! (cworkspace: need 2*n*n+2*n, prefer 2*n*n+n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_zgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_zlacpy( 'L', m, n, a, lda, u, ldu ) ! generate q in u ! (cworkspace: need 2*n*n+n+m, prefer 2*n*n+n+m*nb) ! (rworkspace: 0) call stdlib${ii}$_zungqr( m, m, n, u, ldu, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ! copy r to work(iu), zeroing out below it call stdlib${ii}$_zlacpy( 'U', n, n, a, lda, work( iu ),ldwrku ) call stdlib${ii}$_zlaset( 'L', n-1, n-1, czero, czero,work( iu+1 ), ldwrku ) ie = 1_${ik}$ itauq = itau itaup = itauq + n iwork = itaup + n ! bidiagonalize r in work(iu), copying result to ! work(ir) ! (cworkspace: need 2*n*n+3*n, ! prefer 2*n*n+2*n+2*n*nb) ! (rworkspace: need n) call stdlib${ii}$_zgebrd( n, n, work( iu ), ldwrku, s,rwork( ie ), work( & itauq ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) call stdlib${ii}$_zlacpy( 'U', n, n, work( iu ), ldwrku,work( ir ), ldwrkr ) ! generate left bidiagonalizing vectors in work(iu) ! (cworkspace: need 2*n*n+3*n, prefer 2*n*n+2*n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_zungbr( 'Q', n, n, n, work( iu ), ldwrku,work( itauq ), & work( iwork ),lwork-iwork+1, ierr ) ! generate right bidiagonalizing vectors in work(ir) ! (cworkspace: need 2*n*n+3*n-1, ! prefer 2*n*n+2*n+(n-1)*nb) ! (rworkspace: 0) call stdlib${ii}$_zungbr( 'P', n, n, n, work( ir ), ldwrkr,work( itaup ), & work( iwork ),lwork-iwork+1, ierr ) irwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of r in work(iu) and computing ! right singular vectors of r in work(ir) ! (cworkspace: need 2*n*n) ! (rworkspace: need bdspac) call stdlib${ii}$_zbdsqr( 'U', n, n, n, 0_${ik}$, s, rwork( ie ),work( ir ), ldwrkr, & work( iu ),ldwrku, cdum, 1_${ik}$, rwork( irwork ),info ) ! multiply q in u by left singular vectors of r in ! work(iu), storing result in a ! (cworkspace: need n*n) ! (rworkspace: 0) call stdlib${ii}$_zgemm( 'N', 'N', m, n, n, cone, u, ldu,work( iu ), ldwrku, & czero, a, lda ) ! copy left singular vectors of a from a to u call stdlib${ii}$_zlacpy( 'F', m, n, a, lda, u, ldu ) ! copy right singular vectors of r from work(ir) to a call stdlib${ii}$_zlacpy( 'F', n, n, work( ir ), ldwrkr, a,lda ) else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + n ! compute a=q*r, copying result to u ! (cworkspace: need 2*n, prefer n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_zgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_zlacpy( 'L', m, n, a, lda, u, ldu ) ! generate q in u ! (cworkspace: need n+m, prefer n+m*nb) ! (rworkspace: 0) call stdlib${ii}$_zungqr( m, m, n, u, ldu, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = 1_${ik}$ itauq = itau itaup = itauq + n iwork = itaup + n ! zero out below r in a if( n > 1_${ik}$ ) then call stdlib${ii}$_zlaset( 'L', n-1, n-1, czero, czero,a( 2_${ik}$, 1_${ik}$ ), lda ) end if ! bidiagonalize r in a ! (cworkspace: need 3*n, prefer 2*n+2*n*nb) ! (rworkspace: need n) call stdlib${ii}$_zgebrd( n, n, a, lda, s, rwork( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply q in u by left bidiagonalizing vectors ! in a ! (cworkspace: need 2*n+m, prefer 2*n+m*nb) ! (rworkspace: 0) call stdlib${ii}$_zunmbr( 'Q', 'R', 'N', m, n, n, a, lda,work( itauq ), u, & ldu, work( iwork ),lwork-iwork+1, ierr ) ! generate right bidiagonalizing vectors in a ! (cworkspace: need 3*n-1, prefer 2*n+(n-1)*nb) ! (rworkspace: 0) call stdlib${ii}$_zungbr( 'P', n, n, n, a, lda, work( itaup ),work( iwork ), & lwork-iwork+1, ierr ) irwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of a in u and computing right ! singular vectors of a in a ! (cworkspace: 0) ! (rworkspace: need bdspac) call stdlib${ii}$_zbdsqr( 'U', n, n, m, 0_${ik}$, s, rwork( ie ), a,lda, u, ldu, & cdum, 1_${ik}$, rwork( irwork ),info ) end if else if( wntvas ) then ! path 9 (m much larger than n, jobu='a', jobvt='s' ! or 'a') ! m left singular vectors to be computed in u and ! n right singular vectors to be computed in vt if( lwork>=n*n+max( n+m, 3_${ik}$*n ) ) then ! sufficient workspace for a fast algorithm iu = 1_${ik}$ if( lwork>=wrkbl+lda*n ) then ! work(iu) is lda by n ldwrku = lda else ! work(iu) is n by n ldwrku = n end if itau = iu + ldwrku*n iwork = itau + n ! compute a=q*r, copying result to u ! (cworkspace: need n*n+2*n, prefer n*n+n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_zgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_zlacpy( 'L', m, n, a, lda, u, ldu ) ! generate q in u ! (cworkspace: need n*n+n+m, prefer n*n+n+m*nb) ! (rworkspace: 0) call stdlib${ii}$_zungqr( m, m, n, u, ldu, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ! copy r to work(iu), zeroing out below it call stdlib${ii}$_zlacpy( 'U', n, n, a, lda, work( iu ),ldwrku ) call stdlib${ii}$_zlaset( 'L', n-1, n-1, czero, czero,work( iu+1 ), ldwrku ) ie = 1_${ik}$ itauq = itau itaup = itauq + n iwork = itaup + n ! bidiagonalize r in work(iu), copying result to vt ! (cworkspace: need n*n+3*n, prefer n*n+2*n+2*n*nb) ! (rworkspace: need n) call stdlib${ii}$_zgebrd( n, n, work( iu ), ldwrku, s,rwork( ie ), work( & itauq ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) call stdlib${ii}$_zlacpy( 'U', n, n, work( iu ), ldwrku, vt,ldvt ) ! generate left bidiagonalizing vectors in work(iu) ! (cworkspace: need n*n+3*n, prefer n*n+2*n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_zungbr( 'Q', n, n, n, work( iu ), ldwrku,work( itauq ), & work( iwork ),lwork-iwork+1, ierr ) ! generate right bidiagonalizing vectors in vt ! (cworkspace: need n*n+3*n-1, ! prefer n*n+2*n+(n-1)*nb) ! (rworkspace: need 0) call stdlib${ii}$_zungbr( 'P', n, n, n, vt, ldvt, work( itaup ),work( iwork ),& lwork-iwork+1, ierr ) irwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of r in work(iu) and computing ! right singular vectors of r in vt ! (cworkspace: need n*n) ! (rworkspace: need bdspac) call stdlib${ii}$_zbdsqr( 'U', n, n, n, 0_${ik}$, s, rwork( ie ), vt,ldvt, work( iu )& , ldwrku, cdum, 1_${ik}$,rwork( irwork ), info ) ! multiply q in u by left singular vectors of r in ! work(iu), storing result in a ! (cworkspace: need n*n) ! (rworkspace: 0) call stdlib${ii}$_zgemm( 'N', 'N', m, n, n, cone, u, ldu,work( iu ), ldwrku, & czero, a, lda ) ! copy left singular vectors of a from a to u call stdlib${ii}$_zlacpy( 'F', m, n, a, lda, u, ldu ) else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + n ! compute a=q*r, copying result to u ! (cworkspace: need 2*n, prefer n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_zgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_zlacpy( 'L', m, n, a, lda, u, ldu ) ! generate q in u ! (cworkspace: need n+m, prefer n+m*nb) ! (rworkspace: 0) call stdlib${ii}$_zungqr( m, m, n, u, ldu, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ! copy r from a to vt, zeroing out below it call stdlib${ii}$_zlacpy( 'U', n, n, a, lda, vt, ldvt ) if( n>1_${ik}$ )call stdlib${ii}$_zlaset( 'L', n-1, n-1, czero, czero,vt( 2_${ik}$, 1_${ik}$ ), & ldvt ) ie = 1_${ik}$ itauq = itau itaup = itauq + n iwork = itaup + n ! bidiagonalize r in vt ! (cworkspace: need 3*n, prefer 2*n+2*n*nb) ! (rworkspace: need n) call stdlib${ii}$_zgebrd( n, n, vt, ldvt, s, rwork( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply q in u by left bidiagonalizing vectors ! in vt ! (cworkspace: need 2*n+m, prefer 2*n+m*nb) ! (rworkspace: 0) call stdlib${ii}$_zunmbr( 'Q', 'R', 'N', m, n, n, vt, ldvt,work( itauq ), u, & ldu, work( iwork ),lwork-iwork+1, ierr ) ! generate right bidiagonalizing vectors in vt ! (cworkspace: need 3*n-1, prefer 2*n+(n-1)*nb) ! (rworkspace: 0) call stdlib${ii}$_zungbr( 'P', n, n, n, vt, ldvt, work( itaup ),work( iwork ),& lwork-iwork+1, ierr ) irwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of a in u and computing right ! singular vectors of a in vt ! (cworkspace: 0) ! (rworkspace: need bdspac) call stdlib${ii}$_zbdsqr( 'U', n, n, m, 0_${ik}$, s, rwork( ie ), vt,ldvt, u, ldu, & cdum, 1_${ik}$,rwork( irwork ), info ) end if end if end if else ! m < mnthr ! path 10 (m at least n, but not much larger) ! reduce to bidiagonal form without qr decomposition ie = 1_${ik}$ itauq = 1_${ik}$ itaup = itauq + n iwork = itaup + n ! bidiagonalize a ! (cworkspace: need 2*n+m, prefer 2*n+(m+n)*nb) ! (rworkspace: need n) call stdlib${ii}$_zgebrd( m, n, a, lda, s, rwork( ie ), work( itauq ),work( itaup ), & work( iwork ), lwork-iwork+1,ierr ) if( wntuas ) then ! if left singular vectors desired in u, copy result to u ! and generate left bidiagonalizing vectors in u ! (cworkspace: need 2*n+ncu, prefer 2*n+ncu*nb) ! (rworkspace: 0) call stdlib${ii}$_zlacpy( 'L', m, n, a, lda, u, ldu ) if( wntus )ncu = n if( wntua )ncu = m call stdlib${ii}$_zungbr( 'Q', m, ncu, n, u, ldu, work( itauq ),work( iwork ), & lwork-iwork+1, ierr ) end if if( wntvas ) then ! if right singular vectors desired in vt, copy result to ! vt and generate right bidiagonalizing vectors in vt ! (cworkspace: need 3*n-1, prefer 2*n+(n-1)*nb) ! (rworkspace: 0) call stdlib${ii}$_zlacpy( 'U', n, n, a, lda, vt, ldvt ) call stdlib${ii}$_zungbr( 'P', n, n, n, vt, ldvt, work( itaup ),work( iwork ), & lwork-iwork+1, ierr ) end if if( wntuo ) then ! if left singular vectors desired in a, generate left ! bidiagonalizing vectors in a ! (cworkspace: need 3*n, prefer 2*n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_zungbr( 'Q', m, n, n, a, lda, work( itauq ),work( iwork ), lwork-& iwork+1, ierr ) end if if( wntvo ) then ! if right singular vectors desired in a, generate right ! bidiagonalizing vectors in a ! (cworkspace: need 3*n-1, prefer 2*n+(n-1)*nb) ! (rworkspace: 0) call stdlib${ii}$_zungbr( 'P', n, n, n, a, lda, work( itaup ),work( iwork ), lwork-& iwork+1, ierr ) end if irwork = ie + n if( wntuas .or. wntuo )nru = m if( wntun )nru = 0_${ik}$ if( wntvas .or. wntvo )ncvt = n if( wntvn )ncvt = 0_${ik}$ if( ( .not.wntuo ) .and. ( .not.wntvo ) ) then ! perform bidiagonal qr iteration, if desired, computing ! left singular vectors in u and computing right singular ! vectors in vt ! (cworkspace: 0) ! (rworkspace: need bdspac) call stdlib${ii}$_zbdsqr( 'U', n, ncvt, nru, 0_${ik}$, s, rwork( ie ), vt,ldvt, u, ldu, & cdum, 1_${ik}$, rwork( irwork ),info ) else if( ( .not.wntuo ) .and. wntvo ) then ! perform bidiagonal qr iteration, if desired, computing ! left singular vectors in u and computing right singular ! vectors in a ! (cworkspace: 0) ! (rworkspace: need bdspac) call stdlib${ii}$_zbdsqr( 'U', n, ncvt, nru, 0_${ik}$, s, rwork( ie ), a,lda, u, ldu, cdum,& 1_${ik}$, rwork( irwork ),info ) else ! perform bidiagonal qr iteration, if desired, computing ! left singular vectors in a and computing right singular ! vectors in vt ! (cworkspace: 0) ! (rworkspace: need bdspac) call stdlib${ii}$_zbdsqr( 'U', n, ncvt, nru, 0_${ik}$, s, rwork( ie ), vt,ldvt, a, lda, & cdum, 1_${ik}$, rwork( irwork ),info ) end if end if else ! a has more columns than rows. if a has sufficiently more ! columns than rows, first reduce using the lq decomposition (if ! sufficient workspace available) if( n>=mnthr ) then if( wntvn ) then ! path 1t(n much larger than m, jobvt='n') ! no right singular vectors to be computed itau = 1_${ik}$ iwork = itau + m ! compute a=l*q ! (cworkspace: need 2*m, prefer m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_zgelqf( m, n, a, lda, work( itau ), work( iwork ),lwork-iwork+1, & ierr ) ! zero out above l if (m>1_${ik}$) call stdlib${ii}$_zlaset( 'U', m-1, m-1, czero, czero, a( 1_${ik}$, 2_${ik}$ ),lda ) ie = 1_${ik}$ itauq = 1_${ik}$ itaup = itauq + m iwork = itaup + m ! bidiagonalize l in a ! (cworkspace: need 3*m, prefer 2*m+2*m*nb) ! (rworkspace: need m) call stdlib${ii}$_zgebrd( m, m, a, lda, s, rwork( ie ), work( itauq ),work( itaup ),& work( iwork ), lwork-iwork+1,ierr ) if( wntuo .or. wntuas ) then ! if left singular vectors desired, generate q ! (cworkspace: need 3*m, prefer 2*m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_zungbr( 'Q', m, m, m, a, lda, work( itauq ),work( iwork ), & lwork-iwork+1, ierr ) end if irwork = ie + m nru = 0_${ik}$ if( wntuo .or. wntuas )nru = m ! perform bidiagonal qr iteration, computing left singular ! vectors of a in a if desired ! (cworkspace: 0) ! (rworkspace: need bdspac) call stdlib${ii}$_zbdsqr( 'U', m, 0_${ik}$, nru, 0_${ik}$, s, rwork( ie ), cdum, 1_${ik}$,a, lda, cdum, & 1_${ik}$, rwork( irwork ), info ) ! if left singular vectors desired in u, copy them there if( wntuas )call stdlib${ii}$_zlacpy( 'F', m, m, a, lda, u, ldu ) else if( wntvo .and. wntun ) then ! path 2t(n much larger than m, jobu='n', jobvt='o') ! m right singular vectors to be overwritten on a and ! no left singular vectors to be computed if( lwork>=m*m+3*m ) then ! sufficient workspace for a fast algorithm ir = 1_${ik}$ if( lwork>=max( wrkbl, lda*n )+lda*m ) then ! work(iu) is lda by n and work(ir) is lda by m ldwrku = lda chunk = n ldwrkr = lda else if( lwork>=max( wrkbl, lda*n )+m*m ) then ! work(iu) is lda by n and work(ir) is m by m ldwrku = lda chunk = n ldwrkr = m else ! work(iu) is m by chunk and work(ir) is m by m ldwrku = m chunk = ( lwork-m*m ) / m ldwrkr = m end if itau = ir + ldwrkr*m iwork = itau + m ! compute a=l*q ! (cworkspace: need m*m+2*m, prefer m*m+m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_zgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-iwork+& 1_${ik}$, ierr ) ! copy l to work(ir) and zero out above it call stdlib${ii}$_zlacpy( 'L', m, m, a, lda, work( ir ), ldwrkr ) call stdlib${ii}$_zlaset( 'U', m-1, m-1, czero, czero,work( ir+ldwrkr ), ldwrkr ) ! generate q in a ! (cworkspace: need m*m+2*m, prefer m*m+m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_zunglq( m, n, m, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = 1_${ik}$ itauq = itau itaup = itauq + m iwork = itaup + m ! bidiagonalize l in work(ir) ! (cworkspace: need m*m+3*m, prefer m*m+2*m+2*m*nb) ! (rworkspace: need m) call stdlib${ii}$_zgebrd( m, m, work( ir ), ldwrkr, s, rwork( ie ),work( itauq ),& work( itaup ),work( iwork ), lwork-iwork+1, ierr ) ! generate right vectors bidiagonalizing l ! (cworkspace: need m*m+3*m-1, prefer m*m+2*m+(m-1)*nb) ! (rworkspace: 0) call stdlib${ii}$_zungbr( 'P', m, m, m, work( ir ), ldwrkr,work( itaup ), work( & iwork ),lwork-iwork+1, ierr ) irwork = ie + m ! perform bidiagonal qr iteration, computing right ! singular vectors of l in work(ir) ! (cworkspace: need m*m) ! (rworkspace: need bdspac) call stdlib${ii}$_zbdsqr( 'U', m, m, 0_${ik}$, 0_${ik}$, s, rwork( ie ),work( ir ), ldwrkr, & cdum, 1_${ik}$, cdum, 1_${ik}$,rwork( irwork ), info ) iu = itauq ! multiply right singular vectors of l in work(ir) by q ! in a, storing result in work(iu) and copying to a ! (cworkspace: need m*m+m, prefer m*m+m*n) ! (rworkspace: 0) do i = 1, n, chunk blk = min( n-i+1, chunk ) call stdlib${ii}$_zgemm( 'N', 'N', m, blk, m, cone, work( ir ),ldwrkr, a( 1_${ik}$, & i ), lda, czero,work( iu ), ldwrku ) call stdlib${ii}$_zlacpy( 'F', m, blk, work( iu ), ldwrku,a( 1_${ik}$, i ), lda ) end do else ! insufficient workspace for a fast algorithm ie = 1_${ik}$ itauq = 1_${ik}$ itaup = itauq + m iwork = itaup + m ! bidiagonalize a ! (cworkspace: need 2*m+n, prefer 2*m+(m+n)*nb) ! (rworkspace: need m) call stdlib${ii}$_zgebrd( m, n, a, lda, s, rwork( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! generate right vectors bidiagonalizing a ! (cworkspace: need 3*m, prefer 2*m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_zungbr( 'P', m, n, m, a, lda, work( itaup ),work( iwork ), & lwork-iwork+1, ierr ) irwork = ie + m ! perform bidiagonal qr iteration, computing right ! singular vectors of a in a ! (cworkspace: 0) ! (rworkspace: need bdspac) call stdlib${ii}$_zbdsqr( 'L', m, n, 0_${ik}$, 0_${ik}$, s, rwork( ie ), a, lda,cdum, 1_${ik}$, cdum, & 1_${ik}$, rwork( irwork ), info ) end if else if( wntvo .and. wntuas ) then ! path 3t(n much larger than m, jobu='s' or 'a', jobvt='o') ! m right singular vectors to be overwritten on a and ! m left singular vectors to be computed in u if( lwork>=m*m+3*m ) then ! sufficient workspace for a fast algorithm ir = 1_${ik}$ if( lwork>=max( wrkbl, lda*n )+lda*m ) then ! work(iu) is lda by n and work(ir) is lda by m ldwrku = lda chunk = n ldwrkr = lda else if( lwork>=max( wrkbl, lda*n )+m*m ) then ! work(iu) is lda by n and work(ir) is m by m ldwrku = lda chunk = n ldwrkr = m else ! work(iu) is m by chunk and work(ir) is m by m ldwrku = m chunk = ( lwork-m*m ) / m ldwrkr = m end if itau = ir + ldwrkr*m iwork = itau + m ! compute a=l*q ! (cworkspace: need m*m+2*m, prefer m*m+m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_zgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-iwork+& 1_${ik}$, ierr ) ! copy l to u, zeroing about above it call stdlib${ii}$_zlacpy( 'L', m, m, a, lda, u, ldu ) if (m>1_${ik}$) call stdlib${ii}$_zlaset( 'U', m-1, m-1, czero, czero, u( 1_${ik}$, 2_${ik}$ ),ldu ) ! generate q in a ! (cworkspace: need m*m+2*m, prefer m*m+m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_zunglq( m, n, m, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = 1_${ik}$ itauq = itau itaup = itauq + m iwork = itaup + m ! bidiagonalize l in u, copying result to work(ir) ! (cworkspace: need m*m+3*m, prefer m*m+2*m+2*m*nb) ! (rworkspace: need m) call stdlib${ii}$_zgebrd( m, m, u, ldu, s, rwork( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) call stdlib${ii}$_zlacpy( 'U', m, m, u, ldu, work( ir ), ldwrkr ) ! generate right vectors bidiagonalizing l in work(ir) ! (cworkspace: need m*m+3*m-1, prefer m*m+2*m+(m-1)*nb) ! (rworkspace: 0) call stdlib${ii}$_zungbr( 'P', m, m, m, work( ir ), ldwrkr,work( itaup ), work( & iwork ),lwork-iwork+1, ierr ) ! generate left vectors bidiagonalizing l in u ! (cworkspace: need m*m+3*m, prefer m*m+2*m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_zungbr( 'Q', m, m, m, u, ldu, work( itauq ),work( iwork ), & lwork-iwork+1, ierr ) irwork = ie + m ! perform bidiagonal qr iteration, computing left ! singular vectors of l in u, and computing right ! singular vectors of l in work(ir) ! (cworkspace: need m*m) ! (rworkspace: need bdspac) call stdlib${ii}$_zbdsqr( 'U', m, m, m, 0_${ik}$, s, rwork( ie ),work( ir ), ldwrkr, u, & ldu, cdum, 1_${ik}$,rwork( irwork ), info ) iu = itauq ! multiply right singular vectors of l in work(ir) by q ! in a, storing result in work(iu) and copying to a ! (cworkspace: need m*m+m, prefer m*m+m*n)) ! (rworkspace: 0) do i = 1, n, chunk blk = min( n-i+1, chunk ) call stdlib${ii}$_zgemm( 'N', 'N', m, blk, m, cone, work( ir ),ldwrkr, a( 1_${ik}$, & i ), lda, czero,work( iu ), ldwrku ) call stdlib${ii}$_zlacpy( 'F', m, blk, work( iu ), ldwrku,a( 1_${ik}$, i ), lda ) end do else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + m ! compute a=l*q ! (cworkspace: need 2*m, prefer m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_zgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-iwork+& 1_${ik}$, ierr ) ! copy l to u, zeroing out above it call stdlib${ii}$_zlacpy( 'L', m, m, a, lda, u, ldu ) if (m>1_${ik}$) call stdlib${ii}$_zlaset( 'U', m-1, m-1, czero, czero, u( 1_${ik}$, 2_${ik}$ ),ldu ) ! generate q in a ! (cworkspace: need 2*m, prefer m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_zunglq( m, n, m, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = 1_${ik}$ itauq = itau itaup = itauq + m iwork = itaup + m ! bidiagonalize l in u ! (cworkspace: need 3*m, prefer 2*m+2*m*nb) ! (rworkspace: need m) call stdlib${ii}$_zgebrd( m, m, u, ldu, s, rwork( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply right vectors bidiagonalizing l by q in a ! (cworkspace: need 2*m+n, prefer 2*m+n*nb) ! (rworkspace: 0) call stdlib${ii}$_zunmbr( 'P', 'L', 'C', m, n, m, u, ldu,work( itaup ), a, lda, & work( iwork ),lwork-iwork+1, ierr ) ! generate left vectors bidiagonalizing l in u ! (cworkspace: need 3*m, prefer 2*m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_zungbr( 'Q', m, m, m, u, ldu, work( itauq ),work( iwork ), & lwork-iwork+1, ierr ) irwork = ie + m ! perform bidiagonal qr iteration, computing left ! singular vectors of a in u and computing right ! singular vectors of a in a ! (cworkspace: 0) ! (rworkspace: need bdspac) call stdlib${ii}$_zbdsqr( 'U', m, n, m, 0_${ik}$, s, rwork( ie ), a, lda,u, ldu, cdum, & 1_${ik}$, rwork( irwork ), info ) end if else if( wntvs ) then if( wntun ) then ! path 4t(n much larger than m, jobu='n', jobvt='s') ! m right singular vectors to be computed in vt and ! no left singular vectors to be computed if( lwork>=m*m+3*m ) then ! sufficient workspace for a fast algorithm ir = 1_${ik}$ if( lwork>=wrkbl+lda*m ) then ! work(ir) is lda by m ldwrkr = lda else ! work(ir) is m by m ldwrkr = m end if itau = ir + ldwrkr*m iwork = itau + m ! compute a=l*q ! (cworkspace: need m*m+2*m, prefer m*m+m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_zgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ! copy l to work(ir), zeroing out above it call stdlib${ii}$_zlacpy( 'L', m, m, a, lda, work( ir ),ldwrkr ) call stdlib${ii}$_zlaset( 'U', m-1, m-1, czero, czero,work( ir+ldwrkr ), & ldwrkr ) ! generate q in a ! (cworkspace: need m*m+2*m, prefer m*m+m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_zunglq( m, n, m, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = 1_${ik}$ itauq = itau itaup = itauq + m iwork = itaup + m ! bidiagonalize l in work(ir) ! (cworkspace: need m*m+3*m, prefer m*m+2*m+2*m*nb) ! (rworkspace: need m) call stdlib${ii}$_zgebrd( m, m, work( ir ), ldwrkr, s,rwork( ie ), work( & itauq ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) ! generate right vectors bidiagonalizing l in ! work(ir) ! (cworkspace: need m*m+3*m, prefer m*m+2*m+(m-1)*nb) ! (rworkspace: 0) call stdlib${ii}$_zungbr( 'P', m, m, m, work( ir ), ldwrkr,work( itaup ), & work( iwork ),lwork-iwork+1, ierr ) irwork = ie + m ! perform bidiagonal qr iteration, computing right ! singular vectors of l in work(ir) ! (cworkspace: need m*m) ! (rworkspace: need bdspac) call stdlib${ii}$_zbdsqr( 'U', m, m, 0_${ik}$, 0_${ik}$, s, rwork( ie ),work( ir ), ldwrkr, & cdum, 1_${ik}$, cdum, 1_${ik}$,rwork( irwork ), info ) ! multiply right singular vectors of l in work(ir) by ! q in a, storing result in vt ! (cworkspace: need m*m) ! (rworkspace: 0) call stdlib${ii}$_zgemm( 'N', 'N', m, n, m, cone, work( ir ),ldwrkr, a, lda, & czero, vt, ldvt ) else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + m ! compute a=l*q ! (cworkspace: need 2*m, prefer m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_zgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ! copy result to vt call stdlib${ii}$_zlacpy( 'U', m, n, a, lda, vt, ldvt ) ! generate q in vt ! (cworkspace: need 2*m, prefer m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_zunglq( m, n, m, vt, ldvt, work( itau ),work( iwork ), & lwork-iwork+1, ierr ) ie = 1_${ik}$ itauq = itau itaup = itauq + m iwork = itaup + m ! zero out above l in a if (m>1_${ik}$) call stdlib${ii}$_zlaset( 'U', m-1, m-1, czero, czero,a( 1_${ik}$, 2_${ik}$ ), lda ) ! bidiagonalize l in a ! (cworkspace: need 3*m, prefer 2*m+2*m*nb) ! (rworkspace: need m) call stdlib${ii}$_zgebrd( m, m, a, lda, s, rwork( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply right vectors bidiagonalizing l by q in vt ! (cworkspace: need 2*m+n, prefer 2*m+n*nb) ! (rworkspace: 0) call stdlib${ii}$_zunmbr( 'P', 'L', 'C', m, n, m, a, lda,work( itaup ), vt, & ldvt,work( iwork ), lwork-iwork+1, ierr ) irwork = ie + m ! perform bidiagonal qr iteration, computing right ! singular vectors of a in vt ! (cworkspace: 0) ! (rworkspace: need bdspac) call stdlib${ii}$_zbdsqr( 'U', m, n, 0_${ik}$, 0_${ik}$, s, rwork( ie ), vt,ldvt, cdum, 1_${ik}$, & cdum, 1_${ik}$,rwork( irwork ), info ) end if else if( wntuo ) then ! path 5t(n much larger than m, jobu='o', jobvt='s') ! m right singular vectors to be computed in vt and ! m left singular vectors to be overwritten on a if( lwork>=2_${ik}$*m*m+3*m ) then ! sufficient workspace for a fast algorithm iu = 1_${ik}$ if( lwork>=wrkbl+2*lda*m ) then ! work(iu) is lda by m and work(ir) is lda by m ldwrku = lda ir = iu + ldwrku*m ldwrkr = lda else if( lwork>=wrkbl+( lda+m )*m ) then ! work(iu) is lda by m and work(ir) is m by m ldwrku = lda ir = iu + ldwrku*m ldwrkr = m else ! work(iu) is m by m and work(ir) is m by m ldwrku = m ir = iu + ldwrku*m ldwrkr = m end if itau = ir + ldwrkr*m iwork = itau + m ! compute a=l*q ! (cworkspace: need 2*m*m+2*m, prefer 2*m*m+m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_zgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ! copy l to work(iu), zeroing out below it call stdlib${ii}$_zlacpy( 'L', m, m, a, lda, work( iu ),ldwrku ) call stdlib${ii}$_zlaset( 'U', m-1, m-1, czero, czero,work( iu+ldwrku ), & ldwrku ) ! generate q in a ! (cworkspace: need 2*m*m+2*m, prefer 2*m*m+m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_zunglq( m, n, m, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = 1_${ik}$ itauq = itau itaup = itauq + m iwork = itaup + m ! bidiagonalize l in work(iu), copying result to ! work(ir) ! (cworkspace: need 2*m*m+3*m, ! prefer 2*m*m+2*m+2*m*nb) ! (rworkspace: need m) call stdlib${ii}$_zgebrd( m, m, work( iu ), ldwrku, s,rwork( ie ), work( & itauq ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) call stdlib${ii}$_zlacpy( 'L', m, m, work( iu ), ldwrku,work( ir ), ldwrkr ) ! generate right bidiagonalizing vectors in work(iu) ! (cworkspace: need 2*m*m+3*m-1, ! prefer 2*m*m+2*m+(m-1)*nb) ! (rworkspace: 0) call stdlib${ii}$_zungbr( 'P', m, m, m, work( iu ), ldwrku,work( itaup ), & work( iwork ),lwork-iwork+1, ierr ) ! generate left bidiagonalizing vectors in work(ir) ! (cworkspace: need 2*m*m+3*m, prefer 2*m*m+2*m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_zungbr( 'Q', m, m, m, work( ir ), ldwrkr,work( itauq ), & work( iwork ),lwork-iwork+1, ierr ) irwork = ie + m ! perform bidiagonal qr iteration, computing left ! singular vectors of l in work(ir) and computing ! right singular vectors of l in work(iu) ! (cworkspace: need 2*m*m) ! (rworkspace: need bdspac) call stdlib${ii}$_zbdsqr( 'U', m, m, m, 0_${ik}$, s, rwork( ie ),work( iu ), ldwrku, & work( ir ),ldwrkr, cdum, 1_${ik}$, rwork( irwork ),info ) ! multiply right singular vectors of l in work(iu) by ! q in a, storing result in vt ! (cworkspace: need m*m) ! (rworkspace: 0) call stdlib${ii}$_zgemm( 'N', 'N', m, n, m, cone, work( iu ),ldwrku, a, lda, & czero, vt, ldvt ) ! copy left singular vectors of l to a ! (cworkspace: need m*m) ! (rworkspace: 0) call stdlib${ii}$_zlacpy( 'F', m, m, work( ir ), ldwrkr, a,lda ) else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + m ! compute a=l*q, copying result to vt ! (cworkspace: need 2*m, prefer m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_zgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_zlacpy( 'U', m, n, a, lda, vt, ldvt ) ! generate q in vt ! (cworkspace: need 2*m, prefer m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_zunglq( m, n, m, vt, ldvt, work( itau ),work( iwork ), & lwork-iwork+1, ierr ) ie = 1_${ik}$ itauq = itau itaup = itauq + m iwork = itaup + m ! zero out above l in a if (m>1_${ik}$) call stdlib${ii}$_zlaset( 'U', m-1, m-1, czero, czero,a( 1_${ik}$, 2_${ik}$ ), lda ) ! bidiagonalize l in a ! (cworkspace: need 3*m, prefer 2*m+2*m*nb) ! (rworkspace: need m) call stdlib${ii}$_zgebrd( m, m, a, lda, s, rwork( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply right vectors bidiagonalizing l by q in vt ! (cworkspace: need 2*m+n, prefer 2*m+n*nb) ! (rworkspace: 0) call stdlib${ii}$_zunmbr( 'P', 'L', 'C', m, n, m, a, lda,work( itaup ), vt, & ldvt,work( iwork ), lwork-iwork+1, ierr ) ! generate left bidiagonalizing vectors of l in a ! (cworkspace: need 3*m, prefer 2*m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_zungbr( 'Q', m, m, m, a, lda, work( itauq ),work( iwork ), & lwork-iwork+1, ierr ) irwork = ie + m ! perform bidiagonal qr iteration, computing left ! singular vectors of a in a and computing right ! singular vectors of a in vt ! (cworkspace: 0) ! (rworkspace: need bdspac) call stdlib${ii}$_zbdsqr( 'U', m, n, m, 0_${ik}$, s, rwork( ie ), vt,ldvt, a, lda, & cdum, 1_${ik}$,rwork( irwork ), info ) end if else if( wntuas ) then ! path 6t(n much larger than m, jobu='s' or 'a', ! jobvt='s') ! m right singular vectors to be computed in vt and ! m left singular vectors to be computed in u if( lwork>=m*m+3*m ) then ! sufficient workspace for a fast algorithm iu = 1_${ik}$ if( lwork>=wrkbl+lda*m ) then ! work(iu) is lda by n ldwrku = lda else ! work(iu) is lda by m ldwrku = m end if itau = iu + ldwrku*m iwork = itau + m ! compute a=l*q ! (cworkspace: need m*m+2*m, prefer m*m+m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_zgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ! copy l to work(iu), zeroing out above it call stdlib${ii}$_zlacpy( 'L', m, m, a, lda, work( iu ),ldwrku ) call stdlib${ii}$_zlaset( 'U', m-1, m-1, czero, czero,work( iu+ldwrku ), & ldwrku ) ! generate q in a ! (cworkspace: need m*m+2*m, prefer m*m+m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_zunglq( m, n, m, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = 1_${ik}$ itauq = itau itaup = itauq + m iwork = itaup + m ! bidiagonalize l in work(iu), copying result to u ! (cworkspace: need m*m+3*m, prefer m*m+2*m+2*m*nb) ! (rworkspace: need m) call stdlib${ii}$_zgebrd( m, m, work( iu ), ldwrku, s,rwork( ie ), work( & itauq ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) call stdlib${ii}$_zlacpy( 'L', m, m, work( iu ), ldwrku, u,ldu ) ! generate right bidiagonalizing vectors in work(iu) ! (cworkspace: need m*m+3*m-1, ! prefer m*m+2*m+(m-1)*nb) ! (rworkspace: 0) call stdlib${ii}$_zungbr( 'P', m, m, m, work( iu ), ldwrku,work( itaup ), & work( iwork ),lwork-iwork+1, ierr ) ! generate left bidiagonalizing vectors in u ! (cworkspace: need m*m+3*m, prefer m*m+2*m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_zungbr( 'Q', m, m, m, u, ldu, work( itauq ),work( iwork ), & lwork-iwork+1, ierr ) irwork = ie + m ! perform bidiagonal qr iteration, computing left ! singular vectors of l in u and computing right ! singular vectors of l in work(iu) ! (cworkspace: need m*m) ! (rworkspace: need bdspac) call stdlib${ii}$_zbdsqr( 'U', m, m, m, 0_${ik}$, s, rwork( ie ),work( iu ), ldwrku, & u, ldu, cdum, 1_${ik}$,rwork( irwork ), info ) ! multiply right singular vectors of l in work(iu) by ! q in a, storing result in vt ! (cworkspace: need m*m) ! (rworkspace: 0) call stdlib${ii}$_zgemm( 'N', 'N', m, n, m, cone, work( iu ),ldwrku, a, lda, & czero, vt, ldvt ) else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + m ! compute a=l*q, copying result to vt ! (cworkspace: need 2*m, prefer m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_zgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_zlacpy( 'U', m, n, a, lda, vt, ldvt ) ! generate q in vt ! (cworkspace: need 2*m, prefer m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_zunglq( m, n, m, vt, ldvt, work( itau ),work( iwork ), & lwork-iwork+1, ierr ) ! copy l to u, zeroing out above it call stdlib${ii}$_zlacpy( 'L', m, m, a, lda, u, ldu ) if (m>1_${ik}$) call stdlib${ii}$_zlaset( 'U', m-1, m-1, czero, czero,u( 1_${ik}$, 2_${ik}$ ), ldu ) ie = 1_${ik}$ itauq = itau itaup = itauq + m iwork = itaup + m ! bidiagonalize l in u ! (cworkspace: need 3*m, prefer 2*m+2*m*nb) ! (rworkspace: need m) call stdlib${ii}$_zgebrd( m, m, u, ldu, s, rwork( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply right bidiagonalizing vectors in u by q ! in vt ! (cworkspace: need 2*m+n, prefer 2*m+n*nb) ! (rworkspace: 0) call stdlib${ii}$_zunmbr( 'P', 'L', 'C', m, n, m, u, ldu,work( itaup ), vt, & ldvt,work( iwork ), lwork-iwork+1, ierr ) ! generate left bidiagonalizing vectors in u ! (cworkspace: need 3*m, prefer 2*m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_zungbr( 'Q', m, m, m, u, ldu, work( itauq ),work( iwork ), & lwork-iwork+1, ierr ) irwork = ie + m ! perform bidiagonal qr iteration, computing left ! singular vectors of a in u and computing right ! singular vectors of a in vt ! (cworkspace: 0) ! (rworkspace: need bdspac) call stdlib${ii}$_zbdsqr( 'U', m, n, m, 0_${ik}$, s, rwork( ie ), vt,ldvt, u, ldu, & cdum, 1_${ik}$,rwork( irwork ), info ) end if end if else if( wntva ) then if( wntun ) then ! path 7t(n much larger than m, jobu='n', jobvt='a') ! n right singular vectors to be computed in vt and ! no left singular vectors to be computed if( lwork>=m*m+max( n+m, 3_${ik}$*m ) ) then ! sufficient workspace for a fast algorithm ir = 1_${ik}$ if( lwork>=wrkbl+lda*m ) then ! work(ir) is lda by m ldwrkr = lda else ! work(ir) is m by m ldwrkr = m end if itau = ir + ldwrkr*m iwork = itau + m ! compute a=l*q, copying result to vt ! (cworkspace: need m*m+2*m, prefer m*m+m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_zgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_zlacpy( 'U', m, n, a, lda, vt, ldvt ) ! copy l to work(ir), zeroing out above it call stdlib${ii}$_zlacpy( 'L', m, m, a, lda, work( ir ),ldwrkr ) call stdlib${ii}$_zlaset( 'U', m-1, m-1, czero, czero,work( ir+ldwrkr ), & ldwrkr ) ! generate q in vt ! (cworkspace: need m*m+m+n, prefer m*m+m+n*nb) ! (rworkspace: 0) call stdlib${ii}$_zunglq( n, n, m, vt, ldvt, work( itau ),work( iwork ), & lwork-iwork+1, ierr ) ie = 1_${ik}$ itauq = itau itaup = itauq + m iwork = itaup + m ! bidiagonalize l in work(ir) ! (cworkspace: need m*m+3*m, prefer m*m+2*m+2*m*nb) ! (rworkspace: need m) call stdlib${ii}$_zgebrd( m, m, work( ir ), ldwrkr, s,rwork( ie ), work( & itauq ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) ! generate right bidiagonalizing vectors in work(ir) ! (cworkspace: need m*m+3*m-1, ! prefer m*m+2*m+(m-1)*nb) ! (rworkspace: 0) call stdlib${ii}$_zungbr( 'P', m, m, m, work( ir ), ldwrkr,work( itaup ), & work( iwork ),lwork-iwork+1, ierr ) irwork = ie + m ! perform bidiagonal qr iteration, computing right ! singular vectors of l in work(ir) ! (cworkspace: need m*m) ! (rworkspace: need bdspac) call stdlib${ii}$_zbdsqr( 'U', m, m, 0_${ik}$, 0_${ik}$, s, rwork( ie ),work( ir ), ldwrkr, & cdum, 1_${ik}$, cdum, 1_${ik}$,rwork( irwork ), info ) ! multiply right singular vectors of l in work(ir) by ! q in vt, storing result in a ! (cworkspace: need m*m) ! (rworkspace: 0) call stdlib${ii}$_zgemm( 'N', 'N', m, n, m, cone, work( ir ),ldwrkr, vt, ldvt,& czero, a, lda ) ! copy right singular vectors of a from a to vt call stdlib${ii}$_zlacpy( 'F', m, n, a, lda, vt, ldvt ) else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + m ! compute a=l*q, copying result to vt ! (cworkspace: need 2*m, prefer m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_zgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_zlacpy( 'U', m, n, a, lda, vt, ldvt ) ! generate q in vt ! (cworkspace: need m+n, prefer m+n*nb) ! (rworkspace: 0) call stdlib${ii}$_zunglq( n, n, m, vt, ldvt, work( itau ),work( iwork ), & lwork-iwork+1, ierr ) ie = 1_${ik}$ itauq = itau itaup = itauq + m iwork = itaup + m ! zero out above l in a if (m>1_${ik}$) call stdlib${ii}$_zlaset( 'U', m-1, m-1, czero, czero,a( 1_${ik}$, 2_${ik}$ ), lda ) ! bidiagonalize l in a ! (cworkspace: need 3*m, prefer 2*m+2*m*nb) ! (rworkspace: need m) call stdlib${ii}$_zgebrd( m, m, a, lda, s, rwork( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply right bidiagonalizing vectors in a by q ! in vt ! (cworkspace: need 2*m+n, prefer 2*m+n*nb) ! (rworkspace: 0) call stdlib${ii}$_zunmbr( 'P', 'L', 'C', m, n, m, a, lda,work( itaup ), vt, & ldvt,work( iwork ), lwork-iwork+1, ierr ) irwork = ie + m ! perform bidiagonal qr iteration, computing right ! singular vectors of a in vt ! (cworkspace: 0) ! (rworkspace: need bdspac) call stdlib${ii}$_zbdsqr( 'U', m, n, 0_${ik}$, 0_${ik}$, s, rwork( ie ), vt,ldvt, cdum, 1_${ik}$, & cdum, 1_${ik}$,rwork( irwork ), info ) end if else if( wntuo ) then ! path 8t(n much larger than m, jobu='o', jobvt='a') ! n right singular vectors to be computed in vt and ! m left singular vectors to be overwritten on a if( lwork>=2_${ik}$*m*m+max( n+m, 3_${ik}$*m ) ) then ! sufficient workspace for a fast algorithm iu = 1_${ik}$ if( lwork>=wrkbl+2*lda*m ) then ! work(iu) is lda by m and work(ir) is lda by m ldwrku = lda ir = iu + ldwrku*m ldwrkr = lda else if( lwork>=wrkbl+( lda+m )*m ) then ! work(iu) is lda by m and work(ir) is m by m ldwrku = lda ir = iu + ldwrku*m ldwrkr = m else ! work(iu) is m by m and work(ir) is m by m ldwrku = m ir = iu + ldwrku*m ldwrkr = m end if itau = ir + ldwrkr*m iwork = itau + m ! compute a=l*q, copying result to vt ! (cworkspace: need 2*m*m+2*m, prefer 2*m*m+m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_zgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_zlacpy( 'U', m, n, a, lda, vt, ldvt ) ! generate q in vt ! (cworkspace: need 2*m*m+m+n, prefer 2*m*m+m+n*nb) ! (rworkspace: 0) call stdlib${ii}$_zunglq( n, n, m, vt, ldvt, work( itau ),work( iwork ), & lwork-iwork+1, ierr ) ! copy l to work(iu), zeroing out above it call stdlib${ii}$_zlacpy( 'L', m, m, a, lda, work( iu ),ldwrku ) call stdlib${ii}$_zlaset( 'U', m-1, m-1, czero, czero,work( iu+ldwrku ), & ldwrku ) ie = 1_${ik}$ itauq = itau itaup = itauq + m iwork = itaup + m ! bidiagonalize l in work(iu), copying result to ! work(ir) ! (cworkspace: need 2*m*m+3*m, ! prefer 2*m*m+2*m+2*m*nb) ! (rworkspace: need m) call stdlib${ii}$_zgebrd( m, m, work( iu ), ldwrku, s,rwork( ie ), work( & itauq ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) call stdlib${ii}$_zlacpy( 'L', m, m, work( iu ), ldwrku,work( ir ), ldwrkr ) ! generate right bidiagonalizing vectors in work(iu) ! (cworkspace: need 2*m*m+3*m-1, ! prefer 2*m*m+2*m+(m-1)*nb) ! (rworkspace: 0) call stdlib${ii}$_zungbr( 'P', m, m, m, work( iu ), ldwrku,work( itaup ), & work( iwork ),lwork-iwork+1, ierr ) ! generate left bidiagonalizing vectors in work(ir) ! (cworkspace: need 2*m*m+3*m, prefer 2*m*m+2*m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_zungbr( 'Q', m, m, m, work( ir ), ldwrkr,work( itauq ), & work( iwork ),lwork-iwork+1, ierr ) irwork = ie + m ! perform bidiagonal qr iteration, computing left ! singular vectors of l in work(ir) and computing ! right singular vectors of l in work(iu) ! (cworkspace: need 2*m*m) ! (rworkspace: need bdspac) call stdlib${ii}$_zbdsqr( 'U', m, m, m, 0_${ik}$, s, rwork( ie ),work( iu ), ldwrku, & work( ir ),ldwrkr, cdum, 1_${ik}$, rwork( irwork ),info ) ! multiply right singular vectors of l in work(iu) by ! q in vt, storing result in a ! (cworkspace: need m*m) ! (rworkspace: 0) call stdlib${ii}$_zgemm( 'N', 'N', m, n, m, cone, work( iu ),ldwrku, vt, ldvt,& czero, a, lda ) ! copy right singular vectors of a from a to vt call stdlib${ii}$_zlacpy( 'F', m, n, a, lda, vt, ldvt ) ! copy left singular vectors of a from work(ir) to a call stdlib${ii}$_zlacpy( 'F', m, m, work( ir ), ldwrkr, a,lda ) else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + m ! compute a=l*q, copying result to vt ! (cworkspace: need 2*m, prefer m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_zgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_zlacpy( 'U', m, n, a, lda, vt, ldvt ) ! generate q in vt ! (cworkspace: need m+n, prefer m+n*nb) ! (rworkspace: 0) call stdlib${ii}$_zunglq( n, n, m, vt, ldvt, work( itau ),work( iwork ), & lwork-iwork+1, ierr ) ie = 1_${ik}$ itauq = itau itaup = itauq + m iwork = itaup + m ! zero out above l in a if (m>1_${ik}$) call stdlib${ii}$_zlaset( 'U', m-1, m-1, czero, czero,a( 1_${ik}$, 2_${ik}$ ), lda ) ! bidiagonalize l in a ! (cworkspace: need 3*m, prefer 2*m+2*m*nb) ! (rworkspace: need m) call stdlib${ii}$_zgebrd( m, m, a, lda, s, rwork( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply right bidiagonalizing vectors in a by q ! in vt ! (cworkspace: need 2*m+n, prefer 2*m+n*nb) ! (rworkspace: 0) call stdlib${ii}$_zunmbr( 'P', 'L', 'C', m, n, m, a, lda,work( itaup ), vt, & ldvt,work( iwork ), lwork-iwork+1, ierr ) ! generate left bidiagonalizing vectors in a ! (cworkspace: need 3*m, prefer 2*m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_zungbr( 'Q', m, m, m, a, lda, work( itauq ),work( iwork ), & lwork-iwork+1, ierr ) irwork = ie + m ! perform bidiagonal qr iteration, computing left ! singular vectors of a in a and computing right ! singular vectors of a in vt ! (cworkspace: 0) ! (rworkspace: need bdspac) call stdlib${ii}$_zbdsqr( 'U', m, n, m, 0_${ik}$, s, rwork( ie ), vt,ldvt, a, lda, & cdum, 1_${ik}$,rwork( irwork ), info ) end if else if( wntuas ) then ! path 9t(n much larger than m, jobu='s' or 'a', ! jobvt='a') ! n right singular vectors to be computed in vt and ! m left singular vectors to be computed in u if( lwork>=m*m+max( n+m, 3_${ik}$*m ) ) then ! sufficient workspace for a fast algorithm iu = 1_${ik}$ if( lwork>=wrkbl+lda*m ) then ! work(iu) is lda by m ldwrku = lda else ! work(iu) is m by m ldwrku = m end if itau = iu + ldwrku*m iwork = itau + m ! compute a=l*q, copying result to vt ! (cworkspace: need m*m+2*m, prefer m*m+m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_zgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_zlacpy( 'U', m, n, a, lda, vt, ldvt ) ! generate q in vt ! (cworkspace: need m*m+m+n, prefer m*m+m+n*nb) ! (rworkspace: 0) call stdlib${ii}$_zunglq( n, n, m, vt, ldvt, work( itau ),work( iwork ), & lwork-iwork+1, ierr ) ! copy l to work(iu), zeroing out above it call stdlib${ii}$_zlacpy( 'L', m, m, a, lda, work( iu ),ldwrku ) call stdlib${ii}$_zlaset( 'U', m-1, m-1, czero, czero,work( iu+ldwrku ), & ldwrku ) ie = 1_${ik}$ itauq = itau itaup = itauq + m iwork = itaup + m ! bidiagonalize l in work(iu), copying result to u ! (cworkspace: need m*m+3*m, prefer m*m+2*m+2*m*nb) ! (rworkspace: need m) call stdlib${ii}$_zgebrd( m, m, work( iu ), ldwrku, s,rwork( ie ), work( & itauq ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) call stdlib${ii}$_zlacpy( 'L', m, m, work( iu ), ldwrku, u,ldu ) ! generate right bidiagonalizing vectors in work(iu) ! (cworkspace: need m*m+3*m, prefer m*m+2*m+(m-1)*nb) ! (rworkspace: 0) call stdlib${ii}$_zungbr( 'P', m, m, m, work( iu ), ldwrku,work( itaup ), & work( iwork ),lwork-iwork+1, ierr ) ! generate left bidiagonalizing vectors in u ! (cworkspace: need m*m+3*m, prefer m*m+2*m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_zungbr( 'Q', m, m, m, u, ldu, work( itauq ),work( iwork ), & lwork-iwork+1, ierr ) irwork = ie + m ! perform bidiagonal qr iteration, computing left ! singular vectors of l in u and computing right ! singular vectors of l in work(iu) ! (cworkspace: need m*m) ! (rworkspace: need bdspac) call stdlib${ii}$_zbdsqr( 'U', m, m, m, 0_${ik}$, s, rwork( ie ),work( iu ), ldwrku, & u, ldu, cdum, 1_${ik}$,rwork( irwork ), info ) ! multiply right singular vectors of l in work(iu) by ! q in vt, storing result in a ! (cworkspace: need m*m) ! (rworkspace: 0) call stdlib${ii}$_zgemm( 'N', 'N', m, n, m, cone, work( iu ),ldwrku, vt, ldvt,& czero, a, lda ) ! copy right singular vectors of a from a to vt call stdlib${ii}$_zlacpy( 'F', m, n, a, lda, vt, ldvt ) else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + m ! compute a=l*q, copying result to vt ! (cworkspace: need 2*m, prefer m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_zgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_zlacpy( 'U', m, n, a, lda, vt, ldvt ) ! generate q in vt ! (cworkspace: need m+n, prefer m+n*nb) ! (rworkspace: 0) call stdlib${ii}$_zunglq( n, n, m, vt, ldvt, work( itau ),work( iwork ), & lwork-iwork+1, ierr ) ! copy l to u, zeroing out above it call stdlib${ii}$_zlacpy( 'L', m, m, a, lda, u, ldu ) if (m>1_${ik}$) call stdlib${ii}$_zlaset( 'U', m-1, m-1, czero, czero,u( 1_${ik}$, 2_${ik}$ ), ldu ) ie = 1_${ik}$ itauq = itau itaup = itauq + m iwork = itaup + m ! bidiagonalize l in u ! (cworkspace: need 3*m, prefer 2*m+2*m*nb) ! (rworkspace: need m) call stdlib${ii}$_zgebrd( m, m, u, ldu, s, rwork( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply right bidiagonalizing vectors in u by q ! in vt ! (cworkspace: need 2*m+n, prefer 2*m+n*nb) ! (rworkspace: 0) call stdlib${ii}$_zunmbr( 'P', 'L', 'C', m, n, m, u, ldu,work( itaup ), vt, & ldvt,work( iwork ), lwork-iwork+1, ierr ) ! generate left bidiagonalizing vectors in u ! (cworkspace: need 3*m, prefer 2*m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_zungbr( 'Q', m, m, m, u, ldu, work( itauq ),work( iwork ), & lwork-iwork+1, ierr ) irwork = ie + m ! perform bidiagonal qr iteration, computing left ! singular vectors of a in u and computing right ! singular vectors of a in vt ! (cworkspace: 0) ! (rworkspace: need bdspac) call stdlib${ii}$_zbdsqr( 'U', m, n, m, 0_${ik}$, s, rwork( ie ), vt,ldvt, u, ldu, & cdum, 1_${ik}$,rwork( irwork ), info ) end if end if end if else ! n < mnthr ! path 10t(n greater than m, but not much larger) ! reduce to bidiagonal form without lq decomposition ie = 1_${ik}$ itauq = 1_${ik}$ itaup = itauq + m iwork = itaup + m ! bidiagonalize a ! (cworkspace: need 2*m+n, prefer 2*m+(m+n)*nb) ! (rworkspace: m) call stdlib${ii}$_zgebrd( m, n, a, lda, s, rwork( ie ), work( itauq ),work( itaup ), & work( iwork ), lwork-iwork+1,ierr ) if( wntuas ) then ! if left singular vectors desired in u, copy result to u ! and generate left bidiagonalizing vectors in u ! (cworkspace: need 3*m-1, prefer 2*m+(m-1)*nb) ! (rworkspace: 0) call stdlib${ii}$_zlacpy( 'L', m, m, a, lda, u, ldu ) call stdlib${ii}$_zungbr( 'Q', m, m, n, u, ldu, work( itauq ),work( iwork ), lwork-& iwork+1, ierr ) end if if( wntvas ) then ! if right singular vectors desired in vt, copy result to ! vt and generate right bidiagonalizing vectors in vt ! (cworkspace: need 2*m+nrvt, prefer 2*m+nrvt*nb) ! (rworkspace: 0) call stdlib${ii}$_zlacpy( 'U', m, n, a, lda, vt, ldvt ) if( wntva )nrvt = n if( wntvs )nrvt = m call stdlib${ii}$_zungbr( 'P', nrvt, n, m, vt, ldvt, work( itaup ),work( iwork ), & lwork-iwork+1, ierr ) end if if( wntuo ) then ! if left singular vectors desired in a, generate left ! bidiagonalizing vectors in a ! (cworkspace: need 3*m-1, prefer 2*m+(m-1)*nb) ! (rworkspace: 0) call stdlib${ii}$_zungbr( 'Q', m, m, n, a, lda, work( itauq ),work( iwork ), lwork-& iwork+1, ierr ) end if if( wntvo ) then ! if right singular vectors desired in a, generate right ! bidiagonalizing vectors in a ! (cworkspace: need 3*m, prefer 2*m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_zungbr( 'P', m, n, m, a, lda, work( itaup ),work( iwork ), lwork-& iwork+1, ierr ) end if irwork = ie + m if( wntuas .or. wntuo )nru = m if( wntun )nru = 0_${ik}$ if( wntvas .or. wntvo )ncvt = n if( wntvn )ncvt = 0_${ik}$ if( ( .not.wntuo ) .and. ( .not.wntvo ) ) then ! perform bidiagonal qr iteration, if desired, computing ! left singular vectors in u and computing right singular ! vectors in vt ! (cworkspace: 0) ! (rworkspace: need bdspac) call stdlib${ii}$_zbdsqr( 'L', m, ncvt, nru, 0_${ik}$, s, rwork( ie ), vt,ldvt, u, ldu, & cdum, 1_${ik}$, rwork( irwork ),info ) else if( ( .not.wntuo ) .and. wntvo ) then ! perform bidiagonal qr iteration, if desired, computing ! left singular vectors in u and computing right singular ! vectors in a ! (cworkspace: 0) ! (rworkspace: need bdspac) call stdlib${ii}$_zbdsqr( 'L', m, ncvt, nru, 0_${ik}$, s, rwork( ie ), a,lda, u, ldu, cdum,& 1_${ik}$, rwork( irwork ),info ) else ! perform bidiagonal qr iteration, if desired, computing ! left singular vectors in a and computing right singular ! vectors in vt ! (cworkspace: 0) ! (rworkspace: need bdspac) call stdlib${ii}$_zbdsqr( 'L', m, ncvt, nru, 0_${ik}$, s, rwork( ie ), vt,ldvt, a, lda, & cdum, 1_${ik}$, rwork( irwork ),info ) end if end if end if ! undo scaling if necessary if( iscl==1_${ik}$ ) then if( anrm>bignum )call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, bignum, anrm, minmn, 1_${ik}$, s, minmn,& ierr ) if( info/=0_${ik}$ .and. anrm>bignum )call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, bignum, anrm, minmn-1,& 1_${ik}$,rwork( ie ), minmn, ierr ) if( anrm<smlnum )call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, smlnum, anrm, minmn, 1_${ik}$, s, minmn,& ierr ) if( info/=0_${ik}$ .and. anrm<smlnum )call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, smlnum, anrm, minmn-1,& 1_${ik}$,rwork( ie ), minmn, ierr ) end if ! return optimal workspace in work(1) work( 1_${ik}$ ) = maxwrk return end subroutine stdlib${ii}$_zgesvd #:for ck,ct,ci in CMPLX_KINDS_TYPES #:if not ck in ["sp","dp"] module subroutine stdlib${ii}$_${ci}$gesvd( jobu, jobvt, m, n, a, lda, s, u, ldu,vt, ldvt, work, lwork, rwork, & !! ZGESVD: computes the singular value decomposition (SVD) of a complex !! M-by-N matrix A, optionally computing the left and/or right singular !! vectors. The SVD is written !! A = U * SIGMA * conjugate-transpose(V) !! where SIGMA is an M-by-N matrix which is zero except for its !! min(m,n) diagonal elements, U is an M-by-M unitary matrix, and !! V is an N-by-N unitary matrix. The diagonal elements of SIGMA !! are the singular values of A; they are real and non-negative, and !! are returned in descending order. The first min(m,n) columns of !! U and V are the left and right singular vectors of A. !! Note that the routine returns V**H, not V. info ) ! -- lapack driver routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_${ck}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments character, intent(in) :: jobu, jobvt integer(${ik}$), intent(out) :: info integer(${ik}$), intent(in) :: lda, ldu, ldvt, lwork, m, n ! Array Arguments real(${ck}$), intent(out) :: rwork(*), s(*) complex(${ck}$), intent(inout) :: a(lda,*) complex(${ck}$), intent(out) :: u(ldu,*), vt(ldvt,*), work(*) ! ===================================================================== ! Local Scalars logical(lk) :: lquery, wntua, wntuas, wntun, wntuo, wntus, wntva, wntvas, wntvn, wntvo,& wntvs integer(${ik}$) :: blk, chunk, i, ie, ierr, ir, irwork, iscl, itau, itaup, itauq, iu, & iwork, ldwrkr, ldwrku, maxwrk, minmn, minwrk, mnthr, ncu, ncvt, nru, nrvt, & wrkbl integer(${ik}$) :: lwork_wgeqrf, lwork_wungqr_n, lwork_wungqr_m, lwork_wgebrd, & lwork_wungbr_p, lwork_wungbr_q, lwork_wgelqf, lwork_wunglq_n, lwork_wunglq_m real(${ck}$) :: anrm, bignum, eps, smlnum ! Local Arrays real(${ck}$) :: dum(1_${ik}$) complex(${ck}$) :: cdum(1_${ik}$) ! Intrinsic Functions ! Executable Statements ! test the input arguments info = 0_${ik}$ minmn = min( m, n ) wntua = stdlib_lsame( jobu, 'A' ) wntus = stdlib_lsame( jobu, 'S' ) wntuas = wntua .or. wntus wntuo = stdlib_lsame( jobu, 'O' ) wntun = stdlib_lsame( jobu, 'N' ) wntva = stdlib_lsame( jobvt, 'A' ) wntvs = stdlib_lsame( jobvt, 'S' ) wntvas = wntva .or. wntvs wntvo = stdlib_lsame( jobvt, 'O' ) wntvn = stdlib_lsame( jobvt, 'N' ) lquery = ( lwork==-1_${ik}$ ) if( .not.( wntua .or. wntus .or. wntuo .or. wntun ) ) then info = -1_${ik}$ else if( .not.( wntva .or. wntvs .or. wntvo .or. wntvn ) .or.( wntvo .and. wntuo ) ) & then info = -2_${ik}$ else if( m<0_${ik}$ ) then info = -3_${ik}$ else if( n<0_${ik}$ ) then info = -4_${ik}$ else if( lda<max( 1_${ik}$, m ) ) then info = -6_${ik}$ else if( ldu<1_${ik}$ .or. ( wntuas .and. ldu<m ) ) then info = -9_${ik}$ else if( ldvt<1_${ik}$ .or. ( wntva .and. ldvt<n ) .or.( wntvs .and. ldvt<minmn ) ) & then info = -11_${ik}$ end if ! compute workspace ! (note: comments in the code beginning "workspace:" describe the ! minimal amount of workspace needed at that point in the code, ! as well as the preferred amount for good performance. ! cworkspace refers to complex workspace, and rworkspace to ! real workspace. nb refers to the optimal block size for the ! immediately following subroutine, as returned by stdlib${ii}$_ilaenv.) if( info==0_${ik}$ ) then minwrk = 1_${ik}$ maxwrk = 1_${ik}$ if( m>=n .and. minmn>0_${ik}$ ) then ! space needed for stdlib${ii}$_${ci}$bdsqr is bdspac = 5*n mnthr = stdlib${ii}$_ilaenv( 6_${ik}$, 'ZGESVD', jobu // jobvt, m, n, 0_${ik}$, 0_${ik}$ ) ! compute space needed for stdlib${ii}$_${ci}$geqrf call stdlib${ii}$_${ci}$geqrf( m, n, a, lda, cdum(1_${ik}$), cdum(1_${ik}$), -1_${ik}$, ierr ) lwork_wgeqrf = int( cdum(1_${ik}$),KIND=${ik}$) ! compute space needed for stdlib${ii}$_${ci}$ungqr call stdlib${ii}$_${ci}$ungqr( m, n, n, a, lda, cdum(1_${ik}$), cdum(1_${ik}$), -1_${ik}$, ierr ) lwork_wungqr_n = int( cdum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_${ci}$ungqr( m, m, n, a, lda, cdum(1_${ik}$), cdum(1_${ik}$), -1_${ik}$, ierr ) lwork_wungqr_m = int( cdum(1_${ik}$),KIND=${ik}$) ! compute space needed for stdlib${ii}$_${ci}$gebrd call stdlib${ii}$_${ci}$gebrd( n, n, a, lda, s, dum(1_${ik}$), cdum(1_${ik}$),cdum(1_${ik}$), cdum(1_${ik}$), -1_${ik}$, ierr ) lwork_wgebrd = int( cdum(1_${ik}$),KIND=${ik}$) ! compute space needed for stdlib${ii}$_${ci}$ungbr call stdlib${ii}$_${ci}$ungbr( 'P', n, n, n, a, lda, cdum(1_${ik}$),cdum(1_${ik}$), -1_${ik}$, ierr ) lwork_wungbr_p = int( cdum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_${ci}$ungbr( 'Q', n, n, n, a, lda, cdum(1_${ik}$),cdum(1_${ik}$), -1_${ik}$, ierr ) lwork_wungbr_q = int( cdum(1_${ik}$),KIND=${ik}$) if( m>=mnthr ) then if( wntun ) then ! path 1 (m much larger than n, jobu='n') maxwrk = n + lwork_wgeqrf maxwrk = max( maxwrk, 2_${ik}$*n+lwork_wgebrd ) if( wntvo .or. wntvas )maxwrk = max( maxwrk, 2_${ik}$*n+lwork_wungbr_p ) minwrk = 3_${ik}$*n else if( wntuo .and. wntvn ) then ! path 2 (m much larger than n, jobu='o', jobvt='n') wrkbl = n + lwork_wgeqrf wrkbl = max( wrkbl, n+lwork_wungqr_n ) wrkbl = max( wrkbl, 2_${ik}$*n+lwork_wgebrd ) wrkbl = max( wrkbl, 2_${ik}$*n+lwork_wungbr_q ) maxwrk = max( n*n+wrkbl, n*n+m*n ) minwrk = 2_${ik}$*n + m else if( wntuo .and. wntvas ) then ! path 3 (m much larger than n, jobu='o', jobvt='s' or ! 'a') wrkbl = n + lwork_wgeqrf wrkbl = max( wrkbl, n+lwork_wungqr_n ) wrkbl = max( wrkbl, 2_${ik}$*n+lwork_wgebrd ) wrkbl = max( wrkbl, 2_${ik}$*n+lwork_wungbr_q ) wrkbl = max( wrkbl, 2_${ik}$*n+lwork_wungbr_p ) maxwrk = max( n*n+wrkbl, n*n+m*n ) minwrk = 2_${ik}$*n + m else if( wntus .and. wntvn ) then ! path 4 (m much larger than n, jobu='s', jobvt='n') wrkbl = n + lwork_wgeqrf wrkbl = max( wrkbl, n+lwork_wungqr_n ) wrkbl = max( wrkbl, 2_${ik}$*n+lwork_wgebrd ) wrkbl = max( wrkbl, 2_${ik}$*n+lwork_wungbr_q ) maxwrk = n*n + wrkbl minwrk = 2_${ik}$*n + m else if( wntus .and. wntvo ) then ! path 5 (m much larger than n, jobu='s', jobvt='o') wrkbl = n + lwork_wgeqrf wrkbl = max( wrkbl, n+lwork_wungqr_n ) wrkbl = max( wrkbl, 2_${ik}$*n+lwork_wgebrd ) wrkbl = max( wrkbl, 2_${ik}$*n+lwork_wungbr_q ) wrkbl = max( wrkbl, 2_${ik}$*n+lwork_wungbr_p ) maxwrk = 2_${ik}$*n*n + wrkbl minwrk = 2_${ik}$*n + m else if( wntus .and. wntvas ) then ! path 6 (m much larger than n, jobu='s', jobvt='s' or ! 'a') wrkbl = n + lwork_wgeqrf wrkbl = max( wrkbl, n+lwork_wungqr_n ) wrkbl = max( wrkbl, 2_${ik}$*n+lwork_wgebrd ) wrkbl = max( wrkbl, 2_${ik}$*n+lwork_wungbr_q ) wrkbl = max( wrkbl, 2_${ik}$*n+lwork_wungbr_p ) maxwrk = n*n + wrkbl minwrk = 2_${ik}$*n + m else if( wntua .and. wntvn ) then ! path 7 (m much larger than n, jobu='a', jobvt='n') wrkbl = n + lwork_wgeqrf wrkbl = max( wrkbl, n+lwork_wungqr_m ) wrkbl = max( wrkbl, 2_${ik}$*n+lwork_wgebrd ) wrkbl = max( wrkbl, 2_${ik}$*n+lwork_wungbr_q ) maxwrk = n*n + wrkbl minwrk = 2_${ik}$*n + m else if( wntua .and. wntvo ) then ! path 8 (m much larger than n, jobu='a', jobvt='o') wrkbl = n + lwork_wgeqrf wrkbl = max( wrkbl, n+lwork_wungqr_m ) wrkbl = max( wrkbl, 2_${ik}$*n+lwork_wgebrd ) wrkbl = max( wrkbl, 2_${ik}$*n+lwork_wungbr_q ) wrkbl = max( wrkbl, 2_${ik}$*n+lwork_wungbr_p ) maxwrk = 2_${ik}$*n*n + wrkbl minwrk = 2_${ik}$*n + m else if( wntua .and. wntvas ) then ! path 9 (m much larger than n, jobu='a', jobvt='s' or ! 'a') wrkbl = n + lwork_wgeqrf wrkbl = max( wrkbl, n+lwork_wungqr_m ) wrkbl = max( wrkbl, 2_${ik}$*n+lwork_wgebrd ) wrkbl = max( wrkbl, 2_${ik}$*n+lwork_wungbr_q ) wrkbl = max( wrkbl, 2_${ik}$*n+lwork_wungbr_p ) maxwrk = n*n + wrkbl minwrk = 2_${ik}$*n + m end if else ! path 10 (m at least n, but not much larger) call stdlib${ii}$_${ci}$gebrd( m, n, a, lda, s, dum(1_${ik}$), cdum(1_${ik}$),cdum(1_${ik}$), cdum(1_${ik}$), -1_${ik}$, & ierr ) lwork_wgebrd = int( cdum(1_${ik}$),KIND=${ik}$) maxwrk = 2_${ik}$*n + lwork_wgebrd if( wntus .or. wntuo ) then call stdlib${ii}$_${ci}$ungbr( 'Q', m, n, n, a, lda, cdum(1_${ik}$),cdum(1_${ik}$), -1_${ik}$, ierr ) lwork_wungbr_q = int( cdum(1_${ik}$),KIND=${ik}$) maxwrk = max( maxwrk, 2_${ik}$*n+lwork_wungbr_q ) end if if( wntua ) then call stdlib${ii}$_${ci}$ungbr( 'Q', m, m, n, a, lda, cdum(1_${ik}$),cdum(1_${ik}$), -1_${ik}$, ierr ) lwork_wungbr_q = int( cdum(1_${ik}$),KIND=${ik}$) maxwrk = max( maxwrk, 2_${ik}$*n+lwork_wungbr_q ) end if if( .not.wntvn ) then maxwrk = max( maxwrk, 2_${ik}$*n+lwork_wungbr_p ) end if minwrk = 2_${ik}$*n + m end if else if( minmn>0_${ik}$ ) then ! space needed for stdlib${ii}$_${ci}$bdsqr is bdspac = 5*m mnthr = stdlib${ii}$_ilaenv( 6_${ik}$, 'ZGESVD', jobu // jobvt, m, n, 0_${ik}$, 0_${ik}$ ) ! compute space needed for stdlib${ii}$_${ci}$gelqf call stdlib${ii}$_${ci}$gelqf( m, n, a, lda, cdum(1_${ik}$), cdum(1_${ik}$), -1_${ik}$, ierr ) lwork_wgelqf = int( cdum(1_${ik}$),KIND=${ik}$) ! compute space needed for stdlib${ii}$_${ci}$unglq call stdlib${ii}$_${ci}$unglq( n, n, m, cdum(1_${ik}$), n, cdum(1_${ik}$), cdum(1_${ik}$), -1_${ik}$,ierr ) lwork_wunglq_n = int( cdum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_${ci}$unglq( m, n, m, a, lda, cdum(1_${ik}$), cdum(1_${ik}$), -1_${ik}$, ierr ) lwork_wunglq_m = int( cdum(1_${ik}$),KIND=${ik}$) ! compute space needed for stdlib${ii}$_${ci}$gebrd call stdlib${ii}$_${ci}$gebrd( m, m, a, lda, s, dum(1_${ik}$), cdum(1_${ik}$),cdum(1_${ik}$), cdum(1_${ik}$), -1_${ik}$, ierr ) lwork_wgebrd = int( cdum(1_${ik}$),KIND=${ik}$) ! compute space needed for stdlib${ii}$_${ci}$ungbr p call stdlib${ii}$_${ci}$ungbr( 'P', m, m, m, a, n, cdum(1_${ik}$),cdum(1_${ik}$), -1_${ik}$, ierr ) lwork_wungbr_p = int( cdum(1_${ik}$),KIND=${ik}$) ! compute space needed for stdlib${ii}$_${ci}$ungbr q call stdlib${ii}$_${ci}$ungbr( 'Q', m, m, m, a, n, cdum(1_${ik}$),cdum(1_${ik}$), -1_${ik}$, ierr ) lwork_wungbr_q = int( cdum(1_${ik}$),KIND=${ik}$) if( n>=mnthr ) then if( wntvn ) then ! path 1t(n much larger than m, jobvt='n') maxwrk = m + lwork_wgelqf maxwrk = max( maxwrk, 2_${ik}$*m+lwork_wgebrd ) if( wntuo .or. wntuas )maxwrk = max( maxwrk, 2_${ik}$*m+lwork_wungbr_q ) minwrk = 3_${ik}$*m else if( wntvo .and. wntun ) then ! path 2t(n much larger than m, jobu='n', jobvt='o') wrkbl = m + lwork_wgelqf wrkbl = max( wrkbl, m+lwork_wunglq_m ) wrkbl = max( wrkbl, 2_${ik}$*m+lwork_wgebrd ) wrkbl = max( wrkbl, 2_${ik}$*m+lwork_wungbr_p ) maxwrk = max( m*m+wrkbl, m*m+m*n ) minwrk = 2_${ik}$*m + n else if( wntvo .and. wntuas ) then ! path 3t(n much larger than m, jobu='s' or 'a', ! jobvt='o') wrkbl = m + lwork_wgelqf wrkbl = max( wrkbl, m+lwork_wunglq_m ) wrkbl = max( wrkbl, 2_${ik}$*m+lwork_wgebrd ) wrkbl = max( wrkbl, 2_${ik}$*m+lwork_wungbr_p ) wrkbl = max( wrkbl, 2_${ik}$*m+lwork_wungbr_q ) maxwrk = max( m*m+wrkbl, m*m+m*n ) minwrk = 2_${ik}$*m + n else if( wntvs .and. wntun ) then ! path 4t(n much larger than m, jobu='n', jobvt='s') wrkbl = m + lwork_wgelqf wrkbl = max( wrkbl, m+lwork_wunglq_m ) wrkbl = max( wrkbl, 2_${ik}$*m+lwork_wgebrd ) wrkbl = max( wrkbl, 2_${ik}$*m+lwork_wungbr_p ) maxwrk = m*m + wrkbl minwrk = 2_${ik}$*m + n else if( wntvs .and. wntuo ) then ! path 5t(n much larger than m, jobu='o', jobvt='s') wrkbl = m + lwork_wgelqf wrkbl = max( wrkbl, m+lwork_wunglq_m ) wrkbl = max( wrkbl, 2_${ik}$*m+lwork_wgebrd ) wrkbl = max( wrkbl, 2_${ik}$*m+lwork_wungbr_p ) wrkbl = max( wrkbl, 2_${ik}$*m+lwork_wungbr_q ) maxwrk = 2_${ik}$*m*m + wrkbl minwrk = 2_${ik}$*m + n else if( wntvs .and. wntuas ) then ! path 6t(n much larger than m, jobu='s' or 'a', ! jobvt='s') wrkbl = m + lwork_wgelqf wrkbl = max( wrkbl, m+lwork_wunglq_m ) wrkbl = max( wrkbl, 2_${ik}$*m+lwork_wgebrd ) wrkbl = max( wrkbl, 2_${ik}$*m+lwork_wungbr_p ) wrkbl = max( wrkbl, 2_${ik}$*m+lwork_wungbr_q ) maxwrk = m*m + wrkbl minwrk = 2_${ik}$*m + n else if( wntva .and. wntun ) then ! path 7t(n much larger than m, jobu='n', jobvt='a') wrkbl = m + lwork_wgelqf wrkbl = max( wrkbl, m+lwork_wunglq_n ) wrkbl = max( wrkbl, 2_${ik}$*m+lwork_wgebrd ) wrkbl = max( wrkbl, 2_${ik}$*m+lwork_wungbr_p ) maxwrk = m*m + wrkbl minwrk = 2_${ik}$*m + n else if( wntva .and. wntuo ) then ! path 8t(n much larger than m, jobu='o', jobvt='a') wrkbl = m + lwork_wgelqf wrkbl = max( wrkbl, m+lwork_wunglq_n ) wrkbl = max( wrkbl, 2_${ik}$*m+lwork_wgebrd ) wrkbl = max( wrkbl, 2_${ik}$*m+lwork_wungbr_p ) wrkbl = max( wrkbl, 2_${ik}$*m+lwork_wungbr_q ) maxwrk = 2_${ik}$*m*m + wrkbl minwrk = 2_${ik}$*m + n else if( wntva .and. wntuas ) then ! path 9t(n much larger than m, jobu='s' or 'a', ! jobvt='a') wrkbl = m + lwork_wgelqf wrkbl = max( wrkbl, m+lwork_wunglq_n ) wrkbl = max( wrkbl, 2_${ik}$*m+lwork_wgebrd ) wrkbl = max( wrkbl, 2_${ik}$*m+lwork_wungbr_p ) wrkbl = max( wrkbl, 2_${ik}$*m+lwork_wungbr_q ) maxwrk = m*m + wrkbl minwrk = 2_${ik}$*m + n end if else ! path 10t(n greater than m, but not much larger) call stdlib${ii}$_${ci}$gebrd( m, n, a, lda, s, dum(1_${ik}$), cdum(1_${ik}$),cdum(1_${ik}$), cdum(1_${ik}$), -1_${ik}$, & ierr ) lwork_wgebrd = int( cdum(1_${ik}$),KIND=${ik}$) maxwrk = 2_${ik}$*m + lwork_wgebrd if( wntvs .or. wntvo ) then ! compute space needed for stdlib${ii}$_${ci}$ungbr p call stdlib${ii}$_${ci}$ungbr( 'P', m, n, m, a, n, cdum(1_${ik}$),cdum(1_${ik}$), -1_${ik}$, ierr ) lwork_wungbr_p = int( cdum(1_${ik}$),KIND=${ik}$) maxwrk = max( maxwrk, 2_${ik}$*m+lwork_wungbr_p ) end if if( wntva ) then call stdlib${ii}$_${ci}$ungbr( 'P', n, n, m, a, n, cdum(1_${ik}$),cdum(1_${ik}$), -1_${ik}$, ierr ) lwork_wungbr_p = int( cdum(1_${ik}$),KIND=${ik}$) maxwrk = max( maxwrk, 2_${ik}$*m+lwork_wungbr_p ) end if if( .not.wntun ) then maxwrk = max( maxwrk, 2_${ik}$*m+lwork_wungbr_q ) end if minwrk = 2_${ik}$*m + n end if end if maxwrk = max( maxwrk, minwrk ) work( 1_${ik}$ ) = maxwrk if( lwork<minwrk .and. .not.lquery ) then info = -13_${ik}$ end if end if if( info/=0_${ik}$ ) then call stdlib${ii}$_xerbla( 'ZGESVD', -info ) return else if( lquery ) then return end if ! quick return if possible if( m==0_${ik}$ .or. n==0_${ik}$ ) then return end if ! get machine constants eps = stdlib${ii}$_${c2ri(ci)}$lamch( 'P' ) smlnum = sqrt( stdlib${ii}$_${c2ri(ci)}$lamch( 'S' ) ) / eps bignum = one / smlnum ! scale a if max element outside range [smlnum,bignum] anrm = stdlib${ii}$_${ci}$lange( 'M', m, n, a, lda, dum ) iscl = 0_${ik}$ if( anrm>zero .and. anrm<smlnum ) then iscl = 1_${ik}$ call stdlib${ii}$_${ci}$lascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, m, n, a, lda, ierr ) else if( anrm>bignum ) then iscl = 1_${ik}$ call stdlib${ii}$_${ci}$lascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, m, n, a, lda, ierr ) end if if( m>=n ) then ! a has at least as many rows as columns. if a has sufficiently ! more rows than columns, first reduce using the qr ! decomposition (if sufficient workspace available) if( m>=mnthr ) then if( wntun ) then ! path 1 (m much larger than n, jobu='n') ! no left singular vectors to be computed itau = 1_${ik}$ iwork = itau + n ! compute a=q*r ! (cworkspace: need 2*n, prefer n+n*nb) ! (rworkspace: need 0) call stdlib${ii}$_${ci}$geqrf( m, n, a, lda, work( itau ), work( iwork ),lwork-iwork+1, & ierr ) ! zero out below r if( n > 1_${ik}$ ) then call stdlib${ii}$_${ci}$laset( 'L', n-1, n-1, czero, czero, a( 2_${ik}$, 1_${ik}$ ),lda ) end if ie = 1_${ik}$ itauq = 1_${ik}$ itaup = itauq + n iwork = itaup + n ! bidiagonalize r in a ! (cworkspace: need 3*n, prefer 2*n+2*n*nb) ! (rworkspace: need n) call stdlib${ii}$_${ci}$gebrd( n, n, a, lda, s, rwork( ie ), work( itauq ),work( itaup ),& work( iwork ), lwork-iwork+1,ierr ) ncvt = 0_${ik}$ if( wntvo .or. wntvas ) then ! if right singular vectors desired, generate p'. ! (cworkspace: need 3*n-1, prefer 2*n+(n-1)*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$ungbr( 'P', n, n, n, a, lda, work( itaup ),work( iwork ), & lwork-iwork+1, ierr ) ncvt = n end if irwork = ie + n ! perform bidiagonal qr iteration, computing right ! singular vectors of a in a if desired ! (cworkspace: 0) ! (rworkspace: need bdspac) call stdlib${ii}$_${ci}$bdsqr( 'U', n, ncvt, 0_${ik}$, 0_${ik}$, s, rwork( ie ), a, lda,cdum, 1_${ik}$, cdum, & 1_${ik}$, rwork( irwork ), info ) ! if right singular vectors desired in vt, copy them there if( wntvas )call stdlib${ii}$_${ci}$lacpy( 'F', n, n, a, lda, vt, ldvt ) else if( wntuo .and. wntvn ) then ! path 2 (m much larger than n, jobu='o', jobvt='n') ! n left singular vectors to be overwritten on a and ! no right singular vectors to be computed if( lwork>=n*n+3*n ) then ! sufficient workspace for a fast algorithm ir = 1_${ik}$ if( lwork>=max( wrkbl, lda*n )+lda*n ) then ! work(iu) is lda by n, work(ir) is lda by n ldwrku = lda ldwrkr = lda else if( lwork>=max( wrkbl, lda*n )+n*n ) then ! work(iu) is lda by n, work(ir) is n by n ldwrku = lda ldwrkr = n else ! work(iu) is ldwrku by n, work(ir) is n by n ldwrku = ( lwork-n*n ) / n ldwrkr = n end if itau = ir + ldwrkr*n iwork = itau + n ! compute a=q*r ! (cworkspace: need n*n+2*n, prefer n*n+n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$geqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-iwork+& 1_${ik}$, ierr ) ! copy r to work(ir) and zero out below it call stdlib${ii}$_${ci}$lacpy( 'U', n, n, a, lda, work( ir ), ldwrkr ) call stdlib${ii}$_${ci}$laset( 'L', n-1, n-1, czero, czero,work( ir+1 ), ldwrkr ) ! generate q in a ! (cworkspace: need n*n+2*n, prefer n*n+n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$ungqr( m, n, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = 1_${ik}$ itauq = itau itaup = itauq + n iwork = itaup + n ! bidiagonalize r in work(ir) ! (cworkspace: need n*n+3*n, prefer n*n+2*n+2*n*nb) ! (rworkspace: need n) call stdlib${ii}$_${ci}$gebrd( n, n, work( ir ), ldwrkr, s, rwork( ie ),work( itauq ),& work( itaup ),work( iwork ), lwork-iwork+1, ierr ) ! generate left vectors bidiagonalizing r ! (cworkspace: need n*n+3*n, prefer n*n+2*n+n*nb) ! (rworkspace: need 0) call stdlib${ii}$_${ci}$ungbr( 'Q', n, n, n, work( ir ), ldwrkr,work( itauq ), work( & iwork ),lwork-iwork+1, ierr ) irwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of r in work(ir) ! (cworkspace: need n*n) ! (rworkspace: need bdspac) call stdlib${ii}$_${ci}$bdsqr( 'U', n, 0_${ik}$, n, 0_${ik}$, s, rwork( ie ), cdum, 1_${ik}$,work( ir ), & ldwrkr, cdum, 1_${ik}$,rwork( irwork ), info ) iu = itauq ! multiply q in a by left singular vectors of r in ! work(ir), storing result in work(iu) and copying to a ! (cworkspace: need n*n+n, prefer n*n+m*n) ! (rworkspace: 0) do i = 1, m, ldwrku chunk = min( m-i+1, ldwrku ) call stdlib${ii}$_${ci}$gemm( 'N', 'N', chunk, n, n, cone, a( i, 1_${ik}$ ),lda, work( ir & ), ldwrkr, czero,work( iu ), ldwrku ) call stdlib${ii}$_${ci}$lacpy( 'F', chunk, n, work( iu ), ldwrku,a( i, 1_${ik}$ ), lda ) end do else ! insufficient workspace for a fast algorithm ie = 1_${ik}$ itauq = 1_${ik}$ itaup = itauq + n iwork = itaup + n ! bidiagonalize a ! (cworkspace: need 2*n+m, prefer 2*n+(m+n)*nb) ! (rworkspace: n) call stdlib${ii}$_${ci}$gebrd( m, n, a, lda, s, rwork( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! generate left vectors bidiagonalizing a ! (cworkspace: need 3*n, prefer 2*n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$ungbr( 'Q', m, n, n, a, lda, work( itauq ),work( iwork ), & lwork-iwork+1, ierr ) irwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of a in a ! (cworkspace: need 0) ! (rworkspace: need bdspac) call stdlib${ii}$_${ci}$bdsqr( 'U', n, 0_${ik}$, m, 0_${ik}$, s, rwork( ie ), cdum, 1_${ik}$,a, lda, cdum, & 1_${ik}$, rwork( irwork ), info ) end if else if( wntuo .and. wntvas ) then ! path 3 (m much larger than n, jobu='o', jobvt='s' or 'a') ! n left singular vectors to be overwritten on a and ! n right singular vectors to be computed in vt if( lwork>=n*n+3*n ) then ! sufficient workspace for a fast algorithm ir = 1_${ik}$ if( lwork>=max( wrkbl, lda*n )+lda*n ) then ! work(iu) is lda by n and work(ir) is lda by n ldwrku = lda ldwrkr = lda else if( lwork>=max( wrkbl, lda*n )+n*n ) then ! work(iu) is lda by n and work(ir) is n by n ldwrku = lda ldwrkr = n else ! work(iu) is ldwrku by n and work(ir) is n by n ldwrku = ( lwork-n*n ) / n ldwrkr = n end if itau = ir + ldwrkr*n iwork = itau + n ! compute a=q*r ! (cworkspace: need n*n+2*n, prefer n*n+n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$geqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-iwork+& 1_${ik}$, ierr ) ! copy r to vt, zeroing out below it call stdlib${ii}$_${ci}$lacpy( 'U', n, n, a, lda, vt, ldvt ) if( n>1_${ik}$ )call stdlib${ii}$_${ci}$laset( 'L', n-1, n-1, czero, czero,vt( 2_${ik}$, 1_${ik}$ ), ldvt ) ! generate q in a ! (cworkspace: need n*n+2*n, prefer n*n+n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$ungqr( m, n, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = 1_${ik}$ itauq = itau itaup = itauq + n iwork = itaup + n ! bidiagonalize r in vt, copying result to work(ir) ! (cworkspace: need n*n+3*n, prefer n*n+2*n+2*n*nb) ! (rworkspace: need n) call stdlib${ii}$_${ci}$gebrd( n, n, vt, ldvt, s, rwork( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) call stdlib${ii}$_${ci}$lacpy( 'L', n, n, vt, ldvt, work( ir ), ldwrkr ) ! generate left vectors bidiagonalizing r in work(ir) ! (cworkspace: need n*n+3*n, prefer n*n+2*n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$ungbr( 'Q', n, n, n, work( ir ), ldwrkr,work( itauq ), work( & iwork ),lwork-iwork+1, ierr ) ! generate right vectors bidiagonalizing r in vt ! (cworkspace: need n*n+3*n-1, prefer n*n+2*n+(n-1)*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$ungbr( 'P', n, n, n, vt, ldvt, work( itaup ),work( iwork ), & lwork-iwork+1, ierr ) irwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of r in work(ir) and computing right ! singular vectors of r in vt ! (cworkspace: need n*n) ! (rworkspace: need bdspac) call stdlib${ii}$_${ci}$bdsqr( 'U', n, n, n, 0_${ik}$, s, rwork( ie ), vt,ldvt, work( ir ), & ldwrkr, cdum, 1_${ik}$,rwork( irwork ), info ) iu = itauq ! multiply q in a by left singular vectors of r in ! work(ir), storing result in work(iu) and copying to a ! (cworkspace: need n*n+n, prefer n*n+m*n) ! (rworkspace: 0) do i = 1, m, ldwrku chunk = min( m-i+1, ldwrku ) call stdlib${ii}$_${ci}$gemm( 'N', 'N', chunk, n, n, cone, a( i, 1_${ik}$ ),lda, work( ir & ), ldwrkr, czero,work( iu ), ldwrku ) call stdlib${ii}$_${ci}$lacpy( 'F', chunk, n, work( iu ), ldwrku,a( i, 1_${ik}$ ), lda ) end do else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + n ! compute a=q*r ! (cworkspace: need 2*n, prefer n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$geqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-iwork+& 1_${ik}$, ierr ) ! copy r to vt, zeroing out below it call stdlib${ii}$_${ci}$lacpy( 'U', n, n, a, lda, vt, ldvt ) if( n>1_${ik}$ )call stdlib${ii}$_${ci}$laset( 'L', n-1, n-1, czero, czero,vt( 2_${ik}$, 1_${ik}$ ), ldvt ) ! generate q in a ! (cworkspace: need 2*n, prefer n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$ungqr( m, n, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = 1_${ik}$ itauq = itau itaup = itauq + n iwork = itaup + n ! bidiagonalize r in vt ! (cworkspace: need 3*n, prefer 2*n+2*n*nb) ! (rworkspace: n) call stdlib${ii}$_${ci}$gebrd( n, n, vt, ldvt, s, rwork( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply q in a by left vectors bidiagonalizing r ! (cworkspace: need 2*n+m, prefer 2*n+m*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$unmbr( 'Q', 'R', 'N', m, n, n, vt, ldvt,work( itauq ), a, lda,& work( iwork ),lwork-iwork+1, ierr ) ! generate right vectors bidiagonalizing r in vt ! (cworkspace: need 3*n-1, prefer 2*n+(n-1)*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$ungbr( 'P', n, n, n, vt, ldvt, work( itaup ),work( iwork ), & lwork-iwork+1, ierr ) irwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of a in a and computing right ! singular vectors of a in vt ! (cworkspace: 0) ! (rworkspace: need bdspac) call stdlib${ii}$_${ci}$bdsqr( 'U', n, n, m, 0_${ik}$, s, rwork( ie ), vt,ldvt, a, lda, cdum,& 1_${ik}$, rwork( irwork ),info ) end if else if( wntus ) then if( wntvn ) then ! path 4 (m much larger than n, jobu='s', jobvt='n') ! n left singular vectors to be computed in u and ! no right singular vectors to be computed if( lwork>=n*n+3*n ) then ! sufficient workspace for a fast algorithm ir = 1_${ik}$ if( lwork>=wrkbl+lda*n ) then ! work(ir) is lda by n ldwrkr = lda else ! work(ir) is n by n ldwrkr = n end if itau = ir + ldwrkr*n iwork = itau + n ! compute a=q*r ! (cworkspace: need n*n+2*n, prefer n*n+n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$geqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ! copy r to work(ir), zeroing out below it call stdlib${ii}$_${ci}$lacpy( 'U', n, n, a, lda, work( ir ),ldwrkr ) call stdlib${ii}$_${ci}$laset( 'L', n-1, n-1, czero, czero,work( ir+1 ), ldwrkr ) ! generate q in a ! (cworkspace: need n*n+2*n, prefer n*n+n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$ungqr( m, n, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = 1_${ik}$ itauq = itau itaup = itauq + n iwork = itaup + n ! bidiagonalize r in work(ir) ! (cworkspace: need n*n+3*n, prefer n*n+2*n+2*n*nb) ! (rworkspace: need n) call stdlib${ii}$_${ci}$gebrd( n, n, work( ir ), ldwrkr, s,rwork( ie ), work( & itauq ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) ! generate left vectors bidiagonalizing r in work(ir) ! (cworkspace: need n*n+3*n, prefer n*n+2*n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$ungbr( 'Q', n, n, n, work( ir ), ldwrkr,work( itauq ), & work( iwork ),lwork-iwork+1, ierr ) irwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of r in work(ir) ! (cworkspace: need n*n) ! (rworkspace: need bdspac) call stdlib${ii}$_${ci}$bdsqr( 'U', n, 0_${ik}$, n, 0_${ik}$, s, rwork( ie ), cdum,1_${ik}$, work( ir ),& ldwrkr, cdum, 1_${ik}$,rwork( irwork ), info ) ! multiply q in a by left singular vectors of r in ! work(ir), storing result in u ! (cworkspace: need n*n) ! (rworkspace: 0) call stdlib${ii}$_${ci}$gemm( 'N', 'N', m, n, n, cone, a, lda,work( ir ), ldwrkr, & czero, u, ldu ) else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + n ! compute a=q*r, copying result to u ! (cworkspace: need 2*n, prefer n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$geqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_${ci}$lacpy( 'L', m, n, a, lda, u, ldu ) ! generate q in u ! (cworkspace: need 2*n, prefer n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$ungqr( m, n, n, u, ldu, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = 1_${ik}$ itauq = itau itaup = itauq + n iwork = itaup + n ! zero out below r in a if( n > 1_${ik}$ ) then call stdlib${ii}$_${ci}$laset( 'L', n-1, n-1, czero, czero,a( 2_${ik}$, 1_${ik}$ ), lda ) end if ! bidiagonalize r in a ! (cworkspace: need 3*n, prefer 2*n+2*n*nb) ! (rworkspace: need n) call stdlib${ii}$_${ci}$gebrd( n, n, a, lda, s, rwork( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply q in u by left vectors bidiagonalizing r ! (cworkspace: need 2*n+m, prefer 2*n+m*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$unmbr( 'Q', 'R', 'N', m, n, n, a, lda,work( itauq ), u, & ldu, work( iwork ),lwork-iwork+1, ierr ) irwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of a in u ! (cworkspace: 0) ! (rworkspace: need bdspac) call stdlib${ii}$_${ci}$bdsqr( 'U', n, 0_${ik}$, m, 0_${ik}$, s, rwork( ie ), cdum,1_${ik}$, u, ldu, & cdum, 1_${ik}$, rwork( irwork ),info ) end if else if( wntvo ) then ! path 5 (m much larger than n, jobu='s', jobvt='o') ! n left singular vectors to be computed in u and ! n right singular vectors to be overwritten on a if( lwork>=2_${ik}$*n*n+3*n ) then ! sufficient workspace for a fast algorithm iu = 1_${ik}$ if( lwork>=wrkbl+2*lda*n ) then ! work(iu) is lda by n and work(ir) is lda by n ldwrku = lda ir = iu + ldwrku*n ldwrkr = lda else if( lwork>=wrkbl+( lda+n )*n ) then ! work(iu) is lda by n and work(ir) is n by n ldwrku = lda ir = iu + ldwrku*n ldwrkr = n else ! work(iu) is n by n and work(ir) is n by n ldwrku = n ir = iu + ldwrku*n ldwrkr = n end if itau = ir + ldwrkr*n iwork = itau + n ! compute a=q*r ! (cworkspace: need 2*n*n+2*n, prefer 2*n*n+n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$geqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ! copy r to work(iu), zeroing out below it call stdlib${ii}$_${ci}$lacpy( 'U', n, n, a, lda, work( iu ),ldwrku ) call stdlib${ii}$_${ci}$laset( 'L', n-1, n-1, czero, czero,work( iu+1 ), ldwrku ) ! generate q in a ! (cworkspace: need 2*n*n+2*n, prefer 2*n*n+n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$ungqr( m, n, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = 1_${ik}$ itauq = itau itaup = itauq + n iwork = itaup + n ! bidiagonalize r in work(iu), copying result to ! work(ir) ! (cworkspace: need 2*n*n+3*n, ! prefer 2*n*n+2*n+2*n*nb) ! (rworkspace: need n) call stdlib${ii}$_${ci}$gebrd( n, n, work( iu ), ldwrku, s,rwork( ie ), work( & itauq ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) call stdlib${ii}$_${ci}$lacpy( 'U', n, n, work( iu ), ldwrku,work( ir ), ldwrkr ) ! generate left bidiagonalizing vectors in work(iu) ! (cworkspace: need 2*n*n+3*n, prefer 2*n*n+2*n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$ungbr( 'Q', n, n, n, work( iu ), ldwrku,work( itauq ), & work( iwork ),lwork-iwork+1, ierr ) ! generate right bidiagonalizing vectors in work(ir) ! (cworkspace: need 2*n*n+3*n-1, ! prefer 2*n*n+2*n+(n-1)*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$ungbr( 'P', n, n, n, work( ir ), ldwrkr,work( itaup ), & work( iwork ),lwork-iwork+1, ierr ) irwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of r in work(iu) and computing ! right singular vectors of r in work(ir) ! (cworkspace: need 2*n*n) ! (rworkspace: need bdspac) call stdlib${ii}$_${ci}$bdsqr( 'U', n, n, n, 0_${ik}$, s, rwork( ie ),work( ir ), ldwrkr, & work( iu ),ldwrku, cdum, 1_${ik}$, rwork( irwork ),info ) ! multiply q in a by left singular vectors of r in ! work(iu), storing result in u ! (cworkspace: need n*n) ! (rworkspace: 0) call stdlib${ii}$_${ci}$gemm( 'N', 'N', m, n, n, cone, a, lda,work( iu ), ldwrku, & czero, u, ldu ) ! copy right singular vectors of r to a ! (cworkspace: need n*n) ! (rworkspace: 0) call stdlib${ii}$_${ci}$lacpy( 'F', n, n, work( ir ), ldwrkr, a,lda ) else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + n ! compute a=q*r, copying result to u ! (cworkspace: need 2*n, prefer n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$geqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_${ci}$lacpy( 'L', m, n, a, lda, u, ldu ) ! generate q in u ! (cworkspace: need 2*n, prefer n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$ungqr( m, n, n, u, ldu, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = 1_${ik}$ itauq = itau itaup = itauq + n iwork = itaup + n ! zero out below r in a if( n > 1_${ik}$ ) then call stdlib${ii}$_${ci}$laset( 'L', n-1, n-1, czero, czero,a( 2_${ik}$, 1_${ik}$ ), lda ) end if ! bidiagonalize r in a ! (cworkspace: need 3*n, prefer 2*n+2*n*nb) ! (rworkspace: need n) call stdlib${ii}$_${ci}$gebrd( n, n, a, lda, s, rwork( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply q in u by left vectors bidiagonalizing r ! (cworkspace: need 2*n+m, prefer 2*n+m*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$unmbr( 'Q', 'R', 'N', m, n, n, a, lda,work( itauq ), u, & ldu, work( iwork ),lwork-iwork+1, ierr ) ! generate right vectors bidiagonalizing r in a ! (cworkspace: need 3*n-1, prefer 2*n+(n-1)*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$ungbr( 'P', n, n, n, a, lda, work( itaup ),work( iwork ), & lwork-iwork+1, ierr ) irwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of a in u and computing right ! singular vectors of a in a ! (cworkspace: 0) ! (rworkspace: need bdspac) call stdlib${ii}$_${ci}$bdsqr( 'U', n, n, m, 0_${ik}$, s, rwork( ie ), a,lda, u, ldu, & cdum, 1_${ik}$, rwork( irwork ),info ) end if else if( wntvas ) then ! path 6 (m much larger than n, jobu='s', jobvt='s' ! or 'a') ! n left singular vectors to be computed in u and ! n right singular vectors to be computed in vt if( lwork>=n*n+3*n ) then ! sufficient workspace for a fast algorithm iu = 1_${ik}$ if( lwork>=wrkbl+lda*n ) then ! work(iu) is lda by n ldwrku = lda else ! work(iu) is n by n ldwrku = n end if itau = iu + ldwrku*n iwork = itau + n ! compute a=q*r ! (cworkspace: need n*n+2*n, prefer n*n+n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$geqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ! copy r to work(iu), zeroing out below it call stdlib${ii}$_${ci}$lacpy( 'U', n, n, a, lda, work( iu ),ldwrku ) call stdlib${ii}$_${ci}$laset( 'L', n-1, n-1, czero, czero,work( iu+1 ), ldwrku ) ! generate q in a ! (cworkspace: need n*n+2*n, prefer n*n+n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$ungqr( m, n, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = 1_${ik}$ itauq = itau itaup = itauq + n iwork = itaup + n ! bidiagonalize r in work(iu), copying result to vt ! (cworkspace: need n*n+3*n, prefer n*n+2*n+2*n*nb) ! (rworkspace: need n) call stdlib${ii}$_${ci}$gebrd( n, n, work( iu ), ldwrku, s,rwork( ie ), work( & itauq ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) call stdlib${ii}$_${ci}$lacpy( 'U', n, n, work( iu ), ldwrku, vt,ldvt ) ! generate left bidiagonalizing vectors in work(iu) ! (cworkspace: need n*n+3*n, prefer n*n+2*n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$ungbr( 'Q', n, n, n, work( iu ), ldwrku,work( itauq ), & work( iwork ),lwork-iwork+1, ierr ) ! generate right bidiagonalizing vectors in vt ! (cworkspace: need n*n+3*n-1, ! prefer n*n+2*n+(n-1)*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$ungbr( 'P', n, n, n, vt, ldvt, work( itaup ),work( iwork ),& lwork-iwork+1, ierr ) irwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of r in work(iu) and computing ! right singular vectors of r in vt ! (cworkspace: need n*n) ! (rworkspace: need bdspac) call stdlib${ii}$_${ci}$bdsqr( 'U', n, n, n, 0_${ik}$, s, rwork( ie ), vt,ldvt, work( iu )& , ldwrku, cdum, 1_${ik}$,rwork( irwork ), info ) ! multiply q in a by left singular vectors of r in ! work(iu), storing result in u ! (cworkspace: need n*n) ! (rworkspace: 0) call stdlib${ii}$_${ci}$gemm( 'N', 'N', m, n, n, cone, a, lda,work( iu ), ldwrku, & czero, u, ldu ) else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + n ! compute a=q*r, copying result to u ! (cworkspace: need 2*n, prefer n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$geqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_${ci}$lacpy( 'L', m, n, a, lda, u, ldu ) ! generate q in u ! (cworkspace: need 2*n, prefer n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$ungqr( m, n, n, u, ldu, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ! copy r to vt, zeroing out below it call stdlib${ii}$_${ci}$lacpy( 'U', n, n, a, lda, vt, ldvt ) if( n>1_${ik}$ )call stdlib${ii}$_${ci}$laset( 'L', n-1, n-1, czero, czero,vt( 2_${ik}$, 1_${ik}$ ), & ldvt ) ie = 1_${ik}$ itauq = itau itaup = itauq + n iwork = itaup + n ! bidiagonalize r in vt ! (cworkspace: need 3*n, prefer 2*n+2*n*nb) ! (rworkspace: need n) call stdlib${ii}$_${ci}$gebrd( n, n, vt, ldvt, s, rwork( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply q in u by left bidiagonalizing vectors ! in vt ! (cworkspace: need 2*n+m, prefer 2*n+m*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$unmbr( 'Q', 'R', 'N', m, n, n, vt, ldvt,work( itauq ), u, & ldu, work( iwork ),lwork-iwork+1, ierr ) ! generate right bidiagonalizing vectors in vt ! (cworkspace: need 3*n-1, prefer 2*n+(n-1)*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$ungbr( 'P', n, n, n, vt, ldvt, work( itaup ),work( iwork ),& lwork-iwork+1, ierr ) irwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of a in u and computing right ! singular vectors of a in vt ! (cworkspace: 0) ! (rworkspace: need bdspac) call stdlib${ii}$_${ci}$bdsqr( 'U', n, n, m, 0_${ik}$, s, rwork( ie ), vt,ldvt, u, ldu, & cdum, 1_${ik}$,rwork( irwork ), info ) end if end if else if( wntua ) then if( wntvn ) then ! path 7 (m much larger than n, jobu='a', jobvt='n') ! m left singular vectors to be computed in u and ! no right singular vectors to be computed if( lwork>=n*n+max( n+m, 3_${ik}$*n ) ) then ! sufficient workspace for a fast algorithm ir = 1_${ik}$ if( lwork>=wrkbl+lda*n ) then ! work(ir) is lda by n ldwrkr = lda else ! work(ir) is n by n ldwrkr = n end if itau = ir + ldwrkr*n iwork = itau + n ! compute a=q*r, copying result to u ! (cworkspace: need n*n+2*n, prefer n*n+n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$geqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_${ci}$lacpy( 'L', m, n, a, lda, u, ldu ) ! copy r to work(ir), zeroing out below it call stdlib${ii}$_${ci}$lacpy( 'U', n, n, a, lda, work( ir ),ldwrkr ) call stdlib${ii}$_${ci}$laset( 'L', n-1, n-1, czero, czero,work( ir+1 ), ldwrkr ) ! generate q in u ! (cworkspace: need n*n+n+m, prefer n*n+n+m*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$ungqr( m, m, n, u, ldu, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = 1_${ik}$ itauq = itau itaup = itauq + n iwork = itaup + n ! bidiagonalize r in work(ir) ! (cworkspace: need n*n+3*n, prefer n*n+2*n+2*n*nb) ! (rworkspace: need n) call stdlib${ii}$_${ci}$gebrd( n, n, work( ir ), ldwrkr, s,rwork( ie ), work( & itauq ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) ! generate left bidiagonalizing vectors in work(ir) ! (cworkspace: need n*n+3*n, prefer n*n+2*n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$ungbr( 'Q', n, n, n, work( ir ), ldwrkr,work( itauq ), & work( iwork ),lwork-iwork+1, ierr ) irwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of r in work(ir) ! (cworkspace: need n*n) ! (rworkspace: need bdspac) call stdlib${ii}$_${ci}$bdsqr( 'U', n, 0_${ik}$, n, 0_${ik}$, s, rwork( ie ), cdum,1_${ik}$, work( ir ),& ldwrkr, cdum, 1_${ik}$,rwork( irwork ), info ) ! multiply q in u by left singular vectors of r in ! work(ir), storing result in a ! (cworkspace: need n*n) ! (rworkspace: 0) call stdlib${ii}$_${ci}$gemm( 'N', 'N', m, n, n, cone, u, ldu,work( ir ), ldwrkr, & czero, a, lda ) ! copy left singular vectors of a from a to u call stdlib${ii}$_${ci}$lacpy( 'F', m, n, a, lda, u, ldu ) else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + n ! compute a=q*r, copying result to u ! (cworkspace: need 2*n, prefer n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$geqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_${ci}$lacpy( 'L', m, n, a, lda, u, ldu ) ! generate q in u ! (cworkspace: need n+m, prefer n+m*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$ungqr( m, m, n, u, ldu, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = 1_${ik}$ itauq = itau itaup = itauq + n iwork = itaup + n ! zero out below r in a if( n > 1_${ik}$ ) then call stdlib${ii}$_${ci}$laset( 'L', n-1, n-1, czero, czero,a( 2_${ik}$, 1_${ik}$ ), lda ) end if ! bidiagonalize r in a ! (cworkspace: need 3*n, prefer 2*n+2*n*nb) ! (rworkspace: need n) call stdlib${ii}$_${ci}$gebrd( n, n, a, lda, s, rwork( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply q in u by left bidiagonalizing vectors ! in a ! (cworkspace: need 2*n+m, prefer 2*n+m*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$unmbr( 'Q', 'R', 'N', m, n, n, a, lda,work( itauq ), u, & ldu, work( iwork ),lwork-iwork+1, ierr ) irwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of a in u ! (cworkspace: 0) ! (rworkspace: need bdspac) call stdlib${ii}$_${ci}$bdsqr( 'U', n, 0_${ik}$, m, 0_${ik}$, s, rwork( ie ), cdum,1_${ik}$, u, ldu, & cdum, 1_${ik}$, rwork( irwork ),info ) end if else if( wntvo ) then ! path 8 (m much larger than n, jobu='a', jobvt='o') ! m left singular vectors to be computed in u and ! n right singular vectors to be overwritten on a if( lwork>=2_${ik}$*n*n+max( n+m, 3_${ik}$*n ) ) then ! sufficient workspace for a fast algorithm iu = 1_${ik}$ if( lwork>=wrkbl+2*lda*n ) then ! work(iu) is lda by n and work(ir) is lda by n ldwrku = lda ir = iu + ldwrku*n ldwrkr = lda else if( lwork>=wrkbl+( lda+n )*n ) then ! work(iu) is lda by n and work(ir) is n by n ldwrku = lda ir = iu + ldwrku*n ldwrkr = n else ! work(iu) is n by n and work(ir) is n by n ldwrku = n ir = iu + ldwrku*n ldwrkr = n end if itau = ir + ldwrkr*n iwork = itau + n ! compute a=q*r, copying result to u ! (cworkspace: need 2*n*n+2*n, prefer 2*n*n+n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$geqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_${ci}$lacpy( 'L', m, n, a, lda, u, ldu ) ! generate q in u ! (cworkspace: need 2*n*n+n+m, prefer 2*n*n+n+m*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$ungqr( m, m, n, u, ldu, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ! copy r to work(iu), zeroing out below it call stdlib${ii}$_${ci}$lacpy( 'U', n, n, a, lda, work( iu ),ldwrku ) call stdlib${ii}$_${ci}$laset( 'L', n-1, n-1, czero, czero,work( iu+1 ), ldwrku ) ie = 1_${ik}$ itauq = itau itaup = itauq + n iwork = itaup + n ! bidiagonalize r in work(iu), copying result to ! work(ir) ! (cworkspace: need 2*n*n+3*n, ! prefer 2*n*n+2*n+2*n*nb) ! (rworkspace: need n) call stdlib${ii}$_${ci}$gebrd( n, n, work( iu ), ldwrku, s,rwork( ie ), work( & itauq ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) call stdlib${ii}$_${ci}$lacpy( 'U', n, n, work( iu ), ldwrku,work( ir ), ldwrkr ) ! generate left bidiagonalizing vectors in work(iu) ! (cworkspace: need 2*n*n+3*n, prefer 2*n*n+2*n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$ungbr( 'Q', n, n, n, work( iu ), ldwrku,work( itauq ), & work( iwork ),lwork-iwork+1, ierr ) ! generate right bidiagonalizing vectors in work(ir) ! (cworkspace: need 2*n*n+3*n-1, ! prefer 2*n*n+2*n+(n-1)*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$ungbr( 'P', n, n, n, work( ir ), ldwrkr,work( itaup ), & work( iwork ),lwork-iwork+1, ierr ) irwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of r in work(iu) and computing ! right singular vectors of r in work(ir) ! (cworkspace: need 2*n*n) ! (rworkspace: need bdspac) call stdlib${ii}$_${ci}$bdsqr( 'U', n, n, n, 0_${ik}$, s, rwork( ie ),work( ir ), ldwrkr, & work( iu ),ldwrku, cdum, 1_${ik}$, rwork( irwork ),info ) ! multiply q in u by left singular vectors of r in ! work(iu), storing result in a ! (cworkspace: need n*n) ! (rworkspace: 0) call stdlib${ii}$_${ci}$gemm( 'N', 'N', m, n, n, cone, u, ldu,work( iu ), ldwrku, & czero, a, lda ) ! copy left singular vectors of a from a to u call stdlib${ii}$_${ci}$lacpy( 'F', m, n, a, lda, u, ldu ) ! copy right singular vectors of r from work(ir) to a call stdlib${ii}$_${ci}$lacpy( 'F', n, n, work( ir ), ldwrkr, a,lda ) else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + n ! compute a=q*r, copying result to u ! (cworkspace: need 2*n, prefer n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$geqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_${ci}$lacpy( 'L', m, n, a, lda, u, ldu ) ! generate q in u ! (cworkspace: need n+m, prefer n+m*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$ungqr( m, m, n, u, ldu, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = 1_${ik}$ itauq = itau itaup = itauq + n iwork = itaup + n ! zero out below r in a if( n > 1_${ik}$ ) then call stdlib${ii}$_${ci}$laset( 'L', n-1, n-1, czero, czero,a( 2_${ik}$, 1_${ik}$ ), lda ) end if ! bidiagonalize r in a ! (cworkspace: need 3*n, prefer 2*n+2*n*nb) ! (rworkspace: need n) call stdlib${ii}$_${ci}$gebrd( n, n, a, lda, s, rwork( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply q in u by left bidiagonalizing vectors ! in a ! (cworkspace: need 2*n+m, prefer 2*n+m*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$unmbr( 'Q', 'R', 'N', m, n, n, a, lda,work( itauq ), u, & ldu, work( iwork ),lwork-iwork+1, ierr ) ! generate right bidiagonalizing vectors in a ! (cworkspace: need 3*n-1, prefer 2*n+(n-1)*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$ungbr( 'P', n, n, n, a, lda, work( itaup ),work( iwork ), & lwork-iwork+1, ierr ) irwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of a in u and computing right ! singular vectors of a in a ! (cworkspace: 0) ! (rworkspace: need bdspac) call stdlib${ii}$_${ci}$bdsqr( 'U', n, n, m, 0_${ik}$, s, rwork( ie ), a,lda, u, ldu, & cdum, 1_${ik}$, rwork( irwork ),info ) end if else if( wntvas ) then ! path 9 (m much larger than n, jobu='a', jobvt='s' ! or 'a') ! m left singular vectors to be computed in u and ! n right singular vectors to be computed in vt if( lwork>=n*n+max( n+m, 3_${ik}$*n ) ) then ! sufficient workspace for a fast algorithm iu = 1_${ik}$ if( lwork>=wrkbl+lda*n ) then ! work(iu) is lda by n ldwrku = lda else ! work(iu) is n by n ldwrku = n end if itau = iu + ldwrku*n iwork = itau + n ! compute a=q*r, copying result to u ! (cworkspace: need n*n+2*n, prefer n*n+n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$geqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_${ci}$lacpy( 'L', m, n, a, lda, u, ldu ) ! generate q in u ! (cworkspace: need n*n+n+m, prefer n*n+n+m*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$ungqr( m, m, n, u, ldu, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ! copy r to work(iu), zeroing out below it call stdlib${ii}$_${ci}$lacpy( 'U', n, n, a, lda, work( iu ),ldwrku ) call stdlib${ii}$_${ci}$laset( 'L', n-1, n-1, czero, czero,work( iu+1 ), ldwrku ) ie = 1_${ik}$ itauq = itau itaup = itauq + n iwork = itaup + n ! bidiagona