#:include "common.fypp" submodule(stdlib_lapack_eig_svd_lsq) stdlib_lapack_eigv_svd_drivers implicit none contains #:for ik,it,ii in LINALG_INT_KINDS_TYPES module subroutine stdlib${ii}$_sgesvd( jobu, jobvt, m, n, a, lda, s, u, ldu, vt, ldvt,work, lwork, info ) !! SGESVD computes the singular value decomposition (SVD) of a real !! M-by-N matrix A, optionally computing the left and/or right singular !! vectors. The SVD is written !! A = U * SIGMA * transpose(V) !! where SIGMA is an M-by-N matrix which is zero except for its !! min(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and !! V is an N-by-N orthogonal matrix. The diagonal elements of SIGMA !! are the singular values of A; they are real and non-negative, and !! are returned in descending order. The first min(m,n) columns of !! U and V are the left and right singular vectors of A. !! Note that the routine returns V**T, not V. ! -- lapack driver routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments character, intent(in) :: jobu, jobvt integer(${ik}$), intent(out) :: info integer(${ik}$), intent(in) :: lda, ldu, ldvt, lwork, m, n ! Array Arguments real(sp), intent(inout) :: a(lda,*) real(sp), intent(out) :: s(*), u(ldu,*), vt(ldvt,*), work(*) ! ===================================================================== ! Local Scalars logical(lk) :: lquery, wntua, wntuas, wntun, wntuo, wntus, wntva, wntvas, wntvn, wntvo,& wntvs integer(${ik}$) :: bdspac, blk, chunk, i, ie, ierr, ir, iscl, itau, itaup, itauq, iu, & iwork, ldwrkr, ldwrku, maxwrk, minmn, minwrk, mnthr, ncu, ncvt, nru, nrvt, & wrkbl integer(${ik}$) :: lwork_sgeqrf, lwork_sorgqr_n, lwork_sorgqr_m, lwork_sgebrd, & lwork_sorgbr_p, lwork_sorgbr_q, lwork_sgelqf, lwork_sorglq_n, lwork_sorglq_m real(sp) :: anrm, bignum, eps, smlnum ! Local Arrays real(sp) :: dum(1_${ik}$) ! Intrinsic Functions ! Executable Statements ! test the input arguments info = 0_${ik}$ minmn = min( m, n ) wntua = stdlib_lsame( jobu, 'A' ) wntus = stdlib_lsame( jobu, 'S' ) wntuas = wntua .or. wntus wntuo = stdlib_lsame( jobu, 'O' ) wntun = stdlib_lsame( jobu, 'N' ) wntva = stdlib_lsame( jobvt, 'A' ) wntvs = stdlib_lsame( jobvt, 'S' ) wntvas = wntva .or. wntvs wntvo = stdlib_lsame( jobvt, 'O' ) wntvn = stdlib_lsame( jobvt, 'N' ) lquery = ( lwork==-1_${ik}$ ) if( .not.( wntua .or. wntus .or. wntuo .or. wntun ) ) then info = -1_${ik}$ else if( .not.( wntva .or. wntvs .or. wntvo .or. wntvn ) .or.( wntvo .and. wntuo ) ) & then info = -2_${ik}$ else if( m<0_${ik}$ ) then info = -3_${ik}$ else if( n<0_${ik}$ ) then info = -4_${ik}$ else if( lda<max( 1_${ik}$, m ) ) then info = -6_${ik}$ else if( ldu<1_${ik}$ .or. ( wntuas .and. ldu<m ) ) then info = -9_${ik}$ else if( ldvt<1_${ik}$ .or. ( wntva .and. ldvt<n ) .or.( wntvs .and. ldvt<minmn ) ) & then info = -11_${ik}$ end if ! compute workspace ! (note: comments in the code beginning "workspace:" describe the ! minimal amount of workspace needed at that point in the code, ! as well as the preferred amount for good performance. ! nb refers to the optimal block size for the immediately ! following subroutine, as returned by stdlib${ii}$_ilaenv.) if( info==0_${ik}$ ) then minwrk = 1_${ik}$ maxwrk = 1_${ik}$ if( m>=n .and. minmn>0_${ik}$ ) then ! compute space needed for stdlib${ii}$_sbdsqr mnthr = stdlib${ii}$_ilaenv( 6_${ik}$, 'SGESVD', jobu // jobvt, m, n, 0_${ik}$, 0_${ik}$ ) bdspac = 5_${ik}$*n ! compute space needed for stdlib${ii}$_sgeqrf call stdlib${ii}$_sgeqrf( m, n, a, lda, dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, ierr ) lwork_sgeqrf = int( dum(1_${ik}$),KIND=${ik}$) ! compute space needed for stdlib${ii}$_sorgqr call stdlib${ii}$_sorgqr( m, n, n, a, lda, dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, ierr ) lwork_sorgqr_n = int( dum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_sorgqr( m, m, n, a, lda, dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, ierr ) lwork_sorgqr_m = int( dum(1_${ik}$),KIND=${ik}$) ! compute space needed for stdlib${ii}$_sgebrd call stdlib${ii}$_sgebrd( n, n, a, lda, s, dum(1_${ik}$), dum(1_${ik}$),dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, ierr ) lwork_sgebrd = int( dum(1_${ik}$),KIND=${ik}$) ! compute space needed for stdlib${ii}$_sorgbr p call stdlib${ii}$_sorgbr( 'P', n, n, n, a, lda, dum(1_${ik}$),dum(1_${ik}$), -1_${ik}$, ierr ) lwork_sorgbr_p = int( dum(1_${ik}$),KIND=${ik}$) ! compute space needed for stdlib${ii}$_sorgbr q call stdlib${ii}$_sorgbr( 'Q', n, n, n, a, lda, dum(1_${ik}$),dum(1_${ik}$), -1_${ik}$, ierr ) lwork_sorgbr_q = int( dum(1_${ik}$),KIND=${ik}$) if( m>=mnthr ) then if( wntun ) then ! path 1 (m much larger than n, jobu='n') maxwrk = n + lwork_sgeqrf maxwrk = max( maxwrk, 3_${ik}$*n+lwork_sgebrd ) if( wntvo .or. wntvas )maxwrk = max( maxwrk, 3_${ik}$*n+lwork_sorgbr_p ) maxwrk = max( maxwrk, bdspac ) minwrk = max( 4_${ik}$*n, bdspac ) else if( wntuo .and. wntvn ) then ! path 2 (m much larger than n, jobu='o', jobvt='n') wrkbl = n + lwork_sgeqrf wrkbl = max( wrkbl, n+lwork_sorgqr_n ) wrkbl = max( wrkbl, 3_${ik}$*n+lwork_sgebrd ) wrkbl = max( wrkbl, 3_${ik}$*n+lwork_sorgbr_q ) wrkbl = max( wrkbl, bdspac ) maxwrk = max( n*n+wrkbl, n*n+m*n+n ) minwrk = max( 3_${ik}$*n+m, bdspac ) else if( wntuo .and. wntvas ) then ! path 3 (m much larger than n, jobu='o', jobvt='s' or ! 'a') wrkbl = n + lwork_sgeqrf wrkbl = max( wrkbl, n+lwork_sorgqr_n ) wrkbl = max( wrkbl, 3_${ik}$*n+lwork_sgebrd ) wrkbl = max( wrkbl, 3_${ik}$*n+lwork_sorgbr_q ) wrkbl = max( wrkbl, 3_${ik}$*n+lwork_sorgbr_p ) wrkbl = max( wrkbl, bdspac ) maxwrk = max( n*n+wrkbl, n*n+m*n+n ) minwrk = max( 3_${ik}$*n+m, bdspac ) else if( wntus .and. wntvn ) then ! path 4 (m much larger than n, jobu='s', jobvt='n') wrkbl = n + lwork_sgeqrf wrkbl = max( wrkbl, n+lwork_sorgqr_n ) wrkbl = max( wrkbl, 3_${ik}$*n+lwork_sgebrd ) wrkbl = max( wrkbl, 3_${ik}$*n+lwork_sorgbr_q ) wrkbl = max( wrkbl, bdspac ) maxwrk = n*n + wrkbl minwrk = max( 3_${ik}$*n+m, bdspac ) else if( wntus .and. wntvo ) then ! path 5 (m much larger than n, jobu='s', jobvt='o') wrkbl = n + lwork_sgeqrf wrkbl = max( wrkbl, n+lwork_sorgqr_n ) wrkbl = max( wrkbl, 3_${ik}$*n+lwork_sgebrd ) wrkbl = max( wrkbl, 3_${ik}$*n+lwork_sorgbr_q ) wrkbl = max( wrkbl, 3_${ik}$*n+lwork_sorgbr_p ) wrkbl = max( wrkbl, bdspac ) maxwrk = 2_${ik}$*n*n + wrkbl minwrk = max( 3_${ik}$*n+m, bdspac ) else if( wntus .and. wntvas ) then ! path 6 (m much larger than n, jobu='s', jobvt='s' or ! 'a') wrkbl = n + lwork_sgeqrf wrkbl = max( wrkbl, n+lwork_sorgqr_n ) wrkbl = max( wrkbl, 3_${ik}$*n+lwork_sgebrd ) wrkbl = max( wrkbl, 3_${ik}$*n+lwork_sorgbr_q ) wrkbl = max( wrkbl, 3_${ik}$*n+lwork_sorgbr_p ) wrkbl = max( wrkbl, bdspac ) maxwrk = n*n + wrkbl minwrk = max( 3_${ik}$*n+m, bdspac ) else if( wntua .and. wntvn ) then ! path 7 (m much larger than n, jobu='a', jobvt='n') wrkbl = n + lwork_sgeqrf wrkbl = max( wrkbl, n+lwork_sorgqr_m ) wrkbl = max( wrkbl, 3_${ik}$*n+lwork_sgebrd ) wrkbl = max( wrkbl, 3_${ik}$*n+lwork_sorgbr_q ) wrkbl = max( wrkbl, bdspac ) maxwrk = n*n + wrkbl minwrk = max( 3_${ik}$*n+m, bdspac ) else if( wntua .and. wntvo ) then ! path 8 (m much larger than n, jobu='a', jobvt='o') wrkbl = n + lwork_sgeqrf wrkbl = max( wrkbl, n+lwork_sorgqr_m ) wrkbl = max( wrkbl, 3_${ik}$*n+lwork_sgebrd ) wrkbl = max( wrkbl, 3_${ik}$*n+lwork_sorgbr_q ) wrkbl = max( wrkbl, 3_${ik}$*n+lwork_sorgbr_p ) wrkbl = max( wrkbl, bdspac ) maxwrk = 2_${ik}$*n*n + wrkbl minwrk = max( 3_${ik}$*n+m, bdspac ) else if( wntua .and. wntvas ) then ! path 9 (m much larger than n, jobu='a', jobvt='s' or ! 'a') wrkbl = n + lwork_sgeqrf wrkbl = max( wrkbl, n+lwork_sorgqr_m ) wrkbl = max( wrkbl, 3_${ik}$*n+lwork_sgebrd ) wrkbl = max( wrkbl, 3_${ik}$*n+lwork_sorgbr_q ) wrkbl = max( wrkbl, 3_${ik}$*n+lwork_sorgbr_p ) wrkbl = max( wrkbl, bdspac ) maxwrk = n*n + wrkbl minwrk = max( 3_${ik}$*n+m, bdspac ) end if else ! path 10 (m at least n, but not much larger) call stdlib${ii}$_sgebrd( m, n, a, lda, s, dum(1_${ik}$), dum(1_${ik}$),dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, ierr ) lwork_sgebrd = int( dum(1_${ik}$),KIND=${ik}$) maxwrk = 3_${ik}$*n + lwork_sgebrd if( wntus .or. wntuo ) then call stdlib${ii}$_sorgbr( 'Q', m, n, n, a, lda, dum(1_${ik}$),dum(1_${ik}$), -1_${ik}$, ierr ) lwork_sorgbr_q = int( dum(1_${ik}$),KIND=${ik}$) maxwrk = max( maxwrk, 3_${ik}$*n+lwork_sorgbr_q ) end if if( wntua ) then call stdlib${ii}$_sorgbr( 'Q', m, m, n, a, lda, dum(1_${ik}$),dum(1_${ik}$), -1_${ik}$, ierr ) lwork_sorgbr_q = int( dum(1_${ik}$),KIND=${ik}$) maxwrk = max( maxwrk, 3_${ik}$*n+lwork_sorgbr_q ) end if if( .not.wntvn ) then maxwrk = max( maxwrk, 3_${ik}$*n+lwork_sorgbr_p ) end if maxwrk = max( maxwrk, bdspac ) minwrk = max( 3_${ik}$*n+m, bdspac ) end if else if( minmn>0_${ik}$ ) then ! compute space needed for stdlib${ii}$_sbdsqr mnthr = stdlib${ii}$_ilaenv( 6_${ik}$, 'SGESVD', jobu // jobvt, m, n, 0_${ik}$, 0_${ik}$ ) bdspac = 5_${ik}$*m ! compute space needed for stdlib${ii}$_sgelqf call stdlib${ii}$_sgelqf( m, n, a, lda, dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, ierr ) lwork_sgelqf = int( dum(1_${ik}$),KIND=${ik}$) ! compute space needed for stdlib${ii}$_sorglq call stdlib${ii}$_sorglq( n, n, m, dum(1_${ik}$), n, dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, ierr ) lwork_sorglq_n = int( dum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_sorglq( m, n, m, a, lda, dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, ierr ) lwork_sorglq_m = int( dum(1_${ik}$),KIND=${ik}$) ! compute space needed for stdlib${ii}$_sgebrd call stdlib${ii}$_sgebrd( m, m, a, lda, s, dum(1_${ik}$), dum(1_${ik}$),dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, ierr ) lwork_sgebrd = int( dum(1_${ik}$),KIND=${ik}$) ! compute space needed for stdlib${ii}$_sorgbr p call stdlib${ii}$_sorgbr( 'P', m, m, m, a, n, dum(1_${ik}$),dum(1_${ik}$), -1_${ik}$, ierr ) lwork_sorgbr_p = int( dum(1_${ik}$),KIND=${ik}$) ! compute space needed for stdlib${ii}$_sorgbr q call stdlib${ii}$_sorgbr( 'Q', m, m, m, a, n, dum(1_${ik}$),dum(1_${ik}$), -1_${ik}$, ierr ) lwork_sorgbr_q = int( dum(1_${ik}$),KIND=${ik}$) if( n>=mnthr ) then if( wntvn ) then ! path 1t(n much larger than m, jobvt='n') maxwrk = m + lwork_sgelqf maxwrk = max( maxwrk, 3_${ik}$*m+lwork_sgebrd ) if( wntuo .or. wntuas )maxwrk = max( maxwrk, 3_${ik}$*m+lwork_sorgbr_q ) maxwrk = max( maxwrk, bdspac ) minwrk = max( 4_${ik}$*m, bdspac ) else if( wntvo .and. wntun ) then ! path 2t(n much larger than m, jobu='n', jobvt='o') wrkbl = m + lwork_sgelqf wrkbl = max( wrkbl, m+lwork_sorglq_m ) wrkbl = max( wrkbl, 3_${ik}$*m+lwork_sgebrd ) wrkbl = max( wrkbl, 3_${ik}$*m+lwork_sorgbr_p ) wrkbl = max( wrkbl, bdspac ) maxwrk = max( m*m+wrkbl, m*m+m*n+m ) minwrk = max( 3_${ik}$*m+n, bdspac ) else if( wntvo .and. wntuas ) then ! path 3t(n much larger than m, jobu='s' or 'a', ! jobvt='o') wrkbl = m + lwork_sgelqf wrkbl = max( wrkbl, m+lwork_sorglq_m ) wrkbl = max( wrkbl, 3_${ik}$*m+lwork_sgebrd ) wrkbl = max( wrkbl, 3_${ik}$*m+lwork_sorgbr_p ) wrkbl = max( wrkbl, 3_${ik}$*m+lwork_sorgbr_q ) wrkbl = max( wrkbl, bdspac ) maxwrk = max( m*m+wrkbl, m*m+m*n+m ) minwrk = max( 3_${ik}$*m+n, bdspac ) else if( wntvs .and. wntun ) then ! path 4t(n much larger than m, jobu='n', jobvt='s') wrkbl = m + lwork_sgelqf wrkbl = max( wrkbl, m+lwork_sorglq_m ) wrkbl = max( wrkbl, 3_${ik}$*m+lwork_sgebrd ) wrkbl = max( wrkbl, 3_${ik}$*m+lwork_sorgbr_p ) wrkbl = max( wrkbl, bdspac ) maxwrk = m*m + wrkbl minwrk = max( 3_${ik}$*m+n, bdspac ) else if( wntvs .and. wntuo ) then ! path 5t(n much larger than m, jobu='o', jobvt='s') wrkbl = m + lwork_sgelqf wrkbl = max( wrkbl, m+lwork_sorglq_m ) wrkbl = max( wrkbl, 3_${ik}$*m+lwork_sgebrd ) wrkbl = max( wrkbl, 3_${ik}$*m+lwork_sorgbr_p ) wrkbl = max( wrkbl, 3_${ik}$*m+lwork_sorgbr_q ) wrkbl = max( wrkbl, bdspac ) maxwrk = 2_${ik}$*m*m + wrkbl minwrk = max( 3_${ik}$*m+n, bdspac ) maxwrk = max( maxwrk, minwrk ) else if( wntvs .and. wntuas ) then ! path 6t(n much larger than m, jobu='s' or 'a', ! jobvt='s') wrkbl = m + lwork_sgelqf wrkbl = max( wrkbl, m+lwork_sorglq_m ) wrkbl = max( wrkbl, 3_${ik}$*m+lwork_sgebrd ) wrkbl = max( wrkbl, 3_${ik}$*m+lwork_sorgbr_p ) wrkbl = max( wrkbl, 3_${ik}$*m+lwork_sorgbr_q ) wrkbl = max( wrkbl, bdspac ) maxwrk = m*m + wrkbl minwrk = max( 3_${ik}$*m+n, bdspac ) else if( wntva .and. wntun ) then ! path 7t(n much larger than m, jobu='n', jobvt='a') wrkbl = m + lwork_sgelqf wrkbl = max( wrkbl, m+lwork_sorglq_n ) wrkbl = max( wrkbl, 3_${ik}$*m+lwork_sgebrd ) wrkbl = max( wrkbl, 3_${ik}$*m+lwork_sorgbr_p ) wrkbl = max( wrkbl, bdspac ) maxwrk = m*m + wrkbl minwrk = max( 3_${ik}$*m+n, bdspac ) else if( wntva .and. wntuo ) then ! path 8t(n much larger than m, jobu='o', jobvt='a') wrkbl = m + lwork_sgelqf wrkbl = max( wrkbl, m+lwork_sorglq_n ) wrkbl = max( wrkbl, 3_${ik}$*m+lwork_sgebrd ) wrkbl = max( wrkbl, 3_${ik}$*m+lwork_sorgbr_p ) wrkbl = max( wrkbl, 3_${ik}$*m+lwork_sorgbr_q ) wrkbl = max( wrkbl, bdspac ) maxwrk = 2_${ik}$*m*m + wrkbl minwrk = max( 3_${ik}$*m+n, bdspac ) else if( wntva .and. wntuas ) then ! path 9t(n much larger than m, jobu='s' or 'a', ! jobvt='a') wrkbl = m + lwork_sgelqf wrkbl = max( wrkbl, m+lwork_sorglq_n ) wrkbl = max( wrkbl, 3_${ik}$*m+lwork_sgebrd ) wrkbl = max( wrkbl, 3_${ik}$*m+lwork_sorgbr_p ) wrkbl = max( wrkbl, 3_${ik}$*m+lwork_sorgbr_q ) wrkbl = max( wrkbl, bdspac ) maxwrk = m*m + wrkbl minwrk = max( 3_${ik}$*m+n, bdspac ) end if else ! path 10t(n greater than m, but not much larger) call stdlib${ii}$_sgebrd( m, n, a, lda, s, dum(1_${ik}$), dum(1_${ik}$),dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, ierr ) lwork_sgebrd = int( dum(1_${ik}$),KIND=${ik}$) maxwrk = 3_${ik}$*m + lwork_sgebrd if( wntvs .or. wntvo ) then ! compute space needed for stdlib${ii}$_sorgbr p call stdlib${ii}$_sorgbr( 'P', m, n, m, a, n, dum(1_${ik}$),dum(1_${ik}$), -1_${ik}$, ierr ) lwork_sorgbr_p = int( dum(1_${ik}$),KIND=${ik}$) maxwrk = max( maxwrk, 3_${ik}$*m+lwork_sorgbr_p ) end if if( wntva ) then call stdlib${ii}$_sorgbr( 'P', n, n, m, a, n, dum(1_${ik}$),dum(1_${ik}$), -1_${ik}$, ierr ) lwork_sorgbr_p = int( dum(1_${ik}$),KIND=${ik}$) maxwrk = max( maxwrk, 3_${ik}$*m+lwork_sorgbr_p ) end if if( .not.wntun ) then maxwrk = max( maxwrk, 3_${ik}$*m+lwork_sorgbr_q ) end if maxwrk = max( maxwrk, bdspac ) minwrk = max( 3_${ik}$*m+n, bdspac ) end if end if maxwrk = max( maxwrk, minwrk ) work( 1_${ik}$ ) = maxwrk if( lwork<minwrk .and. .not.lquery ) then info = -13_${ik}$ end if end if if( info/=0_${ik}$ ) then call stdlib${ii}$_xerbla( 'SGESVD', -info ) return else if( lquery ) then return end if ! quick return if possible if( m==0_${ik}$ .or. n==0_${ik}$ ) then return end if ! get machine constants eps = stdlib${ii}$_slamch( 'P' ) smlnum = sqrt( stdlib${ii}$_slamch( 'S' ) ) / eps bignum = one / smlnum ! scale a if max element outside range [smlnum,bignum] anrm = stdlib${ii}$_slange( 'M', m, n, a, lda, dum ) iscl = 0_${ik}$ if( anrm>zero .and. anrm<smlnum ) then iscl = 1_${ik}$ call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, m, n, a, lda, ierr ) else if( anrm>bignum ) then iscl = 1_${ik}$ call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, m, n, a, lda, ierr ) end if if( m>=n ) then ! a has at least as many rows as columns. if a has sufficiently ! more rows than columns, first reduce using the qr ! decomposition (if sufficient workspace available) if( m>=mnthr ) then if( wntun ) then ! path 1 (m much larger than n, jobu='n') ! no left singular vectors to be computed itau = 1_${ik}$ iwork = itau + n ! compute a=q*r ! (workspace: need 2*n, prefer n+n*nb) call stdlib${ii}$_sgeqrf( m, n, a, lda, work( itau ), work( iwork ),lwork-iwork+1, & ierr ) ! zero out below r if( n > 1_${ik}$ ) then call stdlib${ii}$_slaset( 'L', n-1, n-1, zero, zero, a( 2_${ik}$, 1_${ik}$ ),lda ) end if ie = 1_${ik}$ itauq = ie + n itaup = itauq + n iwork = itaup + n ! bidiagonalize r in a ! (workspace: need 4*n, prefer 3*n+2*n*nb) call stdlib${ii}$_sgebrd( n, n, a, lda, s, work( ie ), work( itauq ),work( itaup ), & work( iwork ), lwork-iwork+1,ierr ) ncvt = 0_${ik}$ if( wntvo .or. wntvas ) then ! if right singular vectors desired, generate p'. ! (workspace: need 4*n-1, prefer 3*n+(n-1)*nb) call stdlib${ii}$_sorgbr( 'P', n, n, n, a, lda, work( itaup ),work( iwork ), & lwork-iwork+1, ierr ) ncvt = n end if iwork = ie + n ! perform bidiagonal qr iteration, computing right ! singular vectors of a in a if desired ! (workspace: need bdspac) call stdlib${ii}$_sbdsqr( 'U', n, ncvt, 0_${ik}$, 0_${ik}$, s, work( ie ), a, lda,dum, 1_${ik}$, dum, 1_${ik}$, & work( iwork ), info ) ! if right singular vectors desired in vt, copy them there if( wntvas )call stdlib${ii}$_slacpy( 'F', n, n, a, lda, vt, ldvt ) else if( wntuo .and. wntvn ) then ! path 2 (m much larger than n, jobu='o', jobvt='n') ! n left singular vectors to be overwritten on a and ! no right singular vectors to be computed if( lwork>=n*n+max( 4_${ik}$*n, bdspac ) ) then ! sufficient workspace for a fast algorithm ir = 1_${ik}$ if( lwork>=max( wrkbl, lda*n+n )+lda*n ) then ! work(iu) is lda by n, work(ir) is lda by n ldwrku = lda ldwrkr = lda else if( lwork>=max( wrkbl, lda*n+n )+n*n ) then ! work(iu) is lda by n, work(ir) is n by n ldwrku = lda ldwrkr = n else ! work(iu) is ldwrku by n, work(ir) is n by n ldwrku = ( lwork-n*n-n ) / n ldwrkr = n end if itau = ir + ldwrkr*n iwork = itau + n ! compute a=q*r ! (workspace: need n*n+2*n, prefer n*n+n+n*nb) call stdlib${ii}$_sgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-iwork+& 1_${ik}$, ierr ) ! copy r to work(ir) and zero out below it call stdlib${ii}$_slacpy( 'U', n, n, a, lda, work( ir ), ldwrkr ) call stdlib${ii}$_slaset( 'L', n-1, n-1, zero, zero, work( ir+1 ),ldwrkr ) ! generate q in a ! (workspace: need n*n+2*n, prefer n*n+n+n*nb) call stdlib${ii}$_sorgqr( m, n, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = itau itauq = ie + n itaup = itauq + n iwork = itaup + n ! bidiagonalize r in work(ir) ! (workspace: need n*n+4*n, prefer n*n+3*n+2*n*nb) call stdlib${ii}$_sgebrd( n, n, work( ir ), ldwrkr, s, work( ie ),work( itauq ), & work( itaup ),work( iwork ), lwork-iwork+1, ierr ) ! generate left vectors bidiagonalizing r ! (workspace: need n*n+4*n, prefer n*n+3*n+n*nb) call stdlib${ii}$_sorgbr( 'Q', n, n, n, work( ir ), ldwrkr,work( itauq ), work( & iwork ),lwork-iwork+1, ierr ) iwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of r in work(ir) ! (workspace: need n*n+bdspac) call stdlib${ii}$_sbdsqr( 'U', n, 0_${ik}$, n, 0_${ik}$, s, work( ie ), dum, 1_${ik}$,work( ir ), & ldwrkr, dum, 1_${ik}$,work( iwork ), info ) iu = ie + n ! multiply q in a by left singular vectors of r in ! work(ir), storing result in work(iu) and copying to a ! (workspace: need n*n+2*n, prefer n*n+m*n+n) do i = 1, m, ldwrku chunk = min( m-i+1, ldwrku ) call stdlib${ii}$_sgemm( 'N', 'N', chunk, n, n, one, a( i, 1_${ik}$ ),lda, work( ir )& , ldwrkr, zero,work( iu ), ldwrku ) call stdlib${ii}$_slacpy( 'F', chunk, n, work( iu ), ldwrku,a( i, 1_${ik}$ ), lda ) end do else ! insufficient workspace for a fast algorithm ie = 1_${ik}$ itauq = ie + n itaup = itauq + n iwork = itaup + n ! bidiagonalize a ! (workspace: need 3*n+m, prefer 3*n+(m+n)*nb) call stdlib${ii}$_sgebrd( m, n, a, lda, s, work( ie ),work( itauq ), work( itaup & ),work( iwork ), lwork-iwork+1, ierr ) ! generate left vectors bidiagonalizing a ! (workspace: need 4*n, prefer 3*n+n*nb) call stdlib${ii}$_sorgbr( 'Q', m, n, n, a, lda, work( itauq ),work( iwork ), & lwork-iwork+1, ierr ) iwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of a in a ! (workspace: need bdspac) call stdlib${ii}$_sbdsqr( 'U', n, 0_${ik}$, m, 0_${ik}$, s, work( ie ), dum, 1_${ik}$,a, lda, dum, 1_${ik}$, & work( iwork ), info ) end if else if( wntuo .and. wntvas ) then ! path 3 (m much larger than n, jobu='o', jobvt='s' or 'a') ! n left singular vectors to be overwritten on a and ! n right singular vectors to be computed in vt if( lwork>=n*n+max( 4_${ik}$*n, bdspac ) ) then ! sufficient workspace for a fast algorithm ir = 1_${ik}$ if( lwork>=max( wrkbl, lda*n+n )+lda*n ) then ! work(iu) is lda by n and work(ir) is lda by n ldwrku = lda ldwrkr = lda else if( lwork>=max( wrkbl, lda*n+n )+n*n ) then ! work(iu) is lda by n and work(ir) is n by n ldwrku = lda ldwrkr = n else ! work(iu) is ldwrku by n and work(ir) is n by n ldwrku = ( lwork-n*n-n ) / n ldwrkr = n end if itau = ir + ldwrkr*n iwork = itau + n ! compute a=q*r ! (workspace: need n*n+2*n, prefer n*n+n+n*nb) call stdlib${ii}$_sgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-iwork+& 1_${ik}$, ierr ) ! copy r to vt, zeroing out below it call stdlib${ii}$_slacpy( 'U', n, n, a, lda, vt, ldvt ) if( n>1_${ik}$ )call stdlib${ii}$_slaset( 'L', n-1, n-1, zero, zero,vt( 2_${ik}$, 1_${ik}$ ), ldvt ) ! generate q in a ! (workspace: need n*n+2*n, prefer n*n+n+n*nb) call stdlib${ii}$_sorgqr( m, n, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = itau itauq = ie + n itaup = itauq + n iwork = itaup + n ! bidiagonalize r in vt, copying result to work(ir) ! (workspace: need n*n+4*n, prefer n*n+3*n+2*n*nb) call stdlib${ii}$_sgebrd( n, n, vt, ldvt, s, work( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) call stdlib${ii}$_slacpy( 'L', n, n, vt, ldvt, work( ir ), ldwrkr ) ! generate left vectors bidiagonalizing r in work(ir) ! (workspace: need n*n+4*n, prefer n*n+3*n+n*nb) call stdlib${ii}$_sorgbr( 'Q', n, n, n, work( ir ), ldwrkr,work( itauq ), work( & iwork ),lwork-iwork+1, ierr ) ! generate right vectors bidiagonalizing r in vt ! (workspace: need n*n+4*n-1, prefer n*n+3*n+(n-1)*nb) call stdlib${ii}$_sorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),work( iwork ), & lwork-iwork+1, ierr ) iwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of r in work(ir) and computing right ! singular vectors of r in vt ! (workspace: need n*n+bdspac) call stdlib${ii}$_sbdsqr( 'U', n, n, n, 0_${ik}$, s, work( ie ), vt, ldvt,work( ir ), & ldwrkr, dum, 1_${ik}$,work( iwork ), info ) iu = ie + n ! multiply q in a by left singular vectors of r in ! work(ir), storing result in work(iu) and copying to a ! (workspace: need n*n+2*n, prefer n*n+m*n+n) do i = 1, m, ldwrku chunk = min( m-i+1, ldwrku ) call stdlib${ii}$_sgemm( 'N', 'N', chunk, n, n, one, a( i, 1_${ik}$ ),lda, work( ir )& , ldwrkr, zero,work( iu ), ldwrku ) call stdlib${ii}$_slacpy( 'F', chunk, n, work( iu ), ldwrku,a( i, 1_${ik}$ ), lda ) end do else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + n ! compute a=q*r ! (workspace: need 2*n, prefer n+n*nb) call stdlib${ii}$_sgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-iwork+& 1_${ik}$, ierr ) ! copy r to vt, zeroing out below it call stdlib${ii}$_slacpy( 'U', n, n, a, lda, vt, ldvt ) if( n>1_${ik}$ )call stdlib${ii}$_slaset( 'L', n-1, n-1, zero, zero,vt( 2_${ik}$, 1_${ik}$ ), ldvt ) ! generate q in a ! (workspace: need 2*n, prefer n+n*nb) call stdlib${ii}$_sorgqr( m, n, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = itau itauq = ie + n itaup = itauq + n iwork = itaup + n ! bidiagonalize r in vt ! (workspace: need 4*n, prefer 3*n+2*n*nb) call stdlib${ii}$_sgebrd( n, n, vt, ldvt, s, work( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply q in a by left vectors bidiagonalizing r ! (workspace: need 3*n+m, prefer 3*n+m*nb) call stdlib${ii}$_sormbr( 'Q', 'R', 'N', m, n, n, vt, ldvt,work( itauq ), a, lda,& work( iwork ),lwork-iwork+1, ierr ) ! generate right vectors bidiagonalizing r in vt ! (workspace: need 4*n-1, prefer 3*n+(n-1)*nb) call stdlib${ii}$_sorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),work( iwork ), & lwork-iwork+1, ierr ) iwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of a in a and computing right ! singular vectors of a in vt ! (workspace: need bdspac) call stdlib${ii}$_sbdsqr( 'U', n, n, m, 0_${ik}$, s, work( ie ), vt, ldvt,a, lda, dum, & 1_${ik}$, work( iwork ), info ) end if else if( wntus ) then if( wntvn ) then ! path 4 (m much larger than n, jobu='s', jobvt='n') ! n left singular vectors to be computed in u and ! no right singular vectors to be computed if( lwork>=n*n+max( 4_${ik}$*n, bdspac ) ) then ! sufficient workspace for a fast algorithm ir = 1_${ik}$ if( lwork>=wrkbl+lda*n ) then ! work(ir) is lda by n ldwrkr = lda else ! work(ir) is n by n ldwrkr = n end if itau = ir + ldwrkr*n iwork = itau + n ! compute a=q*r ! (workspace: need n*n+2*n, prefer n*n+n+n*nb) call stdlib${ii}$_sgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ! copy r to work(ir), zeroing out below it call stdlib${ii}$_slacpy( 'U', n, n, a, lda, work( ir ),ldwrkr ) call stdlib${ii}$_slaset( 'L', n-1, n-1, zero, zero,work( ir+1 ), ldwrkr ) ! generate q in a ! (workspace: need n*n+2*n, prefer n*n+n+n*nb) call stdlib${ii}$_sorgqr( m, n, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = itau itauq = ie + n itaup = itauq + n iwork = itaup + n ! bidiagonalize r in work(ir) ! (workspace: need n*n+4*n, prefer n*n+3*n+2*n*nb) call stdlib${ii}$_sgebrd( n, n, work( ir ), ldwrkr, s,work( ie ), work( itauq & ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) ! generate left vectors bidiagonalizing r in work(ir) ! (workspace: need n*n+4*n, prefer n*n+3*n+n*nb) call stdlib${ii}$_sorgbr( 'Q', n, n, n, work( ir ), ldwrkr,work( itauq ), & work( iwork ),lwork-iwork+1, ierr ) iwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of r in work(ir) ! (workspace: need n*n+bdspac) call stdlib${ii}$_sbdsqr( 'U', n, 0_${ik}$, n, 0_${ik}$, s, work( ie ), dum,1_${ik}$, work( ir ), & ldwrkr, dum, 1_${ik}$,work( iwork ), info ) ! multiply q in a by left singular vectors of r in ! work(ir), storing result in u ! (workspace: need n*n) call stdlib${ii}$_sgemm( 'N', 'N', m, n, n, one, a, lda,work( ir ), ldwrkr, & zero, u, ldu ) else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + n ! compute a=q*r, copying result to u ! (workspace: need 2*n, prefer n+n*nb) call stdlib${ii}$_sgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_slacpy( 'L', m, n, a, lda, u, ldu ) ! generate q in u ! (workspace: need 2*n, prefer n+n*nb) call stdlib${ii}$_sorgqr( m, n, n, u, ldu, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = itau itauq = ie + n itaup = itauq + n iwork = itaup + n ! zero out below r in a if( n > 1_${ik}$ ) then call stdlib${ii}$_slaset( 'L', n-1, n-1, zero, zero,a( 2_${ik}$, 1_${ik}$ ), lda ) end if ! bidiagonalize r in a ! (workspace: need 4*n, prefer 3*n+2*n*nb) call stdlib${ii}$_sgebrd( n, n, a, lda, s, work( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply q in u by left vectors bidiagonalizing r ! (workspace: need 3*n+m, prefer 3*n+m*nb) call stdlib${ii}$_sormbr( 'Q', 'R', 'N', m, n, n, a, lda,work( itauq ), u, & ldu, work( iwork ),lwork-iwork+1, ierr ) iwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of a in u ! (workspace: need bdspac) call stdlib${ii}$_sbdsqr( 'U', n, 0_${ik}$, m, 0_${ik}$, s, work( ie ), dum,1_${ik}$, u, ldu, dum, & 1_${ik}$, work( iwork ),info ) end if else if( wntvo ) then ! path 5 (m much larger than n, jobu='s', jobvt='o') ! n left singular vectors to be computed in u and ! n right singular vectors to be overwritten on a if( lwork>=2_${ik}$*n*n+max( 4_${ik}$*n, bdspac ) ) then ! sufficient workspace for a fast algorithm iu = 1_${ik}$ if( lwork>=wrkbl+2*lda*n ) then ! work(iu) is lda by n and work(ir) is lda by n ldwrku = lda ir = iu + ldwrku*n ldwrkr = lda else if( lwork>=wrkbl+( lda+n )*n ) then ! work(iu) is lda by n and work(ir) is n by n ldwrku = lda ir = iu + ldwrku*n ldwrkr = n else ! work(iu) is n by n and work(ir) is n by n ldwrku = n ir = iu + ldwrku*n ldwrkr = n end if itau = ir + ldwrkr*n iwork = itau + n ! compute a=q*r ! (workspace: need 2*n*n+2*n, prefer 2*n*n+n+n*nb) call stdlib${ii}$_sgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ! copy r to work(iu), zeroing out below it call stdlib${ii}$_slacpy( 'U', n, n, a, lda, work( iu ),ldwrku ) call stdlib${ii}$_slaset( 'L', n-1, n-1, zero, zero,work( iu+1 ), ldwrku ) ! generate q in a ! (workspace: need 2*n*n+2*n, prefer 2*n*n+n+n*nb) call stdlib${ii}$_sorgqr( m, n, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = itau itauq = ie + n itaup = itauq + n iwork = itaup + n ! bidiagonalize r in work(iu), copying result to ! work(ir) ! (workspace: need 2*n*n+4*n, ! prefer 2*n*n+3*n+2*n*nb) call stdlib${ii}$_sgebrd( n, n, work( iu ), ldwrku, s,work( ie ), work( itauq & ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) call stdlib${ii}$_slacpy( 'U', n, n, work( iu ), ldwrku,work( ir ), ldwrkr ) ! generate left bidiagonalizing vectors in work(iu) ! (workspace: need 2*n*n+4*n, prefer 2*n*n+3*n+n*nb) call stdlib${ii}$_sorgbr( 'Q', n, n, n, work( iu ), ldwrku,work( itauq ), & work( iwork ),lwork-iwork+1, ierr ) ! generate right bidiagonalizing vectors in work(ir) ! (workspace: need 2*n*n+4*n-1, ! prefer 2*n*n+3*n+(n-1)*nb) call stdlib${ii}$_sorgbr( 'P', n, n, n, work( ir ), ldwrkr,work( itaup ), & work( iwork ),lwork-iwork+1, ierr ) iwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of r in work(iu) and computing ! right singular vectors of r in work(ir) ! (workspace: need 2*n*n+bdspac) call stdlib${ii}$_sbdsqr( 'U', n, n, n, 0_${ik}$, s, work( ie ),work( ir ), ldwrkr, & work( iu ),ldwrku, dum, 1_${ik}$, work( iwork ), info ) ! multiply q in a by left singular vectors of r in ! work(iu), storing result in u ! (workspace: need n*n) call stdlib${ii}$_sgemm( 'N', 'N', m, n, n, one, a, lda,work( iu ), ldwrku, & zero, u, ldu ) ! copy right singular vectors of r to a ! (workspace: need n*n) call stdlib${ii}$_slacpy( 'F', n, n, work( ir ), ldwrkr, a,lda ) else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + n ! compute a=q*r, copying result to u ! (workspace: need 2*n, prefer n+n*nb) call stdlib${ii}$_sgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_slacpy( 'L', m, n, a, lda, u, ldu ) ! generate q in u ! (workspace: need 2*n, prefer n+n*nb) call stdlib${ii}$_sorgqr( m, n, n, u, ldu, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = itau itauq = ie + n itaup = itauq + n iwork = itaup + n ! zero out below r in a if( n > 1_${ik}$ ) then call stdlib${ii}$_slaset( 'L', n-1, n-1, zero, zero,a( 2_${ik}$, 1_${ik}$ ), lda ) end if ! bidiagonalize r in a ! (workspace: need 4*n, prefer 3*n+2*n*nb) call stdlib${ii}$_sgebrd( n, n, a, lda, s, work( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply q in u by left vectors bidiagonalizing r ! (workspace: need 3*n+m, prefer 3*n+m*nb) call stdlib${ii}$_sormbr( 'Q', 'R', 'N', m, n, n, a, lda,work( itauq ), u, & ldu, work( iwork ),lwork-iwork+1, ierr ) ! generate right vectors bidiagonalizing r in a ! (workspace: need 4*n-1, prefer 3*n+(n-1)*nb) call stdlib${ii}$_sorgbr( 'P', n, n, n, a, lda, work( itaup ),work( iwork ), & lwork-iwork+1, ierr ) iwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of a in u and computing right ! singular vectors of a in a ! (workspace: need bdspac) call stdlib${ii}$_sbdsqr( 'U', n, n, m, 0_${ik}$, s, work( ie ), a,lda, u, ldu, dum, & 1_${ik}$, work( iwork ),info ) end if else if( wntvas ) then ! path 6 (m much larger than n, jobu='s', jobvt='s' ! or 'a') ! n left singular vectors to be computed in u and ! n right singular vectors to be computed in vt if( lwork>=n*n+max( 4_${ik}$*n, bdspac ) ) then ! sufficient workspace for a fast algorithm iu = 1_${ik}$ if( lwork>=wrkbl+lda*n ) then ! work(iu) is lda by n ldwrku = lda else ! work(iu) is n by n ldwrku = n end if itau = iu + ldwrku*n iwork = itau + n ! compute a=q*r ! (workspace: need n*n+2*n, prefer n*n+n+n*nb) call stdlib${ii}$_sgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ! copy r to work(iu), zeroing out below it call stdlib${ii}$_slacpy( 'U', n, n, a, lda, work( iu ),ldwrku ) call stdlib${ii}$_slaset( 'L', n-1, n-1, zero, zero,work( iu+1 ), ldwrku ) ! generate q in a ! (workspace: need n*n+2*n, prefer n*n+n+n*nb) call stdlib${ii}$_sorgqr( m, n, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = itau itauq = ie + n itaup = itauq + n iwork = itaup + n ! bidiagonalize r in work(iu), copying result to vt ! (workspace: need n*n+4*n, prefer n*n+3*n+2*n*nb) call stdlib${ii}$_sgebrd( n, n, work( iu ), ldwrku, s,work( ie ), work( itauq & ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) call stdlib${ii}$_slacpy( 'U', n, n, work( iu ), ldwrku, vt,ldvt ) ! generate left bidiagonalizing vectors in work(iu) ! (workspace: need n*n+4*n, prefer n*n+3*n+n*nb) call stdlib${ii}$_sorgbr( 'Q', n, n, n, work( iu ), ldwrku,work( itauq ), & work( iwork ),lwork-iwork+1, ierr ) ! generate right bidiagonalizing vectors in vt ! (workspace: need n*n+4*n-1, ! prefer n*n+3*n+(n-1)*nb) call stdlib${ii}$_sorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),work( iwork ),& lwork-iwork+1, ierr ) iwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of r in work(iu) and computing ! right singular vectors of r in vt ! (workspace: need n*n+bdspac) call stdlib${ii}$_sbdsqr( 'U', n, n, n, 0_${ik}$, s, work( ie ), vt,ldvt, work( iu ),& ldwrku, dum, 1_${ik}$,work( iwork ), info ) ! multiply q in a by left singular vectors of r in ! work(iu), storing result in u ! (workspace: need n*n) call stdlib${ii}$_sgemm( 'N', 'N', m, n, n, one, a, lda,work( iu ), ldwrku, & zero, u, ldu ) else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + n ! compute a=q*r, copying result to u ! (workspace: need 2*n, prefer n+n*nb) call stdlib${ii}$_sgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_slacpy( 'L', m, n, a, lda, u, ldu ) ! generate q in u ! (workspace: need 2*n, prefer n+n*nb) call stdlib${ii}$_sorgqr( m, n, n, u, ldu, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ! copy r to vt, zeroing out below it call stdlib${ii}$_slacpy( 'U', n, n, a, lda, vt, ldvt ) if( n>1_${ik}$ )call stdlib${ii}$_slaset( 'L', n-1, n-1, zero, zero,vt( 2_${ik}$, 1_${ik}$ ), ldvt & ) ie = itau itauq = ie + n itaup = itauq + n iwork = itaup + n ! bidiagonalize r in vt ! (workspace: need 4*n, prefer 3*n+2*n*nb) call stdlib${ii}$_sgebrd( n, n, vt, ldvt, s, work( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply q in u by left bidiagonalizing vectors ! in vt ! (workspace: need 3*n+m, prefer 3*n+m*nb) call stdlib${ii}$_sormbr( 'Q', 'R', 'N', m, n, n, vt, ldvt,work( itauq ), u, & ldu, work( iwork ),lwork-iwork+1, ierr ) ! generate right bidiagonalizing vectors in vt ! (workspace: need 4*n-1, prefer 3*n+(n-1)*nb) call stdlib${ii}$_sorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),work( iwork ),& lwork-iwork+1, ierr ) iwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of a in u and computing right ! singular vectors of a in vt ! (workspace: need bdspac) call stdlib${ii}$_sbdsqr( 'U', n, n, m, 0_${ik}$, s, work( ie ), vt,ldvt, u, ldu, & dum, 1_${ik}$, work( iwork ),info ) end if end if else if( wntua ) then if( wntvn ) then ! path 7 (m much larger than n, jobu='a', jobvt='n') ! m left singular vectors to be computed in u and ! no right singular vectors to be computed if( lwork>=n*n+max( n+m, 4_${ik}$*n, bdspac ) ) then ! sufficient workspace for a fast algorithm ir = 1_${ik}$ if( lwork>=wrkbl+lda*n ) then ! work(ir) is lda by n ldwrkr = lda else ! work(ir) is n by n ldwrkr = n end if itau = ir + ldwrkr*n iwork = itau + n ! compute a=q*r, copying result to u ! (workspace: need n*n+2*n, prefer n*n+n+n*nb) call stdlib${ii}$_sgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_slacpy( 'L', m, n, a, lda, u, ldu ) ! copy r to work(ir), zeroing out below it call stdlib${ii}$_slacpy( 'U', n, n, a, lda, work( ir ),ldwrkr ) call stdlib${ii}$_slaset( 'L', n-1, n-1, zero, zero,work( ir+1 ), ldwrkr ) ! generate q in u ! (workspace: need n*n+n+m, prefer n*n+n+m*nb) call stdlib${ii}$_sorgqr( m, m, n, u, ldu, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = itau itauq = ie + n itaup = itauq + n iwork = itaup + n ! bidiagonalize r in work(ir) ! (workspace: need n*n+4*n, prefer n*n+3*n+2*n*nb) call stdlib${ii}$_sgebrd( n, n, work( ir ), ldwrkr, s,work( ie ), work( itauq & ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) ! generate left bidiagonalizing vectors in work(ir) ! (workspace: need n*n+4*n, prefer n*n+3*n+n*nb) call stdlib${ii}$_sorgbr( 'Q', n, n, n, work( ir ), ldwrkr,work( itauq ), & work( iwork ),lwork-iwork+1, ierr ) iwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of r in work(ir) ! (workspace: need n*n+bdspac) call stdlib${ii}$_sbdsqr( 'U', n, 0_${ik}$, n, 0_${ik}$, s, work( ie ), dum,1_${ik}$, work( ir ), & ldwrkr, dum, 1_${ik}$,work( iwork ), info ) ! multiply q in u by left singular vectors of r in ! work(ir), storing result in a ! (workspace: need n*n) call stdlib${ii}$_sgemm( 'N', 'N', m, n, n, one, u, ldu,work( ir ), ldwrkr, & zero, a, lda ) ! copy left singular vectors of a from a to u call stdlib${ii}$_slacpy( 'F', m, n, a, lda, u, ldu ) else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + n ! compute a=q*r, copying result to u ! (workspace: need 2*n, prefer n+n*nb) call stdlib${ii}$_sgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_slacpy( 'L', m, n, a, lda, u, ldu ) ! generate q in u ! (workspace: need n+m, prefer n+m*nb) call stdlib${ii}$_sorgqr( m, m, n, u, ldu, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = itau itauq = ie + n itaup = itauq + n iwork = itaup + n ! zero out below r in a if( n > 1_${ik}$ ) then call stdlib${ii}$_slaset( 'L', n-1, n-1, zero, zero,a( 2_${ik}$, 1_${ik}$ ), lda ) end if ! bidiagonalize r in a ! (workspace: need 4*n, prefer 3*n+2*n*nb) call stdlib${ii}$_sgebrd( n, n, a, lda, s, work( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply q in u by left bidiagonalizing vectors ! in a ! (workspace: need 3*n+m, prefer 3*n+m*nb) call stdlib${ii}$_sormbr( 'Q', 'R', 'N', m, n, n, a, lda,work( itauq ), u, & ldu, work( iwork ),lwork-iwork+1, ierr ) iwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of a in u ! (workspace: need bdspac) call stdlib${ii}$_sbdsqr( 'U', n, 0_${ik}$, m, 0_${ik}$, s, work( ie ), dum,1_${ik}$, u, ldu, dum, & 1_${ik}$, work( iwork ),info ) end if else if( wntvo ) then ! path 8 (m much larger than n, jobu='a', jobvt='o') ! m left singular vectors to be computed in u and ! n right singular vectors to be overwritten on a if( lwork>=2_${ik}$*n*n+max( n+m, 4_${ik}$*n, bdspac ) ) then ! sufficient workspace for a fast algorithm iu = 1_${ik}$ if( lwork>=wrkbl+2*lda*n ) then ! work(iu) is lda by n and work(ir) is lda by n ldwrku = lda ir = iu + ldwrku*n ldwrkr = lda else if( lwork>=wrkbl+( lda+n )*n ) then ! work(iu) is lda by n and work(ir) is n by n ldwrku = lda ir = iu + ldwrku*n ldwrkr = n else ! work(iu) is n by n and work(ir) is n by n ldwrku = n ir = iu + ldwrku*n ldwrkr = n end if itau = ir + ldwrkr*n iwork = itau + n ! compute a=q*r, copying result to u ! (workspace: need 2*n*n+2*n, prefer 2*n*n+n+n*nb) call stdlib${ii}$_sgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_slacpy( 'L', m, n, a, lda, u, ldu ) ! generate q in u ! (workspace: need 2*n*n+n+m, prefer 2*n*n+n+m*nb) call stdlib${ii}$_sorgqr( m, m, n, u, ldu, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ! copy r to work(iu), zeroing out below it call stdlib${ii}$_slacpy( 'U', n, n, a, lda, work( iu ),ldwrku ) call stdlib${ii}$_slaset( 'L', n-1, n-1, zero, zero,work( iu+1 ), ldwrku ) ie = itau itauq = ie + n itaup = itauq + n iwork = itaup + n ! bidiagonalize r in work(iu), copying result to ! work(ir) ! (workspace: need 2*n*n+4*n, ! prefer 2*n*n+3*n+2*n*nb) call stdlib${ii}$_sgebrd( n, n, work( iu ), ldwrku, s,work( ie ), work( itauq & ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) call stdlib${ii}$_slacpy( 'U', n, n, work( iu ), ldwrku,work( ir ), ldwrkr ) ! generate left bidiagonalizing vectors in work(iu) ! (workspace: need 2*n*n+4*n, prefer 2*n*n+3*n+n*nb) call stdlib${ii}$_sorgbr( 'Q', n, n, n, work( iu ), ldwrku,work( itauq ), & work( iwork ),lwork-iwork+1, ierr ) ! generate right bidiagonalizing vectors in work(ir) ! (workspace: need 2*n*n+4*n-1, ! prefer 2*n*n+3*n+(n-1)*nb) call stdlib${ii}$_sorgbr( 'P', n, n, n, work( ir ), ldwrkr,work( itaup ), & work( iwork ),lwork-iwork+1, ierr ) iwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of r in work(iu) and computing ! right singular vectors of r in work(ir) ! (workspace: need 2*n*n+bdspac) call stdlib${ii}$_sbdsqr( 'U', n, n, n, 0_${ik}$, s, work( ie ),work( ir ), ldwrkr, & work( iu ),ldwrku, dum, 1_${ik}$, work( iwork ), info ) ! multiply q in u by left singular vectors of r in ! work(iu), storing result in a ! (workspace: need n*n) call stdlib${ii}$_sgemm( 'N', 'N', m, n, n, one, u, ldu,work( iu ), ldwrku, & zero, a, lda ) ! copy left singular vectors of a from a to u call stdlib${ii}$_slacpy( 'F', m, n, a, lda, u, ldu ) ! copy right singular vectors of r from work(ir) to a call stdlib${ii}$_slacpy( 'F', n, n, work( ir ), ldwrkr, a,lda ) else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + n ! compute a=q*r, copying result to u ! (workspace: need 2*n, prefer n+n*nb) call stdlib${ii}$_sgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_slacpy( 'L', m, n, a, lda, u, ldu ) ! generate q in u ! (workspace: need n+m, prefer n+m*nb) call stdlib${ii}$_sorgqr( m, m, n, u, ldu, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = itau itauq = ie + n itaup = itauq + n iwork = itaup + n ! zero out below r in a if( n > 1_${ik}$ ) then call stdlib${ii}$_slaset( 'L', n-1, n-1, zero, zero,a( 2_${ik}$, 1_${ik}$ ), lda ) end if ! bidiagonalize r in a ! (workspace: need 4*n, prefer 3*n+2*n*nb) call stdlib${ii}$_sgebrd( n, n, a, lda, s, work( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply q in u by left bidiagonalizing vectors ! in a ! (workspace: need 3*n+m, prefer 3*n+m*nb) call stdlib${ii}$_sormbr( 'Q', 'R', 'N', m, n, n, a, lda,work( itauq ), u, & ldu, work( iwork ),lwork-iwork+1, ierr ) ! generate right bidiagonalizing vectors in a ! (workspace: need 4*n-1, prefer 3*n+(n-1)*nb) call stdlib${ii}$_sorgbr( 'P', n, n, n, a, lda, work( itaup ),work( iwork ), & lwork-iwork+1, ierr ) iwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of a in u and computing right ! singular vectors of a in a ! (workspace: need bdspac) call stdlib${ii}$_sbdsqr( 'U', n, n, m, 0_${ik}$, s, work( ie ), a,lda, u, ldu, dum, & 1_${ik}$, work( iwork ),info ) end if else if( wntvas ) then ! path 9 (m much larger than n, jobu='a', jobvt='s' ! or 'a') ! m left singular vectors to be computed in u and ! n right singular vectors to be computed in vt if( lwork>=n*n+max( n+m, 4_${ik}$*n, bdspac ) ) then ! sufficient workspace for a fast algorithm iu = 1_${ik}$ if( lwork>=wrkbl+lda*n ) then ! work(iu) is lda by n ldwrku = lda else ! work(iu) is n by n ldwrku = n end if itau = iu + ldwrku*n iwork = itau + n ! compute a=q*r, copying result to u ! (workspace: need n*n+2*n, prefer n*n+n+n*nb) call stdlib${ii}$_sgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_slacpy( 'L', m, n, a, lda, u, ldu ) ! generate q in u ! (workspace: need n*n+n+m, prefer n*n+n+m*nb) call stdlib${ii}$_sorgqr( m, m, n, u, ldu, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ! copy r to work(iu), zeroing out below it call stdlib${ii}$_slacpy( 'U', n, n, a, lda, work( iu ),ldwrku ) call stdlib${ii}$_slaset( 'L', n-1, n-1, zero, zero,work( iu+1 ), ldwrku ) ie = itau itauq = ie + n itaup = itauq + n iwork = itaup + n ! bidiagonalize r in work(iu), copying result to vt ! (workspace: need n*n+4*n, prefer n*n+3*n+2*n*nb) call stdlib${ii}$_sgebrd( n, n, work( iu ), ldwrku, s,work( ie ), work( itauq & ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) call stdlib${ii}$_slacpy( 'U', n, n, work( iu ), ldwrku, vt,ldvt ) ! generate left bidiagonalizing vectors in work(iu) ! (workspace: need n*n+4*n, prefer n*n+3*n+n*nb) call stdlib${ii}$_sorgbr( 'Q', n, n, n, work( iu ), ldwrku,work( itauq ), & work( iwork ),lwork-iwork+1, ierr ) ! generate right bidiagonalizing vectors in vt ! (workspace: need n*n+4*n-1, ! prefer n*n+3*n+(n-1)*nb) call stdlib${ii}$_sorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),work( iwork ),& lwork-iwork+1, ierr ) iwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of r in work(iu) and computing ! right singular vectors of r in vt ! (workspace: need n*n+bdspac) call stdlib${ii}$_sbdsqr( 'U', n, n, n, 0_${ik}$, s, work( ie ), vt,ldvt, work( iu ),& ldwrku, dum, 1_${ik}$,work( iwork ), info ) ! multiply q in u by left singular vectors of r in ! work(iu), storing result in a ! (workspace: need n*n) call stdlib${ii}$_sgemm( 'N', 'N', m, n, n, one, u, ldu,work( iu ), ldwrku, & zero, a, lda ) ! copy left singular vectors of a from a to u call stdlib${ii}$_slacpy( 'F', m, n, a, lda, u, ldu ) else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + n ! compute a=q*r, copying result to u ! (workspace: need 2*n, prefer n+n*nb) call stdlib${ii}$_sgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_slacpy( 'L', m, n, a, lda, u, ldu ) ! generate q in u ! (workspace: need n+m, prefer n+m*nb) call stdlib${ii}$_sorgqr( m, m, n, u, ldu, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ! copy r from a to vt, zeroing out below it call stdlib${ii}$_slacpy( 'U', n, n, a, lda, vt, ldvt ) if( n>1_${ik}$ )call stdlib${ii}$_slaset( 'L', n-1, n-1, zero, zero,vt( 2_${ik}$, 1_${ik}$ ), ldvt & ) ie = itau itauq = ie + n itaup = itauq + n iwork = itaup + n ! bidiagonalize r in vt ! (workspace: need 4*n, prefer 3*n+2*n*nb) call stdlib${ii}$_sgebrd( n, n, vt, ldvt, s, work( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply q in u by left bidiagonalizing vectors ! in vt ! (workspace: need 3*n+m, prefer 3*n+m*nb) call stdlib${ii}$_sormbr( 'Q', 'R', 'N', m, n, n, vt, ldvt,work( itauq ), u, & ldu, work( iwork ),lwork-iwork+1, ierr ) ! generate right bidiagonalizing vectors in vt ! (workspace: need 4*n-1, prefer 3*n+(n-1)*nb) call stdlib${ii}$_sorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),work( iwork ),& lwork-iwork+1, ierr ) iwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of a in u and computing right ! singular vectors of a in vt ! (workspace: need bdspac) call stdlib${ii}$_sbdsqr( 'U', n, n, m, 0_${ik}$, s, work( ie ), vt,ldvt, u, ldu, & dum, 1_${ik}$, work( iwork ),info ) end if end if end if else ! m < mnthr ! path 10 (m at least n, but not much larger) ! reduce to bidiagonal form without qr decomposition ie = 1_${ik}$ itauq = ie + n itaup = itauq + n iwork = itaup + n ! bidiagonalize a ! (workspace: need 3*n+m, prefer 3*n+(m+n)*nb) call stdlib${ii}$_sgebrd( m, n, a, lda, s, work( ie ), work( itauq ),work( itaup ), & work( iwork ), lwork-iwork+1,ierr ) if( wntuas ) then ! if left singular vectors desired in u, copy result to u ! and generate left bidiagonalizing vectors in u ! (workspace: need 3*n+ncu, prefer 3*n+ncu*nb) call stdlib${ii}$_slacpy( 'L', m, n, a, lda, u, ldu ) if( wntus )ncu = n if( wntua )ncu = m call stdlib${ii}$_sorgbr( 'Q', m, ncu, n, u, ldu, work( itauq ),work( iwork ), & lwork-iwork+1, ierr ) end if if( wntvas ) then ! if right singular vectors desired in vt, copy result to ! vt and generate right bidiagonalizing vectors in vt ! (workspace: need 4*n-1, prefer 3*n+(n-1)*nb) call stdlib${ii}$_slacpy( 'U', n, n, a, lda, vt, ldvt ) call stdlib${ii}$_sorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),work( iwork ), & lwork-iwork+1, ierr ) end if if( wntuo ) then ! if left singular vectors desired in a, generate left ! bidiagonalizing vectors in a ! (workspace: need 4*n, prefer 3*n+n*nb) call stdlib${ii}$_sorgbr( 'Q', m, n, n, a, lda, work( itauq ),work( iwork ), lwork-& iwork+1, ierr ) end if if( wntvo ) then ! if right singular vectors desired in a, generate right ! bidiagonalizing vectors in a ! (workspace: need 4*n-1, prefer 3*n+(n-1)*nb) call stdlib${ii}$_sorgbr( 'P', n, n, n, a, lda, work( itaup ),work( iwork ), lwork-& iwork+1, ierr ) end if iwork = ie + n if( wntuas .or. wntuo )nru = m if( wntun )nru = 0_${ik}$ if( wntvas .or. wntvo )ncvt = n if( wntvn )ncvt = 0_${ik}$ if( ( .not.wntuo ) .and. ( .not.wntvo ) ) then ! perform bidiagonal qr iteration, if desired, computing ! left singular vectors in u and computing right singular ! vectors in vt ! (workspace: need bdspac) call stdlib${ii}$_sbdsqr( 'U', n, ncvt, nru, 0_${ik}$, s, work( ie ), vt,ldvt, u, ldu, dum,& 1_${ik}$, work( iwork ), info ) else if( ( .not.wntuo ) .and. wntvo ) then ! perform bidiagonal qr iteration, if desired, computing ! left singular vectors in u and computing right singular ! vectors in a ! (workspace: need bdspac) call stdlib${ii}$_sbdsqr( 'U', n, ncvt, nru, 0_${ik}$, s, work( ie ), a, lda,u, ldu, dum, & 1_${ik}$, work( iwork ), info ) else ! perform bidiagonal qr iteration, if desired, computing ! left singular vectors in a and computing right singular ! vectors in vt ! (workspace: need bdspac) call stdlib${ii}$_sbdsqr( 'U', n, ncvt, nru, 0_${ik}$, s, work( ie ), vt,ldvt, a, lda, dum,& 1_${ik}$, work( iwork ), info ) end if end if else ! a has more columns than rows. if a has sufficiently more ! columns than rows, first reduce using the lq decomposition (if ! sufficient workspace available) if( n>=mnthr ) then if( wntvn ) then ! path 1t(n much larger than m, jobvt='n') ! no right singular vectors to be computed itau = 1_${ik}$ iwork = itau + m ! compute a=l*q ! (workspace: need 2*m, prefer m+m*nb) call stdlib${ii}$_sgelqf( m, n, a, lda, work( itau ), work( iwork ),lwork-iwork+1, & ierr ) ! zero out above l if (m>1_${ik}$) call stdlib${ii}$_slaset( 'U', m-1, m-1, zero, zero, a( 1_${ik}$, 2_${ik}$ ), lda ) ie = 1_${ik}$ itauq = ie + m itaup = itauq + m iwork = itaup + m ! bidiagonalize l in a ! (workspace: need 4*m, prefer 3*m+2*m*nb) call stdlib${ii}$_sgebrd( m, m, a, lda, s, work( ie ), work( itauq ),work( itaup ), & work( iwork ), lwork-iwork+1,ierr ) if( wntuo .or. wntuas ) then ! if left singular vectors desired, generate q ! (workspace: need 4*m, prefer 3*m+m*nb) call stdlib${ii}$_sorgbr( 'Q', m, m, m, a, lda, work( itauq ),work( iwork ), & lwork-iwork+1, ierr ) end if iwork = ie + m nru = 0_${ik}$ if( wntuo .or. wntuas )nru = m ! perform bidiagonal qr iteration, computing left singular ! vectors of a in a if desired ! (workspace: need bdspac) call stdlib${ii}$_sbdsqr( 'U', m, 0_${ik}$, nru, 0_${ik}$, s, work( ie ), dum, 1_${ik}$, a,lda, dum, 1_${ik}$, & work( iwork ), info ) ! if left singular vectors desired in u, copy them there if( wntuas )call stdlib${ii}$_slacpy( 'F', m, m, a, lda, u, ldu ) else if( wntvo .and. wntun ) then ! path 2t(n much larger than m, jobu='n', jobvt='o') ! m right singular vectors to be overwritten on a and ! no left singular vectors to be computed if( lwork>=m*m+max( 4_${ik}$*m, bdspac ) ) then ! sufficient workspace for a fast algorithm ir = 1_${ik}$ if( lwork>=max( wrkbl, lda*n+m )+lda*m ) then ! work(iu) is lda by n and work(ir) is lda by m ldwrku = lda chunk = n ldwrkr = lda else if( lwork>=max( wrkbl, lda*n+m )+m*m ) then ! work(iu) is lda by n and work(ir) is m by m ldwrku = lda chunk = n ldwrkr = m else ! work(iu) is m by chunk and work(ir) is m by m ldwrku = m chunk = ( lwork-m*m-m ) / m ldwrkr = m end if itau = ir + ldwrkr*m iwork = itau + m ! compute a=l*q ! (workspace: need m*m+2*m, prefer m*m+m+m*nb) call stdlib${ii}$_sgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-iwork+& 1_${ik}$, ierr ) ! copy l to work(ir) and zero out above it call stdlib${ii}$_slacpy( 'L', m, m, a, lda, work( ir ), ldwrkr ) call stdlib${ii}$_slaset( 'U', m-1, m-1, zero, zero,work( ir+ldwrkr ), ldwrkr ) ! generate q in a ! (workspace: need m*m+2*m, prefer m*m+m+m*nb) call stdlib${ii}$_sorglq( m, n, m, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = itau itauq = ie + m itaup = itauq + m iwork = itaup + m ! bidiagonalize l in work(ir) ! (workspace: need m*m+4*m, prefer m*m+3*m+2*m*nb) call stdlib${ii}$_sgebrd( m, m, work( ir ), ldwrkr, s, work( ie ),work( itauq ), & work( itaup ),work( iwork ), lwork-iwork+1, ierr ) ! generate right vectors bidiagonalizing l ! (workspace: need m*m+4*m-1, prefer m*m+3*m+(m-1)*nb) call stdlib${ii}$_sorgbr( 'P', m, m, m, work( ir ), ldwrkr,work( itaup ), work( & iwork ),lwork-iwork+1, ierr ) iwork = ie + m ! perform bidiagonal qr iteration, computing right ! singular vectors of l in work(ir) ! (workspace: need m*m+bdspac) call stdlib${ii}$_sbdsqr( 'U', m, m, 0_${ik}$, 0_${ik}$, s, work( ie ),work( ir ), ldwrkr, dum,& 1_${ik}$, dum, 1_${ik}$,work( iwork ), info ) iu = ie + m ! multiply right singular vectors of l in work(ir) by q ! in a, storing result in work(iu) and copying to a ! (workspace: need m*m+2*m, prefer m*m+m*n+m) do i = 1, n, chunk blk = min( n-i+1, chunk ) call stdlib${ii}$_sgemm( 'N', 'N', m, blk, m, one, work( ir ),ldwrkr, a( 1_${ik}$, i & ), lda, zero,work( iu ), ldwrku ) call stdlib${ii}$_slacpy( 'F', m, blk, work( iu ), ldwrku,a( 1_${ik}$, i ), lda ) end do else ! insufficient workspace for a fast algorithm ie = 1_${ik}$ itauq = ie + m itaup = itauq + m iwork = itaup + m ! bidiagonalize a ! (workspace: need 3*m+n, prefer 3*m+(m+n)*nb) call stdlib${ii}$_sgebrd( m, n, a, lda, s, work( ie ),work( itauq ), work( itaup & ),work( iwork ), lwork-iwork+1, ierr ) ! generate right vectors bidiagonalizing a ! (workspace: need 4*m, prefer 3*m+m*nb) call stdlib${ii}$_sorgbr( 'P', m, n, m, a, lda, work( itaup ),work( iwork ), & lwork-iwork+1, ierr ) iwork = ie + m ! perform bidiagonal qr iteration, computing right ! singular vectors of a in a ! (workspace: need bdspac) call stdlib${ii}$_sbdsqr( 'L', m, n, 0_${ik}$, 0_${ik}$, s, work( ie ), a, lda,dum, 1_${ik}$, dum, 1_${ik}$, & work( iwork ), info ) end if else if( wntvo .and. wntuas ) then ! path 3t(n much larger than m, jobu='s' or 'a', jobvt='o') ! m right singular vectors to be overwritten on a and ! m left singular vectors to be computed in u if( lwork>=m*m+max( 4_${ik}$*m, bdspac ) ) then ! sufficient workspace for a fast algorithm ir = 1_${ik}$ if( lwork>=max( wrkbl, lda*n+m )+lda*m ) then ! work(iu) is lda by n and work(ir) is lda by m ldwrku = lda chunk = n ldwrkr = lda else if( lwork>=max( wrkbl, lda*n+m )+m*m ) then ! work(iu) is lda by n and work(ir) is m by m ldwrku = lda chunk = n ldwrkr = m else ! work(iu) is m by chunk and work(ir) is m by m ldwrku = m chunk = ( lwork-m*m-m ) / m ldwrkr = m end if itau = ir + ldwrkr*m iwork = itau + m ! compute a=l*q ! (workspace: need m*m+2*m, prefer m*m+m+m*nb) call stdlib${ii}$_sgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-iwork+& 1_${ik}$, ierr ) ! copy l to u, zeroing about above it call stdlib${ii}$_slacpy( 'L', m, m, a, lda, u, ldu ) if (m>1_${ik}$) call stdlib${ii}$_slaset( 'U', m-1, m-1, zero, zero, u( 1_${ik}$, 2_${ik}$ ),ldu ) ! generate q in a ! (workspace: need m*m+2*m, prefer m*m+m+m*nb) call stdlib${ii}$_sorglq( m, n, m, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = itau itauq = ie + m itaup = itauq + m iwork = itaup + m ! bidiagonalize l in u, copying result to work(ir) ! (workspace: need m*m+4*m, prefer m*m+3*m+2*m*nb) call stdlib${ii}$_sgebrd( m, m, u, ldu, s, work( ie ),work( itauq ), work( itaup & ),work( iwork ), lwork-iwork+1, ierr ) call stdlib${ii}$_slacpy( 'U', m, m, u, ldu, work( ir ), ldwrkr ) ! generate right vectors bidiagonalizing l in work(ir) ! (workspace: need m*m+4*m-1, prefer m*m+3*m+(m-1)*nb) call stdlib${ii}$_sorgbr( 'P', m, m, m, work( ir ), ldwrkr,work( itaup ), work( & iwork ),lwork-iwork+1, ierr ) ! generate left vectors bidiagonalizing l in u ! (workspace: need m*m+4*m, prefer m*m+3*m+m*nb) call stdlib${ii}$_sorgbr( 'Q', m, m, m, u, ldu, work( itauq ),work( iwork ), & lwork-iwork+1, ierr ) iwork = ie + m ! perform bidiagonal qr iteration, computing left ! singular vectors of l in u, and computing right ! singular vectors of l in work(ir) ! (workspace: need m*m+bdspac) call stdlib${ii}$_sbdsqr( 'U', m, m, m, 0_${ik}$, s, work( ie ),work( ir ), ldwrkr, u, & ldu, dum, 1_${ik}$,work( iwork ), info ) iu = ie + m ! multiply right singular vectors of l in work(ir) by q ! in a, storing result in work(iu) and copying to a ! (workspace: need m*m+2*m, prefer m*m+m*n+m)) do i = 1, n, chunk blk = min( n-i+1, chunk ) call stdlib${ii}$_sgemm( 'N', 'N', m, blk, m, one, work( ir ),ldwrkr, a( 1_${ik}$, i & ), lda, zero,work( iu ), ldwrku ) call stdlib${ii}$_slacpy( 'F', m, blk, work( iu ), ldwrku,a( 1_${ik}$, i ), lda ) end do else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + m ! compute a=l*q ! (workspace: need 2*m, prefer m+m*nb) call stdlib${ii}$_sgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-iwork+& 1_${ik}$, ierr ) ! copy l to u, zeroing out above it call stdlib${ii}$_slacpy( 'L', m, m, a, lda, u, ldu ) if (m>1_${ik}$) call stdlib${ii}$_slaset( 'U', m-1, m-1, zero, zero, u( 1_${ik}$, 2_${ik}$ ),ldu ) ! generate q in a ! (workspace: need 2*m, prefer m+m*nb) call stdlib${ii}$_sorglq( m, n, m, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = itau itauq = ie + m itaup = itauq + m iwork = itaup + m ! bidiagonalize l in u ! (workspace: need 4*m, prefer 3*m+2*m*nb) call stdlib${ii}$_sgebrd( m, m, u, ldu, s, work( ie ),work( itauq ), work( itaup & ),work( iwork ), lwork-iwork+1, ierr ) ! multiply right vectors bidiagonalizing l by q in a ! (workspace: need 3*m+n, prefer 3*m+n*nb) call stdlib${ii}$_sormbr( 'P', 'L', 'T', m, n, m, u, ldu,work( itaup ), a, lda, & work( iwork ),lwork-iwork+1, ierr ) ! generate left vectors bidiagonalizing l in u ! (workspace: need 4*m, prefer 3*m+m*nb) call stdlib${ii}$_sorgbr( 'Q', m, m, m, u, ldu, work( itauq ),work( iwork ), & lwork-iwork+1, ierr ) iwork = ie + m ! perform bidiagonal qr iteration, computing left ! singular vectors of a in u and computing right ! singular vectors of a in a ! (workspace: need bdspac) call stdlib${ii}$_sbdsqr( 'U', m, n, m, 0_${ik}$, s, work( ie ), a, lda,u, ldu, dum, 1_${ik}$, & work( iwork ), info ) end if else if( wntvs ) then if( wntun ) then ! path 4t(n much larger than m, jobu='n', jobvt='s') ! m right singular vectors to be computed in vt and ! no left singular vectors to be computed if( lwork>=m*m+max( 4_${ik}$*m, bdspac ) ) then ! sufficient workspace for a fast algorithm ir = 1_${ik}$ if( lwork>=wrkbl+lda*m ) then ! work(ir) is lda by m ldwrkr = lda else ! work(ir) is m by m ldwrkr = m end if itau = ir + ldwrkr*m iwork = itau + m ! compute a=l*q ! (workspace: need m*m+2*m, prefer m*m+m+m*nb) call stdlib${ii}$_sgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ! copy l to work(ir), zeroing out above it call stdlib${ii}$_slacpy( 'L', m, m, a, lda, work( ir ),ldwrkr ) call stdlib${ii}$_slaset( 'U', m-1, m-1, zero, zero,work( ir+ldwrkr ), ldwrkr & ) ! generate q in a ! (workspace: need m*m+2*m, prefer m*m+m+m*nb) call stdlib${ii}$_sorglq( m, n, m, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = itau itauq = ie + m itaup = itauq + m iwork = itaup + m ! bidiagonalize l in work(ir) ! (workspace: need m*m+4*m, prefer m*m+3*m+2*m*nb) call stdlib${ii}$_sgebrd( m, m, work( ir ), ldwrkr, s,work( ie ), work( itauq & ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) ! generate right vectors bidiagonalizing l in ! work(ir) ! (workspace: need m*m+4*m, prefer m*m+3*m+(m-1)*nb) call stdlib${ii}$_sorgbr( 'P', m, m, m, work( ir ), ldwrkr,work( itaup ), & work( iwork ),lwork-iwork+1, ierr ) iwork = ie + m ! perform bidiagonal qr iteration, computing right ! singular vectors of l in work(ir) ! (workspace: need m*m+bdspac) call stdlib${ii}$_sbdsqr( 'U', m, m, 0_${ik}$, 0_${ik}$, s, work( ie ),work( ir ), ldwrkr, & dum, 1_${ik}$, dum, 1_${ik}$,work( iwork ), info ) ! multiply right singular vectors of l in work(ir) by ! q in a, storing result in vt ! (workspace: need m*m) call stdlib${ii}$_sgemm( 'N', 'N', m, n, m, one, work( ir ),ldwrkr, a, lda, & zero, vt, ldvt ) else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + m ! compute a=l*q ! (workspace: need 2*m, prefer m+m*nb) call stdlib${ii}$_sgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ! copy result to vt call stdlib${ii}$_slacpy( 'U', m, n, a, lda, vt, ldvt ) ! generate q in vt ! (workspace: need 2*m, prefer m+m*nb) call stdlib${ii}$_sorglq( m, n, m, vt, ldvt, work( itau ),work( iwork ), & lwork-iwork+1, ierr ) ie = itau itauq = ie + m itaup = itauq + m iwork = itaup + m ! zero out above l in a if (m>1_${ik}$) call stdlib${ii}$_slaset( 'U', m-1, m-1, zero, zero, a( 1_${ik}$, 2_${ik}$ ),lda ) ! bidiagonalize l in a ! (workspace: need 4*m, prefer 3*m+2*m*nb) call stdlib${ii}$_sgebrd( m, m, a, lda, s, work( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply right vectors bidiagonalizing l by q in vt ! (workspace: need 3*m+n, prefer 3*m+n*nb) call stdlib${ii}$_sormbr( 'P', 'L', 'T', m, n, m, a, lda,work( itaup ), vt, & ldvt,work( iwork ), lwork-iwork+1, ierr ) iwork = ie + m ! perform bidiagonal qr iteration, computing right ! singular vectors of a in vt ! (workspace: need bdspac) call stdlib${ii}$_sbdsqr( 'U', m, n, 0_${ik}$, 0_${ik}$, s, work( ie ), vt,ldvt, dum, 1_${ik}$, & dum, 1_${ik}$, work( iwork ),info ) end if else if( wntuo ) then ! path 5t(n much larger than m, jobu='o', jobvt='s') ! m right singular vectors to be computed in vt and ! m left singular vectors to be overwritten on a if( lwork>=2_${ik}$*m*m+max( 4_${ik}$*m, bdspac ) ) then ! sufficient workspace for a fast algorithm iu = 1_${ik}$ if( lwork>=wrkbl+2*lda*m ) then ! work(iu) is lda by m and work(ir) is lda by m ldwrku = lda ir = iu + ldwrku*m ldwrkr = lda else if( lwork>=wrkbl+( lda+m )*m ) then ! work(iu) is lda by m and work(ir) is m by m ldwrku = lda ir = iu + ldwrku*m ldwrkr = m else ! work(iu) is m by m and work(ir) is m by m ldwrku = m ir = iu + ldwrku*m ldwrkr = m end if itau = ir + ldwrkr*m iwork = itau + m ! compute a=l*q ! (workspace: need 2*m*m+2*m, prefer 2*m*m+m+m*nb) call stdlib${ii}$_sgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ! copy l to work(iu), zeroing out below it call stdlib${ii}$_slacpy( 'L', m, m, a, lda, work( iu ),ldwrku ) call stdlib${ii}$_slaset( 'U', m-1, m-1, zero, zero,work( iu+ldwrku ), ldwrku & ) ! generate q in a ! (workspace: need 2*m*m+2*m, prefer 2*m*m+m+m*nb) call stdlib${ii}$_sorglq( m, n, m, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = itau itauq = ie + m itaup = itauq + m iwork = itaup + m ! bidiagonalize l in work(iu), copying result to ! work(ir) ! (workspace: need 2*m*m+4*m, ! prefer 2*m*m+3*m+2*m*nb) call stdlib${ii}$_sgebrd( m, m, work( iu ), ldwrku, s,work( ie ), work( itauq & ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) call stdlib${ii}$_slacpy( 'L', m, m, work( iu ), ldwrku,work( ir ), ldwrkr ) ! generate right bidiagonalizing vectors in work(iu) ! (workspace: need 2*m*m+4*m-1, ! prefer 2*m*m+3*m+(m-1)*nb) call stdlib${ii}$_sorgbr( 'P', m, m, m, work( iu ), ldwrku,work( itaup ), & work( iwork ),lwork-iwork+1, ierr ) ! generate left bidiagonalizing vectors in work(ir) ! (workspace: need 2*m*m+4*m, prefer 2*m*m+3*m+m*nb) call stdlib${ii}$_sorgbr( 'Q', m, m, m, work( ir ), ldwrkr,work( itauq ), & work( iwork ),lwork-iwork+1, ierr ) iwork = ie + m ! perform bidiagonal qr iteration, computing left ! singular vectors of l in work(ir) and computing ! right singular vectors of l in work(iu) ! (workspace: need 2*m*m+bdspac) call stdlib${ii}$_sbdsqr( 'U', m, m, m, 0_${ik}$, s, work( ie ),work( iu ), ldwrku, & work( ir ),ldwrkr, dum, 1_${ik}$, work( iwork ), info ) ! multiply right singular vectors of l in work(iu) by ! q in a, storing result in vt ! (workspace: need m*m) call stdlib${ii}$_sgemm( 'N', 'N', m, n, m, one, work( iu ),ldwrku, a, lda, & zero, vt, ldvt ) ! copy left singular vectors of l to a ! (workspace: need m*m) call stdlib${ii}$_slacpy( 'F', m, m, work( ir ), ldwrkr, a,lda ) else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + m ! compute a=l*q, copying result to vt ! (workspace: need 2*m, prefer m+m*nb) call stdlib${ii}$_sgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_slacpy( 'U', m, n, a, lda, vt, ldvt ) ! generate q in vt ! (workspace: need 2*m, prefer m+m*nb) call stdlib${ii}$_sorglq( m, n, m, vt, ldvt, work( itau ),work( iwork ), & lwork-iwork+1, ierr ) ie = itau itauq = ie + m itaup = itauq + m iwork = itaup + m ! zero out above l in a if (m>1_${ik}$) call stdlib${ii}$_slaset( 'U', m-1, m-1, zero, zero, a( 1_${ik}$, 2_${ik}$ ),lda ) ! bidiagonalize l in a ! (workspace: need 4*m, prefer 3*m+2*m*nb) call stdlib${ii}$_sgebrd( m, m, a, lda, s, work( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply right vectors bidiagonalizing l by q in vt ! (workspace: need 3*m+n, prefer 3*m+n*nb) call stdlib${ii}$_sormbr( 'P', 'L', 'T', m, n, m, a, lda,work( itaup ), vt, & ldvt,work( iwork ), lwork-iwork+1, ierr ) ! generate left bidiagonalizing vectors of l in a ! (workspace: need 4*m, prefer 3*m+m*nb) call stdlib${ii}$_sorgbr( 'Q', m, m, m, a, lda, work( itauq ),work( iwork ), & lwork-iwork+1, ierr ) iwork = ie + m ! perform bidiagonal qr iteration, compute left ! singular vectors of a in a and compute right ! singular vectors of a in vt ! (workspace: need bdspac) call stdlib${ii}$_sbdsqr( 'U', m, n, m, 0_${ik}$, s, work( ie ), vt,ldvt, a, lda, & dum, 1_${ik}$, work( iwork ),info ) end if else if( wntuas ) then ! path 6t(n much larger than m, jobu='s' or 'a', ! jobvt='s') ! m right singular vectors to be computed in vt and ! m left singular vectors to be computed in u if( lwork>=m*m+max( 4_${ik}$*m, bdspac ) ) then ! sufficient workspace for a fast algorithm iu = 1_${ik}$ if( lwork>=wrkbl+lda*m ) then ! work(iu) is lda by n ldwrku = lda else ! work(iu) is lda by m ldwrku = m end if itau = iu + ldwrku*m iwork = itau + m ! compute a=l*q ! (workspace: need m*m+2*m, prefer m*m+m+m*nb) call stdlib${ii}$_sgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ! copy l to work(iu), zeroing out above it call stdlib${ii}$_slacpy( 'L', m, m, a, lda, work( iu ),ldwrku ) call stdlib${ii}$_slaset( 'U', m-1, m-1, zero, zero,work( iu+ldwrku ), ldwrku & ) ! generate q in a ! (workspace: need m*m+2*m, prefer m*m+m+m*nb) call stdlib${ii}$_sorglq( m, n, m, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = itau itauq = ie + m itaup = itauq + m iwork = itaup + m ! bidiagonalize l in work(iu), copying result to u ! (workspace: need m*m+4*m, prefer m*m+3*m+2*m*nb) call stdlib${ii}$_sgebrd( m, m, work( iu ), ldwrku, s,work( ie ), work( itauq & ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) call stdlib${ii}$_slacpy( 'L', m, m, work( iu ), ldwrku, u,ldu ) ! generate right bidiagonalizing vectors in work(iu) ! (workspace: need m*m+4*m-1, ! prefer m*m+3*m+(m-1)*nb) call stdlib${ii}$_sorgbr( 'P', m, m, m, work( iu ), ldwrku,work( itaup ), & work( iwork ),lwork-iwork+1, ierr ) ! generate left bidiagonalizing vectors in u ! (workspace: need m*m+4*m, prefer m*m+3*m+m*nb) call stdlib${ii}$_sorgbr( 'Q', m, m, m, u, ldu, work( itauq ),work( iwork ), & lwork-iwork+1, ierr ) iwork = ie + m ! perform bidiagonal qr iteration, computing left ! singular vectors of l in u and computing right ! singular vectors of l in work(iu) ! (workspace: need m*m+bdspac) call stdlib${ii}$_sbdsqr( 'U', m, m, m, 0_${ik}$, s, work( ie ),work( iu ), ldwrku, & u, ldu, dum, 1_${ik}$,work( iwork ), info ) ! multiply right singular vectors of l in work(iu) by ! q in a, storing result in vt ! (workspace: need m*m) call stdlib${ii}$_sgemm( 'N', 'N', m, n, m, one, work( iu ),ldwrku, a, lda, & zero, vt, ldvt ) else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + m ! compute a=l*q, copying result to vt ! (workspace: need 2*m, prefer m+m*nb) call stdlib${ii}$_sgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_slacpy( 'U', m, n, a, lda, vt, ldvt ) ! generate q in vt ! (workspace: need 2*m, prefer m+m*nb) call stdlib${ii}$_sorglq( m, n, m, vt, ldvt, work( itau ),work( iwork ), & lwork-iwork+1, ierr ) ! copy l to u, zeroing out above it call stdlib${ii}$_slacpy( 'L', m, m, a, lda, u, ldu ) if (m>1_${ik}$) call stdlib${ii}$_slaset( 'U', m-1, m-1, zero, zero, u( 1_${ik}$, 2_${ik}$ ),ldu ) ie = itau itauq = ie + m itaup = itauq + m iwork = itaup + m ! bidiagonalize l in u ! (workspace: need 4*m, prefer 3*m+2*m*nb) call stdlib${ii}$_sgebrd( m, m, u, ldu, s, work( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply right bidiagonalizing vectors in u by q ! in vt ! (workspace: need 3*m+n, prefer 3*m+n*nb) call stdlib${ii}$_sormbr( 'P', 'L', 'T', m, n, m, u, ldu,work( itaup ), vt, & ldvt,work( iwork ), lwork-iwork+1, ierr ) ! generate left bidiagonalizing vectors in u ! (workspace: need 4*m, prefer 3*m+m*nb) call stdlib${ii}$_sorgbr( 'Q', m, m, m, u, ldu, work( itauq ),work( iwork ), & lwork-iwork+1, ierr ) iwork = ie + m ! perform bidiagonal qr iteration, computing left ! singular vectors of a in u and computing right ! singular vectors of a in vt ! (workspace: need bdspac) call stdlib${ii}$_sbdsqr( 'U', m, n, m, 0_${ik}$, s, work( ie ), vt,ldvt, u, ldu, & dum, 1_${ik}$, work( iwork ),info ) end if end if else if( wntva ) then if( wntun ) then ! path 7t(n much larger than m, jobu='n', jobvt='a') ! n right singular vectors to be computed in vt and ! no left singular vectors to be computed if( lwork>=m*m+max( n+m, 4_${ik}$*m, bdspac ) ) then ! sufficient workspace for a fast algorithm ir = 1_${ik}$ if( lwork>=wrkbl+lda*m ) then ! work(ir) is lda by m ldwrkr = lda else ! work(ir) is m by m ldwrkr = m end if itau = ir + ldwrkr*m iwork = itau + m ! compute a=l*q, copying result to vt ! (workspace: need m*m+2*m, prefer m*m+m+m*nb) call stdlib${ii}$_sgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_slacpy( 'U', m, n, a, lda, vt, ldvt ) ! copy l to work(ir), zeroing out above it call stdlib${ii}$_slacpy( 'L', m, m, a, lda, work( ir ),ldwrkr ) call stdlib${ii}$_slaset( 'U', m-1, m-1, zero, zero,work( ir+ldwrkr ), ldwrkr & ) ! generate q in vt ! (workspace: need m*m+m+n, prefer m*m+m+n*nb) call stdlib${ii}$_sorglq( n, n, m, vt, ldvt, work( itau ),work( iwork ), & lwork-iwork+1, ierr ) ie = itau itauq = ie + m itaup = itauq + m iwork = itaup + m ! bidiagonalize l in work(ir) ! (workspace: need m*m+4*m, prefer m*m+3*m+2*m*nb) call stdlib${ii}$_sgebrd( m, m, work( ir ), ldwrkr, s,work( ie ), work( itauq & ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) ! generate right bidiagonalizing vectors in work(ir) ! (workspace: need m*m+4*m-1, ! prefer m*m+3*m+(m-1)*nb) call stdlib${ii}$_sorgbr( 'P', m, m, m, work( ir ), ldwrkr,work( itaup ), & work( iwork ),lwork-iwork+1, ierr ) iwork = ie + m ! perform bidiagonal qr iteration, computing right ! singular vectors of l in work(ir) ! (workspace: need m*m+bdspac) call stdlib${ii}$_sbdsqr( 'U', m, m, 0_${ik}$, 0_${ik}$, s, work( ie ),work( ir ), ldwrkr, & dum, 1_${ik}$, dum, 1_${ik}$,work( iwork ), info ) ! multiply right singular vectors of l in work(ir) by ! q in vt, storing result in a ! (workspace: need m*m) call stdlib${ii}$_sgemm( 'N', 'N', m, n, m, one, work( ir ),ldwrkr, vt, ldvt, & zero, a, lda ) ! copy right singular vectors of a from a to vt call stdlib${ii}$_slacpy( 'F', m, n, a, lda, vt, ldvt ) else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + m ! compute a=l*q, copying result to vt ! (workspace: need 2*m, prefer m+m*nb) call stdlib${ii}$_sgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_slacpy( 'U', m, n, a, lda, vt, ldvt ) ! generate q in vt ! (workspace: need m+n, prefer m+n*nb) call stdlib${ii}$_sorglq( n, n, m, vt, ldvt, work( itau ),work( iwork ), & lwork-iwork+1, ierr ) ie = itau itauq = ie + m itaup = itauq + m iwork = itaup + m ! zero out above l in a if (m>1_${ik}$) call stdlib${ii}$_slaset( 'U', m-1, m-1, zero, zero, a( 1_${ik}$, 2_${ik}$ ),lda ) ! bidiagonalize l in a ! (workspace: need 4*m, prefer 3*m+2*m*nb) call stdlib${ii}$_sgebrd( m, m, a, lda, s, work( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply right bidiagonalizing vectors in a by q ! in vt ! (workspace: need 3*m+n, prefer 3*m+n*nb) call stdlib${ii}$_sormbr( 'P', 'L', 'T', m, n, m, a, lda,work( itaup ), vt, & ldvt,work( iwork ), lwork-iwork+1, ierr ) iwork = ie + m ! perform bidiagonal qr iteration, computing right ! singular vectors of a in vt ! (workspace: need bdspac) call stdlib${ii}$_sbdsqr( 'U', m, n, 0_${ik}$, 0_${ik}$, s, work( ie ), vt,ldvt, dum, 1_${ik}$, & dum, 1_${ik}$, work( iwork ),info ) end if else if( wntuo ) then ! path 8t(n much larger than m, jobu='o', jobvt='a') ! n right singular vectors to be computed in vt and ! m left singular vectors to be overwritten on a if( lwork>=2_${ik}$*m*m+max( n+m, 4_${ik}$*m, bdspac ) ) then ! sufficient workspace for a fast algorithm iu = 1_${ik}$ if( lwork>=wrkbl+2*lda*m ) then ! work(iu) is lda by m and work(ir) is lda by m ldwrku = lda ir = iu + ldwrku*m ldwrkr = lda else if( lwork>=wrkbl+( lda+m )*m ) then ! work(iu) is lda by m and work(ir) is m by m ldwrku = lda ir = iu + ldwrku*m ldwrkr = m else ! work(iu) is m by m and work(ir) is m by m ldwrku = m ir = iu + ldwrku*m ldwrkr = m end if itau = ir + ldwrkr*m iwork = itau + m ! compute a=l*q, copying result to vt ! (workspace: need 2*m*m+2*m, prefer 2*m*m+m+m*nb) call stdlib${ii}$_sgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_slacpy( 'U', m, n, a, lda, vt, ldvt ) ! generate q in vt ! (workspace: need 2*m*m+m+n, prefer 2*m*m+m+n*nb) call stdlib${ii}$_sorglq( n, n, m, vt, ldvt, work( itau ),work( iwork ), & lwork-iwork+1, ierr ) ! copy l to work(iu), zeroing out above it call stdlib${ii}$_slacpy( 'L', m, m, a, lda, work( iu ),ldwrku ) call stdlib${ii}$_slaset( 'U', m-1, m-1, zero, zero,work( iu+ldwrku ), ldwrku & ) ie = itau itauq = ie + m itaup = itauq + m iwork = itaup + m ! bidiagonalize l in work(iu), copying result to ! work(ir) ! (workspace: need 2*m*m+4*m, ! prefer 2*m*m+3*m+2*m*nb) call stdlib${ii}$_sgebrd( m, m, work( iu ), ldwrku, s,work( ie ), work( itauq & ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) call stdlib${ii}$_slacpy( 'L', m, m, work( iu ), ldwrku,work( ir ), ldwrkr ) ! generate right bidiagonalizing vectors in work(iu) ! (workspace: need 2*m*m+4*m-1, ! prefer 2*m*m+3*m+(m-1)*nb) call stdlib${ii}$_sorgbr( 'P', m, m, m, work( iu ), ldwrku,work( itaup ), & work( iwork ),lwork-iwork+1, ierr ) ! generate left bidiagonalizing vectors in work(ir) ! (workspace: need 2*m*m+4*m, prefer 2*m*m+3*m+m*nb) call stdlib${ii}$_sorgbr( 'Q', m, m, m, work( ir ), ldwrkr,work( itauq ), & work( iwork ),lwork-iwork+1, ierr ) iwork = ie + m ! perform bidiagonal qr iteration, computing left ! singular vectors of l in work(ir) and computing ! right singular vectors of l in work(iu) ! (workspace: need 2*m*m+bdspac) call stdlib${ii}$_sbdsqr( 'U', m, m, m, 0_${ik}$, s, work( ie ),work( iu ), ldwrku, & work( ir ),ldwrkr, dum, 1_${ik}$, work( iwork ), info ) ! multiply right singular vectors of l in work(iu) by ! q in vt, storing result in a ! (workspace: need m*m) call stdlib${ii}$_sgemm( 'N', 'N', m, n, m, one, work( iu ),ldwrku, vt, ldvt, & zero, a, lda ) ! copy right singular vectors of a from a to vt call stdlib${ii}$_slacpy( 'F', m, n, a, lda, vt, ldvt ) ! copy left singular vectors of a from work(ir) to a call stdlib${ii}$_slacpy( 'F', m, m, work( ir ), ldwrkr, a,lda ) else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + m ! compute a=l*q, copying result to vt ! (workspace: need 2*m, prefer m+m*nb) call stdlib${ii}$_sgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_slacpy( 'U', m, n, a, lda, vt, ldvt ) ! generate q in vt ! (workspace: need m+n, prefer m+n*nb) call stdlib${ii}$_sorglq( n, n, m, vt, ldvt, work( itau ),work( iwork ), & lwork-iwork+1, ierr ) ie = itau itauq = ie + m itaup = itauq + m iwork = itaup + m ! zero out above l in a if (m>1_${ik}$) call stdlib${ii}$_slaset( 'U', m-1, m-1, zero, zero, a( 1_${ik}$, 2_${ik}$ ),lda ) ! bidiagonalize l in a ! (workspace: need 4*m, prefer 3*m+2*m*nb) call stdlib${ii}$_sgebrd( m, m, a, lda, s, work( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply right bidiagonalizing vectors in a by q ! in vt ! (workspace: need 3*m+n, prefer 3*m+n*nb) call stdlib${ii}$_sormbr( 'P', 'L', 'T', m, n, m, a, lda,work( itaup ), vt, & ldvt,work( iwork ), lwork-iwork+1, ierr ) ! generate left bidiagonalizing vectors in a ! (workspace: need 4*m, prefer 3*m+m*nb) call stdlib${ii}$_sorgbr( 'Q', m, m, m, a, lda, work( itauq ),work( iwork ), & lwork-iwork+1, ierr ) iwork = ie + m ! perform bidiagonal qr iteration, computing left ! singular vectors of a in a and computing right ! singular vectors of a in vt ! (workspace: need bdspac) call stdlib${ii}$_sbdsqr( 'U', m, n, m, 0_${ik}$, s, work( ie ), vt,ldvt, a, lda, & dum, 1_${ik}$, work( iwork ),info ) end if else if( wntuas ) then ! path 9t(n much larger than m, jobu='s' or 'a', ! jobvt='a') ! n right singular vectors to be computed in vt and ! m left singular vectors to be computed in u if( lwork>=m*m+max( n+m, 4_${ik}$*m, bdspac ) ) then ! sufficient workspace for a fast algorithm iu = 1_${ik}$ if( lwork>=wrkbl+lda*m ) then ! work(iu) is lda by m ldwrku = lda else ! work(iu) is m by m ldwrku = m end if itau = iu + ldwrku*m iwork = itau + m ! compute a=l*q, copying result to vt ! (workspace: need m*m+2*m, prefer m*m+m+m*nb) call stdlib${ii}$_sgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_slacpy( 'U', m, n, a, lda, vt, ldvt ) ! generate q in vt ! (workspace: need m*m+m+n, prefer m*m+m+n*nb) call stdlib${ii}$_sorglq( n, n, m, vt, ldvt, work( itau ),work( iwork ), & lwork-iwork+1, ierr ) ! copy l to work(iu), zeroing out above it call stdlib${ii}$_slacpy( 'L', m, m, a, lda, work( iu ),ldwrku ) call stdlib${ii}$_slaset( 'U', m-1, m-1, zero, zero,work( iu+ldwrku ), ldwrku & ) ie = itau itauq = ie + m itaup = itauq + m iwork = itaup + m ! bidiagonalize l in work(iu), copying result to u ! (workspace: need m*m+4*m, prefer m*m+3*m+2*m*nb) call stdlib${ii}$_sgebrd( m, m, work( iu ), ldwrku, s,work( ie ), work( itauq & ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) call stdlib${ii}$_slacpy( 'L', m, m, work( iu ), ldwrku, u,ldu ) ! generate right bidiagonalizing vectors in work(iu) ! (workspace: need m*m+4*m, prefer m*m+3*m+(m-1)*nb) call stdlib${ii}$_sorgbr( 'P', m, m, m, work( iu ), ldwrku,work( itaup ), & work( iwork ),lwork-iwork+1, ierr ) ! generate left bidiagonalizing vectors in u ! (workspace: need m*m+4*m, prefer m*m+3*m+m*nb) call stdlib${ii}$_sorgbr( 'Q', m, m, m, u, ldu, work( itauq ),work( iwork ), & lwork-iwork+1, ierr ) iwork = ie + m ! perform bidiagonal qr iteration, computing left ! singular vectors of l in u and computing right ! singular vectors of l in work(iu) ! (workspace: need m*m+bdspac) call stdlib${ii}$_sbdsqr( 'U', m, m, m, 0_${ik}$, s, work( ie ),work( iu ), ldwrku, & u, ldu, dum, 1_${ik}$,work( iwork ), info ) ! multiply right singular vectors of l in work(iu) by ! q in vt, storing result in a ! (workspace: need m*m) call stdlib${ii}$_sgemm( 'N', 'N', m, n, m, one, work( iu ),ldwrku, vt, ldvt, & zero, a, lda ) ! copy right singular vectors of a from a to vt call stdlib${ii}$_slacpy( 'F', m, n, a, lda, vt, ldvt ) else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + m ! compute a=l*q, copying result to vt ! (workspace: need 2*m, prefer m+m*nb) call stdlib${ii}$_sgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_slacpy( 'U', m, n, a, lda, vt, ldvt ) ! generate q in vt ! (workspace: need m+n, prefer m+n*nb) call stdlib${ii}$_sorglq( n, n, m, vt, ldvt, work( itau ),work( iwork ), & lwork-iwork+1, ierr ) ! copy l to u, zeroing out above it call stdlib${ii}$_slacpy( 'L', m, m, a, lda, u, ldu ) if (m>1_${ik}$) call stdlib${ii}$_slaset( 'U', m-1, m-1, zero, zero, u( 1_${ik}$, 2_${ik}$ ),ldu ) ie = itau itauq = ie + m itaup = itauq + m iwork = itaup + m ! bidiagonalize l in u ! (workspace: need 4*m, prefer 3*m+2*m*nb) call stdlib${ii}$_sgebrd( m, m, u, ldu, s, work( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply right bidiagonalizing vectors in u by q ! in vt ! (workspace: need 3*m+n, prefer 3*m+n*nb) call stdlib${ii}$_sormbr( 'P', 'L', 'T', m, n, m, u, ldu,work( itaup ), vt, & ldvt,work( iwork ), lwork-iwork+1, ierr ) ! generate left bidiagonalizing vectors in u ! (workspace: need 4*m, prefer 3*m+m*nb) call stdlib${ii}$_sorgbr( 'Q', m, m, m, u, ldu, work( itauq ),work( iwork ), & lwork-iwork+1, ierr ) iwork = ie + m ! perform bidiagonal qr iteration, computing left ! singular vectors of a in u and computing right ! singular vectors of a in vt ! (workspace: need bdspac) call stdlib${ii}$_sbdsqr( 'U', m, n, m, 0_${ik}$, s, work( ie ), vt,ldvt, u, ldu, & dum, 1_${ik}$, work( iwork ),info ) end if end if end if else ! n < mnthr ! path 10t(n greater than m, but not much larger) ! reduce to bidiagonal form without lq decomposition ie = 1_${ik}$ itauq = ie + m itaup = itauq + m iwork = itaup + m ! bidiagonalize a ! (workspace: need 3*m+n, prefer 3*m+(m+n)*nb) call stdlib${ii}$_sgebrd( m, n, a, lda, s, work( ie ), work( itauq ),work( itaup ), & work( iwork ), lwork-iwork+1,ierr ) if( wntuas ) then ! if left singular vectors desired in u, copy result to u ! and generate left bidiagonalizing vectors in u ! (workspace: need 4*m-1, prefer 3*m+(m-1)*nb) call stdlib${ii}$_slacpy( 'L', m, m, a, lda, u, ldu ) call stdlib${ii}$_sorgbr( 'Q', m, m, n, u, ldu, work( itauq ),work( iwork ), lwork-& iwork+1, ierr ) end if if( wntvas ) then ! if right singular vectors desired in vt, copy result to ! vt and generate right bidiagonalizing vectors in vt ! (workspace: need 3*m+nrvt, prefer 3*m+nrvt*nb) call stdlib${ii}$_slacpy( 'U', m, n, a, lda, vt, ldvt ) if( wntva )nrvt = n if( wntvs )nrvt = m call stdlib${ii}$_sorgbr( 'P', nrvt, n, m, vt, ldvt, work( itaup ),work( iwork ), & lwork-iwork+1, ierr ) end if if( wntuo ) then ! if left singular vectors desired in a, generate left ! bidiagonalizing vectors in a ! (workspace: need 4*m-1, prefer 3*m+(m-1)*nb) call stdlib${ii}$_sorgbr( 'Q', m, m, n, a, lda, work( itauq ),work( iwork ), lwork-& iwork+1, ierr ) end if if( wntvo ) then ! if right singular vectors desired in a, generate right ! bidiagonalizing vectors in a ! (workspace: need 4*m, prefer 3*m+m*nb) call stdlib${ii}$_sorgbr( 'P', m, n, m, a, lda, work( itaup ),work( iwork ), lwork-& iwork+1, ierr ) end if iwork = ie + m if( wntuas .or. wntuo )nru = m if( wntun )nru = 0_${ik}$ if( wntvas .or. wntvo )ncvt = n if( wntvn )ncvt = 0_${ik}$ if( ( .not.wntuo ) .and. ( .not.wntvo ) ) then ! perform bidiagonal qr iteration, if desired, computing ! left singular vectors in u and computing right singular ! vectors in vt ! (workspace: need bdspac) call stdlib${ii}$_sbdsqr( 'L', m, ncvt, nru, 0_${ik}$, s, work( ie ), vt,ldvt, u, ldu, dum,& 1_${ik}$, work( iwork ), info ) else if( ( .not.wntuo ) .and. wntvo ) then ! perform bidiagonal qr iteration, if desired, computing ! left singular vectors in u and computing right singular ! vectors in a ! (workspace: need bdspac) call stdlib${ii}$_sbdsqr( 'L', m, ncvt, nru, 0_${ik}$, s, work( ie ), a, lda,u, ldu, dum, & 1_${ik}$, work( iwork ), info ) else ! perform bidiagonal qr iteration, if desired, computing ! left singular vectors in a and computing right singular ! vectors in vt ! (workspace: need bdspac) call stdlib${ii}$_sbdsqr( 'L', m, ncvt, nru, 0_${ik}$, s, work( ie ), vt,ldvt, a, lda, dum,& 1_${ik}$, work( iwork ), info ) end if end if end if ! if stdlib${ii}$_sbdsqr failed to converge, copy unconverged superdiagonals ! to work( 2:minmn ) if( info/=0_${ik}$ ) then if( ie>2_${ik}$ ) then do i = 1, minmn - 1 work( i+1 ) = work( i+ie-1 ) end do end if if( ie<2_${ik}$ ) then do i = minmn - 1, 1, -1 work( i+1 ) = work( i+ie-1 ) end do end if end if ! undo scaling if necessary if( iscl==1_${ik}$ ) then if( anrm>bignum )call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, bignum, anrm, minmn, 1_${ik}$, s, minmn,& ierr ) if( info/=0_${ik}$ .and. anrm>bignum )call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, bignum, anrm, minmn-1,& 1_${ik}$, work( 2_${ik}$ ),minmn, ierr ) if( anrm<smlnum )call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, smlnum, anrm, minmn, 1_${ik}$, s, minmn,& ierr ) if( info/=0_${ik}$ .and. anrm<smlnum )call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, smlnum, anrm, minmn-1,& 1_${ik}$, work( 2_${ik}$ ),minmn, ierr ) end if ! return optimal workspace in work(1) work( 1_${ik}$ ) = maxwrk return end subroutine stdlib${ii}$_sgesvd module subroutine stdlib${ii}$_dgesvd( jobu, jobvt, m, n, a, lda, s, u, ldu,vt, ldvt, work, lwork, info ) !! DGESVD computes the singular value decomposition (SVD) of a real !! M-by-N matrix A, optionally computing the left and/or right singular !! vectors. The SVD is written !! A = U * SIGMA * transpose(V) !! where SIGMA is an M-by-N matrix which is zero except for its !! min(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and !! V is an N-by-N orthogonal matrix. The diagonal elements of SIGMA !! are the singular values of A; they are real and non-negative, and !! are returned in descending order. The first min(m,n) columns of !! U and V are the left and right singular vectors of A. !! Note that the routine returns V**T, not V. ! -- lapack driver routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments character, intent(in) :: jobu, jobvt integer(${ik}$), intent(out) :: info integer(${ik}$), intent(in) :: lda, ldu, ldvt, lwork, m, n ! Array Arguments real(dp), intent(inout) :: a(lda,*) real(dp), intent(out) :: s(*), u(ldu,*), vt(ldvt,*), work(*) ! ===================================================================== ! Local Scalars logical(lk) :: lquery, wntua, wntuas, wntun, wntuo, wntus, wntva, wntvas, wntvn, wntvo,& wntvs integer(${ik}$) :: bdspac, blk, chunk, i, ie, ierr, ir, iscl, itau, itaup, itauq, iu, & iwork, ldwrkr, ldwrku, maxwrk, minmn, minwrk, mnthr, ncu, ncvt, nru, nrvt, & wrkbl integer(${ik}$) :: lwork_dgeqrf, lwork_dorgqr_n, lwork_dorgqr_m, lwork_dgebrd, & lwork_dorgbr_p, lwork_dorgbr_q, lwork_dgelqf, lwork_dorglq_n, lwork_dorglq_m real(dp) :: anrm, bignum, eps, smlnum ! Local Arrays real(dp) :: dum(1_${ik}$) ! Intrinsic Functions ! Executable Statements ! test the input arguments info = 0_${ik}$ minmn = min( m, n ) wntua = stdlib_lsame( jobu, 'A' ) wntus = stdlib_lsame( jobu, 'S' ) wntuas = wntua .or. wntus wntuo = stdlib_lsame( jobu, 'O' ) wntun = stdlib_lsame( jobu, 'N' ) wntva = stdlib_lsame( jobvt, 'A' ) wntvs = stdlib_lsame( jobvt, 'S' ) wntvas = wntva .or. wntvs wntvo = stdlib_lsame( jobvt, 'O' ) wntvn = stdlib_lsame( jobvt, 'N' ) lquery = ( lwork==-1_${ik}$ ) if( .not.( wntua .or. wntus .or. wntuo .or. wntun ) ) then info = -1_${ik}$ else if( .not.( wntva .or. wntvs .or. wntvo .or. wntvn ) .or.( wntvo .and. wntuo ) ) & then info = -2_${ik}$ else if( m<0_${ik}$ ) then info = -3_${ik}$ else if( n<0_${ik}$ ) then info = -4_${ik}$ else if( lda<max( 1_${ik}$, m ) ) then info = -6_${ik}$ else if( ldu<1_${ik}$ .or. ( wntuas .and. ldu<m ) ) then info = -9_${ik}$ else if( ldvt<1_${ik}$ .or. ( wntva .and. ldvt<n ) .or.( wntvs .and. ldvt<minmn ) ) & then info = -11_${ik}$ end if ! compute workspace ! (note: comments in the code beginning "workspace:" describe the ! minimal amount of workspace needed at that point in the code, ! as well as the preferred amount for good performance. ! nb refers to the optimal block size for the immediately ! following subroutine, as returned by stdlib${ii}$_ilaenv.) if( info==0_${ik}$ ) then minwrk = 1_${ik}$ maxwrk = 1_${ik}$ if( m>=n .and. minmn>0_${ik}$ ) then ! compute space needed for stdlib${ii}$_dbdsqr mnthr = stdlib${ii}$_ilaenv( 6_${ik}$, 'DGESVD', jobu // jobvt, m, n, 0_${ik}$, 0_${ik}$ ) bdspac = 5_${ik}$*n ! compute space needed for stdlib${ii}$_dgeqrf call stdlib${ii}$_dgeqrf( m, n, a, lda, dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, ierr ) lwork_dgeqrf = int( dum(1_${ik}$),KIND=${ik}$) ! compute space needed for stdlib${ii}$_dorgqr call stdlib${ii}$_dorgqr( m, n, n, a, lda, dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, ierr ) lwork_dorgqr_n = int( dum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_dorgqr( m, m, n, a, lda, dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, ierr ) lwork_dorgqr_m = int( dum(1_${ik}$),KIND=${ik}$) ! compute space needed for stdlib${ii}$_dgebrd call stdlib${ii}$_dgebrd( n, n, a, lda, s, dum(1_${ik}$), dum(1_${ik}$),dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, ierr ) lwork_dgebrd = int( dum(1_${ik}$),KIND=${ik}$) ! compute space needed for stdlib${ii}$_dorgbr p call stdlib${ii}$_dorgbr( 'P', n, n, n, a, lda, dum(1_${ik}$),dum(1_${ik}$), -1_${ik}$, ierr ) lwork_dorgbr_p = int( dum(1_${ik}$),KIND=${ik}$) ! compute space needed for stdlib${ii}$_dorgbr q call stdlib${ii}$_dorgbr( 'Q', n, n, n, a, lda, dum(1_${ik}$),dum(1_${ik}$), -1_${ik}$, ierr ) lwork_dorgbr_q = int( dum(1_${ik}$),KIND=${ik}$) if( m>=mnthr ) then if( wntun ) then ! path 1 (m much larger than n, jobu='n') maxwrk = n + lwork_dgeqrf maxwrk = max( maxwrk, 3_${ik}$*n + lwork_dgebrd ) if( wntvo .or. wntvas )maxwrk = max( maxwrk, 3_${ik}$*n + lwork_dorgbr_p ) maxwrk = max( maxwrk, bdspac ) minwrk = max( 4_${ik}$*n, bdspac ) else if( wntuo .and. wntvn ) then ! path 2 (m much larger than n, jobu='o', jobvt='n') wrkbl = n + lwork_dgeqrf wrkbl = max( wrkbl, n + lwork_dorgqr_n ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_dgebrd ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_dorgbr_q ) wrkbl = max( wrkbl, bdspac ) maxwrk = max( n*n + wrkbl, n*n + m*n + n ) minwrk = max( 3_${ik}$*n + m, bdspac ) else if( wntuo .and. wntvas ) then ! path 3 (m much larger than n, jobu='o', jobvt='s' or ! 'a') wrkbl = n + lwork_dgeqrf wrkbl = max( wrkbl, n + lwork_dorgqr_n ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_dgebrd ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_dorgbr_q ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_dorgbr_p ) wrkbl = max( wrkbl, bdspac ) maxwrk = max( n*n + wrkbl, n*n + m*n + n ) minwrk = max( 3_${ik}$*n + m, bdspac ) else if( wntus .and. wntvn ) then ! path 4 (m much larger than n, jobu='s', jobvt='n') wrkbl = n + lwork_dgeqrf wrkbl = max( wrkbl, n + lwork_dorgqr_n ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_dgebrd ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_dorgbr_q ) wrkbl = max( wrkbl, bdspac ) maxwrk = n*n + wrkbl minwrk = max( 3_${ik}$*n + m, bdspac ) else if( wntus .and. wntvo ) then ! path 5 (m much larger than n, jobu='s', jobvt='o') wrkbl = n + lwork_dgeqrf wrkbl = max( wrkbl, n + lwork_dorgqr_n ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_dgebrd ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_dorgbr_q ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_dorgbr_p ) wrkbl = max( wrkbl, bdspac ) maxwrk = 2_${ik}$*n*n + wrkbl minwrk = max( 3_${ik}$*n + m, bdspac ) else if( wntus .and. wntvas ) then ! path 6 (m much larger than n, jobu='s', jobvt='s' or ! 'a') wrkbl = n + lwork_dgeqrf wrkbl = max( wrkbl, n + lwork_dorgqr_n ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_dgebrd ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_dorgbr_q ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_dorgbr_p ) wrkbl = max( wrkbl, bdspac ) maxwrk = n*n + wrkbl minwrk = max( 3_${ik}$*n + m, bdspac ) else if( wntua .and. wntvn ) then ! path 7 (m much larger than n, jobu='a', jobvt='n') wrkbl = n + lwork_dgeqrf wrkbl = max( wrkbl, n + lwork_dorgqr_m ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_dgebrd ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_dorgbr_q ) wrkbl = max( wrkbl, bdspac ) maxwrk = n*n + wrkbl minwrk = max( 3_${ik}$*n + m, bdspac ) else if( wntua .and. wntvo ) then ! path 8 (m much larger than n, jobu='a', jobvt='o') wrkbl = n + lwork_dgeqrf wrkbl = max( wrkbl, n + lwork_dorgqr_m ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_dgebrd ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_dorgbr_q ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_dorgbr_p ) wrkbl = max( wrkbl, bdspac ) maxwrk = 2_${ik}$*n*n + wrkbl minwrk = max( 3_${ik}$*n + m, bdspac ) else if( wntua .and. wntvas ) then ! path 9 (m much larger than n, jobu='a', jobvt='s' or ! 'a') wrkbl = n + lwork_dgeqrf wrkbl = max( wrkbl, n + lwork_dorgqr_m ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_dgebrd ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_dorgbr_q ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_dorgbr_p ) wrkbl = max( wrkbl, bdspac ) maxwrk = n*n + wrkbl minwrk = max( 3_${ik}$*n + m, bdspac ) end if else ! path 10 (m at least n, but not much larger) call stdlib${ii}$_dgebrd( m, n, a, lda, s, dum(1_${ik}$), dum(1_${ik}$),dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, ierr ) lwork_dgebrd = int( dum(1_${ik}$),KIND=${ik}$) maxwrk = 3_${ik}$*n + lwork_dgebrd if( wntus .or. wntuo ) then call stdlib${ii}$_dorgbr( 'Q', m, n, n, a, lda, dum(1_${ik}$),dum(1_${ik}$), -1_${ik}$, ierr ) lwork_dorgbr_q = int( dum(1_${ik}$),KIND=${ik}$) maxwrk = max( maxwrk, 3_${ik}$*n + lwork_dorgbr_q ) end if if( wntua ) then call stdlib${ii}$_dorgbr( 'Q', m, m, n, a, lda, dum(1_${ik}$),dum(1_${ik}$), -1_${ik}$, ierr ) lwork_dorgbr_q = int( dum(1_${ik}$),KIND=${ik}$) maxwrk = max( maxwrk, 3_${ik}$*n + lwork_dorgbr_q ) end if if( .not.wntvn ) then maxwrk = max( maxwrk, 3_${ik}$*n + lwork_dorgbr_p ) end if maxwrk = max( maxwrk, bdspac ) minwrk = max( 3_${ik}$*n + m, bdspac ) end if else if( minmn>0_${ik}$ ) then ! compute space needed for stdlib${ii}$_dbdsqr mnthr = stdlib${ii}$_ilaenv( 6_${ik}$, 'DGESVD', jobu // jobvt, m, n, 0_${ik}$, 0_${ik}$ ) bdspac = 5_${ik}$*m ! compute space needed for stdlib${ii}$_dgelqf call stdlib${ii}$_dgelqf( m, n, a, lda, dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, ierr ) lwork_dgelqf = int( dum(1_${ik}$),KIND=${ik}$) ! compute space needed for stdlib${ii}$_dorglq call stdlib${ii}$_dorglq( n, n, m, dum(1_${ik}$), n, dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, ierr ) lwork_dorglq_n = int( dum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_dorglq( m, n, m, a, lda, dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, ierr ) lwork_dorglq_m = int( dum(1_${ik}$),KIND=${ik}$) ! compute space needed for stdlib${ii}$_dgebrd call stdlib${ii}$_dgebrd( m, m, a, lda, s, dum(1_${ik}$), dum(1_${ik}$),dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, ierr ) lwork_dgebrd = int( dum(1_${ik}$),KIND=${ik}$) ! compute space needed for stdlib${ii}$_dorgbr p call stdlib${ii}$_dorgbr( 'P', m, m, m, a, n, dum(1_${ik}$),dum(1_${ik}$), -1_${ik}$, ierr ) lwork_dorgbr_p = int( dum(1_${ik}$),KIND=${ik}$) ! compute space needed for stdlib${ii}$_dorgbr q call stdlib${ii}$_dorgbr( 'Q', m, m, m, a, n, dum(1_${ik}$),dum(1_${ik}$), -1_${ik}$, ierr ) lwork_dorgbr_q = int( dum(1_${ik}$),KIND=${ik}$) if( n>=mnthr ) then if( wntvn ) then ! path 1t(n much larger than m, jobvt='n') maxwrk = m + lwork_dgelqf maxwrk = max( maxwrk, 3_${ik}$*m + lwork_dgebrd ) if( wntuo .or. wntuas )maxwrk = max( maxwrk, 3_${ik}$*m + lwork_dorgbr_q ) maxwrk = max( maxwrk, bdspac ) minwrk = max( 4_${ik}$*m, bdspac ) else if( wntvo .and. wntun ) then ! path 2t(n much larger than m, jobu='n', jobvt='o') wrkbl = m + lwork_dgelqf wrkbl = max( wrkbl, m + lwork_dorglq_m ) wrkbl = max( wrkbl, 3_${ik}$*m + lwork_dgebrd ) wrkbl = max( wrkbl, 3_${ik}$*m + lwork_dorgbr_p ) wrkbl = max( wrkbl, bdspac ) maxwrk = max( m*m + wrkbl, m*m + m*n + m ) minwrk = max( 3_${ik}$*m + n, bdspac ) else if( wntvo .and. wntuas ) then ! path 3t(n much larger than m, jobu='s' or 'a', ! jobvt='o') wrkbl = m + lwork_dgelqf wrkbl = max( wrkbl, m + lwork_dorglq_m ) wrkbl = max( wrkbl, 3_${ik}$*m + lwork_dgebrd ) wrkbl = max( wrkbl, 3_${ik}$*m + lwork_dorgbr_p ) wrkbl = max( wrkbl, 3_${ik}$*m + lwork_dorgbr_q ) wrkbl = max( wrkbl, bdspac ) maxwrk = max( m*m + wrkbl, m*m + m*n + m ) minwrk = max( 3_${ik}$*m + n, bdspac ) else if( wntvs .and. wntun ) then ! path 4t(n much larger than m, jobu='n', jobvt='s') wrkbl = m + lwork_dgelqf wrkbl = max( wrkbl, m + lwork_dorglq_m ) wrkbl = max( wrkbl, 3_${ik}$*m + lwork_dgebrd ) wrkbl = max( wrkbl, 3_${ik}$*m + lwork_dorgbr_p ) wrkbl = max( wrkbl, bdspac ) maxwrk = m*m + wrkbl minwrk = max( 3_${ik}$*m + n, bdspac ) else if( wntvs .and. wntuo ) then ! path 5t(n much larger than m, jobu='o', jobvt='s') wrkbl = m + lwork_dgelqf wrkbl = max( wrkbl, m + lwork_dorglq_m ) wrkbl = max( wrkbl, 3_${ik}$*m + lwork_dgebrd ) wrkbl = max( wrkbl, 3_${ik}$*m + lwork_dorgbr_p ) wrkbl = max( wrkbl, 3_${ik}$*m + lwork_dorgbr_q ) wrkbl = max( wrkbl, bdspac ) maxwrk = 2_${ik}$*m*m + wrkbl minwrk = max( 3_${ik}$*m + n, bdspac ) else if( wntvs .and. wntuas ) then ! path 6t(n much larger than m, jobu='s' or 'a', ! jobvt='s') wrkbl = m + lwork_dgelqf wrkbl = max( wrkbl, m + lwork_dorglq_m ) wrkbl = max( wrkbl, 3_${ik}$*m + lwork_dgebrd ) wrkbl = max( wrkbl, 3_${ik}$*m + lwork_dorgbr_p ) wrkbl = max( wrkbl, 3_${ik}$*m + lwork_dorgbr_q ) wrkbl = max( wrkbl, bdspac ) maxwrk = m*m + wrkbl minwrk = max( 3_${ik}$*m + n, bdspac ) else if( wntva .and. wntun ) then ! path 7t(n much larger than m, jobu='n', jobvt='a') wrkbl = m + lwork_dgelqf wrkbl = max( wrkbl, m + lwork_dorglq_n ) wrkbl = max( wrkbl, 3_${ik}$*m + lwork_dgebrd ) wrkbl = max( wrkbl, 3_${ik}$*m + lwork_dorgbr_p ) wrkbl = max( wrkbl, bdspac ) maxwrk = m*m + wrkbl minwrk = max( 3_${ik}$*m + n, bdspac ) else if( wntva .and. wntuo ) then ! path 8t(n much larger than m, jobu='o', jobvt='a') wrkbl = m + lwork_dgelqf wrkbl = max( wrkbl, m + lwork_dorglq_n ) wrkbl = max( wrkbl, 3_${ik}$*m + lwork_dgebrd ) wrkbl = max( wrkbl, 3_${ik}$*m + lwork_dorgbr_p ) wrkbl = max( wrkbl, 3_${ik}$*m + lwork_dorgbr_q ) wrkbl = max( wrkbl, bdspac ) maxwrk = 2_${ik}$*m*m + wrkbl minwrk = max( 3_${ik}$*m + n, bdspac ) else if( wntva .and. wntuas ) then ! path 9t(n much larger than m, jobu='s' or 'a', ! jobvt='a') wrkbl = m + lwork_dgelqf wrkbl = max( wrkbl, m + lwork_dorglq_n ) wrkbl = max( wrkbl, 3_${ik}$*m + lwork_dgebrd ) wrkbl = max( wrkbl, 3_${ik}$*m + lwork_dorgbr_p ) wrkbl = max( wrkbl, 3_${ik}$*m + lwork_dorgbr_q ) wrkbl = max( wrkbl, bdspac ) maxwrk = m*m + wrkbl minwrk = max( 3_${ik}$*m + n, bdspac ) end if else ! path 10t(n greater than m, but not much larger) call stdlib${ii}$_dgebrd( m, n, a, lda, s, dum(1_${ik}$), dum(1_${ik}$),dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, ierr ) lwork_dgebrd = int( dum(1_${ik}$),KIND=${ik}$) maxwrk = 3_${ik}$*m + lwork_dgebrd if( wntvs .or. wntvo ) then ! compute space needed for stdlib${ii}$_dorgbr p call stdlib${ii}$_dorgbr( 'P', m, n, m, a, n, dum(1_${ik}$),dum(1_${ik}$), -1_${ik}$, ierr ) lwork_dorgbr_p = int( dum(1_${ik}$),KIND=${ik}$) maxwrk = max( maxwrk, 3_${ik}$*m + lwork_dorgbr_p ) end if if( wntva ) then call stdlib${ii}$_dorgbr( 'P', n, n, m, a, n, dum(1_${ik}$),dum(1_${ik}$), -1_${ik}$, ierr ) lwork_dorgbr_p = int( dum(1_${ik}$),KIND=${ik}$) maxwrk = max( maxwrk, 3_${ik}$*m + lwork_dorgbr_p ) end if if( .not.wntun ) then maxwrk = max( maxwrk, 3_${ik}$*m + lwork_dorgbr_q ) end if maxwrk = max( maxwrk, bdspac ) minwrk = max( 3_${ik}$*m + n, bdspac ) end if end if maxwrk = max( maxwrk, minwrk ) work( 1_${ik}$ ) = maxwrk if( lwork<minwrk .and. .not.lquery ) then info = -13_${ik}$ end if end if if( info/=0_${ik}$ ) then call stdlib${ii}$_xerbla( 'DGESVD', -info ) return else if( lquery ) then return end if ! quick return if possible if( m==0_${ik}$ .or. n==0_${ik}$ ) then return end if ! get machine constants eps = stdlib${ii}$_dlamch( 'P' ) smlnum = sqrt( stdlib${ii}$_dlamch( 'S' ) ) / eps bignum = one / smlnum ! scale a if max element outside range [smlnum,bignum] anrm = stdlib${ii}$_dlange( 'M', m, n, a, lda, dum ) iscl = 0_${ik}$ if( anrm>zero .and. anrm<smlnum ) then iscl = 1_${ik}$ call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, m, n, a, lda, ierr ) else if( anrm>bignum ) then iscl = 1_${ik}$ call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, m, n, a, lda, ierr ) end if if( m>=n ) then ! a has at least as many rows as columns. if a has sufficiently ! more rows than columns, first reduce using the qr ! decomposition (if sufficient workspace available) if( m>=mnthr ) then if( wntun ) then ! path 1 (m much larger than n, jobu='n') ! no left singular vectors to be computed itau = 1_${ik}$ iwork = itau + n ! compute a=q*r ! (workspace: need 2*n, prefer n + n*nb) call stdlib${ii}$_dgeqrf( m, n, a, lda, work( itau ), work( iwork ),lwork-iwork+1, & ierr ) ! zero out below r if( n > 1_${ik}$ ) then call stdlib${ii}$_dlaset( 'L', n-1, n-1, zero, zero, a( 2_${ik}$, 1_${ik}$ ),lda ) end if ie = 1_${ik}$ itauq = ie + n itaup = itauq + n iwork = itaup + n ! bidiagonalize r in a ! (workspace: need 4*n, prefer 3*n + 2*n*nb) call stdlib${ii}$_dgebrd( n, n, a, lda, s, work( ie ), work( itauq ),work( itaup ), & work( iwork ), lwork-iwork+1,ierr ) ncvt = 0_${ik}$ if( wntvo .or. wntvas ) then ! if right singular vectors desired, generate p'. ! (workspace: need 4*n-1, prefer 3*n + (n-1)*nb) call stdlib${ii}$_dorgbr( 'P', n, n, n, a, lda, work( itaup ),work( iwork ), & lwork-iwork+1, ierr ) ncvt = n end if iwork = ie + n ! perform bidiagonal qr iteration, computing right ! singular vectors of a in a if desired ! (workspace: need bdspac) call stdlib${ii}$_dbdsqr( 'U', n, ncvt, 0_${ik}$, 0_${ik}$, s, work( ie ), a, lda,dum, 1_${ik}$, dum, 1_${ik}$, & work( iwork ), info ) ! if right singular vectors desired in vt, copy them there if( wntvas )call stdlib${ii}$_dlacpy( 'F', n, n, a, lda, vt, ldvt ) else if( wntuo .and. wntvn ) then ! path 2 (m much larger than n, jobu='o', jobvt='n') ! n left singular vectors to be overwritten on a and ! no right singular vectors to be computed if( lwork>=n*n+max( 4_${ik}$*n, bdspac ) ) then ! sufficient workspace for a fast algorithm ir = 1_${ik}$ if( lwork>=max( wrkbl, lda*n + n ) + lda*n ) then ! work(iu) is lda by n, work(ir) is lda by n ldwrku = lda ldwrkr = lda else if( lwork>=max( wrkbl, lda*n + n ) + n*n ) then ! work(iu) is lda by n, work(ir) is n by n ldwrku = lda ldwrkr = n else ! work(iu) is ldwrku by n, work(ir) is n by n ldwrku = ( lwork-n*n-n ) / n ldwrkr = n end if itau = ir + ldwrkr*n iwork = itau + n ! compute a=q*r ! (workspace: need n*n + 2*n, prefer n*n + n + n*nb) call stdlib${ii}$_dgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-iwork+& 1_${ik}$, ierr ) ! copy r to work(ir) and zero out below it call stdlib${ii}$_dlacpy( 'U', n, n, a, lda, work( ir ), ldwrkr ) call stdlib${ii}$_dlaset( 'L', n-1, n-1, zero, zero, work( ir+1 ),ldwrkr ) ! generate q in a ! (workspace: need n*n + 2*n, prefer n*n + n + n*nb) call stdlib${ii}$_dorgqr( m, n, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = itau itauq = ie + n itaup = itauq + n iwork = itaup + n ! bidiagonalize r in work(ir) ! (workspace: need n*n + 4*n, prefer n*n + 3*n + 2*n*nb) call stdlib${ii}$_dgebrd( n, n, work( ir ), ldwrkr, s, work( ie ),work( itauq ), & work( itaup ),work( iwork ), lwork-iwork+1, ierr ) ! generate left vectors bidiagonalizing r ! (workspace: need n*n + 4*n, prefer n*n + 3*n + n*nb) call stdlib${ii}$_dorgbr( 'Q', n, n, n, work( ir ), ldwrkr,work( itauq ), work( & iwork ),lwork-iwork+1, ierr ) iwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of r in work(ir) ! (workspace: need n*n + bdspac) call stdlib${ii}$_dbdsqr( 'U', n, 0_${ik}$, n, 0_${ik}$, s, work( ie ), dum, 1_${ik}$,work( ir ), & ldwrkr, dum, 1_${ik}$,work( iwork ), info ) iu = ie + n ! multiply q in a by left singular vectors of r in ! work(ir), storing result in work(iu) and copying to a ! (workspace: need n*n + 2*n, prefer n*n + m*n + n) do i = 1, m, ldwrku chunk = min( m-i+1, ldwrku ) call stdlib${ii}$_dgemm( 'N', 'N', chunk, n, n, one, a( i, 1_${ik}$ ),lda, work( ir )& , ldwrkr, zero,work( iu ), ldwrku ) call stdlib${ii}$_dlacpy( 'F', chunk, n, work( iu ), ldwrku,a( i, 1_${ik}$ ), lda ) end do else ! insufficient workspace for a fast algorithm ie = 1_${ik}$ itauq = ie + n itaup = itauq + n iwork = itaup + n ! bidiagonalize a ! (workspace: need 3*n + m, prefer 3*n + (m + n)*nb) call stdlib${ii}$_dgebrd( m, n, a, lda, s, work( ie ),work( itauq ), work( itaup & ),work( iwork ), lwork-iwork+1, ierr ) ! generate left vectors bidiagonalizing a ! (workspace: need 4*n, prefer 3*n + n*nb) call stdlib${ii}$_dorgbr( 'Q', m, n, n, a, lda, work( itauq ),work( iwork ), & lwork-iwork+1, ierr ) iwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of a in a ! (workspace: need bdspac) call stdlib${ii}$_dbdsqr( 'U', n, 0_${ik}$, m, 0_${ik}$, s, work( ie ), dum, 1_${ik}$,a, lda, dum, 1_${ik}$, & work( iwork ), info ) end if else if( wntuo .and. wntvas ) then ! path 3 (m much larger than n, jobu='o', jobvt='s' or 'a') ! n left singular vectors to be overwritten on a and ! n right singular vectors to be computed in vt if( lwork>=n*n+max( 4_${ik}$*n, bdspac ) ) then ! sufficient workspace for a fast algorithm ir = 1_${ik}$ if( lwork>=max( wrkbl, lda*n + n ) + lda*n ) then ! work(iu) is lda by n and work(ir) is lda by n ldwrku = lda ldwrkr = lda else if( lwork>=max( wrkbl, lda*n + n ) + n*n ) then ! work(iu) is lda by n and work(ir) is n by n ldwrku = lda ldwrkr = n else ! work(iu) is ldwrku by n and work(ir) is n by n ldwrku = ( lwork-n*n-n ) / n ldwrkr = n end if itau = ir + ldwrkr*n iwork = itau + n ! compute a=q*r ! (workspace: need n*n + 2*n, prefer n*n + n + n*nb) call stdlib${ii}$_dgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-iwork+& 1_${ik}$, ierr ) ! copy r to vt, zeroing out below it call stdlib${ii}$_dlacpy( 'U', n, n, a, lda, vt, ldvt ) if( n>1_${ik}$ )call stdlib${ii}$_dlaset( 'L', n-1, n-1, zero, zero,vt( 2_${ik}$, 1_${ik}$ ), ldvt ) ! generate q in a ! (workspace: need n*n + 2*n, prefer n*n + n + n*nb) call stdlib${ii}$_dorgqr( m, n, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = itau itauq = ie + n itaup = itauq + n iwork = itaup + n ! bidiagonalize r in vt, copying result to work(ir) ! (workspace: need n*n + 4*n, prefer n*n + 3*n + 2*n*nb) call stdlib${ii}$_dgebrd( n, n, vt, ldvt, s, work( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) call stdlib${ii}$_dlacpy( 'L', n, n, vt, ldvt, work( ir ), ldwrkr ) ! generate left vectors bidiagonalizing r in work(ir) ! (workspace: need n*n + 4*n, prefer n*n + 3*n + n*nb) call stdlib${ii}$_dorgbr( 'Q', n, n, n, work( ir ), ldwrkr,work( itauq ), work( & iwork ),lwork-iwork+1, ierr ) ! generate right vectors bidiagonalizing r in vt ! (workspace: need n*n + 4*n-1, prefer n*n + 3*n + (n-1)*nb) call stdlib${ii}$_dorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),work( iwork ), & lwork-iwork+1, ierr ) iwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of r in work(ir) and computing right ! singular vectors of r in vt ! (workspace: need n*n + bdspac) call stdlib${ii}$_dbdsqr( 'U', n, n, n, 0_${ik}$, s, work( ie ), vt, ldvt,work( ir ), & ldwrkr, dum, 1_${ik}$,work( iwork ), info ) iu = ie + n ! multiply q in a by left singular vectors of r in ! work(ir), storing result in work(iu) and copying to a ! (workspace: need n*n + 2*n, prefer n*n + m*n + n) do i = 1, m, ldwrku chunk = min( m-i+1, ldwrku ) call stdlib${ii}$_dgemm( 'N', 'N', chunk, n, n, one, a( i, 1_${ik}$ ),lda, work( ir )& , ldwrkr, zero,work( iu ), ldwrku ) call stdlib${ii}$_dlacpy( 'F', chunk, n, work( iu ), ldwrku,a( i, 1_${ik}$ ), lda ) end do else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + n ! compute a=q*r ! (workspace: need 2*n, prefer n + n*nb) call stdlib${ii}$_dgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-iwork+& 1_${ik}$, ierr ) ! copy r to vt, zeroing out below it call stdlib${ii}$_dlacpy( 'U', n, n, a, lda, vt, ldvt ) if( n>1_${ik}$ )call stdlib${ii}$_dlaset( 'L', n-1, n-1, zero, zero,vt( 2_${ik}$, 1_${ik}$ ), ldvt ) ! generate q in a ! (workspace: need 2*n, prefer n + n*nb) call stdlib${ii}$_dorgqr( m, n, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = itau itauq = ie + n itaup = itauq + n iwork = itaup + n ! bidiagonalize r in vt ! (workspace: need 4*n, prefer 3*n + 2*n*nb) call stdlib${ii}$_dgebrd( n, n, vt, ldvt, s, work( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply q in a by left vectors bidiagonalizing r ! (workspace: need 3*n + m, prefer 3*n + m*nb) call stdlib${ii}$_dormbr( 'Q', 'R', 'N', m, n, n, vt, ldvt,work( itauq ), a, lda,& work( iwork ),lwork-iwork+1, ierr ) ! generate right vectors bidiagonalizing r in vt ! (workspace: need 4*n-1, prefer 3*n + (n-1)*nb) call stdlib${ii}$_dorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),work( iwork ), & lwork-iwork+1, ierr ) iwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of a in a and computing right ! singular vectors of a in vt ! (workspace: need bdspac) call stdlib${ii}$_dbdsqr( 'U', n, n, m, 0_${ik}$, s, work( ie ), vt, ldvt,a, lda, dum, & 1_${ik}$, work( iwork ), info ) end if else if( wntus ) then if( wntvn ) then ! path 4 (m much larger than n, jobu='s', jobvt='n') ! n left singular vectors to be computed in u and ! no right singular vectors to be computed if( lwork>=n*n+max( 4_${ik}$*n, bdspac ) ) then ! sufficient workspace for a fast algorithm ir = 1_${ik}$ if( lwork>=wrkbl+lda*n ) then ! work(ir) is lda by n ldwrkr = lda else ! work(ir) is n by n ldwrkr = n end if itau = ir + ldwrkr*n iwork = itau + n ! compute a=q*r ! (workspace: need n*n + 2*n, prefer n*n + n + n*nb) call stdlib${ii}$_dgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ! copy r to work(ir), zeroing out below it call stdlib${ii}$_dlacpy( 'U', n, n, a, lda, work( ir ),ldwrkr ) call stdlib${ii}$_dlaset( 'L', n-1, n-1, zero, zero,work( ir+1 ), ldwrkr ) ! generate q in a ! (workspace: need n*n + 2*n, prefer n*n + n + n*nb) call stdlib${ii}$_dorgqr( m, n, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = itau itauq = ie + n itaup = itauq + n iwork = itaup + n ! bidiagonalize r in work(ir) ! (workspace: need n*n + 4*n, prefer n*n + 3*n + 2*n*nb) call stdlib${ii}$_dgebrd( n, n, work( ir ), ldwrkr, s,work( ie ), work( itauq & ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) ! generate left vectors bidiagonalizing r in work(ir) ! (workspace: need n*n + 4*n, prefer n*n + 3*n + n*nb) call stdlib${ii}$_dorgbr( 'Q', n, n, n, work( ir ), ldwrkr,work( itauq ), & work( iwork ),lwork-iwork+1, ierr ) iwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of r in work(ir) ! (workspace: need n*n + bdspac) call stdlib${ii}$_dbdsqr( 'U', n, 0_${ik}$, n, 0_${ik}$, s, work( ie ), dum,1_${ik}$, work( ir ), & ldwrkr, dum, 1_${ik}$,work( iwork ), info ) ! multiply q in a by left singular vectors of r in ! work(ir), storing result in u ! (workspace: need n*n) call stdlib${ii}$_dgemm( 'N', 'N', m, n, n, one, a, lda,work( ir ), ldwrkr, & zero, u, ldu ) else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + n ! compute a=q*r, copying result to u ! (workspace: need 2*n, prefer n + n*nb) call stdlib${ii}$_dgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_dlacpy( 'L', m, n, a, lda, u, ldu ) ! generate q in u ! (workspace: need 2*n, prefer n + n*nb) call stdlib${ii}$_dorgqr( m, n, n, u, ldu, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = itau itauq = ie + n itaup = itauq + n iwork = itaup + n ! zero out below r in a if( n > 1_${ik}$ ) then call stdlib${ii}$_dlaset( 'L', n-1, n-1, zero, zero,a( 2_${ik}$, 1_${ik}$ ), lda ) end if ! bidiagonalize r in a ! (workspace: need 4*n, prefer 3*n + 2*n*nb) call stdlib${ii}$_dgebrd( n, n, a, lda, s, work( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply q in u by left vectors bidiagonalizing r ! (workspace: need 3*n + m, prefer 3*n + m*nb) call stdlib${ii}$_dormbr( 'Q', 'R', 'N', m, n, n, a, lda,work( itauq ), u, & ldu, work( iwork ),lwork-iwork+1, ierr ) iwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of a in u ! (workspace: need bdspac) call stdlib${ii}$_dbdsqr( 'U', n, 0_${ik}$, m, 0_${ik}$, s, work( ie ), dum,1_${ik}$, u, ldu, dum, & 1_${ik}$, work( iwork ),info ) end if else if( wntvo ) then ! path 5 (m much larger than n, jobu='s', jobvt='o') ! n left singular vectors to be computed in u and ! n right singular vectors to be overwritten on a if( lwork>=2_${ik}$*n*n+max( 4_${ik}$*n, bdspac ) ) then ! sufficient workspace for a fast algorithm iu = 1_${ik}$ if( lwork>=wrkbl+2*lda*n ) then ! work(iu) is lda by n and work(ir) is lda by n ldwrku = lda ir = iu + ldwrku*n ldwrkr = lda else if( lwork>=wrkbl+( lda + n )*n ) then ! work(iu) is lda by n and work(ir) is n by n ldwrku = lda ir = iu + ldwrku*n ldwrkr = n else ! work(iu) is n by n and work(ir) is n by n ldwrku = n ir = iu + ldwrku*n ldwrkr = n end if itau = ir + ldwrkr*n iwork = itau + n ! compute a=q*r ! (workspace: need 2*n*n + 2*n, prefer 2*n*n + n + n*nb) call stdlib${ii}$_dgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ! copy r to work(iu), zeroing out below it call stdlib${ii}$_dlacpy( 'U', n, n, a, lda, work( iu ),ldwrku ) call stdlib${ii}$_dlaset( 'L', n-1, n-1, zero, zero,work( iu+1 ), ldwrku ) ! generate q in a ! (workspace: need 2*n*n + 2*n, prefer 2*n*n + n + n*nb) call stdlib${ii}$_dorgqr( m, n, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = itau itauq = ie + n itaup = itauq + n iwork = itaup + n ! bidiagonalize r in work(iu), copying result to ! work(ir) ! (workspace: need 2*n*n + 4*n, ! prefer 2*n*n+3*n+2*n*nb) call stdlib${ii}$_dgebrd( n, n, work( iu ), ldwrku, s,work( ie ), work( itauq & ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) call stdlib${ii}$_dlacpy( 'U', n, n, work( iu ), ldwrku,work( ir ), ldwrkr ) ! generate left bidiagonalizing vectors in work(iu) ! (workspace: need 2*n*n + 4*n, prefer 2*n*n + 3*n + n*nb) call stdlib${ii}$_dorgbr( 'Q', n, n, n, work( iu ), ldwrku,work( itauq ), & work( iwork ),lwork-iwork+1, ierr ) ! generate right bidiagonalizing vectors in work(ir) ! (workspace: need 2*n*n + 4*n-1, ! prefer 2*n*n+3*n+(n-1)*nb) call stdlib${ii}$_dorgbr( 'P', n, n, n, work( ir ), ldwrkr,work( itaup ), & work( iwork ),lwork-iwork+1, ierr ) iwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of r in work(iu) and computing ! right singular vectors of r in work(ir) ! (workspace: need 2*n*n + bdspac) call stdlib${ii}$_dbdsqr( 'U', n, n, n, 0_${ik}$, s, work( ie ),work( ir ), ldwrkr, & work( iu ),ldwrku, dum, 1_${ik}$, work( iwork ), info ) ! multiply q in a by left singular vectors of r in ! work(iu), storing result in u ! (workspace: need n*n) call stdlib${ii}$_dgemm( 'N', 'N', m, n, n, one, a, lda,work( iu ), ldwrku, & zero, u, ldu ) ! copy right singular vectors of r to a ! (workspace: need n*n) call stdlib${ii}$_dlacpy( 'F', n, n, work( ir ), ldwrkr, a,lda ) else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + n ! compute a=q*r, copying result to u ! (workspace: need 2*n, prefer n + n*nb) call stdlib${ii}$_dgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_dlacpy( 'L', m, n, a, lda, u, ldu ) ! generate q in u ! (workspace: need 2*n, prefer n + n*nb) call stdlib${ii}$_dorgqr( m, n, n, u, ldu, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = itau itauq = ie + n itaup = itauq + n iwork = itaup + n ! zero out below r in a if( n > 1_${ik}$ ) then call stdlib${ii}$_dlaset( 'L', n-1, n-1, zero, zero,a( 2_${ik}$, 1_${ik}$ ), lda ) end if ! bidiagonalize r in a ! (workspace: need 4*n, prefer 3*n + 2*n*nb) call stdlib${ii}$_dgebrd( n, n, a, lda, s, work( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply q in u by left vectors bidiagonalizing r ! (workspace: need 3*n + m, prefer 3*n + m*nb) call stdlib${ii}$_dormbr( 'Q', 'R', 'N', m, n, n, a, lda,work( itauq ), u, & ldu, work( iwork ),lwork-iwork+1, ierr ) ! generate right vectors bidiagonalizing r in a ! (workspace: need 4*n-1, prefer 3*n + (n-1)*nb) call stdlib${ii}$_dorgbr( 'P', n, n, n, a, lda, work( itaup ),work( iwork ), & lwork-iwork+1, ierr ) iwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of a in u and computing right ! singular vectors of a in a ! (workspace: need bdspac) call stdlib${ii}$_dbdsqr( 'U', n, n, m, 0_${ik}$, s, work( ie ), a,lda, u, ldu, dum, & 1_${ik}$, work( iwork ),info ) end if else if( wntvas ) then ! path 6 (m much larger than n, jobu='s', jobvt='s' ! or 'a') ! n left singular vectors to be computed in u and ! n right singular vectors to be computed in vt if( lwork>=n*n+max( 4_${ik}$*n, bdspac ) ) then ! sufficient workspace for a fast algorithm iu = 1_${ik}$ if( lwork>=wrkbl+lda*n ) then ! work(iu) is lda by n ldwrku = lda else ! work(iu) is n by n ldwrku = n end if itau = iu + ldwrku*n iwork = itau + n ! compute a=q*r ! (workspace: need n*n + 2*n, prefer n*n + n + n*nb) call stdlib${ii}$_dgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ! copy r to work(iu), zeroing out below it call stdlib${ii}$_dlacpy( 'U', n, n, a, lda, work( iu ),ldwrku ) call stdlib${ii}$_dlaset( 'L', n-1, n-1, zero, zero,work( iu+1 ), ldwrku ) ! generate q in a ! (workspace: need n*n + 2*n, prefer n*n + n + n*nb) call stdlib${ii}$_dorgqr( m, n, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = itau itauq = ie + n itaup = itauq + n iwork = itaup + n ! bidiagonalize r in work(iu), copying result to vt ! (workspace: need n*n + 4*n, prefer n*n + 3*n + 2*n*nb) call stdlib${ii}$_dgebrd( n, n, work( iu ), ldwrku, s,work( ie ), work( itauq & ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) call stdlib${ii}$_dlacpy( 'U', n, n, work( iu ), ldwrku, vt,ldvt ) ! generate left bidiagonalizing vectors in work(iu) ! (workspace: need n*n + 4*n, prefer n*n + 3*n + n*nb) call stdlib${ii}$_dorgbr( 'Q', n, n, n, work( iu ), ldwrku,work( itauq ), & work( iwork ),lwork-iwork+1, ierr ) ! generate right bidiagonalizing vectors in vt ! (workspace: need n*n + 4*n-1, ! prefer n*n+3*n+(n-1)*nb) call stdlib${ii}$_dorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),work( iwork ),& lwork-iwork+1, ierr ) iwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of r in work(iu) and computing ! right singular vectors of r in vt ! (workspace: need n*n + bdspac) call stdlib${ii}$_dbdsqr( 'U', n, n, n, 0_${ik}$, s, work( ie ), vt,ldvt, work( iu ),& ldwrku, dum, 1_${ik}$,work( iwork ), info ) ! multiply q in a by left singular vectors of r in ! work(iu), storing result in u ! (workspace: need n*n) call stdlib${ii}$_dgemm( 'N', 'N', m, n, n, one, a, lda,work( iu ), ldwrku, & zero, u, ldu ) else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + n ! compute a=q*r, copying result to u ! (workspace: need 2*n, prefer n + n*nb) call stdlib${ii}$_dgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_dlacpy( 'L', m, n, a, lda, u, ldu ) ! generate q in u ! (workspace: need 2*n, prefer n + n*nb) call stdlib${ii}$_dorgqr( m, n, n, u, ldu, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ! copy r to vt, zeroing out below it call stdlib${ii}$_dlacpy( 'U', n, n, a, lda, vt, ldvt ) if( n>1_${ik}$ )call stdlib${ii}$_dlaset( 'L', n-1, n-1, zero, zero,vt( 2_${ik}$, 1_${ik}$ ), ldvt & ) ie = itau itauq = ie + n itaup = itauq + n iwork = itaup + n ! bidiagonalize r in vt ! (workspace: need 4*n, prefer 3*n + 2*n*nb) call stdlib${ii}$_dgebrd( n, n, vt, ldvt, s, work( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply q in u by left bidiagonalizing vectors ! in vt ! (workspace: need 3*n + m, prefer 3*n + m*nb) call stdlib${ii}$_dormbr( 'Q', 'R', 'N', m, n, n, vt, ldvt,work( itauq ), u, & ldu, work( iwork ),lwork-iwork+1, ierr ) ! generate right bidiagonalizing vectors in vt ! (workspace: need 4*n-1, prefer 3*n + (n-1)*nb) call stdlib${ii}$_dorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),work( iwork ),& lwork-iwork+1, ierr ) iwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of a in u and computing right ! singular vectors of a in vt ! (workspace: need bdspac) call stdlib${ii}$_dbdsqr( 'U', n, n, m, 0_${ik}$, s, work( ie ), vt,ldvt, u, ldu, & dum, 1_${ik}$, work( iwork ),info ) end if end if else if( wntua ) then if( wntvn ) then ! path 7 (m much larger than n, jobu='a', jobvt='n') ! m left singular vectors to be computed in u and ! no right singular vectors to be computed if( lwork>=n*n+max( n+m, 4_${ik}$*n, bdspac ) ) then ! sufficient workspace for a fast algorithm ir = 1_${ik}$ if( lwork>=wrkbl+lda*n ) then ! work(ir) is lda by n ldwrkr = lda else ! work(ir) is n by n ldwrkr = n end if itau = ir + ldwrkr*n iwork = itau + n ! compute a=q*r, copying result to u ! (workspace: need n*n + 2*n, prefer n*n + n + n*nb) call stdlib${ii}$_dgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_dlacpy( 'L', m, n, a, lda, u, ldu ) ! copy r to work(ir), zeroing out below it call stdlib${ii}$_dlacpy( 'U', n, n, a, lda, work( ir ),ldwrkr ) call stdlib${ii}$_dlaset( 'L', n-1, n-1, zero, zero,work( ir+1 ), ldwrkr ) ! generate q in u ! (workspace: need n*n + n + m, prefer n*n + n + m*nb) call stdlib${ii}$_dorgqr( m, m, n, u, ldu, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = itau itauq = ie + n itaup = itauq + n iwork = itaup + n ! bidiagonalize r in work(ir) ! (workspace: need n*n + 4*n, prefer n*n + 3*n + 2*n*nb) call stdlib${ii}$_dgebrd( n, n, work( ir ), ldwrkr, s,work( ie ), work( itauq & ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) ! generate left bidiagonalizing vectors in work(ir) ! (workspace: need n*n + 4*n, prefer n*n + 3*n + n*nb) call stdlib${ii}$_dorgbr( 'Q', n, n, n, work( ir ), ldwrkr,work( itauq ), & work( iwork ),lwork-iwork+1, ierr ) iwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of r in work(ir) ! (workspace: need n*n + bdspac) call stdlib${ii}$_dbdsqr( 'U', n, 0_${ik}$, n, 0_${ik}$, s, work( ie ), dum,1_${ik}$, work( ir ), & ldwrkr, dum, 1_${ik}$,work( iwork ), info ) ! multiply q in u by left singular vectors of r in ! work(ir), storing result in a ! (workspace: need n*n) call stdlib${ii}$_dgemm( 'N', 'N', m, n, n, one, u, ldu,work( ir ), ldwrkr, & zero, a, lda ) ! copy left singular vectors of a from a to u call stdlib${ii}$_dlacpy( 'F', m, n, a, lda, u, ldu ) else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + n ! compute a=q*r, copying result to u ! (workspace: need 2*n, prefer n + n*nb) call stdlib${ii}$_dgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_dlacpy( 'L', m, n, a, lda, u, ldu ) ! generate q in u ! (workspace: need n + m, prefer n + m*nb) call stdlib${ii}$_dorgqr( m, m, n, u, ldu, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = itau itauq = ie + n itaup = itauq + n iwork = itaup + n ! zero out below r in a if( n > 1_${ik}$ ) then call stdlib${ii}$_dlaset( 'L', n-1, n-1, zero, zero,a( 2_${ik}$, 1_${ik}$ ), lda ) end if ! bidiagonalize r in a ! (workspace: need 4*n, prefer 3*n + 2*n*nb) call stdlib${ii}$_dgebrd( n, n, a, lda, s, work( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply q in u by left bidiagonalizing vectors ! in a ! (workspace: need 3*n + m, prefer 3*n + m*nb) call stdlib${ii}$_dormbr( 'Q', 'R', 'N', m, n, n, a, lda,work( itauq ), u, & ldu, work( iwork ),lwork-iwork+1, ierr ) iwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of a in u ! (workspace: need bdspac) call stdlib${ii}$_dbdsqr( 'U', n, 0_${ik}$, m, 0_${ik}$, s, work( ie ), dum,1_${ik}$, u, ldu, dum, & 1_${ik}$, work( iwork ),info ) end if else if( wntvo ) then ! path 8 (m much larger than n, jobu='a', jobvt='o') ! m left singular vectors to be computed in u and ! n right singular vectors to be overwritten on a if( lwork>=2_${ik}$*n*n+max( n+m, 4_${ik}$*n, bdspac ) ) then ! sufficient workspace for a fast algorithm iu = 1_${ik}$ if( lwork>=wrkbl+2*lda*n ) then ! work(iu) is lda by n and work(ir) is lda by n ldwrku = lda ir = iu + ldwrku*n ldwrkr = lda else if( lwork>=wrkbl+( lda + n )*n ) then ! work(iu) is lda by n and work(ir) is n by n ldwrku = lda ir = iu + ldwrku*n ldwrkr = n else ! work(iu) is n by n and work(ir) is n by n ldwrku = n ir = iu + ldwrku*n ldwrkr = n end if itau = ir + ldwrkr*n iwork = itau + n ! compute a=q*r, copying result to u ! (workspace: need 2*n*n + 2*n, prefer 2*n*n + n + n*nb) call stdlib${ii}$_dgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_dlacpy( 'L', m, n, a, lda, u, ldu ) ! generate q in u ! (workspace: need 2*n*n + n + m, prefer 2*n*n + n + m*nb) call stdlib${ii}$_dorgqr( m, m, n, u, ldu, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ! copy r to work(iu), zeroing out below it call stdlib${ii}$_dlacpy( 'U', n, n, a, lda, work( iu ),ldwrku ) call stdlib${ii}$_dlaset( 'L', n-1, n-1, zero, zero,work( iu+1 ), ldwrku ) ie = itau itauq = ie + n itaup = itauq + n iwork = itaup + n ! bidiagonalize r in work(iu), copying result to ! work(ir) ! (workspace: need 2*n*n + 4*n, ! prefer 2*n*n+3*n+2*n*nb) call stdlib${ii}$_dgebrd( n, n, work( iu ), ldwrku, s,work( ie ), work( itauq & ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) call stdlib${ii}$_dlacpy( 'U', n, n, work( iu ), ldwrku,work( ir ), ldwrkr ) ! generate left bidiagonalizing vectors in work(iu) ! (workspace: need 2*n*n + 4*n, prefer 2*n*n + 3*n + n*nb) call stdlib${ii}$_dorgbr( 'Q', n, n, n, work( iu ), ldwrku,work( itauq ), & work( iwork ),lwork-iwork+1, ierr ) ! generate right bidiagonalizing vectors in work(ir) ! (workspace: need 2*n*n + 4*n-1, ! prefer 2*n*n+3*n+(n-1)*nb) call stdlib${ii}$_dorgbr( 'P', n, n, n, work( ir ), ldwrkr,work( itaup ), & work( iwork ),lwork-iwork+1, ierr ) iwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of r in work(iu) and computing ! right singular vectors of r in work(ir) ! (workspace: need 2*n*n + bdspac) call stdlib${ii}$_dbdsqr( 'U', n, n, n, 0_${ik}$, s, work( ie ),work( ir ), ldwrkr, & work( iu ),ldwrku, dum, 1_${ik}$, work( iwork ), info ) ! multiply q in u by left singular vectors of r in ! work(iu), storing result in a ! (workspace: need n*n) call stdlib${ii}$_dgemm( 'N', 'N', m, n, n, one, u, ldu,work( iu ), ldwrku, & zero, a, lda ) ! copy left singular vectors of a from a to u call stdlib${ii}$_dlacpy( 'F', m, n, a, lda, u, ldu ) ! copy right singular vectors of r from work(ir) to a call stdlib${ii}$_dlacpy( 'F', n, n, work( ir ), ldwrkr, a,lda ) else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + n ! compute a=q*r, copying result to u ! (workspace: need 2*n, prefer n + n*nb) call stdlib${ii}$_dgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_dlacpy( 'L', m, n, a, lda, u, ldu ) ! generate q in u ! (workspace: need n + m, prefer n + m*nb) call stdlib${ii}$_dorgqr( m, m, n, u, ldu, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = itau itauq = ie + n itaup = itauq + n iwork = itaup + n ! zero out below r in a if( n > 1_${ik}$ ) then call stdlib${ii}$_dlaset( 'L', n-1, n-1, zero, zero,a( 2_${ik}$, 1_${ik}$ ), lda ) end if ! bidiagonalize r in a ! (workspace: need 4*n, prefer 3*n + 2*n*nb) call stdlib${ii}$_dgebrd( n, n, a, lda, s, work( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply q in u by left bidiagonalizing vectors ! in a ! (workspace: need 3*n + m, prefer 3*n + m*nb) call stdlib${ii}$_dormbr( 'Q', 'R', 'N', m, n, n, a, lda,work( itauq ), u, & ldu, work( iwork ),lwork-iwork+1, ierr ) ! generate right bidiagonalizing vectors in a ! (workspace: need 4*n-1, prefer 3*n + (n-1)*nb) call stdlib${ii}$_dorgbr( 'P', n, n, n, a, lda, work( itaup ),work( iwork ), & lwork-iwork+1, ierr ) iwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of a in u and computing right ! singular vectors of a in a ! (workspace: need bdspac) call stdlib${ii}$_dbdsqr( 'U', n, n, m, 0_${ik}$, s, work( ie ), a,lda, u, ldu, dum, & 1_${ik}$, work( iwork ),info ) end if else if( wntvas ) then ! path 9 (m much larger than n, jobu='a', jobvt='s' ! or 'a') ! m left singular vectors to be computed in u and ! n right singular vectors to be computed in vt if( lwork>=n*n+max( n+m, 4_${ik}$*n, bdspac ) ) then ! sufficient workspace for a fast algorithm iu = 1_${ik}$ if( lwork>=wrkbl+lda*n ) then ! work(iu) is lda by n ldwrku = lda else ! work(iu) is n by n ldwrku = n end if itau = iu + ldwrku*n iwork = itau + n ! compute a=q*r, copying result to u ! (workspace: need n*n + 2*n, prefer n*n + n + n*nb) call stdlib${ii}$_dgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_dlacpy( 'L', m, n, a, lda, u, ldu ) ! generate q in u ! (workspace: need n*n + n + m, prefer n*n + n + m*nb) call stdlib${ii}$_dorgqr( m, m, n, u, ldu, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ! copy r to work(iu), zeroing out below it call stdlib${ii}$_dlacpy( 'U', n, n, a, lda, work( iu ),ldwrku ) call stdlib${ii}$_dlaset( 'L', n-1, n-1, zero, zero,work( iu+1 ), ldwrku ) ie = itau itauq = ie + n itaup = itauq + n iwork = itaup + n ! bidiagonalize r in work(iu), copying result to vt ! (workspace: need n*n + 4*n, prefer n*n + 3*n + 2*n*nb) call stdlib${ii}$_dgebrd( n, n, work( iu ), ldwrku, s,work( ie ), work( itauq & ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) call stdlib${ii}$_dlacpy( 'U', n, n, work( iu ), ldwrku, vt,ldvt ) ! generate left bidiagonalizing vectors in work(iu) ! (workspace: need n*n + 4*n, prefer n*n + 3*n + n*nb) call stdlib${ii}$_dorgbr( 'Q', n, n, n, work( iu ), ldwrku,work( itauq ), & work( iwork ),lwork-iwork+1, ierr ) ! generate right bidiagonalizing vectors in vt ! (workspace: need n*n + 4*n-1, ! prefer n*n+3*n+(n-1)*nb) call stdlib${ii}$_dorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),work( iwork ),& lwork-iwork+1, ierr ) iwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of r in work(iu) and computing ! right singular vectors of r in vt ! (workspace: need n*n + bdspac) call stdlib${ii}$_dbdsqr( 'U', n, n, n, 0_${ik}$, s, work( ie ), vt,ldvt, work( iu ),& ldwrku, dum, 1_${ik}$,work( iwork ), info ) ! multiply q in u by left singular vectors of r in ! work(iu), storing result in a ! (workspace: need n*n) call stdlib${ii}$_dgemm( 'N', 'N', m, n, n, one, u, ldu,work( iu ), ldwrku, & zero, a, lda ) ! copy left singular vectors of a from a to u call stdlib${ii}$_dlacpy( 'F', m, n, a, lda, u, ldu ) else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + n ! compute a=q*r, copying result to u ! (workspace: need 2*n, prefer n + n*nb) call stdlib${ii}$_dgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_dlacpy( 'L', m, n, a, lda, u, ldu ) ! generate q in u ! (workspace: need n + m, prefer n + m*nb) call stdlib${ii}$_dorgqr( m, m, n, u, ldu, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ! copy r from a to vt, zeroing out below it call stdlib${ii}$_dlacpy( 'U', n, n, a, lda, vt, ldvt ) if( n>1_${ik}$ )call stdlib${ii}$_dlaset( 'L', n-1, n-1, zero, zero,vt( 2_${ik}$, 1_${ik}$ ), ldvt & ) ie = itau itauq = ie + n itaup = itauq + n iwork = itaup + n ! bidiagonalize r in vt ! (workspace: need 4*n, prefer 3*n + 2*n*nb) call stdlib${ii}$_dgebrd( n, n, vt, ldvt, s, work( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply q in u by left bidiagonalizing vectors ! in vt ! (workspace: need 3*n + m, prefer 3*n + m*nb) call stdlib${ii}$_dormbr( 'Q', 'R', 'N', m, n, n, vt, ldvt,work( itauq ), u, & ldu, work( iwork ),lwork-iwork+1, ierr ) ! generate right bidiagonalizing vectors in vt ! (workspace: need 4*n-1, prefer 3*n + (n-1)*nb) call stdlib${ii}$_dorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),work( iwork ),& lwork-iwork+1, ierr ) iwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of a in u and computing right ! singular vectors of a in vt ! (workspace: need bdspac) call stdlib${ii}$_dbdsqr( 'U', n, n, m, 0_${ik}$, s, work( ie ), vt,ldvt, u, ldu, & dum, 1_${ik}$, work( iwork ),info ) end if end if end if else ! m < mnthr ! path 10 (m at least n, but not much larger) ! reduce to bidiagonal form without qr decomposition ie = 1_${ik}$ itauq = ie + n itaup = itauq + n iwork = itaup + n ! bidiagonalize a ! (workspace: need 3*n + m, prefer 3*n + (m + n)*nb) call stdlib${ii}$_dgebrd( m, n, a, lda, s, work( ie ), work( itauq ),work( itaup ), & work( iwork ), lwork-iwork+1,ierr ) if( wntuas ) then ! if left singular vectors desired in u, copy result to u ! and generate left bidiagonalizing vectors in u ! (workspace: need 3*n + ncu, prefer 3*n + ncu*nb) call stdlib${ii}$_dlacpy( 'L', m, n, a, lda, u, ldu ) if( wntus )ncu = n if( wntua )ncu = m call stdlib${ii}$_dorgbr( 'Q', m, ncu, n, u, ldu, work( itauq ),work( iwork ), & lwork-iwork+1, ierr ) end if if( wntvas ) then ! if right singular vectors desired in vt, copy result to ! vt and generate right bidiagonalizing vectors in vt ! (workspace: need 4*n-1, prefer 3*n + (n-1)*nb) call stdlib${ii}$_dlacpy( 'U', n, n, a, lda, vt, ldvt ) call stdlib${ii}$_dorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),work( iwork ), & lwork-iwork+1, ierr ) end if if( wntuo ) then ! if left singular vectors desired in a, generate left ! bidiagonalizing vectors in a ! (workspace: need 4*n, prefer 3*n + n*nb) call stdlib${ii}$_dorgbr( 'Q', m, n, n, a, lda, work( itauq ),work( iwork ), lwork-& iwork+1, ierr ) end if if( wntvo ) then ! if right singular vectors desired in a, generate right ! bidiagonalizing vectors in a ! (workspace: need 4*n-1, prefer 3*n + (n-1)*nb) call stdlib${ii}$_dorgbr( 'P', n, n, n, a, lda, work( itaup ),work( iwork ), lwork-& iwork+1, ierr ) end if iwork = ie + n if( wntuas .or. wntuo )nru = m if( wntun )nru = 0_${ik}$ if( wntvas .or. wntvo )ncvt = n if( wntvn )ncvt = 0_${ik}$ if( ( .not.wntuo ) .and. ( .not.wntvo ) ) then ! perform bidiagonal qr iteration, if desired, computing ! left singular vectors in u and computing right singular ! vectors in vt ! (workspace: need bdspac) call stdlib${ii}$_dbdsqr( 'U', n, ncvt, nru, 0_${ik}$, s, work( ie ), vt,ldvt, u, ldu, dum,& 1_${ik}$, work( iwork ), info ) else if( ( .not.wntuo ) .and. wntvo ) then ! perform bidiagonal qr iteration, if desired, computing ! left singular vectors in u and computing right singular ! vectors in a ! (workspace: need bdspac) call stdlib${ii}$_dbdsqr( 'U', n, ncvt, nru, 0_${ik}$, s, work( ie ), a, lda,u, ldu, dum, & 1_${ik}$, work( iwork ), info ) else ! perform bidiagonal qr iteration, if desired, computing ! left singular vectors in a and computing right singular ! vectors in vt ! (workspace: need bdspac) call stdlib${ii}$_dbdsqr( 'U', n, ncvt, nru, 0_${ik}$, s, work( ie ), vt,ldvt, a, lda, dum,& 1_${ik}$, work( iwork ), info ) end if end if else ! a has more columns than rows. if a has sufficiently more ! columns than rows, first reduce using the lq decomposition (if ! sufficient workspace available) if( n>=mnthr ) then if( wntvn ) then ! path 1t(n much larger than m, jobvt='n') ! no right singular vectors to be computed itau = 1_${ik}$ iwork = itau + m ! compute a=l*q ! (workspace: need 2*m, prefer m + m*nb) call stdlib${ii}$_dgelqf( m, n, a, lda, work( itau ), work( iwork ),lwork-iwork+1, & ierr ) ! zero out above l if (m>1_${ik}$) call stdlib${ii}$_dlaset( 'U', m-1, m-1, zero, zero, a( 1_${ik}$, 2_${ik}$ ), lda ) ie = 1_${ik}$ itauq = ie + m itaup = itauq + m iwork = itaup + m ! bidiagonalize l in a ! (workspace: need 4*m, prefer 3*m + 2*m*nb) call stdlib${ii}$_dgebrd( m, m, a, lda, s, work( ie ), work( itauq ),work( itaup ), & work( iwork ), lwork-iwork+1,ierr ) if( wntuo .or. wntuas ) then ! if left singular vectors desired, generate q ! (workspace: need 4*m, prefer 3*m + m*nb) call stdlib${ii}$_dorgbr( 'Q', m, m, m, a, lda, work( itauq ),work( iwork ), & lwork-iwork+1, ierr ) end if iwork = ie + m nru = 0_${ik}$ if( wntuo .or. wntuas )nru = m ! perform bidiagonal qr iteration, computing left singular ! vectors of a in a if desired ! (workspace: need bdspac) call stdlib${ii}$_dbdsqr( 'U', m, 0_${ik}$, nru, 0_${ik}$, s, work( ie ), dum, 1_${ik}$, a,lda, dum, 1_${ik}$, & work( iwork ), info ) ! if left singular vectors desired in u, copy them there if( wntuas )call stdlib${ii}$_dlacpy( 'F', m, m, a, lda, u, ldu ) else if( wntvo .and. wntun ) then ! path 2t(n much larger than m, jobu='n', jobvt='o') ! m right singular vectors to be overwritten on a and ! no left singular vectors to be computed if( lwork>=m*m+max( 4_${ik}$*m, bdspac ) ) then ! sufficient workspace for a fast algorithm ir = 1_${ik}$ if( lwork>=max( wrkbl, lda*n + m ) + lda*m ) then ! work(iu) is lda by n and work(ir) is lda by m ldwrku = lda chunk = n ldwrkr = lda else if( lwork>=max( wrkbl, lda*n + m ) + m*m ) then ! work(iu) is lda by n and work(ir) is m by m ldwrku = lda chunk = n ldwrkr = m else ! work(iu) is m by chunk and work(ir) is m by m ldwrku = m chunk = ( lwork-m*m-m ) / m ldwrkr = m end if itau = ir + ldwrkr*m iwork = itau + m ! compute a=l*q ! (workspace: need m*m + 2*m, prefer m*m + m + m*nb) call stdlib${ii}$_dgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-iwork+& 1_${ik}$, ierr ) ! copy l to work(ir) and zero out above it call stdlib${ii}$_dlacpy( 'L', m, m, a, lda, work( ir ), ldwrkr ) call stdlib${ii}$_dlaset( 'U', m-1, m-1, zero, zero,work( ir+ldwrkr ), ldwrkr ) ! generate q in a ! (workspace: need m*m + 2*m, prefer m*m + m + m*nb) call stdlib${ii}$_dorglq( m, n, m, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = itau itauq = ie + m itaup = itauq + m iwork = itaup + m ! bidiagonalize l in work(ir) ! (workspace: need m*m + 4*m, prefer m*m + 3*m + 2*m*nb) call stdlib${ii}$_dgebrd( m, m, work( ir ), ldwrkr, s, work( ie ),work( itauq ), & work( itaup ),work( iwork ), lwork-iwork+1, ierr ) ! generate right vectors bidiagonalizing l ! (workspace: need m*m + 4*m-1, prefer m*m + 3*m + (m-1)*nb) call stdlib${ii}$_dorgbr( 'P', m, m, m, work( ir ), ldwrkr,work( itaup ), work( & iwork ),lwork-iwork+1, ierr ) iwork = ie + m ! perform bidiagonal qr iteration, computing right ! singular vectors of l in work(ir) ! (workspace: need m*m + bdspac) call stdlib${ii}$_dbdsqr( 'U', m, m, 0_${ik}$, 0_${ik}$, s, work( ie ),work( ir ), ldwrkr, dum,& 1_${ik}$, dum, 1_${ik}$,work( iwork ), info ) iu = ie + m ! multiply right singular vectors of l in work(ir) by q ! in a, storing result in work(iu) and copying to a ! (workspace: need m*m + 2*m, prefer m*m + m*n + m) do i = 1, n, chunk blk = min( n-i+1, chunk ) call stdlib${ii}$_dgemm( 'N', 'N', m, blk, m, one, work( ir ),ldwrkr, a( 1_${ik}$, i & ), lda, zero,work( iu ), ldwrku ) call stdlib${ii}$_dlacpy( 'F', m, blk, work( iu ), ldwrku,a( 1_${ik}$, i ), lda ) end do else ! insufficient workspace for a fast algorithm ie = 1_${ik}$ itauq = ie + m itaup = itauq + m iwork = itaup + m ! bidiagonalize a ! (workspace: need 3*m + n, prefer 3*m + (m + n)*nb) call stdlib${ii}$_dgebrd( m, n, a, lda, s, work( ie ),work( itauq ), work( itaup & ),work( iwork ), lwork-iwork+1, ierr ) ! generate right vectors bidiagonalizing a ! (workspace: need 4*m, prefer 3*m + m*nb) call stdlib${ii}$_dorgbr( 'P', m, n, m, a, lda, work( itaup ),work( iwork ), & lwork-iwork+1, ierr ) iwork = ie + m ! perform bidiagonal qr iteration, computing right ! singular vectors of a in a ! (workspace: need bdspac) call stdlib${ii}$_dbdsqr( 'L', m, n, 0_${ik}$, 0_${ik}$, s, work( ie ), a, lda,dum, 1_${ik}$, dum, 1_${ik}$, & work( iwork ), info ) end if else if( wntvo .and. wntuas ) then ! path 3t(n much larger than m, jobu='s' or 'a', jobvt='o') ! m right singular vectors to be overwritten on a and ! m left singular vectors to be computed in u if( lwork>=m*m+max( 4_${ik}$*m, bdspac ) ) then ! sufficient workspace for a fast algorithm ir = 1_${ik}$ if( lwork>=max( wrkbl, lda*n + m ) + lda*m ) then ! work(iu) is lda by n and work(ir) is lda by m ldwrku = lda chunk = n ldwrkr = lda else if( lwork>=max( wrkbl, lda*n + m ) + m*m ) then ! work(iu) is lda by n and work(ir) is m by m ldwrku = lda chunk = n ldwrkr = m else ! work(iu) is m by chunk and work(ir) is m by m ldwrku = m chunk = ( lwork-m*m-m ) / m ldwrkr = m end if itau = ir + ldwrkr*m iwork = itau + m ! compute a=l*q ! (workspace: need m*m + 2*m, prefer m*m + m + m*nb) call stdlib${ii}$_dgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-iwork+& 1_${ik}$, ierr ) ! copy l to u, zeroing about above it call stdlib${ii}$_dlacpy( 'L', m, m, a, lda, u, ldu ) if (m>1_${ik}$) call stdlib${ii}$_dlaset( 'U', m-1, m-1, zero, zero, u( 1_${ik}$, 2_${ik}$ ),ldu ) ! generate q in a ! (workspace: need m*m + 2*m, prefer m*m + m + m*nb) call stdlib${ii}$_dorglq( m, n, m, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = itau itauq = ie + m itaup = itauq + m iwork = itaup + m ! bidiagonalize l in u, copying result to work(ir) ! (workspace: need m*m + 4*m, prefer m*m + 3*m + 2*m*nb) call stdlib${ii}$_dgebrd( m, m, u, ldu, s, work( ie ),work( itauq ), work( itaup & ),work( iwork ), lwork-iwork+1, ierr ) call stdlib${ii}$_dlacpy( 'U', m, m, u, ldu, work( ir ), ldwrkr ) ! generate right vectors bidiagonalizing l in work(ir) ! (workspace: need m*m + 4*m-1, prefer m*m + 3*m + (m-1)*nb) call stdlib${ii}$_dorgbr( 'P', m, m, m, work( ir ), ldwrkr,work( itaup ), work( & iwork ),lwork-iwork+1, ierr ) ! generate left vectors bidiagonalizing l in u ! (workspace: need m*m + 4*m, prefer m*m + 3*m + m*nb) call stdlib${ii}$_dorgbr( 'Q', m, m, m, u, ldu, work( itauq ),work( iwork ), & lwork-iwork+1, ierr ) iwork = ie + m ! perform bidiagonal qr iteration, computing left ! singular vectors of l in u, and computing right ! singular vectors of l in work(ir) ! (workspace: need m*m + bdspac) call stdlib${ii}$_dbdsqr( 'U', m, m, m, 0_${ik}$, s, work( ie ),work( ir ), ldwrkr, u, & ldu, dum, 1_${ik}$,work( iwork ), info ) iu = ie + m ! multiply right singular vectors of l in work(ir) by q ! in a, storing result in work(iu) and copying to a ! (workspace: need m*m + 2*m, prefer m*m + m*n + m)) do i = 1, n, chunk blk = min( n-i+1, chunk ) call stdlib${ii}$_dgemm( 'N', 'N', m, blk, m, one, work( ir ),ldwrkr, a( 1_${ik}$, i & ), lda, zero,work( iu ), ldwrku ) call stdlib${ii}$_dlacpy( 'F', m, blk, work( iu ), ldwrku,a( 1_${ik}$, i ), lda ) end do else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + m ! compute a=l*q ! (workspace: need 2*m, prefer m + m*nb) call stdlib${ii}$_dgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-iwork+& 1_${ik}$, ierr ) ! copy l to u, zeroing out above it call stdlib${ii}$_dlacpy( 'L', m, m, a, lda, u, ldu ) if (m>1_${ik}$) call stdlib${ii}$_dlaset( 'U', m-1, m-1, zero, zero, u( 1_${ik}$, 2_${ik}$ ),ldu ) ! generate q in a ! (workspace: need 2*m, prefer m + m*nb) call stdlib${ii}$_dorglq( m, n, m, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = itau itauq = ie + m itaup = itauq + m iwork = itaup + m ! bidiagonalize l in u ! (workspace: need 4*m, prefer 3*m + 2*m*nb) call stdlib${ii}$_dgebrd( m, m, u, ldu, s, work( ie ),work( itauq ), work( itaup & ),work( iwork ), lwork-iwork+1, ierr ) ! multiply right vectors bidiagonalizing l by q in a ! (workspace: need 3*m + n, prefer 3*m + n*nb) call stdlib${ii}$_dormbr( 'P', 'L', 'T', m, n, m, u, ldu,work( itaup ), a, lda, & work( iwork ),lwork-iwork+1, ierr ) ! generate left vectors bidiagonalizing l in u ! (workspace: need 4*m, prefer 3*m + m*nb) call stdlib${ii}$_dorgbr( 'Q', m, m, m, u, ldu, work( itauq ),work( iwork ), & lwork-iwork+1, ierr ) iwork = ie + m ! perform bidiagonal qr iteration, computing left ! singular vectors of a in u and computing right ! singular vectors of a in a ! (workspace: need bdspac) call stdlib${ii}$_dbdsqr( 'U', m, n, m, 0_${ik}$, s, work( ie ), a, lda,u, ldu, dum, 1_${ik}$, & work( iwork ), info ) end if else if( wntvs ) then if( wntun ) then ! path 4t(n much larger than m, jobu='n', jobvt='s') ! m right singular vectors to be computed in vt and ! no left singular vectors to be computed if( lwork>=m*m+max( 4_${ik}$*m, bdspac ) ) then ! sufficient workspace for a fast algorithm ir = 1_${ik}$ if( lwork>=wrkbl+lda*m ) then ! work(ir) is lda by m ldwrkr = lda else ! work(ir) is m by m ldwrkr = m end if itau = ir + ldwrkr*m iwork = itau + m ! compute a=l*q ! (workspace: need m*m + 2*m, prefer m*m + m + m*nb) call stdlib${ii}$_dgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ! copy l to work(ir), zeroing out above it call stdlib${ii}$_dlacpy( 'L', m, m, a, lda, work( ir ),ldwrkr ) call stdlib${ii}$_dlaset( 'U', m-1, m-1, zero, zero,work( ir+ldwrkr ), ldwrkr & ) ! generate q in a ! (workspace: need m*m + 2*m, prefer m*m + m + m*nb) call stdlib${ii}$_dorglq( m, n, m, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = itau itauq = ie + m itaup = itauq + m iwork = itaup + m ! bidiagonalize l in work(ir) ! (workspace: need m*m + 4*m, prefer m*m + 3*m + 2*m*nb) call stdlib${ii}$_dgebrd( m, m, work( ir ), ldwrkr, s,work( ie ), work( itauq & ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) ! generate right vectors bidiagonalizing l in ! work(ir) ! (workspace: need m*m + 4*m, prefer m*m + 3*m + (m-1)*nb) call stdlib${ii}$_dorgbr( 'P', m, m, m, work( ir ), ldwrkr,work( itaup ), & work( iwork ),lwork-iwork+1, ierr ) iwork = ie + m ! perform bidiagonal qr iteration, computing right ! singular vectors of l in work(ir) ! (workspace: need m*m + bdspac) call stdlib${ii}$_dbdsqr( 'U', m, m, 0_${ik}$, 0_${ik}$, s, work( ie ),work( ir ), ldwrkr, & dum, 1_${ik}$, dum, 1_${ik}$,work( iwork ), info ) ! multiply right singular vectors of l in work(ir) by ! q in a, storing result in vt ! (workspace: need m*m) call stdlib${ii}$_dgemm( 'N', 'N', m, n, m, one, work( ir ),ldwrkr, a, lda, & zero, vt, ldvt ) else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + m ! compute a=l*q ! (workspace: need 2*m, prefer m + m*nb) call stdlib${ii}$_dgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ! copy result to vt call stdlib${ii}$_dlacpy( 'U', m, n, a, lda, vt, ldvt ) ! generate q in vt ! (workspace: need 2*m, prefer m + m*nb) call stdlib${ii}$_dorglq( m, n, m, vt, ldvt, work( itau ),work( iwork ), & lwork-iwork+1, ierr ) ie = itau itauq = ie + m itaup = itauq + m iwork = itaup + m ! zero out above l in a if (m>1_${ik}$) call stdlib${ii}$_dlaset( 'U', m-1, m-1, zero, zero, a( 1_${ik}$, 2_${ik}$ ),lda ) ! bidiagonalize l in a ! (workspace: need 4*m, prefer 3*m + 2*m*nb) call stdlib${ii}$_dgebrd( m, m, a, lda, s, work( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply right vectors bidiagonalizing l by q in vt ! (workspace: need 3*m + n, prefer 3*m + n*nb) call stdlib${ii}$_dormbr( 'P', 'L', 'T', m, n, m, a, lda,work( itaup ), vt, & ldvt,work( iwork ), lwork-iwork+1, ierr ) iwork = ie + m ! perform bidiagonal qr iteration, computing right ! singular vectors of a in vt ! (workspace: need bdspac) call stdlib${ii}$_dbdsqr( 'U', m, n, 0_${ik}$, 0_${ik}$, s, work( ie ), vt,ldvt, dum, 1_${ik}$, & dum, 1_${ik}$, work( iwork ),info ) end if else if( wntuo ) then ! path 5t(n much larger than m, jobu='o', jobvt='s') ! m right singular vectors to be computed in vt and ! m left singular vectors to be overwritten on a if( lwork>=2_${ik}$*m*m+max( 4_${ik}$*m, bdspac ) ) then ! sufficient workspace for a fast algorithm iu = 1_${ik}$ if( lwork>=wrkbl+2*lda*m ) then ! work(iu) is lda by m and work(ir) is lda by m ldwrku = lda ir = iu + ldwrku*m ldwrkr = lda else if( lwork>=wrkbl+( lda + m )*m ) then ! work(iu) is lda by m and work(ir) is m by m ldwrku = lda ir = iu + ldwrku*m ldwrkr = m else ! work(iu) is m by m and work(ir) is m by m ldwrku = m ir = iu + ldwrku*m ldwrkr = m end if itau = ir + ldwrkr*m iwork = itau + m ! compute a=l*q ! (workspace: need 2*m*m + 2*m, prefer 2*m*m + m + m*nb) call stdlib${ii}$_dgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ! copy l to work(iu), zeroing out below it call stdlib${ii}$_dlacpy( 'L', m, m, a, lda, work( iu ),ldwrku ) call stdlib${ii}$_dlaset( 'U', m-1, m-1, zero, zero,work( iu+ldwrku ), ldwrku & ) ! generate q in a ! (workspace: need 2*m*m + 2*m, prefer 2*m*m + m + m*nb) call stdlib${ii}$_dorglq( m, n, m, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = itau itauq = ie + m itaup = itauq + m iwork = itaup + m ! bidiagonalize l in work(iu), copying result to ! work(ir) ! (workspace: need 2*m*m + 4*m, ! prefer 2*m*m+3*m+2*m*nb) call stdlib${ii}$_dgebrd( m, m, work( iu ), ldwrku, s,work( ie ), work( itauq & ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) call stdlib${ii}$_dlacpy( 'L', m, m, work( iu ), ldwrku,work( ir ), ldwrkr ) ! generate right bidiagonalizing vectors in work(iu) ! (workspace: need 2*m*m + 4*m-1, ! prefer 2*m*m+3*m+(m-1)*nb) call stdlib${ii}$_dorgbr( 'P', m, m, m, work( iu ), ldwrku,work( itaup ), & work( iwork ),lwork-iwork+1, ierr ) ! generate left bidiagonalizing vectors in work(ir) ! (workspace: need 2*m*m + 4*m, prefer 2*m*m + 3*m + m*nb) call stdlib${ii}$_dorgbr( 'Q', m, m, m, work( ir ), ldwrkr,work( itauq ), & work( iwork ),lwork-iwork+1, ierr ) iwork = ie + m ! perform bidiagonal qr iteration, computing left ! singular vectors of l in work(ir) and computing ! right singular vectors of l in work(iu) ! (workspace: need 2*m*m + bdspac) call stdlib${ii}$_dbdsqr( 'U', m, m, m, 0_${ik}$, s, work( ie ),work( iu ), ldwrku, & work( ir ),ldwrkr, dum, 1_${ik}$, work( iwork ), info ) ! multiply right singular vectors of l in work(iu) by ! q in a, storing result in vt ! (workspace: need m*m) call stdlib${ii}$_dgemm( 'N', 'N', m, n, m, one, work( iu ),ldwrku, a, lda, & zero, vt, ldvt ) ! copy left singular vectors of l to a ! (workspace: need m*m) call stdlib${ii}$_dlacpy( 'F', m, m, work( ir ), ldwrkr, a,lda ) else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + m ! compute a=l*q, copying result to vt ! (workspace: need 2*m, prefer m + m*nb) call stdlib${ii}$_dgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_dlacpy( 'U', m, n, a, lda, vt, ldvt ) ! generate q in vt ! (workspace: need 2*m, prefer m + m*nb) call stdlib${ii}$_dorglq( m, n, m, vt, ldvt, work( itau ),work( iwork ), & lwork-iwork+1, ierr ) ie = itau itauq = ie + m itaup = itauq + m iwork = itaup + m ! zero out above l in a if (m>1_${ik}$) call stdlib${ii}$_dlaset( 'U', m-1, m-1, zero, zero, a( 1_${ik}$, 2_${ik}$ ),lda ) ! bidiagonalize l in a ! (workspace: need 4*m, prefer 3*m + 2*m*nb) call stdlib${ii}$_dgebrd( m, m, a, lda, s, work( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply right vectors bidiagonalizing l by q in vt ! (workspace: need 3*m + n, prefer 3*m + n*nb) call stdlib${ii}$_dormbr( 'P', 'L', 'T', m, n, m, a, lda,work( itaup ), vt, & ldvt,work( iwork ), lwork-iwork+1, ierr ) ! generate left bidiagonalizing vectors of l in a ! (workspace: need 4*m, prefer 3*m + m*nb) call stdlib${ii}$_dorgbr( 'Q', m, m, m, a, lda, work( itauq ),work( iwork ), & lwork-iwork+1, ierr ) iwork = ie + m ! perform bidiagonal qr iteration, compute left ! singular vectors of a in a and compute right ! singular vectors of a in vt ! (workspace: need bdspac) call stdlib${ii}$_dbdsqr( 'U', m, n, m, 0_${ik}$, s, work( ie ), vt,ldvt, a, lda, & dum, 1_${ik}$, work( iwork ),info ) end if else if( wntuas ) then ! path 6t(n much larger than m, jobu='s' or 'a', ! jobvt='s') ! m right singular vectors to be computed in vt and ! m left singular vectors to be computed in u if( lwork>=m*m+max( 4_${ik}$*m, bdspac ) ) then ! sufficient workspace for a fast algorithm iu = 1_${ik}$ if( lwork>=wrkbl+lda*m ) then ! work(iu) is lda by n ldwrku = lda else ! work(iu) is lda by m ldwrku = m end if itau = iu + ldwrku*m iwork = itau + m ! compute a=l*q ! (workspace: need m*m + 2*m, prefer m*m + m + m*nb) call stdlib${ii}$_dgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ! copy l to work(iu), zeroing out above it call stdlib${ii}$_dlacpy( 'L', m, m, a, lda, work( iu ),ldwrku ) call stdlib${ii}$_dlaset( 'U', m-1, m-1, zero, zero,work( iu+ldwrku ), ldwrku & ) ! generate q in a ! (workspace: need m*m + 2*m, prefer m*m + m + m*nb) call stdlib${ii}$_dorglq( m, n, m, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = itau itauq = ie + m itaup = itauq + m iwork = itaup + m ! bidiagonalize l in work(iu), copying result to u ! (workspace: need m*m + 4*m, prefer m*m + 3*m + 2*m*nb) call stdlib${ii}$_dgebrd( m, m, work( iu ), ldwrku, s,work( ie ), work( itauq & ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) call stdlib${ii}$_dlacpy( 'L', m, m, work( iu ), ldwrku, u,ldu ) ! generate right bidiagonalizing vectors in work(iu) ! (workspace: need m*m + 4*m-1, ! prefer m*m+3*m+(m-1)*nb) call stdlib${ii}$_dorgbr( 'P', m, m, m, work( iu ), ldwrku,work( itaup ), & work( iwork ),lwork-iwork+1, ierr ) ! generate left bidiagonalizing vectors in u ! (workspace: need m*m + 4*m, prefer m*m + 3*m + m*nb) call stdlib${ii}$_dorgbr( 'Q', m, m, m, u, ldu, work( itauq ),work( iwork ), & lwork-iwork+1, ierr ) iwork = ie + m ! perform bidiagonal qr iteration, computing left ! singular vectors of l in u and computing right ! singular vectors of l in work(iu) ! (workspace: need m*m + bdspac) call stdlib${ii}$_dbdsqr( 'U', m, m, m, 0_${ik}$, s, work( ie ),work( iu ), ldwrku, & u, ldu, dum, 1_${ik}$,work( iwork ), info ) ! multiply right singular vectors of l in work(iu) by ! q in a, storing result in vt ! (workspace: need m*m) call stdlib${ii}$_dgemm( 'N', 'N', m, n, m, one, work( iu ),ldwrku, a, lda, & zero, vt, ldvt ) else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + m ! compute a=l*q, copying result to vt ! (workspace: need 2*m, prefer m + m*nb) call stdlib${ii}$_dgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_dlacpy( 'U', m, n, a, lda, vt, ldvt ) ! generate q in vt ! (workspace: need 2*m, prefer m + m*nb) call stdlib${ii}$_dorglq( m, n, m, vt, ldvt, work( itau ),work( iwork ), & lwork-iwork+1, ierr ) ! copy l to u, zeroing out above it call stdlib${ii}$_dlacpy( 'L', m, m, a, lda, u, ldu ) if (m>1_${ik}$) call stdlib${ii}$_dlaset( 'U', m-1, m-1, zero, zero, u( 1_${ik}$, 2_${ik}$ ),ldu ) ie = itau itauq = ie + m itaup = itauq + m iwork = itaup + m ! bidiagonalize l in u ! (workspace: need 4*m, prefer 3*m + 2*m*nb) call stdlib${ii}$_dgebrd( m, m, u, ldu, s, work( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply right bidiagonalizing vectors in u by q ! in vt ! (workspace: need 3*m + n, prefer 3*m + n*nb) call stdlib${ii}$_dormbr( 'P', 'L', 'T', m, n, m, u, ldu,work( itaup ), vt, & ldvt,work( iwork ), lwork-iwork+1, ierr ) ! generate left bidiagonalizing vectors in u ! (workspace: need 4*m, prefer 3*m + m*nb) call stdlib${ii}$_dorgbr( 'Q', m, m, m, u, ldu, work( itauq ),work( iwork ), & lwork-iwork+1, ierr ) iwork = ie + m ! perform bidiagonal qr iteration, computing left ! singular vectors of a in u and computing right ! singular vectors of a in vt ! (workspace: need bdspac) call stdlib${ii}$_dbdsqr( 'U', m, n, m, 0_${ik}$, s, work( ie ), vt,ldvt, u, ldu, & dum, 1_${ik}$, work( iwork ),info ) end if end if else if( wntva ) then if( wntun ) then ! path 7t(n much larger than m, jobu='n', jobvt='a') ! n right singular vectors to be computed in vt and ! no left singular vectors to be computed if( lwork>=m*m+max( n + m, 4_${ik}$*m, bdspac ) ) then ! sufficient workspace for a fast algorithm ir = 1_${ik}$ if( lwork>=wrkbl+lda*m ) then ! work(ir) is lda by m ldwrkr = lda else ! work(ir) is m by m ldwrkr = m end if itau = ir + ldwrkr*m iwork = itau + m ! compute a=l*q, copying result to vt ! (workspace: need m*m + 2*m, prefer m*m + m + m*nb) call stdlib${ii}$_dgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_dlacpy( 'U', m, n, a, lda, vt, ldvt ) ! copy l to work(ir), zeroing out above it call stdlib${ii}$_dlacpy( 'L', m, m, a, lda, work( ir ),ldwrkr ) call stdlib${ii}$_dlaset( 'U', m-1, m-1, zero, zero,work( ir+ldwrkr ), ldwrkr & ) ! generate q in vt ! (workspace: need m*m + m + n, prefer m*m + m + n*nb) call stdlib${ii}$_dorglq( n, n, m, vt, ldvt, work( itau ),work( iwork ), & lwork-iwork+1, ierr ) ie = itau itauq = ie + m itaup = itauq + m iwork = itaup + m ! bidiagonalize l in work(ir) ! (workspace: need m*m + 4*m, prefer m*m + 3*m + 2*m*nb) call stdlib${ii}$_dgebrd( m, m, work( ir ), ldwrkr, s,work( ie ), work( itauq & ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) ! generate right bidiagonalizing vectors in work(ir) ! (workspace: need m*m + 4*m-1, ! prefer m*m+3*m+(m-1)*nb) call stdlib${ii}$_dorgbr( 'P', m, m, m, work( ir ), ldwrkr,work( itaup ), & work( iwork ),lwork-iwork+1, ierr ) iwork = ie + m ! perform bidiagonal qr iteration, computing right ! singular vectors of l in work(ir) ! (workspace: need m*m + bdspac) call stdlib${ii}$_dbdsqr( 'U', m, m, 0_${ik}$, 0_${ik}$, s, work( ie ),work( ir ), ldwrkr, & dum, 1_${ik}$, dum, 1_${ik}$,work( iwork ), info ) ! multiply right singular vectors of l in work(ir) by ! q in vt, storing result in a ! (workspace: need m*m) call stdlib${ii}$_dgemm( 'N', 'N', m, n, m, one, work( ir ),ldwrkr, vt, ldvt, & zero, a, lda ) ! copy right singular vectors of a from a to vt call stdlib${ii}$_dlacpy( 'F', m, n, a, lda, vt, ldvt ) else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + m ! compute a=l*q, copying result to vt ! (workspace: need 2*m, prefer m + m*nb) call stdlib${ii}$_dgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_dlacpy( 'U', m, n, a, lda, vt, ldvt ) ! generate q in vt ! (workspace: need m + n, prefer m + n*nb) call stdlib${ii}$_dorglq( n, n, m, vt, ldvt, work( itau ),work( iwork ), & lwork-iwork+1, ierr ) ie = itau itauq = ie + m itaup = itauq + m iwork = itaup + m ! zero out above l in a if (m>1_${ik}$) call stdlib${ii}$_dlaset( 'U', m-1, m-1, zero, zero, a( 1_${ik}$, 2_${ik}$ ),lda ) ! bidiagonalize l in a ! (workspace: need 4*m, prefer 3*m + 2*m*nb) call stdlib${ii}$_dgebrd( m, m, a, lda, s, work( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply right bidiagonalizing vectors in a by q ! in vt ! (workspace: need 3*m + n, prefer 3*m + n*nb) call stdlib${ii}$_dormbr( 'P', 'L', 'T', m, n, m, a, lda,work( itaup ), vt, & ldvt,work( iwork ), lwork-iwork+1, ierr ) iwork = ie + m ! perform bidiagonal qr iteration, computing right ! singular vectors of a in vt ! (workspace: need bdspac) call stdlib${ii}$_dbdsqr( 'U', m, n, 0_${ik}$, 0_${ik}$, s, work( ie ), vt,ldvt, dum, 1_${ik}$, & dum, 1_${ik}$, work( iwork ),info ) end if else if( wntuo ) then ! path 8t(n much larger than m, jobu='o', jobvt='a') ! n right singular vectors to be computed in vt and ! m left singular vectors to be overwritten on a if( lwork>=2_${ik}$*m*m+max( n + m, 4_${ik}$*m, bdspac ) ) then ! sufficient workspace for a fast algorithm iu = 1_${ik}$ if( lwork>=wrkbl+2*lda*m ) then ! work(iu) is lda by m and work(ir) is lda by m ldwrku = lda ir = iu + ldwrku*m ldwrkr = lda else if( lwork>=wrkbl+( lda + m )*m ) then ! work(iu) is lda by m and work(ir) is m by m ldwrku = lda ir = iu + ldwrku*m ldwrkr = m else ! work(iu) is m by m and work(ir) is m by m ldwrku = m ir = iu + ldwrku*m ldwrkr = m end if itau = ir + ldwrkr*m iwork = itau + m ! compute a=l*q, copying result to vt ! (workspace: need 2*m*m + 2*m, prefer 2*m*m + m + m*nb) call stdlib${ii}$_dgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_dlacpy( 'U', m, n, a, lda, vt, ldvt ) ! generate q in vt ! (workspace: need 2*m*m + m + n, prefer 2*m*m + m + n*nb) call stdlib${ii}$_dorglq( n, n, m, vt, ldvt, work( itau ),work( iwork ), & lwork-iwork+1, ierr ) ! copy l to work(iu), zeroing out above it call stdlib${ii}$_dlacpy( 'L', m, m, a, lda, work( iu ),ldwrku ) call stdlib${ii}$_dlaset( 'U', m-1, m-1, zero, zero,work( iu+ldwrku ), ldwrku & ) ie = itau itauq = ie + m itaup = itauq + m iwork = itaup + m ! bidiagonalize l in work(iu), copying result to ! work(ir) ! (workspace: need 2*m*m + 4*m, ! prefer 2*m*m+3*m+2*m*nb) call stdlib${ii}$_dgebrd( m, m, work( iu ), ldwrku, s,work( ie ), work( itauq & ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) call stdlib${ii}$_dlacpy( 'L', m, m, work( iu ), ldwrku,work( ir ), ldwrkr ) ! generate right bidiagonalizing vectors in work(iu) ! (workspace: need 2*m*m + 4*m-1, ! prefer 2*m*m+3*m+(m-1)*nb) call stdlib${ii}$_dorgbr( 'P', m, m, m, work( iu ), ldwrku,work( itaup ), & work( iwork ),lwork-iwork+1, ierr ) ! generate left bidiagonalizing vectors in work(ir) ! (workspace: need 2*m*m + 4*m, prefer 2*m*m + 3*m + m*nb) call stdlib${ii}$_dorgbr( 'Q', m, m, m, work( ir ), ldwrkr,work( itauq ), & work( iwork ),lwork-iwork+1, ierr ) iwork = ie + m ! perform bidiagonal qr iteration, computing left ! singular vectors of l in work(ir) and computing ! right singular vectors of l in work(iu) ! (workspace: need 2*m*m + bdspac) call stdlib${ii}$_dbdsqr( 'U', m, m, m, 0_${ik}$, s, work( ie ),work( iu ), ldwrku, & work( ir ),ldwrkr, dum, 1_${ik}$, work( iwork ), info ) ! multiply right singular vectors of l in work(iu) by ! q in vt, storing result in a ! (workspace: need m*m) call stdlib${ii}$_dgemm( 'N', 'N', m, n, m, one, work( iu ),ldwrku, vt, ldvt, & zero, a, lda ) ! copy right singular vectors of a from a to vt call stdlib${ii}$_dlacpy( 'F', m, n, a, lda, vt, ldvt ) ! copy left singular vectors of a from work(ir) to a call stdlib${ii}$_dlacpy( 'F', m, m, work( ir ), ldwrkr, a,lda ) else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + m ! compute a=l*q, copying result to vt ! (workspace: need 2*m, prefer m + m*nb) call stdlib${ii}$_dgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_dlacpy( 'U', m, n, a, lda, vt, ldvt ) ! generate q in vt ! (workspace: need m + n, prefer m + n*nb) call stdlib${ii}$_dorglq( n, n, m, vt, ldvt, work( itau ),work( iwork ), & lwork-iwork+1, ierr ) ie = itau itauq = ie + m itaup = itauq + m iwork = itaup + m ! zero out above l in a if (m>1_${ik}$) call stdlib${ii}$_dlaset( 'U', m-1, m-1, zero, zero, a( 1_${ik}$, 2_${ik}$ ),lda ) ! bidiagonalize l in a ! (workspace: need 4*m, prefer 3*m + 2*m*nb) call stdlib${ii}$_dgebrd( m, m, a, lda, s, work( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply right bidiagonalizing vectors in a by q ! in vt ! (workspace: need 3*m + n, prefer 3*m + n*nb) call stdlib${ii}$_dormbr( 'P', 'L', 'T', m, n, m, a, lda,work( itaup ), vt, & ldvt,work( iwork ), lwork-iwork+1, ierr ) ! generate left bidiagonalizing vectors in a ! (workspace: need 4*m, prefer 3*m + m*nb) call stdlib${ii}$_dorgbr( 'Q', m, m, m, a, lda, work( itauq ),work( iwork ), & lwork-iwork+1, ierr ) iwork = ie + m ! perform bidiagonal qr iteration, computing left ! singular vectors of a in a and computing right ! singular vectors of a in vt ! (workspace: need bdspac) call stdlib${ii}$_dbdsqr( 'U', m, n, m, 0_${ik}$, s, work( ie ), vt,ldvt, a, lda, & dum, 1_${ik}$, work( iwork ),info ) end if else if( wntuas ) then ! path 9t(n much larger than m, jobu='s' or 'a', ! jobvt='a') ! n right singular vectors to be computed in vt and ! m left singular vectors to be computed in u if( lwork>=m*m+max( n + m, 4_${ik}$*m, bdspac ) ) then ! sufficient workspace for a fast algorithm iu = 1_${ik}$ if( lwork>=wrkbl+lda*m ) then ! work(iu) is lda by m ldwrku = lda else ! work(iu) is m by m ldwrku = m end if itau = iu + ldwrku*m iwork = itau + m ! compute a=l*q, copying result to vt ! (workspace: need m*m + 2*m, prefer m*m + m + m*nb) call stdlib${ii}$_dgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_dlacpy( 'U', m, n, a, lda, vt, ldvt ) ! generate q in vt ! (workspace: need m*m + m + n, prefer m*m + m + n*nb) call stdlib${ii}$_dorglq( n, n, m, vt, ldvt, work( itau ),work( iwork ), & lwork-iwork+1, ierr ) ! copy l to work(iu), zeroing out above it call stdlib${ii}$_dlacpy( 'L', m, m, a, lda, work( iu ),ldwrku ) call stdlib${ii}$_dlaset( 'U', m-1, m-1, zero, zero,work( iu+ldwrku ), ldwrku & ) ie = itau itauq = ie + m itaup = itauq + m iwork = itaup + m ! bidiagonalize l in work(iu), copying result to u ! (workspace: need m*m + 4*m, prefer m*m + 3*m + 2*m*nb) call stdlib${ii}$_dgebrd( m, m, work( iu ), ldwrku, s,work( ie ), work( itauq & ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) call stdlib${ii}$_dlacpy( 'L', m, m, work( iu ), ldwrku, u,ldu ) ! generate right bidiagonalizing vectors in work(iu) ! (workspace: need m*m + 4*m, prefer m*m + 3*m + (m-1)*nb) call stdlib${ii}$_dorgbr( 'P', m, m, m, work( iu ), ldwrku,work( itaup ), & work( iwork ),lwork-iwork+1, ierr ) ! generate left bidiagonalizing vectors in u ! (workspace: need m*m + 4*m, prefer m*m + 3*m + m*nb) call stdlib${ii}$_dorgbr( 'Q', m, m, m, u, ldu, work( itauq ),work( iwork ), & lwork-iwork+1, ierr ) iwork = ie + m ! perform bidiagonal qr iteration, computing left ! singular vectors of l in u and computing right ! singular vectors of l in work(iu) ! (workspace: need m*m + bdspac) call stdlib${ii}$_dbdsqr( 'U', m, m, m, 0_${ik}$, s, work( ie ),work( iu ), ldwrku, & u, ldu, dum, 1_${ik}$,work( iwork ), info ) ! multiply right singular vectors of l in work(iu) by ! q in vt, storing result in a ! (workspace: need m*m) call stdlib${ii}$_dgemm( 'N', 'N', m, n, m, one, work( iu ),ldwrku, vt, ldvt, & zero, a, lda ) ! copy right singular vectors of a from a to vt call stdlib${ii}$_dlacpy( 'F', m, n, a, lda, vt, ldvt ) else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + m ! compute a=l*q, copying result to vt ! (workspace: need 2*m, prefer m + m*nb) call stdlib${ii}$_dgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_dlacpy( 'U', m, n, a, lda, vt, ldvt ) ! generate q in vt ! (workspace: need m + n, prefer m + n*nb) call stdlib${ii}$_dorglq( n, n, m, vt, ldvt, work( itau ),work( iwork ), & lwork-iwork+1, ierr ) ! copy l to u, zeroing out above it call stdlib${ii}$_dlacpy( 'L', m, m, a, lda, u, ldu ) if (m>1_${ik}$) call stdlib${ii}$_dlaset( 'U', m-1, m-1, zero, zero, u( 1_${ik}$, 2_${ik}$ ),ldu ) ie = itau itauq = ie + m itaup = itauq + m iwork = itaup + m ! bidiagonalize l in u ! (workspace: need 4*m, prefer 3*m + 2*m*nb) call stdlib${ii}$_dgebrd( m, m, u, ldu, s, work( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply right bidiagonalizing vectors in u by q ! in vt ! (workspace: need 3*m + n, prefer 3*m + n*nb) call stdlib${ii}$_dormbr( 'P', 'L', 'T', m, n, m, u, ldu,work( itaup ), vt, & ldvt,work( iwork ), lwork-iwork+1, ierr ) ! generate left bidiagonalizing vectors in u ! (workspace: need 4*m, prefer 3*m + m*nb) call stdlib${ii}$_dorgbr( 'Q', m, m, m, u, ldu, work( itauq ),work( iwork ), & lwork-iwork+1, ierr ) iwork = ie + m ! perform bidiagonal qr iteration, computing left ! singular vectors of a in u and computing right ! singular vectors of a in vt ! (workspace: need bdspac) call stdlib${ii}$_dbdsqr( 'U', m, n, m, 0_${ik}$, s, work( ie ), vt,ldvt, u, ldu, & dum, 1_${ik}$, work( iwork ),info ) end if end if end if else ! n < mnthr ! path 10t(n greater than m, but not much larger) ! reduce to bidiagonal form without lq decomposition ie = 1_${ik}$ itauq = ie + m itaup = itauq + m iwork = itaup + m ! bidiagonalize a ! (workspace: need 3*m + n, prefer 3*m + (m + n)*nb) call stdlib${ii}$_dgebrd( m, n, a, lda, s, work( ie ), work( itauq ),work( itaup ), & work( iwork ), lwork-iwork+1,ierr ) if( wntuas ) then ! if left singular vectors desired in u, copy result to u ! and generate left bidiagonalizing vectors in u ! (workspace: need 4*m-1, prefer 3*m + (m-1)*nb) call stdlib${ii}$_dlacpy( 'L', m, m, a, lda, u, ldu ) call stdlib${ii}$_dorgbr( 'Q', m, m, n, u, ldu, work( itauq ),work( iwork ), lwork-& iwork+1, ierr ) end if if( wntvas ) then ! if right singular vectors desired in vt, copy result to ! vt and generate right bidiagonalizing vectors in vt ! (workspace: need 3*m + nrvt, prefer 3*m + nrvt*nb) call stdlib${ii}$_dlacpy( 'U', m, n, a, lda, vt, ldvt ) if( wntva )nrvt = n if( wntvs )nrvt = m call stdlib${ii}$_dorgbr( 'P', nrvt, n, m, vt, ldvt, work( itaup ),work( iwork ), & lwork-iwork+1, ierr ) end if if( wntuo ) then ! if left singular vectors desired in a, generate left ! bidiagonalizing vectors in a ! (workspace: need 4*m-1, prefer 3*m + (m-1)*nb) call stdlib${ii}$_dorgbr( 'Q', m, m, n, a, lda, work( itauq ),work( iwork ), lwork-& iwork+1, ierr ) end if if( wntvo ) then ! if right singular vectors desired in a, generate right ! bidiagonalizing vectors in a ! (workspace: need 4*m, prefer 3*m + m*nb) call stdlib${ii}$_dorgbr( 'P', m, n, m, a, lda, work( itaup ),work( iwork ), lwork-& iwork+1, ierr ) end if iwork = ie + m if( wntuas .or. wntuo )nru = m if( wntun )nru = 0_${ik}$ if( wntvas .or. wntvo )ncvt = n if( wntvn )ncvt = 0_${ik}$ if( ( .not.wntuo ) .and. ( .not.wntvo ) ) then ! perform bidiagonal qr iteration, if desired, computing ! left singular vectors in u and computing right singular ! vectors in vt ! (workspace: need bdspac) call stdlib${ii}$_dbdsqr( 'L', m, ncvt, nru, 0_${ik}$, s, work( ie ), vt,ldvt, u, ldu, dum,& 1_${ik}$, work( iwork ), info ) else if( ( .not.wntuo ) .and. wntvo ) then ! perform bidiagonal qr iteration, if desired, computing ! left singular vectors in u and computing right singular ! vectors in a ! (workspace: need bdspac) call stdlib${ii}$_dbdsqr( 'L', m, ncvt, nru, 0_${ik}$, s, work( ie ), a, lda,u, ldu, dum, & 1_${ik}$, work( iwork ), info ) else ! perform bidiagonal qr iteration, if desired, computing ! left singular vectors in a and computing right singular ! vectors in vt ! (workspace: need bdspac) call stdlib${ii}$_dbdsqr( 'L', m, ncvt, nru, 0_${ik}$, s, work( ie ), vt,ldvt, a, lda, dum,& 1_${ik}$, work( iwork ), info ) end if end if end if ! if stdlib${ii}$_dbdsqr failed to converge, copy unconverged superdiagonals ! to work( 2:minmn ) if( info/=0_${ik}$ ) then if( ie>2_${ik}$ ) then do i = 1, minmn - 1 work( i+1 ) = work( i+ie-1 ) end do end if if( ie<2_${ik}$ ) then do i = minmn - 1, 1, -1 work( i+1 ) = work( i+ie-1 ) end do end if end if ! undo scaling if necessary if( iscl==1_${ik}$ ) then if( anrm>bignum )call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, bignum, anrm, minmn, 1_${ik}$, s, minmn,& ierr ) if( info/=0_${ik}$ .and. anrm>bignum )call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, bignum, anrm, minmn-1,& 1_${ik}$, work( 2_${ik}$ ),minmn, ierr ) if( anrm<smlnum )call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, smlnum, anrm, minmn, 1_${ik}$, s, minmn,& ierr ) if( info/=0_${ik}$ .and. anrm<smlnum )call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, smlnum, anrm, minmn-1,& 1_${ik}$, work( 2_${ik}$ ),minmn, ierr ) end if ! return optimal workspace in work(1) work( 1_${ik}$ ) = maxwrk return end subroutine stdlib${ii}$_dgesvd #:for rk,rt,ri in REAL_KINDS_TYPES #:if not rk in ["sp","dp"] module subroutine stdlib${ii}$_${ri}$gesvd( jobu, jobvt, m, n, a, lda, s, u, ldu,vt, ldvt, work, lwork, info ) !! DGESVD: computes the singular value decomposition (SVD) of a real !! M-by-N matrix A, optionally computing the left and/or right singular !! vectors. The SVD is written !! A = U * SIGMA * transpose(V) !! where SIGMA is an M-by-N matrix which is zero except for its !! min(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and !! V is an N-by-N orthogonal matrix. The diagonal elements of SIGMA !! are the singular values of A; they are real and non-negative, and !! are returned in descending order. The first min(m,n) columns of !! U and V are the left and right singular vectors of A. !! Note that the routine returns V**T, not V. ! -- lapack driver routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments character, intent(in) :: jobu, jobvt integer(${ik}$), intent(out) :: info integer(${ik}$), intent(in) :: lda, ldu, ldvt, lwork, m, n ! Array Arguments real(${rk}$), intent(inout) :: a(lda,*) real(${rk}$), intent(out) :: s(*), u(ldu,*), vt(ldvt,*), work(*) ! ===================================================================== ! Local Scalars logical(lk) :: lquery, wntua, wntuas, wntun, wntuo, wntus, wntva, wntvas, wntvn, wntvo,& wntvs integer(${ik}$) :: bdspac, blk, chunk, i, ie, ierr, ir, iscl, itau, itaup, itauq, iu, & iwork, ldwrkr, ldwrku, maxwrk, minmn, minwrk, mnthr, ncu, ncvt, nru, nrvt, & wrkbl integer(${ik}$) :: lwork_qgeqrf, lwork_qorgqr_n, lwork_qorgqr_m, lwork_qgebrd, & lwork_qorgbr_p, lwork_qorgbr_q, lwork_qgelqf, lwork_qorglq_n, lwork_qorglq_m real(${rk}$) :: anrm, bignum, eps, smlnum ! Local Arrays real(${rk}$) :: dum(1_${ik}$) ! Intrinsic Functions ! Executable Statements ! test the input arguments info = 0_${ik}$ minmn = min( m, n ) wntua = stdlib_lsame( jobu, 'A' ) wntus = stdlib_lsame( jobu, 'S' ) wntuas = wntua .or. wntus wntuo = stdlib_lsame( jobu, 'O' ) wntun = stdlib_lsame( jobu, 'N' ) wntva = stdlib_lsame( jobvt, 'A' ) wntvs = stdlib_lsame( jobvt, 'S' ) wntvas = wntva .or. wntvs wntvo = stdlib_lsame( jobvt, 'O' ) wntvn = stdlib_lsame( jobvt, 'N' ) lquery = ( lwork==-1_${ik}$ ) if( .not.( wntua .or. wntus .or. wntuo .or. wntun ) ) then info = -1_${ik}$ else if( .not.( wntva .or. wntvs .or. wntvo .or. wntvn ) .or.( wntvo .and. wntuo ) ) & then info = -2_${ik}$ else if( m<0_${ik}$ ) then info = -3_${ik}$ else if( n<0_${ik}$ ) then info = -4_${ik}$ else if( lda<max( 1_${ik}$, m ) ) then info = -6_${ik}$ else if( ldu<1_${ik}$ .or. ( wntuas .and. ldu<m ) ) then info = -9_${ik}$ else if( ldvt<1_${ik}$ .or. ( wntva .and. ldvt<n ) .or.( wntvs .and. ldvt<minmn ) ) & then info = -11_${ik}$ end if ! compute workspace ! (note: comments in the code beginning "workspace:" describe the ! minimal amount of workspace needed at that point in the code, ! as well as the preferred amount for good performance. ! nb refers to the optimal block size for the immediately ! following subroutine, as returned by stdlib${ii}$_ilaenv.) if( info==0_${ik}$ ) then minwrk = 1_${ik}$ maxwrk = 1_${ik}$ if( m>=n .and. minmn>0_${ik}$ ) then ! compute space needed for stdlib${ii}$_${ri}$bdsqr mnthr = stdlib${ii}$_ilaenv( 6_${ik}$, 'DGESVD', jobu // jobvt, m, n, 0_${ik}$, 0_${ik}$ ) bdspac = 5_${ik}$*n ! compute space needed for stdlib${ii}$_${ri}$geqrf call stdlib${ii}$_${ri}$geqrf( m, n, a, lda, dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, ierr ) lwork_qgeqrf = int( dum(1_${ik}$),KIND=${ik}$) ! compute space needed for stdlib${ii}$_${ri}$orgqr call stdlib${ii}$_${ri}$orgqr( m, n, n, a, lda, dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, ierr ) lwork_qorgqr_n = int( dum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_${ri}$orgqr( m, m, n, a, lda, dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, ierr ) lwork_qorgqr_m = int( dum(1_${ik}$),KIND=${ik}$) ! compute space needed for stdlib${ii}$_${ri}$gebrd call stdlib${ii}$_${ri}$gebrd( n, n, a, lda, s, dum(1_${ik}$), dum(1_${ik}$),dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, ierr ) lwork_qgebrd = int( dum(1_${ik}$),KIND=${ik}$) ! compute space needed for stdlib${ii}$_${ri}$orgbr p call stdlib${ii}$_${ri}$orgbr( 'P', n, n, n, a, lda, dum(1_${ik}$),dum(1_${ik}$), -1_${ik}$, ierr ) lwork_qorgbr_p = int( dum(1_${ik}$),KIND=${ik}$) ! compute space needed for stdlib${ii}$_${ri}$orgbr q call stdlib${ii}$_${ri}$orgbr( 'Q', n, n, n, a, lda, dum(1_${ik}$),dum(1_${ik}$), -1_${ik}$, ierr ) lwork_qorgbr_q = int( dum(1_${ik}$),KIND=${ik}$) if( m>=mnthr ) then if( wntun ) then ! path 1 (m much larger than n, jobu='n') maxwrk = n + lwork_qgeqrf maxwrk = max( maxwrk, 3_${ik}$*n + lwork_qgebrd ) if( wntvo .or. wntvas )maxwrk = max( maxwrk, 3_${ik}$*n + lwork_qorgbr_p ) maxwrk = max( maxwrk, bdspac ) minwrk = max( 4_${ik}$*n, bdspac ) else if( wntuo .and. wntvn ) then ! path 2 (m much larger than n, jobu='o', jobvt='n') wrkbl = n + lwork_qgeqrf wrkbl = max( wrkbl, n + lwork_qorgqr_n ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_qgebrd ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_qorgbr_q ) wrkbl = max( wrkbl, bdspac ) maxwrk = max( n*n + wrkbl, n*n + m*n + n ) minwrk = max( 3_${ik}$*n + m, bdspac ) else if( wntuo .and. wntvas ) then ! path 3 (m much larger than n, jobu='o', jobvt='s' or ! 'a') wrkbl = n + lwork_qgeqrf wrkbl = max( wrkbl, n + lwork_qorgqr_n ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_qgebrd ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_qorgbr_q ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_qorgbr_p ) wrkbl = max( wrkbl, bdspac ) maxwrk = max( n*n + wrkbl, n*n + m*n + n ) minwrk = max( 3_${ik}$*n + m, bdspac ) else if( wntus .and. wntvn ) then ! path 4 (m much larger than n, jobu='s', jobvt='n') wrkbl = n + lwork_qgeqrf wrkbl = max( wrkbl, n + lwork_qorgqr_n ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_qgebrd ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_qorgbr_q ) wrkbl = max( wrkbl, bdspac ) maxwrk = n*n + wrkbl minwrk = max( 3_${ik}$*n + m, bdspac ) else if( wntus .and. wntvo ) then ! path 5 (m much larger than n, jobu='s', jobvt='o') wrkbl = n + lwork_qgeqrf wrkbl = max( wrkbl, n + lwork_qorgqr_n ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_qgebrd ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_qorgbr_q ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_qorgbr_p ) wrkbl = max( wrkbl, bdspac ) maxwrk = 2_${ik}$*n*n + wrkbl minwrk = max( 3_${ik}$*n + m, bdspac ) else if( wntus .and. wntvas ) then ! path 6 (m much larger than n, jobu='s', jobvt='s' or ! 'a') wrkbl = n + lwork_qgeqrf wrkbl = max( wrkbl, n + lwork_qorgqr_n ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_qgebrd ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_qorgbr_q ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_qorgbr_p ) wrkbl = max( wrkbl, bdspac ) maxwrk = n*n + wrkbl minwrk = max( 3_${ik}$*n + m, bdspac ) else if( wntua .and. wntvn ) then ! path 7 (m much larger than n, jobu='a', jobvt='n') wrkbl = n + lwork_qgeqrf wrkbl = max( wrkbl, n + lwork_qorgqr_m ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_qgebrd ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_qorgbr_q ) wrkbl = max( wrkbl, bdspac ) maxwrk = n*n + wrkbl minwrk = max( 3_${ik}$*n + m, bdspac ) else if( wntua .and. wntvo ) then ! path 8 (m much larger than n, jobu='a', jobvt='o') wrkbl = n + lwork_qgeqrf wrkbl = max( wrkbl, n + lwork_qorgqr_m ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_qgebrd ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_qorgbr_q ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_qorgbr_p ) wrkbl = max( wrkbl, bdspac ) maxwrk = 2_${ik}$*n*n + wrkbl minwrk = max( 3_${ik}$*n + m, bdspac ) else if( wntua .and. wntvas ) then ! path 9 (m much larger than n, jobu='a', jobvt='s' or ! 'a') wrkbl = n + lwork_qgeqrf wrkbl = max( wrkbl, n + lwork_qorgqr_m ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_qgebrd ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_qorgbr_q ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_qorgbr_p ) wrkbl = max( wrkbl, bdspac ) maxwrk = n*n + wrkbl minwrk = max( 3_${ik}$*n + m, bdspac ) end if else ! path 10 (m at least n, but not much larger) call stdlib${ii}$_${ri}$gebrd( m, n, a, lda, s, dum(1_${ik}$), dum(1_${ik}$),dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, ierr ) lwork_qgebrd = int( dum(1_${ik}$),KIND=${ik}$) maxwrk = 3_${ik}$*n + lwork_qgebrd if( wntus .or. wntuo ) then call stdlib${ii}$_${ri}$orgbr( 'Q', m, n, n, a, lda, dum(1_${ik}$),dum(1_${ik}$), -1_${ik}$, ierr ) lwork_qorgbr_q = int( dum(1_${ik}$),KIND=${ik}$) maxwrk = max( maxwrk, 3_${ik}$*n + lwork_qorgbr_q ) end if if( wntua ) then call stdlib${ii}$_${ri}$orgbr( 'Q', m, m, n, a, lda, dum(1_${ik}$),dum(1_${ik}$), -1_${ik}$, ierr ) lwork_qorgbr_q = int( dum(1_${ik}$),KIND=${ik}$) maxwrk = max( maxwrk, 3_${ik}$*n + lwork_qorgbr_q ) end if if( .not.wntvn ) then maxwrk = max( maxwrk, 3_${ik}$*n + lwork_qorgbr_p ) end if maxwrk = max( maxwrk, bdspac ) minwrk = max( 3_${ik}$*n + m, bdspac ) end if else if( minmn>0_${ik}$ ) then ! compute space needed for stdlib${ii}$_${ri}$bdsqr mnthr = stdlib${ii}$_ilaenv( 6_${ik}$, 'DGESVD', jobu // jobvt, m, n, 0_${ik}$, 0_${ik}$ ) bdspac = 5_${ik}$*m ! compute space needed for stdlib${ii}$_${ri}$gelqf call stdlib${ii}$_${ri}$gelqf( m, n, a, lda, dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, ierr ) lwork_qgelqf = int( dum(1_${ik}$),KIND=${ik}$) ! compute space needed for stdlib${ii}$_${ri}$orglq call stdlib${ii}$_${ri}$orglq( n, n, m, dum(1_${ik}$), n, dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, ierr ) lwork_qorglq_n = int( dum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_${ri}$orglq( m, n, m, a, lda, dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, ierr ) lwork_qorglq_m = int( dum(1_${ik}$),KIND=${ik}$) ! compute space needed for stdlib${ii}$_${ri}$gebrd call stdlib${ii}$_${ri}$gebrd( m, m, a, lda, s, dum(1_${ik}$), dum(1_${ik}$),dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, ierr ) lwork_qgebrd = int( dum(1_${ik}$),KIND=${ik}$) ! compute space needed for stdlib${ii}$_${ri}$orgbr p call stdlib${ii}$_${ri}$orgbr( 'P', m, m, m, a, n, dum(1_${ik}$),dum(1_${ik}$), -1_${ik}$, ierr ) lwork_qorgbr_p = int( dum(1_${ik}$),KIND=${ik}$) ! compute space needed for stdlib${ii}$_${ri}$orgbr q call stdlib${ii}$_${ri}$orgbr( 'Q', m, m, m, a, n, dum(1_${ik}$),dum(1_${ik}$), -1_${ik}$, ierr ) lwork_qorgbr_q = int( dum(1_${ik}$),KIND=${ik}$) if( n>=mnthr ) then if( wntvn ) then ! path 1t(n much larger than m, jobvt='n') maxwrk = m + lwork_qgelqf maxwrk = max( maxwrk, 3_${ik}$*m + lwork_qgebrd ) if( wntuo .or. wntuas )maxwrk = max( maxwrk, 3_${ik}$*m + lwork_qorgbr_q ) maxwrk = max( maxwrk, bdspac ) minwrk = max( 4_${ik}$*m, bdspac ) else if( wntvo .and. wntun ) then ! path 2t(n much larger than m, jobu='n', jobvt='o') wrkbl = m + lwork_qgelqf wrkbl = max( wrkbl, m + lwork_qorglq_m ) wrkbl = max( wrkbl, 3_${ik}$*m + lwork_qgebrd ) wrkbl = max( wrkbl, 3_${ik}$*m + lwork_qorgbr_p ) wrkbl = max( wrkbl, bdspac ) maxwrk = max( m*m + wrkbl, m*m + m*n + m ) minwrk = max( 3_${ik}$*m + n, bdspac ) else if( wntvo .and. wntuas ) then ! path 3t(n much larger than m, jobu='s' or 'a', ! jobvt='o') wrkbl = m + lwork_qgelqf wrkbl = max( wrkbl, m + lwork_qorglq_m ) wrkbl = max( wrkbl, 3_${ik}$*m + lwork_qgebrd ) wrkbl = max( wrkbl, 3_${ik}$*m + lwork_qorgbr_p ) wrkbl = max( wrkbl, 3_${ik}$*m + lwork_qorgbr_q ) wrkbl = max( wrkbl, bdspac ) maxwrk = max( m*m + wrkbl, m*m + m*n + m ) minwrk = max( 3_${ik}$*m + n, bdspac ) else if( wntvs .and. wntun ) then ! path 4t(n much larger than m, jobu='n', jobvt='s') wrkbl = m + lwork_qgelqf wrkbl = max( wrkbl, m + lwork_qorglq_m ) wrkbl = max( wrkbl, 3_${ik}$*m + lwork_qgebrd ) wrkbl = max( wrkbl, 3_${ik}$*m + lwork_qorgbr_p ) wrkbl = max( wrkbl, bdspac ) maxwrk = m*m + wrkbl minwrk = max( 3_${ik}$*m + n, bdspac ) else if( wntvs .and. wntuo ) then ! path 5t(n much larger than m, jobu='o', jobvt='s') wrkbl = m + lwork_qgelqf wrkbl = max( wrkbl, m + lwork_qorglq_m ) wrkbl = max( wrkbl, 3_${ik}$*m + lwork_qgebrd ) wrkbl = max( wrkbl, 3_${ik}$*m + lwork_qorgbr_p ) wrkbl = max( wrkbl, 3_${ik}$*m + lwork_qorgbr_q ) wrkbl = max( wrkbl, bdspac ) maxwrk = 2_${ik}$*m*m + wrkbl minwrk = max( 3_${ik}$*m + n, bdspac ) else if( wntvs .and. wntuas ) then ! path 6t(n much larger than m, jobu='s' or 'a', ! jobvt='s') wrkbl = m + lwork_qgelqf wrkbl = max( wrkbl, m + lwork_qorglq_m ) wrkbl = max( wrkbl, 3_${ik}$*m + lwork_qgebrd ) wrkbl = max( wrkbl, 3_${ik}$*m + lwork_qorgbr_p ) wrkbl = max( wrkbl, 3_${ik}$*m + lwork_qorgbr_q ) wrkbl = max( wrkbl, bdspac ) maxwrk = m*m + wrkbl minwrk = max( 3_${ik}$*m + n, bdspac ) else if( wntva .and. wntun ) then ! path 7t(n much larger than m, jobu='n', jobvt='a') wrkbl = m + lwork_qgelqf wrkbl = max( wrkbl, m + lwork_qorglq_n ) wrkbl = max( wrkbl, 3_${ik}$*m + lwork_qgebrd ) wrkbl = max( wrkbl, 3_${ik}$*m + lwork_qorgbr_p ) wrkbl = max( wrkbl, bdspac ) maxwrk = m*m + wrkbl minwrk = max( 3_${ik}$*m + n, bdspac ) else if( wntva .and. wntuo ) then ! path 8t(n much larger than m, jobu='o', jobvt='a') wrkbl = m + lwork_qgelqf wrkbl = max( wrkbl, m + lwork_qorglq_n ) wrkbl = max( wrkbl, 3_${ik}$*m + lwork_qgebrd ) wrkbl = max( wrkbl, 3_${ik}$*m + lwork_qorgbr_p ) wrkbl = max( wrkbl, 3_${ik}$*m + lwork_qorgbr_q ) wrkbl = max( wrkbl, bdspac ) maxwrk = 2_${ik}$*m*m + wrkbl minwrk = max( 3_${ik}$*m + n, bdspac ) else if( wntva .and. wntuas ) then ! path 9t(n much larger than m, jobu='s' or 'a', ! jobvt='a') wrkbl = m + lwork_qgelqf wrkbl = max( wrkbl, m + lwork_qorglq_n ) wrkbl = max( wrkbl, 3_${ik}$*m + lwork_qgebrd ) wrkbl = max( wrkbl, 3_${ik}$*m + lwork_qorgbr_p ) wrkbl = max( wrkbl, 3_${ik}$*m + lwork_qorgbr_q ) wrkbl = max( wrkbl, bdspac ) maxwrk = m*m + wrkbl minwrk = max( 3_${ik}$*m + n, bdspac ) end if else ! path 10t(n greater than m, but not much larger) call stdlib${ii}$_${ri}$gebrd( m, n, a, lda, s, dum(1_${ik}$), dum(1_${ik}$),dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, ierr ) lwork_qgebrd = int( dum(1_${ik}$),KIND=${ik}$) maxwrk = 3_${ik}$*m + lwork_qgebrd if( wntvs .or. wntvo ) then ! compute space needed for stdlib${ii}$_${ri}$orgbr p call stdlib${ii}$_${ri}$orgbr( 'P', m, n, m, a, n, dum(1_${ik}$),dum(1_${ik}$), -1_${ik}$, ierr ) lwork_qorgbr_p = int( dum(1_${ik}$),KIND=${ik}$) maxwrk = max( maxwrk, 3_${ik}$*m + lwork_qorgbr_p ) end if if( wntva ) then call stdlib${ii}$_${ri}$orgbr( 'P', n, n, m, a, n, dum(1_${ik}$),dum(1_${ik}$), -1_${ik}$, ierr ) lwork_qorgbr_p = int( dum(1_${ik}$),KIND=${ik}$) maxwrk = max( maxwrk, 3_${ik}$*m + lwork_qorgbr_p ) end if if( .not.wntun ) then maxwrk = max( maxwrk, 3_${ik}$*m + lwork_qorgbr_q ) end if maxwrk = max( maxwrk, bdspac ) minwrk = max( 3_${ik}$*m + n, bdspac ) end if end if maxwrk = max( maxwrk, minwrk ) work( 1_${ik}$ ) = maxwrk if( lwork<minwrk .and. .not.lquery ) then info = -13_${ik}$ end if end if if( info/=0_${ik}$ ) then call stdlib${ii}$_xerbla( 'DGESVD', -info ) return else if( lquery ) then return end if ! quick return if possible if( m==0_${ik}$ .or. n==0_${ik}$ ) then return end if ! get machine constants eps = stdlib${ii}$_${ri}$lamch( 'P' ) smlnum = sqrt( stdlib${ii}$_${ri}$lamch( 'S' ) ) / eps bignum = one / smlnum ! scale a if max element outside range [smlnum,bignum] anrm = stdlib${ii}$_${ri}$lange( 'M', m, n, a, lda, dum ) iscl = 0_${ik}$ if( anrm>zero .and. anrm<smlnum ) then iscl = 1_${ik}$ call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, m, n, a, lda, ierr ) else if( anrm>bignum ) then iscl = 1_${ik}$ call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, m, n, a, lda, ierr ) end if if( m>=n ) then ! a has at least as many rows as columns. if a has sufficiently ! more rows than columns, first reduce using the qr ! decomposition (if sufficient workspace available) if( m>=mnthr ) then if( wntun ) then ! path 1 (m much larger than n, jobu='n') ! no left singular vectors to be computed itau = 1_${ik}$ iwork = itau + n ! compute a=q*r ! (workspace: need 2*n, prefer n + n*nb) call stdlib${ii}$_${ri}$geqrf( m, n, a, lda, work( itau ), work( iwork ),lwork-iwork+1, & ierr ) ! zero out below r if( n > 1_${ik}$ ) then call stdlib${ii}$_${ri}$laset( 'L', n-1, n-1, zero, zero, a( 2_${ik}$, 1_${ik}$ ),lda ) end if ie = 1_${ik}$ itauq = ie + n itaup = itauq + n iwork = itaup + n ! bidiagonalize r in a ! (workspace: need 4*n, prefer 3*n + 2*n*nb) call stdlib${ii}$_${ri}$gebrd( n, n, a, lda, s, work( ie ), work( itauq ),work( itaup ), & work( iwork ), lwork-iwork+1,ierr ) ncvt = 0_${ik}$ if( wntvo .or. wntvas ) then ! if right singular vectors desired, generate p'. ! (workspace: need 4*n-1, prefer 3*n + (n-1)*nb) call stdlib${ii}$_${ri}$orgbr( 'P', n, n, n, a, lda, work( itaup ),work( iwork ), & lwork-iwork+1, ierr ) ncvt = n end if iwork = ie + n ! perform bidiagonal qr iteration, computing right ! singular vectors of a in a if desired ! (workspace: need bdspac) call stdlib${ii}$_${ri}$bdsqr( 'U', n, ncvt, 0_${ik}$, 0_${ik}$, s, work( ie ), a, lda,dum, 1_${ik}$, dum, 1_${ik}$, & work( iwork ), info ) ! if right singular vectors desired in vt, copy them there if( wntvas )call stdlib${ii}$_${ri}$lacpy( 'F', n, n, a, lda, vt, ldvt ) else if( wntuo .and. wntvn ) then ! path 2 (m much larger than n, jobu='o', jobvt='n') ! n left singular vectors to be overwritten on a and ! no right singular vectors to be computed if( lwork>=n*n+max( 4_${ik}$*n, bdspac ) ) then ! sufficient workspace for a fast algorithm ir = 1_${ik}$ if( lwork>=max( wrkbl, lda*n + n ) + lda*n ) then ! work(iu) is lda by n, work(ir) is lda by n ldwrku = lda ldwrkr = lda else if( lwork>=max( wrkbl, lda*n + n ) + n*n ) then ! work(iu) is lda by n, work(ir) is n by n ldwrku = lda ldwrkr = n else ! work(iu) is ldwrku by n, work(ir) is n by n ldwrku = ( lwork-n*n-n ) / n ldwrkr = n end if itau = ir + ldwrkr*n iwork = itau + n ! compute a=q*r ! (workspace: need n*n + 2*n, prefer n*n + n + n*nb) call stdlib${ii}$_${ri}$geqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-iwork+& 1_${ik}$, ierr ) ! copy r to work(ir) and zero out below it call stdlib${ii}$_${ri}$lacpy( 'U', n, n, a, lda, work( ir ), ldwrkr ) call stdlib${ii}$_${ri}$laset( 'L', n-1, n-1, zero, zero, work( ir+1 ),ldwrkr ) ! generate q in a ! (workspace: need n*n + 2*n, prefer n*n + n + n*nb) call stdlib${ii}$_${ri}$orgqr( m, n, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = itau itauq = ie + n itaup = itauq + n iwork = itaup + n ! bidiagonalize r in work(ir) ! (workspace: need n*n + 4*n, prefer n*n + 3*n + 2*n*nb) call stdlib${ii}$_${ri}$gebrd( n, n, work( ir ), ldwrkr, s, work( ie ),work( itauq ), & work( itaup ),work( iwork ), lwork-iwork+1, ierr ) ! generate left vectors bidiagonalizing r ! (workspace: need n*n + 4*n, prefer n*n + 3*n + n*nb) call stdlib${ii}$_${ri}$orgbr( 'Q', n, n, n, work( ir ), ldwrkr,work( itauq ), work( & iwork ),lwork-iwork+1, ierr ) iwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of r in work(ir) ! (workspace: need n*n + bdspac) call stdlib${ii}$_${ri}$bdsqr( 'U', n, 0_${ik}$, n, 0_${ik}$, s, work( ie ), dum, 1_${ik}$,work( ir ), & ldwrkr, dum, 1_${ik}$,work( iwork ), info ) iu = ie + n ! multiply q in a by left singular vectors of r in ! work(ir), storing result in work(iu) and copying to a ! (workspace: need n*n + 2*n, prefer n*n + m*n + n) do i = 1, m, ldwrku chunk = min( m-i+1, ldwrku ) call stdlib${ii}$_${ri}$gemm( 'N', 'N', chunk, n, n, one, a( i, 1_${ik}$ ),lda, work( ir )& , ldwrkr, zero,work( iu ), ldwrku ) call stdlib${ii}$_${ri}$lacpy( 'F', chunk, n, work( iu ), ldwrku,a( i, 1_${ik}$ ), lda ) end do else ! insufficient workspace for a fast algorithm ie = 1_${ik}$ itauq = ie + n itaup = itauq + n iwork = itaup + n ! bidiagonalize a ! (workspace: need 3*n + m, prefer 3*n + (m + n)*nb) call stdlib${ii}$_${ri}$gebrd( m, n, a, lda, s, work( ie ),work( itauq ), work( itaup & ),work( iwork ), lwork-iwork+1, ierr ) ! generate left vectors bidiagonalizing a ! (workspace: need 4*n, prefer 3*n + n*nb) call stdlib${ii}$_${ri}$orgbr( 'Q', m, n, n, a, lda, work( itauq ),work( iwork ), & lwork-iwork+1, ierr ) iwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of a in a ! (workspace: need bdspac) call stdlib${ii}$_${ri}$bdsqr( 'U', n, 0_${ik}$, m, 0_${ik}$, s, work( ie ), dum, 1_${ik}$,a, lda, dum, 1_${ik}$, & work( iwork ), info ) end if else if( wntuo .and. wntvas ) then ! path 3 (m much larger than n, jobu='o', jobvt='s' or 'a') ! n left singular vectors to be overwritten on a and ! n right singular vectors to be computed in vt if( lwork>=n*n+max( 4_${ik}$*n, bdspac ) ) then ! sufficient workspace for a fast algorithm ir = 1_${ik}$ if( lwork>=max( wrkbl, lda*n + n ) + lda*n ) then ! work(iu) is lda by n and work(ir) is lda by n ldwrku = lda ldwrkr = lda else if( lwork>=max( wrkbl, lda*n + n ) + n*n ) then ! work(iu) is lda by n and work(ir) is n by n ldwrku = lda ldwrkr = n else ! work(iu) is ldwrku by n and work(ir) is n by n ldwrku = ( lwork-n*n-n ) / n ldwrkr = n end if itau = ir + ldwrkr*n iwork = itau + n ! compute a=q*r ! (workspace: need n*n + 2*n, prefer n*n + n + n*nb) call stdlib${ii}$_${ri}$geqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-iwork+& 1_${ik}$, ierr ) ! copy r to vt, zeroing out below it call stdlib${ii}$_${ri}$lacpy( 'U', n, n, a, lda, vt, ldvt ) if( n>1_${ik}$ )call stdlib${ii}$_${ri}$laset( 'L', n-1, n-1, zero, zero,vt( 2_${ik}$, 1_${ik}$ ), ldvt ) ! generate q in a ! (workspace: need n*n + 2*n, prefer n*n + n + n*nb) call stdlib${ii}$_${ri}$orgqr( m, n, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = itau itauq = ie + n itaup = itauq + n iwork = itaup + n ! bidiagonalize r in vt, copying result to work(ir) ! (workspace: need n*n + 4*n, prefer n*n + 3*n + 2*n*nb) call stdlib${ii}$_${ri}$gebrd( n, n, vt, ldvt, s, work( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) call stdlib${ii}$_${ri}$lacpy( 'L', n, n, vt, ldvt, work( ir ), ldwrkr ) ! generate left vectors bidiagonalizing r in work(ir) ! (workspace: need n*n + 4*n, prefer n*n + 3*n + n*nb) call stdlib${ii}$_${ri}$orgbr( 'Q', n, n, n, work( ir ), ldwrkr,work( itauq ), work( & iwork ),lwork-iwork+1, ierr ) ! generate right vectors bidiagonalizing r in vt ! (workspace: need n*n + 4*n-1, prefer n*n + 3*n + (n-1)*nb) call stdlib${ii}$_${ri}$orgbr( 'P', n, n, n, vt, ldvt, work( itaup ),work( iwork ), & lwork-iwork+1, ierr ) iwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of r in work(ir) and computing right ! singular vectors of r in vt ! (workspace: need n*n + bdspac) call stdlib${ii}$_${ri}$bdsqr( 'U', n, n, n, 0_${ik}$, s, work( ie ), vt, ldvt,work( ir ), & ldwrkr, dum, 1_${ik}$,work( iwork ), info ) iu = ie + n ! multiply q in a by left singular vectors of r in ! work(ir), storing result in work(iu) and copying to a ! (workspace: need n*n + 2*n, prefer n*n + m*n + n) do i = 1, m, ldwrku chunk = min( m-i+1, ldwrku ) call stdlib${ii}$_${ri}$gemm( 'N', 'N', chunk, n, n, one, a( i, 1_${ik}$ ),lda, work( ir )& , ldwrkr, zero,work( iu ), ldwrku ) call stdlib${ii}$_${ri}$lacpy( 'F', chunk, n, work( iu ), ldwrku,a( i, 1_${ik}$ ), lda ) end do else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + n ! compute a=q*r ! (workspace: need 2*n, prefer n + n*nb) call stdlib${ii}$_${ri}$geqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-iwork+& 1_${ik}$, ierr ) ! copy r to vt, zeroing out below it call stdlib${ii}$_${ri}$lacpy( 'U', n, n, a, lda, vt, ldvt ) if( n>1_${ik}$ )call stdlib${ii}$_${ri}$laset( 'L', n-1, n-1, zero, zero,vt( 2_${ik}$, 1_${ik}$ ), ldvt ) ! generate q in a ! (workspace: need 2*n, prefer n + n*nb) call stdlib${ii}$_${ri}$orgqr( m, n, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = itau itauq = ie + n itaup = itauq + n iwork = itaup + n ! bidiagonalize r in vt ! (workspace: need 4*n, prefer 3*n + 2*n*nb) call stdlib${ii}$_${ri}$gebrd( n, n, vt, ldvt, s, work( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply q in a by left vectors bidiagonalizing r ! (workspace: need 3*n + m, prefer 3*n + m*nb) call stdlib${ii}$_${ri}$ormbr( 'Q', 'R', 'N', m, n, n, vt, ldvt,work( itauq ), a, lda,& work( iwork ),lwork-iwork+1, ierr ) ! generate right vectors bidiagonalizing r in vt ! (workspace: need 4*n-1, prefer 3*n + (n-1)*nb) call stdlib${ii}$_${ri}$orgbr( 'P', n, n, n, vt, ldvt, work( itaup ),work( iwork ), & lwork-iwork+1, ierr ) iwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of a in a and computing right ! singular vectors of a in vt ! (workspace: need bdspac) call stdlib${ii}$_${ri}$bdsqr( 'U', n, n, m, 0_${ik}$, s, work( ie ), vt, ldvt,a, lda, dum, & 1_${ik}$, work( iwork ), info ) end if else if( wntus ) then if( wntvn ) then ! path 4 (m much larger than n, jobu='s', jobvt='n') ! n left singular vectors to be computed in u and ! no right singular vectors to be computed if( lwork>=n*n+max( 4_${ik}$*n, bdspac ) ) then ! sufficient workspace for a fast algorithm ir = 1_${ik}$ if( lwork>=wrkbl+lda*n ) then ! work(ir) is lda by n ldwrkr = lda else ! work(ir) is n by n ldwrkr = n end if itau = ir + ldwrkr*n iwork = itau + n ! compute a=q*r ! (workspace: need n*n + 2*n, prefer n*n + n + n*nb) call stdlib${ii}$_${ri}$geqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ! copy r to work(ir), zeroing out below it call stdlib${ii}$_${ri}$lacpy( 'U', n, n, a, lda, work( ir ),ldwrkr ) call stdlib${ii}$_${ri}$laset( 'L', n-1, n-1, zero, zero,work( ir+1 ), ldwrkr ) ! generate q in a ! (workspace: need n*n + 2*n, prefer n*n + n + n*nb) call stdlib${ii}$_${ri}$orgqr( m, n, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = itau itauq = ie + n itaup = itauq + n iwork = itaup + n ! bidiagonalize r in work(ir) ! (workspace: need n*n + 4*n, prefer n*n + 3*n + 2*n*nb) call stdlib${ii}$_${ri}$gebrd( n, n, work( ir ), ldwrkr, s,work( ie ), work( itauq & ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) ! generate left vectors bidiagonalizing r in work(ir) ! (workspace: need n*n + 4*n, prefer n*n + 3*n + n*nb) call stdlib${ii}$_${ri}$orgbr( 'Q', n, n, n, work( ir ), ldwrkr,work( itauq ), & work( iwork ),lwork-iwork+1, ierr ) iwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of r in work(ir) ! (workspace: need n*n + bdspac) call stdlib${ii}$_${ri}$bdsqr( 'U', n, 0_${ik}$, n, 0_${ik}$, s, work( ie ), dum,1_${ik}$, work( ir ), & ldwrkr, dum, 1_${ik}$,work( iwork ), info ) ! multiply q in a by left singular vectors of r in ! work(ir), storing result in u ! (workspace: need n*n) call stdlib${ii}$_${ri}$gemm( 'N', 'N', m, n, n, one, a, lda,work( ir ), ldwrkr, & zero, u, ldu ) else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + n ! compute a=q*r, copying result to u ! (workspace: need 2*n, prefer n + n*nb) call stdlib${ii}$_${ri}$geqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_${ri}$lacpy( 'L', m, n, a, lda, u, ldu ) ! generate q in u ! (workspace: need 2*n, prefer n + n*nb) call stdlib${ii}$_${ri}$orgqr( m, n, n, u, ldu, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = itau itauq = ie + n itaup = itauq + n iwork = itaup + n ! zero out below r in a if( n > 1_${ik}$ ) then call stdlib${ii}$_${ri}$laset( 'L', n-1, n-1, zero, zero,a( 2_${ik}$, 1_${ik}$ ), lda ) end if ! bidiagonalize r in a ! (workspace: need 4*n, prefer 3*n + 2*n*nb) call stdlib${ii}$_${ri}$gebrd( n, n, a, lda, s, work( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply q in u by left vectors bidiagonalizing r ! (workspace: need 3*n + m, prefer 3*n + m*nb) call stdlib${ii}$_${ri}$ormbr( 'Q', 'R', 'N', m, n, n, a, lda,work( itauq ), u, & ldu, work( iwork ),lwork-iwork+1, ierr ) iwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of a in u ! (workspace: need bdspac) call stdlib${ii}$_${ri}$bdsqr( 'U', n, 0_${ik}$, m, 0_${ik}$, s, work( ie ), dum,1_${ik}$, u, ldu, dum, & 1_${ik}$, work( iwork ),info ) end if else if( wntvo ) then ! path 5 (m much larger than n, jobu='s', jobvt='o') ! n left singular vectors to be computed in u and ! n right singular vectors to be overwritten on a if( lwork>=2_${ik}$*n*n+max( 4_${ik}$*n, bdspac ) ) then ! sufficient workspace for a fast algorithm iu = 1_${ik}$ if( lwork>=wrkbl+2*lda*n ) then ! work(iu) is lda by n and work(ir) is lda by n ldwrku = lda ir = iu + ldwrku*n ldwrkr = lda else if( lwork>=wrkbl+( lda + n )*n ) then ! work(iu) is lda by n and work(ir) is n by n ldwrku = lda ir = iu + ldwrku*n ldwrkr = n else ! work(iu) is n by n and work(ir) is n by n ldwrku = n ir = iu + ldwrku*n ldwrkr = n end if itau = ir + ldwrkr*n iwork = itau + n ! compute a=q*r ! (workspace: need 2*n*n + 2*n, prefer 2*n*n + n + n*nb) call stdlib${ii}$_${ri}$geqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ! copy r to work(iu), zeroing out below it call stdlib${ii}$_${ri}$lacpy( 'U', n, n, a, lda, work( iu ),ldwrku ) call stdlib${ii}$_${ri}$laset( 'L', n-1, n-1, zero, zero,work( iu+1 ), ldwrku ) ! generate q in a ! (workspace: need 2*n*n + 2*n, prefer 2*n*n + n + n*nb) call stdlib${ii}$_${ri}$orgqr( m, n, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = itau itauq = ie + n itaup = itauq + n iwork = itaup + n ! bidiagonalize r in work(iu), copying result to ! work(ir) ! (workspace: need 2*n*n + 4*n, ! prefer 2*n*n+3*n+2*n*nb) call stdlib${ii}$_${ri}$gebrd( n, n, work( iu ), ldwrku, s,work( ie ), work( itauq & ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) call stdlib${ii}$_${ri}$lacpy( 'U', n, n, work( iu ), ldwrku,work( ir ), ldwrkr ) ! generate left bidiagonalizing vectors in work(iu) ! (workspace: need 2*n*n + 4*n, prefer 2*n*n + 3*n + n*nb) call stdlib${ii}$_${ri}$orgbr( 'Q', n, n, n, work( iu ), ldwrku,work( itauq ), & work( iwork ),lwork-iwork+1, ierr ) ! generate right bidiagonalizing vectors in work(ir) ! (workspace: need 2*n*n + 4*n-1, ! prefer 2*n*n+3*n+(n-1)*nb) call stdlib${ii}$_${ri}$orgbr( 'P', n, n, n, work( ir ), ldwrkr,work( itaup ), & work( iwork ),lwork-iwork+1, ierr ) iwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of r in work(iu) and computing ! right singular vectors of r in work(ir) ! (workspace: need 2*n*n + bdspac) call stdlib${ii}$_${ri}$bdsqr( 'U', n, n, n, 0_${ik}$, s, work( ie ),work( ir ), ldwrkr, & work( iu ),ldwrku, dum, 1_${ik}$, work( iwork ), info ) ! multiply q in a by left singular vectors of r in ! work(iu), storing result in u ! (workspace: need n*n) call stdlib${ii}$_${ri}$gemm( 'N', 'N', m, n, n, one, a, lda,work( iu ), ldwrku, & zero, u, ldu ) ! copy right singular vectors of r to a ! (workspace: need n*n) call stdlib${ii}$_${ri}$lacpy( 'F', n, n, work( ir ), ldwrkr, a,lda ) else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + n ! compute a=q*r, copying result to u ! (workspace: need 2*n, prefer n + n*nb) call stdlib${ii}$_${ri}$geqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_${ri}$lacpy( 'L', m, n, a, lda, u, ldu ) ! generate q in u ! (workspace: need 2*n, prefer n + n*nb) call stdlib${ii}$_${ri}$orgqr( m, n, n, u, ldu, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = itau itauq = ie + n itaup = itauq + n iwork = itaup + n ! zero out below r in a if( n > 1_${ik}$ ) then call stdlib${ii}$_${ri}$laset( 'L', n-1, n-1, zero, zero,a( 2_${ik}$, 1_${ik}$ ), lda ) end if ! bidiagonalize r in a ! (workspace: need 4*n, prefer 3*n + 2*n*nb) call stdlib${ii}$_${ri}$gebrd( n, n, a, lda, s, work( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply q in u by left vectors bidiagonalizing r ! (workspace: need 3*n + m, prefer 3*n + m*nb) call stdlib${ii}$_${ri}$ormbr( 'Q', 'R', 'N', m, n, n, a, lda,work( itauq ), u, & ldu, work( iwork ),lwork-iwork+1, ierr ) ! generate right vectors bidiagonalizing r in a ! (workspace: need 4*n-1, prefer 3*n + (n-1)*nb) call stdlib${ii}$_${ri}$orgbr( 'P', n, n, n, a, lda, work( itaup ),work( iwork ), & lwork-iwork+1, ierr ) iwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of a in u and computing right ! singular vectors of a in a ! (workspace: need bdspac) call stdlib${ii}$_${ri}$bdsqr( 'U', n, n, m, 0_${ik}$, s, work( ie ), a,lda, u, ldu, dum, & 1_${ik}$, work( iwork ),info ) end if else if( wntvas ) then ! path 6 (m much larger than n, jobu='s', jobvt='s' ! or 'a') ! n left singular vectors to be computed in u and ! n right singular vectors to be computed in vt if( lwork>=n*n+max( 4_${ik}$*n, bdspac ) ) then ! sufficient workspace for a fast algorithm iu = 1_${ik}$ if( lwork>=wrkbl+lda*n ) then ! work(iu) is lda by n ldwrku = lda else ! work(iu) is n by n ldwrku = n end if itau = iu + ldwrku*n iwork = itau + n ! compute a=q*r ! (workspace: need n*n + 2*n, prefer n*n + n + n*nb) call stdlib${ii}$_${ri}$geqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ! copy r to work(iu), zeroing out below it call stdlib${ii}$_${ri}$lacpy( 'U', n, n, a, lda, work( iu ),ldwrku ) call stdlib${ii}$_${ri}$laset( 'L', n-1, n-1, zero, zero,work( iu+1 ), ldwrku ) ! generate q in a ! (workspace: need n*n + 2*n, prefer n*n + n + n*nb) call stdlib${ii}$_${ri}$orgqr( m, n, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = itau itauq = ie + n itaup = itauq + n iwork = itaup + n ! bidiagonalize r in work(iu), copying result to vt ! (workspace: need n*n + 4*n, prefer n*n + 3*n + 2*n*nb) call stdlib${ii}$_${ri}$gebrd( n, n, work( iu ), ldwrku, s,work( ie ), work( itauq & ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) call stdlib${ii}$_${ri}$lacpy( 'U', n, n, work( iu ), ldwrku, vt,ldvt ) ! generate left bidiagonalizing vectors in work(iu) ! (workspace: need n*n + 4*n, prefer n*n + 3*n + n*nb) call stdlib${ii}$_${ri}$orgbr( 'Q', n, n, n, work( iu ), ldwrku,work( itauq ), & work( iwork ),lwork-iwork+1, ierr ) ! generate right bidiagonalizing vectors in vt ! (workspace: need n*n + 4*n-1, ! prefer n*n+3*n+(n-1)*nb) call stdlib${ii}$_${ri}$orgbr( 'P', n, n, n, vt, ldvt, work( itaup ),work( iwork ),& lwork-iwork+1, ierr ) iwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of r in work(iu) and computing ! right singular vectors of r in vt ! (workspace: need n*n + bdspac) call stdlib${ii}$_${ri}$bdsqr( 'U', n, n, n, 0_${ik}$, s, work( ie ), vt,ldvt, work( iu ),& ldwrku, dum, 1_${ik}$,work( iwork ), info ) ! multiply q in a by left singular vectors of r in ! work(iu), storing result in u ! (workspace: need n*n) call stdlib${ii}$_${ri}$gemm( 'N', 'N', m, n, n, one, a, lda,work( iu ), ldwrku, & zero, u, ldu ) else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + n ! compute a=q*r, copying result to u ! (workspace: need 2*n, prefer n + n*nb) call stdlib${ii}$_${ri}$geqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_${ri}$lacpy( 'L', m, n, a, lda, u, ldu ) ! generate q in u ! (workspace: need 2*n, prefer n + n*nb) call stdlib${ii}$_${ri}$orgqr( m, n, n, u, ldu, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ! copy r to vt, zeroing out below it call stdlib${ii}$_${ri}$lacpy( 'U', n, n, a, lda, vt, ldvt ) if( n>1_${ik}$ )call stdlib${ii}$_${ri}$laset( 'L', n-1, n-1, zero, zero,vt( 2_${ik}$, 1_${ik}$ ), ldvt & ) ie = itau itauq = ie + n itaup = itauq + n iwork = itaup + n ! bidiagonalize r in vt ! (workspace: need 4*n, prefer 3*n + 2*n*nb) call stdlib${ii}$_${ri}$gebrd( n, n, vt, ldvt, s, work( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply q in u by left bidiagonalizing vectors ! in vt ! (workspace: need 3*n + m, prefer 3*n + m*nb) call stdlib${ii}$_${ri}$ormbr( 'Q', 'R', 'N', m, n, n, vt, ldvt,work( itauq ), u, & ldu, work( iwork ),lwork-iwork+1, ierr ) ! generate right bidiagonalizing vectors in vt ! (workspace: need 4*n-1, prefer 3*n + (n-1)*nb) call stdlib${ii}$_${ri}$orgbr( 'P', n, n, n, vt, ldvt, work( itaup ),work( iwork ),& lwork-iwork+1, ierr ) iwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of a in u and computing right ! singular vectors of a in vt ! (workspace: need bdspac) call stdlib${ii}$_${ri}$bdsqr( 'U', n, n, m, 0_${ik}$, s, work( ie ), vt,ldvt, u, ldu, & dum, 1_${ik}$, work( iwork ),info ) end if end if else if( wntua ) then if( wntvn ) then ! path 7 (m much larger than n, jobu='a', jobvt='n') ! m left singular vectors to be computed in u and ! no right singular vectors to be computed if( lwork>=n*n+max( n+m, 4_${ik}$*n, bdspac ) ) then ! sufficient workspace for a fast algorithm ir = 1_${ik}$ if( lwork>=wrkbl+lda*n ) then ! work(ir) is lda by n ldwrkr = lda else ! work(ir) is n by n ldwrkr = n end if itau = ir + ldwrkr*n iwork = itau + n ! compute a=q*r, copying result to u ! (workspace: need n*n + 2*n, prefer n*n + n + n*nb) call stdlib${ii}$_${ri}$geqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_${ri}$lacpy( 'L', m, n, a, lda, u, ldu ) ! copy r to work(ir), zeroing out below it call stdlib${ii}$_${ri}$lacpy( 'U', n, n, a, lda, work( ir ),ldwrkr ) call stdlib${ii}$_${ri}$laset( 'L', n-1, n-1, zero, zero,work( ir+1 ), ldwrkr ) ! generate q in u ! (workspace: need n*n + n + m, prefer n*n + n + m*nb) call stdlib${ii}$_${ri}$orgqr( m, m, n, u, ldu, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = itau itauq = ie + n itaup = itauq + n iwork = itaup + n ! bidiagonalize r in work(ir) ! (workspace: need n*n + 4*n, prefer n*n + 3*n + 2*n*nb) call stdlib${ii}$_${ri}$gebrd( n, n, work( ir ), ldwrkr, s,work( ie ), work( itauq & ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) ! generate left bidiagonalizing vectors in work(ir) ! (workspace: need n*n + 4*n, prefer n*n + 3*n + n*nb) call stdlib${ii}$_${ri}$orgbr( 'Q', n, n, n, work( ir ), ldwrkr,work( itauq ), & work( iwork ),lwork-iwork+1, ierr ) iwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of r in work(ir) ! (workspace: need n*n + bdspac) call stdlib${ii}$_${ri}$bdsqr( 'U', n, 0_${ik}$, n, 0_${ik}$, s, work( ie ), dum,1_${ik}$, work( ir ), & ldwrkr, dum, 1_${ik}$,work( iwork ), info ) ! multiply q in u by left singular vectors of r in ! work(ir), storing result in a ! (workspace: need n*n) call stdlib${ii}$_${ri}$gemm( 'N', 'N', m, n, n, one, u, ldu,work( ir ), ldwrkr, & zero, a, lda ) ! copy left singular vectors of a from a to u call stdlib${ii}$_${ri}$lacpy( 'F', m, n, a, lda, u, ldu ) else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + n ! compute a=q*r, copying result to u ! (workspace: need 2*n, prefer n + n*nb) call stdlib${ii}$_${ri}$geqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_${ri}$lacpy( 'L', m, n, a, lda, u, ldu ) ! generate q in u ! (workspace: need n + m, prefer n + m*nb) call stdlib${ii}$_${ri}$orgqr( m, m, n, u, ldu, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = itau itauq = ie + n itaup = itauq + n iwork = itaup + n ! zero out below r in a if( n > 1_${ik}$ ) then call stdlib${ii}$_${ri}$laset( 'L', n-1, n-1, zero, zero,a( 2_${ik}$, 1_${ik}$ ), lda ) end if ! bidiagonalize r in a ! (workspace: need 4*n, prefer 3*n + 2*n*nb) call stdlib${ii}$_${ri}$gebrd( n, n, a, lda, s, work( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply q in u by left bidiagonalizing vectors ! in a ! (workspace: need 3*n + m, prefer 3*n + m*nb) call stdlib${ii}$_${ri}$ormbr( 'Q', 'R', 'N', m, n, n, a, lda,work( itauq ), u, & ldu, work( iwork ),lwork-iwork+1, ierr ) iwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of a in u ! (workspace: need bdspac) call stdlib${ii}$_${ri}$bdsqr( 'U', n, 0_${ik}$, m, 0_${ik}$, s, work( ie ), dum,1_${ik}$, u, ldu, dum, & 1_${ik}$, work( iwork ),info ) end if else if( wntvo ) then ! path 8 (m much larger than n, jobu='a', jobvt='o') ! m left singular vectors to be computed in u and ! n right singular vectors to be overwritten on a if( lwork>=2_${ik}$*n*n+max( n+m, 4_${ik}$*n, bdspac ) ) then ! sufficient workspace for a fast algorithm iu = 1_${ik}$ if( lwork>=wrkbl+2*lda*n ) then ! work(iu) is lda by n and work(ir) is lda by n ldwrku = lda ir = iu + ldwrku*n ldwrkr = lda else if( lwork>=wrkbl+( lda + n )*n ) then ! work(iu) is lda by n and work(ir) is n by n ldwrku = lda ir = iu + ldwrku*n ldwrkr = n else ! work(iu) is n by n and work(ir) is n by n ldwrku = n ir = iu + ldwrku*n ldwrkr = n end if itau = ir + ldwrkr*n iwork = itau + n ! compute a=q*r, copying result to u ! (workspace: need 2*n*n + 2*n, prefer 2*n*n + n + n*nb) call stdlib${ii}$_${ri}$geqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_${ri}$lacpy( 'L', m, n, a, lda, u, ldu ) ! generate q in u ! (workspace: need 2*n*n + n + m, prefer 2*n*n + n + m*nb) call stdlib${ii}$_${ri}$orgqr( m, m, n, u, ldu, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ! copy r to work(iu), zeroing out below it call stdlib${ii}$_${ri}$lacpy( 'U', n, n, a, lda, work( iu ),ldwrku ) call stdlib${ii}$_${ri}$laset( 'L', n-1, n-1, zero, zero,work( iu+1 ), ldwrku ) ie = itau itauq = ie + n itaup = itauq + n iwork = itaup + n ! bidiagonalize r in work(iu), copying result to ! work(ir) ! (workspace: need 2*n*n + 4*n, ! prefer 2*n*n+3*n+2*n*nb) call stdlib${ii}$_${ri}$gebrd( n, n, work( iu ), ldwrku, s,work( ie ), work( itauq & ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) call stdlib${ii}$_${ri}$lacpy( 'U', n, n, work( iu ), ldwrku,work( ir ), ldwrkr ) ! generate left bidiagonalizing vectors in work(iu) ! (workspace: need 2*n*n + 4*n, prefer 2*n*n + 3*n + n*nb) call stdlib${ii}$_${ri}$orgbr( 'Q', n, n, n, work( iu ), ldwrku,work( itauq ), & work( iwork ),lwork-iwork+1, ierr ) ! generate right bidiagonalizing vectors in work(ir) ! (workspace: need 2*n*n + 4*n-1, ! prefer 2*n*n+3*n+(n-1)*nb) call stdlib${ii}$_${ri}$orgbr( 'P', n, n, n, work( ir ), ldwrkr,work( itaup ), & work( iwork ),lwork-iwork+1, ierr ) iwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of r in work(iu) and computing ! right singular vectors of r in work(ir) ! (workspace: need 2*n*n + bdspac) call stdlib${ii}$_${ri}$bdsqr( 'U', n, n, n, 0_${ik}$, s, work( ie ),work( ir ), ldwrkr, & work( iu ),ldwrku, dum, 1_${ik}$, work( iwork ), info ) ! multiply q in u by left singular vectors of r in ! work(iu), storing result in a ! (workspace: need n*n) call stdlib${ii}$_${ri}$gemm( 'N', 'N', m, n, n, one, u, ldu,work( iu ), ldwrku, & zero, a, lda ) ! copy left singular vectors of a from a to u call stdlib${ii}$_${ri}$lacpy( 'F', m, n, a, lda, u, ldu ) ! copy right singular vectors of r from work(ir) to a call stdlib${ii}$_${ri}$lacpy( 'F', n, n, work( ir ), ldwrkr, a,lda ) else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + n ! compute a=q*r, copying result to u ! (workspace: need 2*n, prefer n + n*nb) call stdlib${ii}$_${ri}$geqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_${ri}$lacpy( 'L', m, n, a, lda, u, ldu ) ! generate q in u ! (workspace: need n + m, prefer n + m*nb) call stdlib${ii}$_${ri}$orgqr( m, m, n, u, ldu, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = itau itauq = ie + n itaup = itauq + n iwork = itaup + n ! zero out below r in a if( n > 1_${ik}$ ) then call stdlib${ii}$_${ri}$laset( 'L', n-1, n-1, zero, zero,a( 2_${ik}$, 1_${ik}$ ), lda ) end if ! bidiagonalize r in a ! (workspace: need 4*n, prefer 3*n + 2*n*nb) call stdlib${ii}$_${ri}$gebrd( n, n, a, lda, s, work( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply q in u by left bidiagonalizing vectors ! in a ! (workspace: need 3*n + m, prefer 3*n + m*nb) call stdlib${ii}$_${ri}$ormbr( 'Q', 'R', 'N', m, n, n, a, lda,work( itauq ), u, & ldu, work( iwork ),lwork-iwork+1, ierr ) ! generate right bidiagonalizing vectors in a ! (workspace: need 4*n-1, prefer 3*n + (n-1)*nb) call stdlib${ii}$_${ri}$orgbr( 'P', n, n, n, a, lda, work( itaup ),work( iwork ), & lwork-iwork+1, ierr ) iwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of a in u and computing right ! singular vectors of a in a ! (workspace: need bdspac) call stdlib${ii}$_${ri}$bdsqr( 'U', n, n, m, 0_${ik}$, s, work( ie ), a,lda, u, ldu, dum, & 1_${ik}$, work( iwork ),info ) end if else if( wntvas ) then ! path 9 (m much larger than n, jobu='a', jobvt='s' ! or 'a') ! m left singular vectors to be computed in u and ! n right singular vectors to be computed in vt if( lwork>=n*n+max( n+m, 4_${ik}$*n, bdspac ) ) then ! sufficient workspace for a fast algorithm iu = 1_${ik}$ if( lwork>=wrkbl+lda*n ) then ! work(iu) is lda by n ldwrku = lda else ! work(iu) is n by n ldwrku = n end if itau = iu + ldwrku*n iwork = itau + n ! compute a=q*r, copying result to u ! (workspace: need n*n + 2*n, prefer n*n + n + n*nb) call stdlib${ii}$_${ri}$geqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_${ri}$lacpy( 'L', m, n, a, lda, u, ldu ) ! generate q in u ! (workspace: need n*n + n + m, prefer n*n + n + m*nb) call stdlib${ii}$_${ri}$orgqr( m, m, n, u, ldu, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ! copy r to work(iu), zeroing out below it call stdlib${ii}$_${ri}$lacpy( 'U', n, n, a, lda, work( iu ),ldwrku ) call stdlib${ii}$_${ri}$laset( 'L', n-1, n-1, zero, zero,work( iu+1 ), ldwrku ) ie = itau itauq = ie + n itaup = itauq + n iwork = itaup + n ! bidiagonalize r in work(iu), copying result to vt ! (workspace: need n*n + 4*n, prefer n*n + 3*n + 2*n*nb) call stdlib${ii}$_${ri}$gebrd( n, n, work( iu ), ldwrku, s,work( ie ), work( itauq & ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) call stdlib${ii}$_${ri}$lacpy( 'U', n, n, work( iu ), ldwrku, vt,ldvt ) ! generate left bidiagonalizing vectors in work(iu) ! (workspace: need n*n + 4*n, prefer n*n + 3*n + n*nb) call stdlib${ii}$_${ri}$orgbr( 'Q', n, n, n, work( iu ), ldwrku,work( itauq ), & work( iwork ),lwork-iwork+1, ierr ) ! generate right bidiagonalizing vectors in vt ! (workspace: need n*n + 4*n-1, ! prefer n*n+3*n+(n-1)*nb) call stdlib${ii}$_${ri}$orgbr( 'P', n, n, n, vt, ldvt, work( itaup ),work( iwork ),& lwork-iwork+1, ierr ) iwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of r in work(iu) and computing ! right singular vectors of r in vt ! (workspace: need n*n + bdspac) call stdlib${ii}$_${ri}$bdsqr( 'U', n, n, n, 0_${ik}$, s, work( ie ), vt,ldvt, work( iu ),& ldwrku, dum, 1_${ik}$,work( iwork ), info ) ! multiply q in u by left singular vectors of r in ! work(iu), storing result in a ! (workspace: need n*n) call stdlib${ii}$_${ri}$gemm( 'N', 'N', m, n, n, one, u, ldu,work( iu ), ldwrku, & zero, a, lda ) ! copy left singular vectors of a from a to u call stdlib${ii}$_${ri}$lacpy( 'F', m, n, a, lda, u, ldu ) else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + n ! compute a=q*r, copying result to u ! (workspace: need 2*n, prefer n + n*nb) call stdlib${ii}$_${ri}$geqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_${ri}$lacpy( 'L', m, n, a, lda, u, ldu ) ! generate q in u ! (workspace: need n + m, prefer n + m*nb) call stdlib${ii}$_${ri}$orgqr( m, m, n, u, ldu, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ! copy r from a to vt, zeroing out below it call stdlib${ii}$_${ri}$lacpy( 'U', n, n, a, lda, vt, ldvt ) if( n>1_${ik}$ )call stdlib${ii}$_${ri}$laset( 'L', n-1, n-1, zero, zero,vt( 2_${ik}$, 1_${ik}$ ), ldvt & ) ie = itau itauq = ie + n itaup = itauq + n iwork = itaup + n ! bidiagonalize r in vt ! (workspace: need 4*n, prefer 3*n + 2*n*nb) call stdlib${ii}$_${ri}$gebrd( n, n, vt, ldvt, s, work( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply q in u by left bidiagonalizing vectors ! in vt ! (workspace: need 3*n + m, prefer 3*n + m*nb) call stdlib${ii}$_${ri}$ormbr( 'Q', 'R', 'N', m, n, n, vt, ldvt,work( itauq ), u, & ldu, work( iwork ),lwork-iwork+1, ierr ) ! generate right bidiagonalizing vectors in vt ! (workspace: need 4*n-1, prefer 3*n + (n-1)*nb) call stdlib${ii}$_${ri}$orgbr( 'P', n, n, n, vt, ldvt, work( itaup ),work( iwork ),& lwork-iwork+1, ierr ) iwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of a in u and computing right ! singular vectors of a in vt ! (workspace: need bdspac) call stdlib${ii}$_${ri}$bdsqr( 'U', n, n, m, 0_${ik}$, s, work( ie ), vt,ldvt, u, ldu, & dum, 1_${ik}$, work( iwork ),info ) end if end if end if else ! m < mnthr ! path 10 (m at least n, but not much larger) ! reduce to bidiagonal form without qr decomposition ie = 1_${ik}$ itauq = ie + n itaup = itauq + n iwork = itaup + n ! bidiagonalize a ! (workspace: need 3*n + m, prefer 3*n + (m + n)*nb) call stdlib${ii}$_${ri}$gebrd( m, n, a, lda, s, work( ie ), work( itauq ),work( itaup ), & work( iwork ), lwork-iwork+1,ierr ) if( wntuas ) then ! if left singular vectors desired in u, copy result to u ! and generate left bidiagonalizing vectors in u ! (workspace: need 3*n + ncu, prefer 3*n + ncu*nb) call stdlib${ii}$_${ri}$lacpy( 'L', m, n, a, lda, u, ldu ) if( wntus )ncu = n if( wntua )ncu = m call stdlib${ii}$_${ri}$orgbr( 'Q', m, ncu, n, u, ldu, work( itauq ),work( iwork ), & lwork-iwork+1, ierr ) end if if( wntvas ) then ! if right singular vectors desired in vt, copy result to ! vt and generate right bidiagonalizing vectors in vt ! (workspace: need 4*n-1, prefer 3*n + (n-1)*nb) call stdlib${ii}$_${ri}$lacpy( 'U', n, n, a, lda, vt, ldvt ) call stdlib${ii}$_${ri}$orgbr( 'P', n, n, n, vt, ldvt, work( itaup ),work( iwork ), & lwork-iwork+1, ierr ) end if if( wntuo ) then ! if left singular vectors desired in a, generate left ! bidiagonalizing vectors in a ! (workspace: need 4*n, prefer 3*n + n*nb) call stdlib${ii}$_${ri}$orgbr( 'Q', m, n, n, a, lda, work( itauq ),work( iwork ), lwork-& iwork+1, ierr ) end if if( wntvo ) then ! if right singular vectors desired in a, generate right ! bidiagonalizing vectors in a ! (workspace: need 4*n-1, prefer 3*n + (n-1)*nb) call stdlib${ii}$_${ri}$orgbr( 'P', n, n, n, a, lda, work( itaup ),work( iwork ), lwork-& iwork+1, ierr ) end if iwork = ie + n if( wntuas .or. wntuo )nru = m if( wntun )nru = 0_${ik}$ if( wntvas .or. wntvo )ncvt = n if( wntvn )ncvt = 0_${ik}$ if( ( .not.wntuo ) .and. ( .not.wntvo ) ) then ! perform bidiagonal qr iteration, if desired, computing ! left singular vectors in u and computing right singular ! vectors in vt ! (workspace: need bdspac) call stdlib${ii}$_${ri}$bdsqr( 'U', n, ncvt, nru, 0_${ik}$, s, work( ie ), vt,ldvt, u, ldu, dum,& 1_${ik}$, work( iwork ), info ) else if( ( .not.wntuo ) .and. wntvo ) then ! perform bidiagonal qr iteration, if desired, computing ! left singular vectors in u and computing right singular ! vectors in a ! (workspace: need bdspac) call stdlib${ii}$_${ri}$bdsqr( 'U', n, ncvt, nru, 0_${ik}$, s, work( ie ), a, lda,u, ldu, dum, & 1_${ik}$, work( iwork ), info ) else ! perform bidiagonal qr iteration, if desired, computing ! left singular vectors in a and computing right singular ! vectors in vt ! (workspace: need bdspac) call stdlib${ii}$_${ri}$bdsqr( 'U', n, ncvt, nru, 0_${ik}$, s, work( ie ), vt,ldvt, a, lda, dum,& 1_${ik}$, work( iwork ), info ) end if end if else ! a has more columns than rows. if a has sufficiently more ! columns than rows, first reduce using the lq decomposition (if ! sufficient workspace available) if( n>=mnthr ) then if( wntvn ) then ! path 1t(n much larger than m, jobvt='n') ! no right singular vectors to be computed itau = 1_${ik}$ iwork = itau + m ! compute a=l*q ! (workspace: need 2*m, prefer m + m*nb) call stdlib${ii}$_${ri}$gelqf( m, n, a, lda, work( itau ), work( iwork ),lwork-iwork+1, & ierr ) ! zero out above l if (m>1_${ik}$) call stdlib${ii}$_${ri}$laset( 'U', m-1, m-1, zero, zero, a( 1_${ik}$, 2_${ik}$ ), lda ) ie = 1_${ik}$ itauq = ie + m itaup = itauq + m iwork = itaup + m ! bidiagonalize l in a ! (workspace: need 4*m, prefer 3*m + 2*m*nb) call stdlib${ii}$_${ri}$gebrd( m, m, a, lda, s, work( ie ), work( itauq ),work( itaup ), & work( iwork ), lwork-iwork+1,ierr ) if( wntuo .or. wntuas ) then ! if left singular vectors desired, generate q ! (workspace: need 4*m, prefer 3*m + m*nb) call stdlib${ii}$_${ri}$orgbr( 'Q', m, m, m, a, lda, work( itauq ),work( iwork ), & lwork-iwork+1, ierr ) end if iwork = ie + m nru = 0_${ik}$ if( wntuo .or. wntuas )nru = m ! perform bidiagonal qr iteration, computing left singular ! vectors of a in a if desired ! (workspace: need bdspac) call stdlib${ii}$_${ri}$bdsqr( 'U', m, 0_${ik}$, nru, 0_${ik}$, s, work( ie ), dum, 1_${ik}$, a,lda, dum, 1_${ik}$, & work( iwork ), info ) ! if left singular vectors desired in u, copy them there if( wntuas )call stdlib${ii}$_${ri}$lacpy( 'F', m, m, a, lda, u, ldu ) else if( wntvo .and. wntun ) then ! path 2t(n much larger than m, jobu='n', jobvt='o') ! m right singular vectors to be overwritten on a and ! no left singular vectors to be computed if( lwork>=m*m+max( 4_${ik}$*m, bdspac ) ) then ! sufficient workspace for a fast algorithm ir = 1_${ik}$ if( lwork>=max( wrkbl, lda*n + m ) + lda*m ) then ! work(iu) is lda by n and work(ir) is lda by m ldwrku = lda chunk = n ldwrkr = lda else if( lwork>=max( wrkbl, lda*n + m ) + m*m ) then ! work(iu) is lda by n and work(ir) is m by m ldwrku = lda chunk = n ldwrkr = m else ! work(iu) is m by chunk and work(ir) is m by m ldwrku = m chunk = ( lwork-m*m-m ) / m ldwrkr = m end if itau = ir + ldwrkr*m iwork = itau + m ! compute a=l*q ! (workspace: need m*m + 2*m, prefer m*m + m + m*nb) call stdlib${ii}$_${ri}$gelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-iwork+& 1_${ik}$, ierr ) ! copy l to work(ir) and zero out above it call stdlib${ii}$_${ri}$lacpy( 'L', m, m, a, lda, work( ir ), ldwrkr ) call stdlib${ii}$_${ri}$laset( 'U', m-1, m-1, zero, zero,work( ir+ldwrkr ), ldwrkr ) ! generate q in a ! (workspace: need m*m + 2*m, prefer m*m + m + m*nb) call stdlib${ii}$_${ri}$orglq( m, n, m, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = itau itauq = ie + m itaup = itauq + m iwork = itaup + m ! bidiagonalize l in work(ir) ! (workspace: need m*m + 4*m, prefer m*m + 3*m + 2*m*nb) call stdlib${ii}$_${ri}$gebrd( m, m, work( ir ), ldwrkr, s, work( ie ),work( itauq ), & work( itaup ),work( iwork ), lwork-iwork+1, ierr ) ! generate right vectors bidiagonalizing l ! (workspace: need m*m + 4*m-1, prefer m*m + 3*m + (m-1)*nb) call stdlib${ii}$_${ri}$orgbr( 'P', m, m, m, work( ir ), ldwrkr,work( itaup ), work( & iwork ),lwork-iwork+1, ierr ) iwork = ie + m ! perform bidiagonal qr iteration, computing right ! singular vectors of l in work(ir) ! (workspace: need m*m + bdspac) call stdlib${ii}$_${ri}$bdsqr( 'U', m, m, 0_${ik}$, 0_${ik}$, s, work( ie ),work( ir ), ldwrkr, dum,& 1_${ik}$, dum, 1_${ik}$,work( iwork ), info ) iu = ie + m ! multiply right singular vectors of l in work(ir) by q ! in a, storing result in work(iu) and copying to a ! (workspace: need m*m + 2*m, prefer m*m + m*n + m) do i = 1, n, chunk blk = min( n-i+1, chunk ) call stdlib${ii}$_${ri}$gemm( 'N', 'N', m, blk, m, one, work( ir ),ldwrkr, a( 1_${ik}$, i & ), lda, zero,work( iu ), ldwrku ) call stdlib${ii}$_${ri}$lacpy( 'F', m, blk, work( iu ), ldwrku,a( 1_${ik}$, i ), lda ) end do else ! insufficient workspace for a fast algorithm ie = 1_${ik}$ itauq = ie + m itaup = itauq + m iwork = itaup + m ! bidiagonalize a ! (workspace: need 3*m + n, prefer 3*m + (m + n)*nb) call stdlib${ii}$_${ri}$gebrd( m, n, a, lda, s, work( ie ),work( itauq ), work( itaup & ),work( iwork ), lwork-iwork+1, ierr ) ! generate right vectors bidiagonalizing a ! (workspace: need 4*m, prefer 3*m + m*nb) call stdlib${ii}$_${ri}$orgbr( 'P', m, n, m, a, lda, work( itaup ),work( iwork ), & lwork-iwork+1, ierr ) iwork = ie + m ! perform bidiagonal qr iteration, computing right ! singular vectors of a in a ! (workspace: need bdspac) call stdlib${ii}$_${ri}$bdsqr( 'L', m, n, 0_${ik}$, 0_${ik}$, s, work( ie ), a, lda,dum, 1_${ik}$, dum, 1_${ik}$, & work( iwork ), info ) end if else if( wntvo .and. wntuas ) then ! path 3t(n much larger than m, jobu='s' or 'a', jobvt='o') ! m right singular vectors to be overwritten on a and ! m left singular vectors to be computed in u if( lwork>=m*m+max( 4_${ik}$*m, bdspac ) ) then ! sufficient workspace for a fast algorithm ir = 1_${ik}$ if( lwork>=max( wrkbl, lda*n + m ) + lda*m ) then ! work(iu) is lda by n and work(ir) is lda by m ldwrku = lda chunk = n ldwrkr = lda else if( lwork>=max( wrkbl, lda*n + m ) + m*m ) then ! work(iu) is lda by n and work(ir) is m by m ldwrku = lda chunk = n ldwrkr = m else ! work(iu) is m by chunk and work(ir) is m by m ldwrku = m chunk = ( lwork-m*m-m ) / m ldwrkr = m end if itau = ir + ldwrkr*m iwork = itau + m ! compute a=l*q ! (workspace: need m*m + 2*m, prefer m*m + m + m*nb) call stdlib${ii}$_${ri}$gelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-iwork+& 1_${ik}$, ierr ) ! copy l to u, zeroing about above it call stdlib${ii}$_${ri}$lacpy( 'L', m, m, a, lda, u, ldu ) if (m>1_${ik}$) call stdlib${ii}$_${ri}$laset( 'U', m-1, m-1, zero, zero, u( 1_${ik}$, 2_${ik}$ ),ldu ) ! generate q in a ! (workspace: need m*m + 2*m, prefer m*m + m + m*nb) call stdlib${ii}$_${ri}$orglq( m, n, m, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = itau itauq = ie + m itaup = itauq + m iwork = itaup + m ! bidiagonalize l in u, copying result to work(ir) ! (workspace: need m*m + 4*m, prefer m*m + 3*m + 2*m*nb) call stdlib${ii}$_${ri}$gebrd( m, m, u, ldu, s, work( ie ),work( itauq ), work( itaup & ),work( iwork ), lwork-iwork+1, ierr ) call stdlib${ii}$_${ri}$lacpy( 'U', m, m, u, ldu, work( ir ), ldwrkr ) ! generate right vectors bidiagonalizing l in work(ir) ! (workspace: need m*m + 4*m-1, prefer m*m + 3*m + (m-1)*nb) call stdlib${ii}$_${ri}$orgbr( 'P', m, m, m, work( ir ), ldwrkr,work( itaup ), work( & iwork ),lwork-iwork+1, ierr ) ! generate left vectors bidiagonalizing l in u ! (workspace: need m*m + 4*m, prefer m*m + 3*m + m*nb) call stdlib${ii}$_${ri}$orgbr( 'Q', m, m, m, u, ldu, work( itauq ),work( iwork ), & lwork-iwork+1, ierr ) iwork = ie + m ! perform bidiagonal qr iteration, computing left ! singular vectors of l in u, and computing right ! singular vectors of l in work(ir) ! (workspace: need m*m + bdspac) call stdlib${ii}$_${ri}$bdsqr( 'U', m, m, m, 0_${ik}$, s, work( ie ),work( ir ), ldwrkr, u, & ldu, dum, 1_${ik}$,work( iwork ), info ) iu = ie + m ! multiply right singular vectors of l in work(ir) by q ! in a, storing result in work(iu) and copying to a ! (workspace: need m*m + 2*m, prefer m*m + m*n + m)) do i = 1, n, chunk blk = min( n-i+1, chunk ) call stdlib${ii}$_${ri}$gemm( 'N', 'N', m, blk, m, one, work( ir ),ldwrkr, a( 1_${ik}$, i & ), lda, zero,work( iu ), ldwrku ) call stdlib${ii}$_${ri}$lacpy( 'F', m, blk, work( iu ), ldwrku,a( 1_${ik}$, i ), lda ) end do else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + m ! compute a=l*q ! (workspace: need 2*m, prefer m + m*nb) call stdlib${ii}$_${ri}$gelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-iwork+& 1_${ik}$, ierr ) ! copy l to u, zeroing out above it call stdlib${ii}$_${ri}$lacpy( 'L', m, m, a, lda, u, ldu ) if (m>1_${ik}$) call stdlib${ii}$_${ri}$laset( 'U', m-1, m-1, zero, zero, u( 1_${ik}$, 2_${ik}$ ),ldu ) ! generate q in a ! (workspace: need 2*m, prefer m + m*nb) call stdlib${ii}$_${ri}$orglq( m, n, m, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = itau itauq = ie + m itaup = itauq + m iwork = itaup + m ! bidiagonalize l in u ! (workspace: need 4*m, prefer 3*m + 2*m*nb) call stdlib${ii}$_${ri}$gebrd( m, m, u, ldu, s, work( ie ),work( itauq ), work( itaup & ),work( iwork ), lwork-iwork+1, ierr ) ! multiply right vectors bidiagonalizing l by q in a ! (workspace: need 3*m + n, prefer 3*m + n*nb) call stdlib${ii}$_${ri}$ormbr( 'P', 'L', 'T', m, n, m, u, ldu,work( itaup ), a, lda, & work( iwork ),lwork-iwork+1, ierr ) ! generate left vectors bidiagonalizing l in u ! (workspace: need 4*m, prefer 3*m + m*nb) call stdlib${ii}$_${ri}$orgbr( 'Q', m, m, m, u, ldu, work( itauq ),work( iwork ), & lwork-iwork+1, ierr ) iwork = ie + m ! perform bidiagonal qr iteration, computing left ! singular vectors of a in u and computing right ! singular vectors of a in a ! (workspace: need bdspac) call stdlib${ii}$_${ri}$bdsqr( 'U', m, n, m, 0_${ik}$, s, work( ie ), a, lda,u, ldu, dum, 1_${ik}$, & work( iwork ), info ) end if else if( wntvs ) then if( wntun ) then ! path 4t(n much larger than m, jobu='n', jobvt='s') ! m right singular vectors to be computed in vt and ! no left singular vectors to be computed if( lwork>=m*m+max( 4_${ik}$*m, bdspac ) ) then ! sufficient workspace for a fast algorithm ir = 1_${ik}$ if( lwork>=wrkbl+lda*m ) then ! work(ir) is lda by m ldwrkr = lda else ! work(ir) is m by m ldwrkr = m end if itau = ir + ldwrkr*m iwork = itau + m ! compute a=l*q ! (workspace: need m*m + 2*m, prefer m*m + m + m*nb) call stdlib${ii}$_${ri}$gelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ! copy l to work(ir), zeroing out above it call stdlib${ii}$_${ri}$lacpy( 'L', m, m, a, lda, work( ir ),ldwrkr ) call stdlib${ii}$_${ri}$laset( 'U', m-1, m-1, zero, zero,work( ir+ldwrkr ), ldwrkr & ) ! generate q in a ! (workspace: need m*m + 2*m, prefer m*m + m + m*nb) call stdlib${ii}$_${ri}$orglq( m, n, m, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = itau itauq = ie + m itaup = itauq + m iwork = itaup + m ! bidiagonalize l in work(ir) ! (workspace: need m*m + 4*m, prefer m*m + 3*m + 2*m*nb) call stdlib${ii}$_${ri}$gebrd( m, m, work( ir ), ldwrkr, s,work( ie ), work( itauq & ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) ! generate right vectors bidiagonalizing l in ! work(ir) ! (workspace: need m*m + 4*m, prefer m*m + 3*m + (m-1)*nb) call stdlib${ii}$_${ri}$orgbr( 'P', m, m, m, work( ir ), ldwrkr,work( itaup ), & work( iwork ),lwork-iwork+1, ierr ) iwork = ie + m ! perform bidiagonal qr iteration, computing right ! singular vectors of l in work(ir) ! (workspace: need m*m + bdspac) call stdlib${ii}$_${ri}$bdsqr( 'U', m, m, 0_${ik}$, 0_${ik}$, s, work( ie ),work( ir ), ldwrkr, & dum, 1_${ik}$, dum, 1_${ik}$,work( iwork ), info ) ! multiply right singular vectors of l in work(ir) by ! q in a, storing result in vt ! (workspace: need m*m) call stdlib${ii}$_${ri}$gemm( 'N', 'N', m, n, m, one, work( ir ),ldwrkr, a, lda, & zero, vt, ldvt ) else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + m ! compute a=l*q ! (workspace: need 2*m, prefer m + m*nb) call stdlib${ii}$_${ri}$gelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ! copy result to vt call stdlib${ii}$_${ri}$lacpy( 'U', m, n, a, lda, vt, ldvt ) ! generate q in vt ! (workspace: need 2*m, prefer m + m*nb) call stdlib${ii}$_${ri}$orglq( m, n, m, vt, ldvt, work( itau ),work( iwork ), & lwork-iwork+1, ierr ) ie = itau itauq = ie + m itaup = itauq + m iwork = itaup + m ! zero out above l in a if (m>1_${ik}$) call stdlib${ii}$_${ri}$laset( 'U', m-1, m-1, zero, zero, a( 1_${ik}$, 2_${ik}$ ),lda ) ! bidiagonalize l in a ! (workspace: need 4*m, prefer 3*m + 2*m*nb) call stdlib${ii}$_${ri}$gebrd( m, m, a, lda, s, work( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply right vectors bidiagonalizing l by q in vt ! (workspace: need 3*m + n, prefer 3*m + n*nb) call stdlib${ii}$_${ri}$ormbr( 'P', 'L', 'T', m, n, m, a, lda,work( itaup ), vt, & ldvt,work( iwork ), lwork-iwork+1, ierr ) iwork = ie + m ! perform bidiagonal qr iteration, computing right ! singular vectors of a in vt ! (workspace: need bdspac) call stdlib${ii}$_${ri}$bdsqr( 'U', m, n, 0_${ik}$, 0_${ik}$, s, work( ie ), vt,ldvt, dum, 1_${ik}$, & dum, 1_${ik}$, work( iwork ),info ) end if else if( wntuo ) then ! path 5t(n much larger than m, jobu='o', jobvt='s') ! m right singular vectors to be computed in vt and ! m left singular vectors to be overwritten on a if( lwork>=2_${ik}$*m*m+max( 4_${ik}$*m, bdspac ) ) then ! sufficient workspace for a fast algorithm iu = 1_${ik}$ if( lwork>=wrkbl+2*lda*m ) then ! work(iu) is lda by m and work(ir) is lda by m ldwrku = lda ir = iu + ldwrku*m ldwrkr = lda else if( lwork>=wrkbl+( lda + m )*m ) then ! work(iu) is lda by m and work(ir) is m by m ldwrku = lda ir = iu + ldwrku*m ldwrkr = m else ! work(iu) is m by m and work(ir) is m by m ldwrku = m ir = iu + ldwrku*m ldwrkr = m end if itau = ir + ldwrkr*m iwork = itau + m ! compute a=l*q ! (workspace: need 2*m*m + 2*m, prefer 2*m*m + m + m*nb) call stdlib${ii}$_${ri}$gelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ! copy l to work(iu), zeroing out below it call stdlib${ii}$_${ri}$lacpy( 'L', m, m, a, lda, work( iu ),ldwrku ) call stdlib${ii}$_${ri}$laset( 'U', m-1, m-1, zero, zero,work( iu+ldwrku ), ldwrku & ) ! generate q in a ! (workspace: need 2*m*m + 2*m, prefer 2*m*m + m + m*nb) call stdlib${ii}$_${ri}$orglq( m, n, m, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = itau itauq = ie + m itaup = itauq + m iwork = itaup + m ! bidiagonalize l in work(iu), copying result to ! work(ir) ! (workspace: need 2*m*m + 4*m, ! prefer 2*m*m+3*m+2*m*nb) call stdlib${ii}$_${ri}$gebrd( m, m, work( iu ), ldwrku, s,work( ie ), work( itauq & ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) call stdlib${ii}$_${ri}$lacpy( 'L', m, m, work( iu ), ldwrku,work( ir ), ldwrkr ) ! generate right bidiagonalizing vectors in work(iu) ! (workspace: need 2*m*m + 4*m-1, ! prefer 2*m*m+3*m+(m-1)*nb) call stdlib${ii}$_${ri}$orgbr( 'P', m, m, m, work( iu ), ldwrku,work( itaup ), & work( iwork ),lwork-iwork+1, ierr ) ! generate left bidiagonalizing vectors in work(ir) ! (workspace: need 2*m*m + 4*m, prefer 2*m*m + 3*m + m*nb) call stdlib${ii}$_${ri}$orgbr( 'Q', m, m, m, work( ir ), ldwrkr,work( itauq ), & work( iwork ),lwork-iwork+1, ierr ) iwork = ie + m ! perform bidiagonal qr iteration, computing left ! singular vectors of l in work(ir) and computing ! right singular vectors of l in work(iu) ! (workspace: need 2*m*m + bdspac) call stdlib${ii}$_${ri}$bdsqr( 'U', m, m, m, 0_${ik}$, s, work( ie ),work( iu ), ldwrku, & work( ir ),ldwrkr, dum, 1_${ik}$, work( iwork ), info ) ! multiply right singular vectors of l in work(iu) by ! q in a, storing result in vt ! (workspace: need m*m) call stdlib${ii}$_${ri}$gemm( 'N', 'N', m, n, m, one, work( iu ),ldwrku, a, lda, & zero, vt, ldvt ) ! copy left singular vectors of l to a ! (workspace: need m*m) call stdlib${ii}$_${ri}$lacpy( 'F', m, m, work( ir ), ldwrkr, a,lda ) else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + m ! compute a=l*q, copying result to vt ! (workspace: need 2*m, prefer m + m*nb) call stdlib${ii}$_${ri}$gelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_${ri}$lacpy( 'U', m, n, a, lda, vt, ldvt ) ! generate q in vt ! (workspace: need 2*m, prefer m + m*nb) call stdlib${ii}$_${ri}$orglq( m, n, m, vt, ldvt, work( itau ),work( iwork ), & lwork-iwork+1, ierr ) ie = itau itauq = ie + m itaup = itauq + m iwork = itaup + m ! zero out above l in a if (m>1_${ik}$) call stdlib${ii}$_${ri}$laset( 'U', m-1, m-1, zero, zero, a( 1_${ik}$, 2_${ik}$ ),lda ) ! bidiagonalize l in a ! (workspace: need 4*m, prefer 3*m + 2*m*nb) call stdlib${ii}$_${ri}$gebrd( m, m, a, lda, s, work( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply right vectors bidiagonalizing l by q in vt ! (workspace: need 3*m + n, prefer 3*m + n*nb) call stdlib${ii}$_${ri}$ormbr( 'P', 'L', 'T', m, n, m, a, lda,work( itaup ), vt, & ldvt,work( iwork ), lwork-iwork+1, ierr ) ! generate left bidiagonalizing vectors of l in a ! (workspace: need 4*m, prefer 3*m + m*nb) call stdlib${ii}$_${ri}$orgbr( 'Q', m, m, m, a, lda, work( itauq ),work( iwork ), & lwork-iwork+1, ierr ) iwork = ie + m ! perform bidiagonal qr iteration, compute left ! singular vectors of a in a and compute right ! singular vectors of a in vt ! (workspace: need bdspac) call stdlib${ii}$_${ri}$bdsqr( 'U', m, n, m, 0_${ik}$, s, work( ie ), vt,ldvt, a, lda, & dum, 1_${ik}$, work( iwork ),info ) end if else if( wntuas ) then ! path 6t(n much larger than m, jobu='s' or 'a', ! jobvt='s') ! m right singular vectors to be computed in vt and ! m left singular vectors to be computed in u if( lwork>=m*m+max( 4_${ik}$*m, bdspac ) ) then ! sufficient workspace for a fast algorithm iu = 1_${ik}$ if( lwork>=wrkbl+lda*m ) then ! work(iu) is lda by n ldwrku = lda else ! work(iu) is lda by m ldwrku = m end if itau = iu + ldwrku*m iwork = itau + m ! compute a=l*q ! (workspace: need m*m + 2*m, prefer m*m + m + m*nb) call stdlib${ii}$_${ri}$gelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ! copy l to work(iu), zeroing out above it call stdlib${ii}$_${ri}$lacpy( 'L', m, m, a, lda, work( iu ),ldwrku ) call stdlib${ii}$_${ri}$laset( 'U', m-1, m-1, zero, zero,work( iu+ldwrku ), ldwrku & ) ! generate q in a ! (workspace: need m*m + 2*m, prefer m*m + m + m*nb) call stdlib${ii}$_${ri}$orglq( m, n, m, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = itau itauq = ie + m itaup = itauq + m iwork = itaup + m ! bidiagonalize l in work(iu), copying result to u ! (workspace: need m*m + 4*m, prefer m*m + 3*m + 2*m*nb) call stdlib${ii}$_${ri}$gebrd( m, m, work( iu ), ldwrku, s,work( ie ), work( itauq & ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) call stdlib${ii}$_${ri}$lacpy( 'L', m, m, work( iu ), ldwrku, u,ldu ) ! generate right bidiagonalizing vectors in work(iu) ! (workspace: need m*m + 4*m-1, ! prefer m*m+3*m+(m-1)*nb) call stdlib${ii}$_${ri}$orgbr( 'P', m, m, m, work( iu ), ldwrku,work( itaup ), & work( iwork ),lwork-iwork+1, ierr ) ! generate left bidiagonalizing vectors in u ! (workspace: need m*m + 4*m, prefer m*m + 3*m + m*nb) call stdlib${ii}$_${ri}$orgbr( 'Q', m, m, m, u, ldu, work( itauq ),work( iwork ), & lwork-iwork+1, ierr ) iwork = ie + m ! perform bidiagonal qr iteration, computing left ! singular vectors of l in u and computing right ! singular vectors of l in work(iu) ! (workspace: need m*m + bdspac) call stdlib${ii}$_${ri}$bdsqr( 'U', m, m, m, 0_${ik}$, s, work( ie ),work( iu ), ldwrku, & u, ldu, dum, 1_${ik}$,work( iwork ), info ) ! multiply right singular vectors of l in work(iu) by ! q in a, storing result in vt ! (workspace: need m*m) call stdlib${ii}$_${ri}$gemm( 'N', 'N', m, n, m, one, work( iu ),ldwrku, a, lda, & zero, vt, ldvt ) else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + m ! compute a=l*q, copying result to vt ! (workspace: need 2*m, prefer m + m*nb) call stdlib${ii}$_${ri}$gelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_${ri}$lacpy( 'U', m, n, a, lda, vt, ldvt ) ! generate q in vt ! (workspace: need 2*m, prefer m + m*nb) call stdlib${ii}$_${ri}$orglq( m, n, m, vt, ldvt, work( itau ),work( iwork ), & lwork-iwork+1, ierr ) ! copy l to u, zeroing out above it call stdlib${ii}$_${ri}$lacpy( 'L', m, m, a, lda, u, ldu ) if (m>1_${ik}$) call stdlib${ii}$_${ri}$laset( 'U', m-1, m-1, zero, zero, u( 1_${ik}$, 2_${ik}$ ),ldu ) ie = itau itauq = ie + m itaup = itauq + m iwork = itaup + m ! bidiagonalize l in u ! (workspace: need 4*m, prefer 3*m + 2*m*nb) call stdlib${ii}$_${ri}$gebrd( m, m, u, ldu, s, work( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply right bidiagonalizing vectors in u by q ! in vt ! (workspace: need 3*m + n, prefer 3*m + n*nb) call stdlib${ii}$_${ri}$ormbr( 'P', 'L', 'T', m, n, m, u, ldu,work( itaup ), vt, & ldvt,work( iwork ), lwork-iwork+1, ierr ) ! generate left bidiagonalizing vectors in u ! (workspace: need 4*m, prefer 3*m + m*nb) call stdlib${ii}$_${ri}$orgbr( 'Q', m, m, m, u, ldu, work( itauq ),work( iwork ), & lwork-iwork+1, ierr ) iwork = ie + m ! perform bidiagonal qr iteration, computing left ! singular vectors of a in u and computing right ! singular vectors of a in vt ! (workspace: need bdspac) call stdlib${ii}$_${ri}$bdsqr( 'U', m, n, m, 0_${ik}$, s, work( ie ), vt,ldvt, u, ldu, & dum, 1_${ik}$, work( iwork ),info ) end if end if else if( wntva ) then if( wntun ) then ! path 7t(n much larger than m, jobu='n', jobvt='a') ! n right singular vectors to be computed in vt and ! no left singular vectors to be computed if( lwork>=m*m+max( n + m, 4_${ik}$*m, bdspac ) ) then ! sufficient workspace for a fast algorithm ir = 1_${ik}$ if( lwork>=wrkbl+lda*m ) then ! work(ir) is lda by m ldwrkr = lda else ! work(ir) is m by m ldwrkr = m end if itau = ir + ldwrkr*m iwork = itau + m ! compute a=l*q, copying result to vt ! (workspace: need m*m + 2*m, prefer m*m + m + m*nb) call stdlib${ii}$_${ri}$gelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_${ri}$lacpy( 'U', m, n, a, lda, vt, ldvt ) ! copy l to work(ir), zeroing out above it call stdlib${ii}$_${ri}$lacpy( 'L', m, m, a, lda, work( ir ),ldwrkr ) call stdlib${ii}$_${ri}$laset( 'U', m-1, m-1, zero, zero,work( ir+ldwrkr ), ldwrkr & ) ! generate q in vt ! (workspace: need m*m + m + n, prefer m*m + m + n*nb) call stdlib${ii}$_${ri}$orglq( n, n, m, vt, ldvt, work( itau ),work( iwork ), & lwork-iwork+1, ierr ) ie = itau itauq = ie + m itaup = itauq + m iwork = itaup + m ! bidiagonalize l in work(ir) ! (workspace: need m*m + 4*m, prefer m*m + 3*m + 2*m*nb) call stdlib${ii}$_${ri}$gebrd( m, m, work( ir ), ldwrkr, s,work( ie ), work( itauq & ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) ! generate right bidiagonalizing vectors in work(ir) ! (workspace: need m*m + 4*m-1, ! prefer m*m+3*m+(m-1)*nb) call stdlib${ii}$_${ri}$orgbr( 'P', m, m, m, work( ir ), ldwrkr,work( itaup ), & work( iwork ),lwork-iwork+1, ierr ) iwork = ie + m ! perform bidiagonal qr iteration, computing right ! singular vectors of l in work(ir) ! (workspace: need m*m + bdspac) call stdlib${ii}$_${ri}$bdsqr( 'U', m, m, 0_${ik}$, 0_${ik}$, s, work( ie ),work( ir ), ldwrkr, & dum, 1_${ik}$, dum, 1_${ik}$,work( iwork ), info ) ! multiply right singular vectors of l in work(ir) by ! q in vt, storing result in a ! (workspace: need m*m) call stdlib${ii}$_${ri}$gemm( 'N', 'N', m, n, m, one, work( ir ),ldwrkr, vt, ldvt, & zero, a, lda ) ! copy right singular vectors of a from a to vt call stdlib${ii}$_${ri}$lacpy( 'F', m, n, a, lda, vt, ldvt ) else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + m ! compute a=l*q, copying result to vt ! (workspace: need 2*m, prefer m + m*nb) call stdlib${ii}$_${ri}$gelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_${ri}$lacpy( 'U', m, n, a, lda, vt, ldvt ) ! generate q in vt ! (workspace: need m + n, prefer m + n*nb) call stdlib${ii}$_${ri}$orglq( n, n, m, vt, ldvt, work( itau ),work( iwork ), & lwork-iwork+1, ierr ) ie = itau itauq = ie + m itaup = itauq + m iwork = itaup + m ! zero out above l in a if (m>1_${ik}$) call stdlib${ii}$_${ri}$laset( 'U', m-1, m-1, zero, zero, a( 1_${ik}$, 2_${ik}$ ),lda ) ! bidiagonalize l in a ! (workspace: need 4*m, prefer 3*m + 2*m*nb) call stdlib${ii}$_${ri}$gebrd( m, m, a, lda, s, work( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply right bidiagonalizing vectors in a by q ! in vt ! (workspace: need 3*m + n, prefer 3*m + n*nb) call stdlib${ii}$_${ri}$ormbr( 'P', 'L', 'T', m, n, m, a, lda,work( itaup ), vt, & ldvt,work( iwork ), lwork-iwork+1, ierr ) iwork = ie + m ! perform bidiagonal qr iteration, computing right ! singular vectors of a in vt ! (workspace: need bdspac) call stdlib${ii}$_${ri}$bdsqr( 'U', m, n, 0_${ik}$, 0_${ik}$, s, work( ie ), vt,ldvt, dum, 1_${ik}$, & dum, 1_${ik}$, work( iwork ),info ) end if else if( wntuo ) then ! path 8t(n much larger than m, jobu='o', jobvt='a') ! n right singular vectors to be computed in vt and ! m left singular vectors to be overwritten on a if( lwork>=2_${ik}$*m*m+max( n + m, 4_${ik}$*m, bdspac ) ) then ! sufficient workspace for a fast algorithm iu = 1_${ik}$ if( lwork>=wrkbl+2*lda*m ) then ! work(iu) is lda by m and work(ir) is lda by m ldwrku = lda ir = iu + ldwrku*m ldwrkr = lda else if( lwork>=wrkbl+( lda + m )*m ) then ! work(iu) is lda by m and work(ir) is m by m ldwrku = lda ir = iu + ldwrku*m ldwrkr = m else ! work(iu) is m by m and work(ir) is m by m ldwrku = m ir = iu + ldwrku*m ldwrkr = m end if itau = ir + ldwrkr*m iwork = itau + m ! compute a=l*q, copying result to vt ! (workspace: need 2*m*m + 2*m, prefer 2*m*m + m + m*nb) call stdlib${ii}$_${ri}$gelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_${ri}$lacpy( 'U', m, n, a, lda, vt, ldvt ) ! generate q in vt ! (workspace: need 2*m*m + m + n, prefer 2*m*m + m + n*nb) call stdlib${ii}$_${ri}$orglq( n, n, m, vt, ldvt, work( itau ),work( iwork ), & lwork-iwork+1, ierr ) ! copy l to work(iu), zeroing out above it call stdlib${ii}$_${ri}$lacpy( 'L', m, m, a, lda, work( iu ),ldwrku ) call stdlib${ii}$_${ri}$laset( 'U', m-1, m-1, zero, zero,work( iu+ldwrku ), ldwrku & ) ie = itau itauq = ie + m itaup = itauq + m iwork = itaup + m ! bidiagonalize l in work(iu), copying result to ! work(ir) ! (workspace: need 2*m*m + 4*m, ! prefer 2*m*m+3*m+2*m*nb) call stdlib${ii}$_${ri}$gebrd( m, m, work( iu ), ldwrku, s,work( ie ), work( itauq & ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) call stdlib${ii}$_${ri}$lacpy( 'L', m, m, work( iu ), ldwrku,work( ir ), ldwrkr ) ! generate right bidiagonalizing vectors in work(iu) ! (workspace: need 2*m*m + 4*m-1, ! prefer 2*m*m+3*m+(m-1)*nb) call stdlib${ii}$_${ri}$orgbr( 'P', m, m, m, work( iu ), ldwrku,work( itaup ), & work( iwork ),lwork-iwork+1, ierr ) ! generate left bidiagonalizing vectors in work(ir) ! (workspace: need 2*m*m + 4*m, prefer 2*m*m + 3*m + m*nb) call stdlib${ii}$_${ri}$orgbr( 'Q', m, m, m, work( ir ), ldwrkr,work( itauq ), & work( iwork ),lwork-iwork+1, ierr ) iwork = ie + m ! perform bidiagonal qr iteration, computing left ! singular vectors of l in work(ir) and computing ! right singular vectors of l in work(iu) ! (workspace: need 2*m*m + bdspac) call stdlib${ii}$_${ri}$bdsqr( 'U', m, m, m, 0_${ik}$, s, work( ie ),work( iu ), ldwrku, & work( ir ),ldwrkr, dum, 1_${ik}$, work( iwork ), info ) ! multiply right singular vectors of l in work(iu) by ! q in vt, storing result in a ! (workspace: need m*m) call stdlib${ii}$_${ri}$gemm( 'N', 'N', m, n, m, one, work( iu ),ldwrku, vt, ldvt, & zero, a, lda ) ! copy right singular vectors of a from a to vt call stdlib${ii}$_${ri}$lacpy( 'F', m, n, a, lda, vt, ldvt ) ! copy left singular vectors of a from work(ir) to a call stdlib${ii}$_${ri}$lacpy( 'F', m, m, work( ir ), ldwrkr, a,lda ) else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + m ! compute a=l*q, copying result to vt ! (workspace: need 2*m, prefer m + m*nb) call stdlib${ii}$_${ri}$gelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_${ri}$lacpy( 'U', m, n, a, lda, vt, ldvt ) ! generate q in vt ! (workspace: need m + n, prefer m + n*nb) call stdlib${ii}$_${ri}$orglq( n, n, m, vt, ldvt, work( itau ),work( iwork ), & lwork-iwork+1, ierr ) ie = itau itauq = ie + m itaup = itauq + m iwork = itaup + m ! zero out above l in a if (m>1_${ik}$) call stdlib${ii}$_${ri}$laset( 'U', m-1, m-1, zero, zero, a( 1_${ik}$, 2_${ik}$ ),lda ) ! bidiagonalize l in a ! (workspace: need 4*m, prefer 3*m + 2*m*nb) call stdlib${ii}$_${ri}$gebrd( m, m, a, lda, s, work( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply right bidiagonalizing vectors in a by q ! in vt ! (workspace: need 3*m + n, prefer 3*m + n*nb) call stdlib${ii}$_${ri}$ormbr( 'P', 'L', 'T', m, n, m, a, lda,work( itaup ), vt, & ldvt,work( iwork ), lwork-iwork+1, ierr ) ! generate left bidiagonalizing vectors in a ! (workspace: need 4*m, prefer 3*m + m*nb) call stdlib${ii}$_${ri}$orgbr( 'Q', m, m, m, a, lda, work( itauq ),work( iwork ), & lwork-iwork+1, ierr ) iwork = ie + m ! perform bidiagonal qr iteration, computing left ! singular vectors of a in a and computing right ! singular vectors of a in vt ! (workspace: need bdspac) call stdlib${ii}$_${ri}$bdsqr( 'U', m, n, m, 0_${ik}$, s, work( ie ), vt,ldvt, a, lda, & dum, 1_${ik}$, work( iwork ),info ) end if else if( wntuas ) then ! path 9t(n much larger than m, jobu='s' or 'a', ! jobvt='a') ! n right singular vectors to be computed in vt and ! m left singular vectors to be computed in u if( lwork>=m*m+max( n + m, 4_${ik}$*m, bdspac ) ) then ! sufficient workspace for a fast algorithm iu = 1_${ik}$ if( lwork>=wrkbl+lda*m ) then ! work(iu) is lda by m ldwrku = lda else ! work(iu) is m by m ldwrku = m end if itau = iu + ldwrku*m iwork = itau + m ! compute a=l*q, copying result to vt ! (workspace: need m*m + 2*m, prefer m*m + m + m*nb) call stdlib${ii}$_${ri}$gelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_${ri}$lacpy( 'U', m, n, a, lda, vt, ldvt ) ! generate q in vt ! (workspace: need m*m + m + n, prefer m*m + m + n*nb) call stdlib${ii}$_${ri}$orglq( n, n, m, vt, ldvt, work( itau ),work( iwork ), & lwork-iwork+1, ierr ) ! copy l to work(iu), zeroing out above it call stdlib${ii}$_${ri}$lacpy( 'L', m, m, a, lda, work( iu ),ldwrku ) call stdlib${ii}$_${ri}$laset( 'U', m-1, m-1, zero, zero,work( iu+ldwrku ), ldwrku & ) ie = itau itauq = ie + m itaup = itauq + m iwork = itaup + m ! bidiagonalize l in work(iu), copying result to u ! (workspace: need m*m + 4*m, prefer m*m + 3*m + 2*m*nb) call stdlib${ii}$_${ri}$gebrd( m, m, work( iu ), ldwrku, s,work( ie ), work( itauq & ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) call stdlib${ii}$_${ri}$lacpy( 'L', m, m, work( iu ), ldwrku, u,ldu ) ! generate right bidiagonalizing vectors in work(iu) ! (workspace: need m*m + 4*m, prefer m*m + 3*m + (m-1)*nb) call stdlib${ii}$_${ri}$orgbr( 'P', m, m, m, work( iu ), ldwrku,work( itaup ), & work( iwork ),lwork-iwork+1, ierr ) ! generate left bidiagonalizing vectors in u ! (workspace: need m*m + 4*m, prefer m*m + 3*m + m*nb) call stdlib${ii}$_${ri}$orgbr( 'Q', m, m, m, u, ldu, work( itauq ),work( iwork ), & lwork-iwork+1, ierr ) iwork = ie + m ! perform bidiagonal qr iteration, computing left ! singular vectors of l in u and computing right ! singular vectors of l in work(iu) ! (workspace: need m*m + bdspac) call stdlib${ii}$_${ri}$bdsqr( 'U', m, m, m, 0_${ik}$, s, work( ie ),work( iu ), ldwrku, & u, ldu, dum, 1_${ik}$,work( iwork ), info ) ! multiply right singular vectors of l in work(iu) by ! q in vt, storing result in a ! (workspace: need m*m) call stdlib${ii}$_${ri}$gemm( 'N', 'N', m, n, m, one, work( iu ),ldwrku, vt, ldvt, & zero, a, lda ) ! copy right singular vectors of a from a to vt call stdlib${ii}$_${ri}$lacpy( 'F', m, n, a, lda, vt, ldvt ) else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + m ! compute a=l*q, copying result to vt ! (workspace: need 2*m, prefer m + m*nb) call stdlib${ii}$_${ri}$gelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_${ri}$lacpy( 'U', m, n, a, lda, vt, ldvt ) ! generate q in vt ! (workspace: need m + n, prefer m + n*nb) call stdlib${ii}$_${ri}$orglq( n, n, m, vt, ldvt, work( itau ),work( iwork ), & lwork-iwork+1, ierr ) ! copy l to u, zeroing out above it call stdlib${ii}$_${ri}$lacpy( 'L', m, m, a, lda, u, ldu ) if (m>1_${ik}$) call stdlib${ii}$_${ri}$laset( 'U', m-1, m-1, zero, zero, u( 1_${ik}$, 2_${ik}$ ),ldu ) ie = itau itauq = ie + m itaup = itauq + m iwork = itaup + m ! bidiagonalize l in u ! (workspace: need 4*m, prefer 3*m + 2*m*nb) call stdlib${ii}$_${ri}$gebrd( m, m, u, ldu, s, work( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply right bidiagonalizing vectors in u by q ! in vt ! (workspace: need 3*m + n, prefer 3*m + n*nb) call stdlib${ii}$_${ri}$ormbr( 'P', 'L', 'T', m, n, m, u, ldu,work( itaup ), vt, & ldvt,work( iwork ), lwork-iwork+1, ierr ) ! generate left bidiagonalizing vectors in u ! (workspace: need 4*m, prefer 3*m + m*nb) call stdlib${ii}$_${ri}$orgbr( 'Q', m, m, m, u, ldu, work( itauq ),work( iwork ), & lwork-iwork+1, ierr ) iwork = ie + m ! perform bidiagonal qr iteration, computing left ! singular vectors of a in u and computing right ! singular vectors of a in vt ! (workspace: need bdspac) call stdlib${ii}$_${ri}$bdsqr( 'U', m, n, m, 0_${ik}$, s, work( ie ), vt,ldvt, u, ldu, & dum, 1_${ik}$, work( iwork ),info ) end if end if end if else ! n < mnthr ! path 10t(n greater than m, but not much larger) ! reduce to bidiagonal form without lq decomposition ie = 1_${ik}$ itauq = ie + m itaup = itauq + m iwork = itaup + m ! bidiagonalize a ! (workspace: need 3*m + n, prefer 3*m + (m + n)*nb) call stdlib${ii}$_${ri}$gebrd( m, n, a, lda, s, work( ie ), work( itauq ),work( itaup ), & work( iwork ), lwork-iwork+1,ierr ) if( wntuas ) then ! if left singular vectors desired in u, copy result to u ! and generate left bidiagonalizing vectors in u ! (workspace: need 4*m-1, prefer 3*m + (m-1)*nb) call stdlib${ii}$_${ri}$lacpy( 'L', m, m, a, lda, u, ldu ) call stdlib${ii}$_${ri}$orgbr( 'Q', m, m, n, u, ldu, work( itauq ),work( iwork ), lwork-& iwork+1, ierr ) end if if( wntvas ) then ! if right singular vectors desired in vt, copy result to ! vt and generate right bidiagonalizing vectors in vt ! (workspace: need 3*m + nrvt, prefer 3*m + nrvt*nb) call stdlib${ii}$_${ri}$lacpy( 'U', m, n, a, lda, vt, ldvt ) if( wntva )nrvt = n if( wntvs )nrvt = m call stdlib${ii}$_${ri}$orgbr( 'P', nrvt, n, m, vt, ldvt, work( itaup ),work( iwork ), & lwork-iwork+1, ierr ) end if if( wntuo ) then ! if left singular vectors desired in a, generate left ! bidiagonalizing vectors in a ! (workspace: need 4*m-1, prefer 3*m + (m-1)*nb) call stdlib${ii}$_${ri}$orgbr( 'Q', m, m, n, a, lda, work( itauq ),work( iwork ), lwork-& iwork+1, ierr ) end if if( wntvo ) then ! if right singular vectors desired in a, generate right ! bidiagonalizing vectors in a ! (workspace: need 4*m, prefer 3*m + m*nb) call stdlib${ii}$_${ri}$orgbr( 'P', m, n, m, a, lda, work( itaup ),work( iwork ), lwork-& iwork+1, ierr ) end if iwork = ie + m if( wntuas .or. wntuo )nru = m if( wntun )nru = 0_${ik}$ if( wntvas .or. wntvo )ncvt = n if( wntvn )ncvt = 0_${ik}$ if( ( .not.wntuo ) .and. ( .not.wntvo ) ) then ! perform bidiagonal qr iteration, if desired, computing ! left singular vectors in u and computing right singular ! vectors in vt ! (workspace: need bdspac) call stdlib${ii}$_${ri}$bdsqr( 'L', m, ncvt, nru, 0_${ik}$, s, work( ie ), vt,ldvt, u, ldu, dum,& 1_${ik}$, work( iwork ), info ) else if( ( .not.wntuo ) .and. wntvo ) then ! perform bidiagonal qr iteration, if desired, computing ! left singular vectors in u and computing right singular ! vectors in a ! (workspace: need bdspac) call stdlib${ii}$_${ri}$bdsqr( 'L', m, ncvt, nru, 0_${ik}$, s, work( ie ), a, lda,u, ldu, dum, & 1_${ik}$, work( iwork ), info ) else ! perform bidiagonal qr iteration, if desired, computing ! left singular vectors in a and computing right singular ! vectors in vt ! (workspace: need bdspac) call stdlib${ii}$_${ri}$bdsqr( 'L', m, ncvt, nru, 0_${ik}$, s, work( ie ), vt,ldvt, a, lda, dum,& 1_${ik}$, work( iwork ), info ) end if end if end if ! if stdlib${ii}$_${ri}$bdsqr failed to converge, copy unconverged superdiagonals ! to work( 2:minmn ) if( info/=0_${ik}$ ) then if( ie>2_${ik}$ ) then do i = 1, minmn - 1 work( i+1 ) = work( i+ie-1 ) end do end if if( ie<2_${ik}$ ) then do i = minmn - 1, 1, -1 work( i+1 ) = work( i+ie-1 ) end do end if end if ! undo scaling if necessary if( iscl==1_${ik}$ ) then if( anrm>bignum )call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, bignum, anrm, minmn, 1_${ik}$, s, minmn,& ierr ) if( info/=0_${ik}$ .and. anrm>bignum )call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, bignum, anrm, minmn-1,& 1_${ik}$, work( 2_${ik}$ ),minmn, ierr ) if( anrm<smlnum )call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, smlnum, anrm, minmn, 1_${ik}$, s, minmn,& ierr ) if( info/=0_${ik}$ .and. anrm<smlnum )call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, smlnum, anrm, minmn-1,& 1_${ik}$, work( 2_${ik}$ ),minmn, ierr ) end if ! return optimal workspace in work(1) work( 1_${ik}$ ) = maxwrk return end subroutine stdlib${ii}$_${ri}$gesvd #:endif #:endfor module subroutine stdlib${ii}$_cgesvd( jobu, jobvt, m, n, a, lda, s, u, ldu, vt, ldvt,work, lwork, rwork, & !! CGESVD computes the singular value decomposition (SVD) of a complex !! M-by-N matrix A, optionally computing the left and/or right singular !! vectors. The SVD is written !! A = U * SIGMA * conjugate-transpose(V) !! where SIGMA is an M-by-N matrix which is zero except for its !! min(m,n) diagonal elements, U is an M-by-M unitary matrix, and !! V is an N-by-N unitary matrix. The diagonal elements of SIGMA !! are the singular values of A; they are real and non-negative, and !! are returned in descending order. The first min(m,n) columns of !! U and V are the left and right singular vectors of A. !! Note that the routine returns V**H, not V. info ) ! -- lapack driver routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments character, intent(in) :: jobu, jobvt integer(${ik}$), intent(out) :: info integer(${ik}$), intent(in) :: lda, ldu, ldvt, lwork, m, n ! Array Arguments real(sp), intent(out) :: rwork(*), s(*) complex(sp), intent(inout) :: a(lda,*) complex(sp), intent(out) :: u(ldu,*), vt(ldvt,*), work(*) ! ===================================================================== ! Local Scalars logical(lk) :: lquery, wntua, wntuas, wntun, wntuo, wntus, wntva, wntvas, wntvn, wntvo,& wntvs integer(${ik}$) :: blk, chunk, i, ie, ierr, ir, irwork, iscl, itau, itaup, itauq, iu, & iwork, ldwrkr, ldwrku, maxwrk, minmn, minwrk, mnthr, ncu, ncvt, nru, nrvt, & wrkbl integer(${ik}$) :: lwork_cgeqrf, lwork_cungqr_n, lwork_cungqr_m, lwork_cgebrd, & lwork_cungbr_p, lwork_cungbr_q, lwork_cgelqf, lwork_cunglq_n, lwork_cunglq_m real(sp) :: anrm, bignum, eps, smlnum ! Local Arrays real(sp) :: dum(1_${ik}$) complex(sp) :: cdum(1_${ik}$) ! Intrinsic Functions ! Executable Statements ! test the input arguments info = 0_${ik}$ minmn = min( m, n ) wntua = stdlib_lsame( jobu, 'A' ) wntus = stdlib_lsame( jobu, 'S' ) wntuas = wntua .or. wntus wntuo = stdlib_lsame( jobu, 'O' ) wntun = stdlib_lsame( jobu, 'N' ) wntva = stdlib_lsame( jobvt, 'A' ) wntvs = stdlib_lsame( jobvt, 'S' ) wntvas = wntva .or. wntvs wntvo = stdlib_lsame( jobvt, 'O' ) wntvn = stdlib_lsame( jobvt, 'N' ) lquery = ( lwork==-1_${ik}$ ) if( .not.( wntua .or. wntus .or. wntuo .or. wntun ) ) then info = -1_${ik}$ else if( .not.( wntva .or. wntvs .or. wntvo .or. wntvn ) .or.( wntvo .and. wntuo ) ) & then info = -2_${ik}$ else if( m<0_${ik}$ ) then info = -3_${ik}$ else if( n<0_${ik}$ ) then info = -4_${ik}$ else if( lda<max( 1_${ik}$, m ) ) then info = -6_${ik}$ else if( ldu<1_${ik}$ .or. ( wntuas .and. ldu<m ) ) then info = -9_${ik}$ else if( ldvt<1_${ik}$ .or. ( wntva .and. ldvt<n ) .or.( wntvs .and. ldvt<minmn ) ) & then info = -11_${ik}$ end if ! compute workspace ! (note: comments in the code beginning "workspace:" describe the ! minimal amount of workspace needed at that point in the code, ! as well as the preferred amount for good performance. ! cworkspace refers to complex workspace, and rworkspace to ! real workspace. nb refers to the optimal block size for the ! immediately following subroutine, as returned by stdlib${ii}$_ilaenv.) if( info==0_${ik}$ ) then minwrk = 1_${ik}$ maxwrk = 1_${ik}$ if( m>=n .and. minmn>0_${ik}$ ) then ! space needed for stdlib${ii}$_zbdsqr is bdspac = 5*n mnthr = stdlib${ii}$_ilaenv( 6_${ik}$, 'CGESVD', jobu // jobvt, m, n, 0_${ik}$, 0_${ik}$ ) ! compute space needed for stdlib${ii}$_cgeqrf call stdlib${ii}$_cgeqrf( m, n, a, lda, cdum(1_${ik}$), cdum(1_${ik}$), -1_${ik}$, ierr ) lwork_cgeqrf = int( cdum(1_${ik}$),KIND=${ik}$) ! compute space needed for stdlib${ii}$_cungqr call stdlib${ii}$_cungqr( m, n, n, a, lda, cdum(1_${ik}$), cdum(1_${ik}$), -1_${ik}$, ierr ) lwork_cungqr_n = int( cdum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_cungqr( m, m, n, a, lda, cdum(1_${ik}$), cdum(1_${ik}$), -1_${ik}$, ierr ) lwork_cungqr_m = int( cdum(1_${ik}$),KIND=${ik}$) ! compute space needed for stdlib${ii}$_cgebrd call stdlib${ii}$_cgebrd( n, n, a, lda, s, dum(1_${ik}$), cdum(1_${ik}$),cdum(1_${ik}$), cdum(1_${ik}$), -1_${ik}$, ierr ) lwork_cgebrd = int( cdum(1_${ik}$),KIND=${ik}$) ! compute space needed for stdlib${ii}$_cungbr call stdlib${ii}$_cungbr( 'P', n, n, n, a, lda, cdum(1_${ik}$),cdum(1_${ik}$), -1_${ik}$, ierr ) lwork_cungbr_p = int( cdum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_cungbr( 'Q', n, n, n, a, lda, cdum(1_${ik}$),cdum(1_${ik}$), -1_${ik}$, ierr ) lwork_cungbr_q = int( cdum(1_${ik}$),KIND=${ik}$) mnthr = stdlib${ii}$_ilaenv( 6_${ik}$, 'CGESVD', jobu // jobvt, m, n, 0_${ik}$, 0_${ik}$ ) if( m>=mnthr ) then if( wntun ) then ! path 1 (m much larger than n, jobu='n') maxwrk = n + lwork_cgeqrf maxwrk = max( maxwrk, 2_${ik}$*n+lwork_cgebrd ) if( wntvo .or. wntvas )maxwrk = max( maxwrk, 2_${ik}$*n+lwork_cungbr_p ) minwrk = 3_${ik}$*n else if( wntuo .and. wntvn ) then ! path 2 (m much larger than n, jobu='o', jobvt='n') wrkbl = n + lwork_cgeqrf wrkbl = max( wrkbl, n+lwork_cungqr_n ) wrkbl = max( wrkbl, 2_${ik}$*n+lwork_cgebrd ) wrkbl = max( wrkbl, 2_${ik}$*n+lwork_cungbr_q ) maxwrk = max( n*n+wrkbl, n*n+m*n ) minwrk = 2_${ik}$*n + m else if( wntuo .and. wntvas ) then ! path 3 (m much larger than n, jobu='o', jobvt='s' or ! 'a') wrkbl = n + lwork_cgeqrf wrkbl = max( wrkbl, n+lwork_cungqr_n ) wrkbl = max( wrkbl, 2_${ik}$*n+lwork_cgebrd ) wrkbl = max( wrkbl, 2_${ik}$*n+lwork_cungbr_q ) wrkbl = max( wrkbl, 2_${ik}$*n+lwork_cungbr_p ) maxwrk = max( n*n+wrkbl, n*n+m*n ) minwrk = 2_${ik}$*n + m else if( wntus .and. wntvn ) then ! path 4 (m much larger than n, jobu='s', jobvt='n') wrkbl = n + lwork_cgeqrf wrkbl = max( wrkbl, n+lwork_cungqr_n ) wrkbl = max( wrkbl, 2_${ik}$*n+lwork_cgebrd ) wrkbl = max( wrkbl, 2_${ik}$*n+lwork_cungbr_q ) maxwrk = n*n + wrkbl minwrk = 2_${ik}$*n + m else if( wntus .and. wntvo ) then ! path 5 (m much larger than n, jobu='s', jobvt='o') wrkbl = n + lwork_cgeqrf wrkbl = max( wrkbl, n+lwork_cungqr_n ) wrkbl = max( wrkbl, 2_${ik}$*n+lwork_cgebrd ) wrkbl = max( wrkbl, 2_${ik}$*n+lwork_cungbr_q ) wrkbl = max( wrkbl, 2_${ik}$*n+lwork_cungbr_p ) maxwrk = 2_${ik}$*n*n + wrkbl minwrk = 2_${ik}$*n + m else if( wntus .and. wntvas ) then ! path 6 (m much larger than n, jobu='s', jobvt='s' or ! 'a') wrkbl = n + lwork_cgeqrf wrkbl = max( wrkbl, n+lwork_cungqr_n ) wrkbl = max( wrkbl, 2_${ik}$*n+lwork_cgebrd ) wrkbl = max( wrkbl, 2_${ik}$*n+lwork_cungbr_q ) wrkbl = max( wrkbl, 2_${ik}$*n+lwork_cungbr_p ) maxwrk = n*n + wrkbl minwrk = 2_${ik}$*n + m else if( wntua .and. wntvn ) then ! path 7 (m much larger than n, jobu='a', jobvt='n') wrkbl = n + lwork_cgeqrf wrkbl = max( wrkbl, n+lwork_cungqr_m ) wrkbl = max( wrkbl, 2_${ik}$*n+lwork_cgebrd ) wrkbl = max( wrkbl, 2_${ik}$*n+lwork_cungbr_q ) maxwrk = n*n + wrkbl minwrk = 2_${ik}$*n + m else if( wntua .and. wntvo ) then ! path 8 (m much larger than n, jobu='a', jobvt='o') wrkbl = n + lwork_cgeqrf wrkbl = max( wrkbl, n+lwork_cungqr_m ) wrkbl = max( wrkbl, 2_${ik}$*n+lwork_cgebrd ) wrkbl = max( wrkbl, 2_${ik}$*n+lwork_cungbr_q ) wrkbl = max( wrkbl, 2_${ik}$*n+lwork_cungbr_p ) maxwrk = 2_${ik}$*n*n + wrkbl minwrk = 2_${ik}$*n + m else if( wntua .and. wntvas ) then ! path 9 (m much larger than n, jobu='a', jobvt='s' or ! 'a') wrkbl = n + lwork_cgeqrf wrkbl = max( wrkbl, n+lwork_cungqr_m ) wrkbl = max( wrkbl, 2_${ik}$*n+lwork_cgebrd ) wrkbl = max( wrkbl, 2_${ik}$*n+lwork_cungbr_q ) wrkbl = max( wrkbl, 2_${ik}$*n+lwork_cungbr_p ) maxwrk = n*n + wrkbl minwrk = 2_${ik}$*n + m end if else ! path 10 (m at least n, but not much larger) call stdlib${ii}$_cgebrd( m, n, a, lda, s, dum(1_${ik}$), cdum(1_${ik}$),cdum(1_${ik}$), cdum(1_${ik}$), -1_${ik}$, & ierr ) lwork_cgebrd = int( cdum(1_${ik}$),KIND=${ik}$) maxwrk = 2_${ik}$*n + lwork_cgebrd if( wntus .or. wntuo ) then call stdlib${ii}$_cungbr( 'Q', m, n, n, a, lda, cdum(1_${ik}$),cdum(1_${ik}$), -1_${ik}$, ierr ) lwork_cungbr_q = int( cdum(1_${ik}$),KIND=${ik}$) maxwrk = max( maxwrk, 2_${ik}$*n+lwork_cungbr_q ) end if if( wntua ) then call stdlib${ii}$_cungbr( 'Q', m, m, n, a, lda, cdum(1_${ik}$),cdum(1_${ik}$), -1_${ik}$, ierr ) lwork_cungbr_q = int( cdum(1_${ik}$),KIND=${ik}$) maxwrk = max( maxwrk, 2_${ik}$*n+lwork_cungbr_q ) end if if( .not.wntvn ) then maxwrk = max( maxwrk, 2_${ik}$*n+lwork_cungbr_p ) end if minwrk = 2_${ik}$*n + m end if else if( minmn>0_${ik}$ ) then ! space needed for stdlib${ii}$_cbdsqr is bdspac = 5*m mnthr = stdlib${ii}$_ilaenv( 6_${ik}$, 'CGESVD', jobu // jobvt, m, n, 0_${ik}$, 0_${ik}$ ) ! compute space needed for stdlib${ii}$_cgelqf call stdlib${ii}$_cgelqf( m, n, a, lda, cdum(1_${ik}$), cdum(1_${ik}$), -1_${ik}$, ierr ) lwork_cgelqf = int( cdum(1_${ik}$),KIND=${ik}$) ! compute space needed for stdlib${ii}$_cunglq call stdlib${ii}$_cunglq( n, n, m, cdum(1_${ik}$), n, cdum(1_${ik}$), cdum(1_${ik}$), -1_${ik}$,ierr ) lwork_cunglq_n = int( cdum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_cunglq( m, n, m, a, lda, cdum(1_${ik}$), cdum(1_${ik}$), -1_${ik}$, ierr ) lwork_cunglq_m = int( cdum(1_${ik}$),KIND=${ik}$) ! compute space needed for stdlib${ii}$_cgebrd call stdlib${ii}$_cgebrd( m, m, a, lda, s, dum(1_${ik}$), cdum(1_${ik}$),cdum(1_${ik}$), cdum(1_${ik}$), -1_${ik}$, ierr ) lwork_cgebrd = int( cdum(1_${ik}$),KIND=${ik}$) ! compute space needed for stdlib${ii}$_cungbr p call stdlib${ii}$_cungbr( 'P', m, m, m, a, n, cdum(1_${ik}$),cdum(1_${ik}$), -1_${ik}$, ierr ) lwork_cungbr_p = int( cdum(1_${ik}$),KIND=${ik}$) ! compute space needed for stdlib${ii}$_cungbr q call stdlib${ii}$_cungbr( 'Q', m, m, m, a, n, cdum(1_${ik}$),cdum(1_${ik}$), -1_${ik}$, ierr ) lwork_cungbr_q = int( cdum(1_${ik}$),KIND=${ik}$) if( n>=mnthr ) then if( wntvn ) then ! path 1t(n much larger than m, jobvt='n') maxwrk = m + lwork_cgelqf maxwrk = max( maxwrk, 2_${ik}$*m+lwork_cgebrd ) if( wntuo .or. wntuas )maxwrk = max( maxwrk, 2_${ik}$*m+lwork_cungbr_q ) minwrk = 3_${ik}$*m else if( wntvo .and. wntun ) then ! path 2t(n much larger than m, jobu='n', jobvt='o') wrkbl = m + lwork_cgelqf wrkbl = max( wrkbl, m+lwork_cunglq_m ) wrkbl = max( wrkbl, 2_${ik}$*m+lwork_cgebrd ) wrkbl = max( wrkbl, 2_${ik}$*m+lwork_cungbr_p ) maxwrk = max( m*m+wrkbl, m*m+m*n ) minwrk = 2_${ik}$*m + n else if( wntvo .and. wntuas ) then ! path 3t(n much larger than m, jobu='s' or 'a', ! jobvt='o') wrkbl = m + lwork_cgelqf wrkbl = max( wrkbl, m+lwork_cunglq_m ) wrkbl = max( wrkbl, 2_${ik}$*m+lwork_cgebrd ) wrkbl = max( wrkbl, 2_${ik}$*m+lwork_cungbr_p ) wrkbl = max( wrkbl, 2_${ik}$*m+lwork_cungbr_q ) maxwrk = max( m*m+wrkbl, m*m+m*n ) minwrk = 2_${ik}$*m + n else if( wntvs .and. wntun ) then ! path 4t(n much larger than m, jobu='n', jobvt='s') wrkbl = m + lwork_cgelqf wrkbl = max( wrkbl, m+lwork_cunglq_m ) wrkbl = max( wrkbl, 2_${ik}$*m+lwork_cgebrd ) wrkbl = max( wrkbl, 2_${ik}$*m+lwork_cungbr_p ) maxwrk = m*m + wrkbl minwrk = 2_${ik}$*m + n else if( wntvs .and. wntuo ) then ! path 5t(n much larger than m, jobu='o', jobvt='s') wrkbl = m + lwork_cgelqf wrkbl = max( wrkbl, m+lwork_cunglq_m ) wrkbl = max( wrkbl, 2_${ik}$*m+lwork_cgebrd ) wrkbl = max( wrkbl, 2_${ik}$*m+lwork_cungbr_p ) wrkbl = max( wrkbl, 2_${ik}$*m+lwork_cungbr_q ) maxwrk = 2_${ik}$*m*m + wrkbl minwrk = 2_${ik}$*m + n else if( wntvs .and. wntuas ) then ! path 6t(n much larger than m, jobu='s' or 'a', ! jobvt='s') wrkbl = m + lwork_cgelqf wrkbl = max( wrkbl, m+lwork_cunglq_m ) wrkbl = max( wrkbl, 2_${ik}$*m+lwork_cgebrd ) wrkbl = max( wrkbl, 2_${ik}$*m+lwork_cungbr_p ) wrkbl = max( wrkbl, 2_${ik}$*m+lwork_cungbr_q ) maxwrk = m*m + wrkbl minwrk = 2_${ik}$*m + n else if( wntva .and. wntun ) then ! path 7t(n much larger than m, jobu='n', jobvt='a') wrkbl = m + lwork_cgelqf wrkbl = max( wrkbl, m+lwork_cunglq_n ) wrkbl = max( wrkbl, 2_${ik}$*m+lwork_cgebrd ) wrkbl = max( wrkbl, 2_${ik}$*m+lwork_cungbr_p ) maxwrk = m*m + wrkbl minwrk = 2_${ik}$*m + n else if( wntva .and. wntuo ) then ! path 8t(n much larger than m, jobu='o', jobvt='a') wrkbl = m + lwork_cgelqf wrkbl = max( wrkbl, m+lwork_cunglq_n ) wrkbl = max( wrkbl, 2_${ik}$*m+lwork_cgebrd ) wrkbl = max( wrkbl, 2_${ik}$*m+lwork_cungbr_p ) wrkbl = max( wrkbl, 2_${ik}$*m+lwork_cungbr_q ) maxwrk = 2_${ik}$*m*m + wrkbl minwrk = 2_${ik}$*m + n else if( wntva .and. wntuas ) then ! path 9t(n much larger than m, jobu='s' or 'a', ! jobvt='a') wrkbl = m + lwork_cgelqf wrkbl = max( wrkbl, m+lwork_cunglq_n ) wrkbl = max( wrkbl, 2_${ik}$*m+lwork_cgebrd ) wrkbl = max( wrkbl, 2_${ik}$*m+lwork_cungbr_p ) wrkbl = max( wrkbl, 2_${ik}$*m+lwork_cungbr_q ) maxwrk = m*m + wrkbl minwrk = 2_${ik}$*m + n end if else ! path 10t(n greater than m, but not much larger) call stdlib${ii}$_cgebrd( m, n, a, lda, s, dum(1_${ik}$), cdum(1_${ik}$),cdum(1_${ik}$), cdum(1_${ik}$), -1_${ik}$, & ierr ) lwork_cgebrd = int( cdum(1_${ik}$),KIND=${ik}$) maxwrk = 2_${ik}$*m + lwork_cgebrd if( wntvs .or. wntvo ) then ! compute space needed for stdlib${ii}$_cungbr p call stdlib${ii}$_cungbr( 'P', m, n, m, a, n, cdum(1_${ik}$),cdum(1_${ik}$), -1_${ik}$, ierr ) lwork_cungbr_p = int( cdum(1_${ik}$),KIND=${ik}$) maxwrk = max( maxwrk, 2_${ik}$*m+lwork_cungbr_p ) end if if( wntva ) then call stdlib${ii}$_cungbr( 'P', n, n, m, a, n, cdum(1_${ik}$),cdum(1_${ik}$), -1_${ik}$, ierr ) lwork_cungbr_p = int( cdum(1_${ik}$),KIND=${ik}$) maxwrk = max( maxwrk, 2_${ik}$*m+lwork_cungbr_p ) end if if( .not.wntun ) then maxwrk = max( maxwrk, 2_${ik}$*m+lwork_cungbr_q ) end if minwrk = 2_${ik}$*m + n end if end if maxwrk = max( minwrk, maxwrk ) work( 1_${ik}$ ) = maxwrk if( lwork<minwrk .and. .not.lquery ) then info = -13_${ik}$ end if end if if( info/=0_${ik}$ ) then call stdlib${ii}$_xerbla( 'CGESVD', -info ) return else if( lquery ) then return end if ! quick return if possible if( m==0_${ik}$ .or. n==0_${ik}$ ) then return end if ! get machine constants eps = stdlib${ii}$_slamch( 'P' ) smlnum = sqrt( stdlib${ii}$_slamch( 'S' ) ) / eps bignum = one / smlnum ! scale a if max element outside range [smlnum,bignum] anrm = stdlib${ii}$_clange( 'M', m, n, a, lda, dum ) iscl = 0_${ik}$ if( anrm>zero .and. anrm<smlnum ) then iscl = 1_${ik}$ call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, m, n, a, lda, ierr ) else if( anrm>bignum ) then iscl = 1_${ik}$ call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, m, n, a, lda, ierr ) end if if( m>=n ) then ! a has at least as many rows as columns. if a has sufficiently ! more rows than columns, first reduce using the qr ! decomposition (if sufficient workspace available) if( m>=mnthr ) then if( wntun ) then ! path 1 (m much larger than n, jobu='n') ! no left singular vectors to be computed itau = 1_${ik}$ iwork = itau + n ! compute a=q*r ! (cworkspace: need 2*n, prefer n+n*nb) ! (rworkspace: need 0) call stdlib${ii}$_cgeqrf( m, n, a, lda, work( itau ), work( iwork ),lwork-iwork+1, & ierr ) ! zero out below r if( n > 1_${ik}$ ) then call stdlib${ii}$_claset( 'L', n-1, n-1, czero, czero, a( 2_${ik}$, 1_${ik}$ ),lda ) end if ie = 1_${ik}$ itauq = 1_${ik}$ itaup = itauq + n iwork = itaup + n ! bidiagonalize r in a ! (cworkspace: need 3*n, prefer 2*n+2*n*nb) ! (rworkspace: need n) call stdlib${ii}$_cgebrd( n, n, a, lda, s, rwork( ie ), work( itauq ),work( itaup ),& work( iwork ), lwork-iwork+1,ierr ) ncvt = 0_${ik}$ if( wntvo .or. wntvas ) then ! if right singular vectors desired, generate p'. ! (cworkspace: need 3*n-1, prefer 2*n+(n-1)*nb) ! (rworkspace: 0) call stdlib${ii}$_cungbr( 'P', n, n, n, a, lda, work( itaup ),work( iwork ), & lwork-iwork+1, ierr ) ncvt = n end if irwork = ie + n ! perform bidiagonal qr iteration, computing right ! singular vectors of a in a if desired ! (cworkspace: 0) ! (rworkspace: need bdspac) call stdlib${ii}$_cbdsqr( 'U', n, ncvt, 0_${ik}$, 0_${ik}$, s, rwork( ie ), a, lda,cdum, 1_${ik}$, cdum, & 1_${ik}$, rwork( irwork ), info ) ! if right singular vectors desired in vt, copy them there if( wntvas )call stdlib${ii}$_clacpy( 'F', n, n, a, lda, vt, ldvt ) else if( wntuo .and. wntvn ) then ! path 2 (m much larger than n, jobu='o', jobvt='n') ! n left singular vectors to be overwritten on a and ! no right singular vectors to be computed if( lwork>=n*n+3*n ) then ! sufficient workspace for a fast algorithm ir = 1_${ik}$ if( lwork>=max( wrkbl, lda*n )+lda*n ) then ! work(iu) is lda by n, work(ir) is lda by n ldwrku = lda ldwrkr = lda else if( lwork>=max( wrkbl, lda*n )+n*n ) then ! work(iu) is lda by n, work(ir) is n by n ldwrku = lda ldwrkr = n else ! work(iu) is ldwrku by n, work(ir) is n by n ldwrku = ( lwork-n*n ) / n ldwrkr = n end if itau = ir + ldwrkr*n iwork = itau + n ! compute a=q*r ! (cworkspace: need n*n+2*n, prefer n*n+n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_cgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-iwork+& 1_${ik}$, ierr ) ! copy r to work(ir) and zero out below it call stdlib${ii}$_clacpy( 'U', n, n, a, lda, work( ir ), ldwrkr ) call stdlib${ii}$_claset( 'L', n-1, n-1, czero, czero,work( ir+1 ), ldwrkr ) ! generate q in a ! (cworkspace: need n*n+2*n, prefer n*n+n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_cungqr( m, n, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = 1_${ik}$ itauq = itau itaup = itauq + n iwork = itaup + n ! bidiagonalize r in work(ir) ! (cworkspace: need n*n+3*n, prefer n*n+2*n+2*n*nb) ! (rworkspace: need n) call stdlib${ii}$_cgebrd( n, n, work( ir ), ldwrkr, s, rwork( ie ),work( itauq ),& work( itaup ),work( iwork ), lwork-iwork+1, ierr ) ! generate left vectors bidiagonalizing r ! (cworkspace: need n*n+3*n, prefer n*n+2*n+n*nb) ! (rworkspace: need 0) call stdlib${ii}$_cungbr( 'Q', n, n, n, work( ir ), ldwrkr,work( itauq ), work( & iwork ),lwork-iwork+1, ierr ) irwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of r in work(ir) ! (cworkspace: need n*n) ! (rworkspace: need bdspac) call stdlib${ii}$_cbdsqr( 'U', n, 0_${ik}$, n, 0_${ik}$, s, rwork( ie ), cdum, 1_${ik}$,work( ir ), & ldwrkr, cdum, 1_${ik}$,rwork( irwork ), info ) iu = itauq ! multiply q in a by left singular vectors of r in ! work(ir), storing result in work(iu) and copying to a ! (cworkspace: need n*n+n, prefer n*n+m*n) ! (rworkspace: 0) do i = 1, m, ldwrku chunk = min( m-i+1, ldwrku ) call stdlib${ii}$_cgemm( 'N', 'N', chunk, n, n, cone, a( i, 1_${ik}$ ),lda, work( ir & ), ldwrkr, czero,work( iu ), ldwrku ) call stdlib${ii}$_clacpy( 'F', chunk, n, work( iu ), ldwrku,a( i, 1_${ik}$ ), lda ) end do else ! insufficient workspace for a fast algorithm ie = 1_${ik}$ itauq = 1_${ik}$ itaup = itauq + n iwork = itaup + n ! bidiagonalize a ! (cworkspace: need 2*n+m, prefer 2*n+(m+n)*nb) ! (rworkspace: n) call stdlib${ii}$_cgebrd( m, n, a, lda, s, rwork( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! generate left vectors bidiagonalizing a ! (cworkspace: need 3*n, prefer 2*n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_cungbr( 'Q', m, n, n, a, lda, work( itauq ),work( iwork ), & lwork-iwork+1, ierr ) irwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of a in a ! (cworkspace: need 0) ! (rworkspace: need bdspac) call stdlib${ii}$_cbdsqr( 'U', n, 0_${ik}$, m, 0_${ik}$, s, rwork( ie ), cdum, 1_${ik}$,a, lda, cdum, & 1_${ik}$, rwork( irwork ), info ) end if else if( wntuo .and. wntvas ) then ! path 3 (m much larger than n, jobu='o', jobvt='s' or 'a') ! n left singular vectors to be overwritten on a and ! n right singular vectors to be computed in vt if( lwork>=n*n+3*n ) then ! sufficient workspace for a fast algorithm ir = 1_${ik}$ if( lwork>=max( wrkbl, lda*n )+lda*n ) then ! work(iu) is lda by n and work(ir) is lda by n ldwrku = lda ldwrkr = lda else if( lwork>=max( wrkbl, lda*n )+n*n ) then ! work(iu) is lda by n and work(ir) is n by n ldwrku = lda ldwrkr = n else ! work(iu) is ldwrku by n and work(ir) is n by n ldwrku = ( lwork-n*n ) / n ldwrkr = n end if itau = ir + ldwrkr*n iwork = itau + n ! compute a=q*r ! (cworkspace: need n*n+2*n, prefer n*n+n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_cgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-iwork+& 1_${ik}$, ierr ) ! copy r to vt, zeroing out below it call stdlib${ii}$_clacpy( 'U', n, n, a, lda, vt, ldvt ) if( n>1_${ik}$ )call stdlib${ii}$_claset( 'L', n-1, n-1, czero, czero,vt( 2_${ik}$, 1_${ik}$ ), ldvt ) ! generate q in a ! (cworkspace: need n*n+2*n, prefer n*n+n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_cungqr( m, n, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = 1_${ik}$ itauq = itau itaup = itauq + n iwork = itaup + n ! bidiagonalize r in vt, copying result to work(ir) ! (cworkspace: need n*n+3*n, prefer n*n+2*n+2*n*nb) ! (rworkspace: need n) call stdlib${ii}$_cgebrd( n, n, vt, ldvt, s, rwork( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) call stdlib${ii}$_clacpy( 'L', n, n, vt, ldvt, work( ir ), ldwrkr ) ! generate left vectors bidiagonalizing r in work(ir) ! (cworkspace: need n*n+3*n, prefer n*n+2*n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_cungbr( 'Q', n, n, n, work( ir ), ldwrkr,work( itauq ), work( & iwork ),lwork-iwork+1, ierr ) ! generate right vectors bidiagonalizing r in vt ! (cworkspace: need n*n+3*n-1, prefer n*n+2*n+(n-1)*nb) ! (rworkspace: 0) call stdlib${ii}$_cungbr( 'P', n, n, n, vt, ldvt, work( itaup ),work( iwork ), & lwork-iwork+1, ierr ) irwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of r in work(ir) and computing right ! singular vectors of r in vt ! (cworkspace: need n*n) ! (rworkspace: need bdspac) call stdlib${ii}$_cbdsqr( 'U', n, n, n, 0_${ik}$, s, rwork( ie ), vt,ldvt, work( ir ), & ldwrkr, cdum, 1_${ik}$,rwork( irwork ), info ) iu = itauq ! multiply q in a by left singular vectors of r in ! work(ir), storing result in work(iu) and copying to a ! (cworkspace: need n*n+n, prefer n*n+m*n) ! (rworkspace: 0) do i = 1, m, ldwrku chunk = min( m-i+1, ldwrku ) call stdlib${ii}$_cgemm( 'N', 'N', chunk, n, n, cone, a( i, 1_${ik}$ ),lda, work( ir & ), ldwrkr, czero,work( iu ), ldwrku ) call stdlib${ii}$_clacpy( 'F', chunk, n, work( iu ), ldwrku,a( i, 1_${ik}$ ), lda ) end do else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + n ! compute a=q*r ! (cworkspace: need 2*n, prefer n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_cgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-iwork+& 1_${ik}$, ierr ) ! copy r to vt, zeroing out below it call stdlib${ii}$_clacpy( 'U', n, n, a, lda, vt, ldvt ) if( n>1_${ik}$ )call stdlib${ii}$_claset( 'L', n-1, n-1, czero, czero,vt( 2_${ik}$, 1_${ik}$ ), ldvt ) ! generate q in a ! (cworkspace: need 2*n, prefer n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_cungqr( m, n, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = 1_${ik}$ itauq = itau itaup = itauq + n iwork = itaup + n ! bidiagonalize r in vt ! (cworkspace: need 3*n, prefer 2*n+2*n*nb) ! (rworkspace: n) call stdlib${ii}$_cgebrd( n, n, vt, ldvt, s, rwork( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply q in a by left vectors bidiagonalizing r ! (cworkspace: need 2*n+m, prefer 2*n+m*nb) ! (rworkspace: 0) call stdlib${ii}$_cunmbr( 'Q', 'R', 'N', m, n, n, vt, ldvt,work( itauq ), a, lda,& work( iwork ),lwork-iwork+1, ierr ) ! generate right vectors bidiagonalizing r in vt ! (cworkspace: need 3*n-1, prefer 2*n+(n-1)*nb) ! (rworkspace: 0) call stdlib${ii}$_cungbr( 'P', n, n, n, vt, ldvt, work( itaup ),work( iwork ), & lwork-iwork+1, ierr ) irwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of a in a and computing right ! singular vectors of a in vt ! (cworkspace: 0) ! (rworkspace: need bdspac) call stdlib${ii}$_cbdsqr( 'U', n, n, m, 0_${ik}$, s, rwork( ie ), vt,ldvt, a, lda, cdum,& 1_${ik}$, rwork( irwork ),info ) end if else if( wntus ) then if( wntvn ) then ! path 4 (m much larger than n, jobu='s', jobvt='n') ! n left singular vectors to be computed in u and ! no right singular vectors to be computed if( lwork>=n*n+3*n ) then ! sufficient workspace for a fast algorithm ir = 1_${ik}$ if( lwork>=wrkbl+lda*n ) then ! work(ir) is lda by n ldwrkr = lda else ! work(ir) is n by n ldwrkr = n end if itau = ir + ldwrkr*n iwork = itau + n ! compute a=q*r ! (cworkspace: need n*n+2*n, prefer n*n+n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_cgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ! copy r to work(ir), zeroing out below it call stdlib${ii}$_clacpy( 'U', n, n, a, lda, work( ir ),ldwrkr ) call stdlib${ii}$_claset( 'L', n-1, n-1, czero, czero,work( ir+1 ), ldwrkr ) ! generate q in a ! (cworkspace: need n*n+2*n, prefer n*n+n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_cungqr( m, n, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = 1_${ik}$ itauq = itau itaup = itauq + n iwork = itaup + n ! bidiagonalize r in work(ir) ! (cworkspace: need n*n+3*n, prefer n*n+2*n+2*n*nb) ! (rworkspace: need n) call stdlib${ii}$_cgebrd( n, n, work( ir ), ldwrkr, s,rwork( ie ), work( & itauq ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) ! generate left vectors bidiagonalizing r in work(ir) ! (cworkspace: need n*n+3*n, prefer n*n+2*n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_cungbr( 'Q', n, n, n, work( ir ), ldwrkr,work( itauq ), & work( iwork ),lwork-iwork+1, ierr ) irwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of r in work(ir) ! (cworkspace: need n*n) ! (rworkspace: need bdspac) call stdlib${ii}$_cbdsqr( 'U', n, 0_${ik}$, n, 0_${ik}$, s, rwork( ie ), cdum,1_${ik}$, work( ir ),& ldwrkr, cdum, 1_${ik}$,rwork( irwork ), info ) ! multiply q in a by left singular vectors of r in ! work(ir), storing result in u ! (cworkspace: need n*n) ! (rworkspace: 0) call stdlib${ii}$_cgemm( 'N', 'N', m, n, n, cone, a, lda,work( ir ), ldwrkr, & czero, u, ldu ) else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + n ! compute a=q*r, copying result to u ! (cworkspace: need 2*n, prefer n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_cgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_clacpy( 'L', m, n, a, lda, u, ldu ) ! generate q in u ! (cworkspace: need 2*n, prefer n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_cungqr( m, n, n, u, ldu, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = 1_${ik}$ itauq = itau itaup = itauq + n iwork = itaup + n ! zero out below r in a if( n > 1_${ik}$ ) then call stdlib${ii}$_claset( 'L', n-1, n-1, czero, czero,a( 2_${ik}$, 1_${ik}$ ), lda ) end if ! bidiagonalize r in a ! (cworkspace: need 3*n, prefer 2*n+2*n*nb) ! (rworkspace: need n) call stdlib${ii}$_cgebrd( n, n, a, lda, s, rwork( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply q in u by left vectors bidiagonalizing r ! (cworkspace: need 2*n+m, prefer 2*n+m*nb) ! (rworkspace: 0) call stdlib${ii}$_cunmbr( 'Q', 'R', 'N', m, n, n, a, lda,work( itauq ), u, & ldu, work( iwork ),lwork-iwork+1, ierr ) irwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of a in u ! (cworkspace: 0) ! (rworkspace: need bdspac) call stdlib${ii}$_cbdsqr( 'U', n, 0_${ik}$, m, 0_${ik}$, s, rwork( ie ), cdum,1_${ik}$, u, ldu, & cdum, 1_${ik}$, rwork( irwork ),info ) end if else if( wntvo ) then ! path 5 (m much larger than n, jobu='s', jobvt='o') ! n left singular vectors to be computed in u and ! n right singular vectors to be overwritten on a if( lwork>=2_${ik}$*n*n+3*n ) then ! sufficient workspace for a fast algorithm iu = 1_${ik}$ if( lwork>=wrkbl+2*lda*n ) then ! work(iu) is lda by n and work(ir) is lda by n ldwrku = lda ir = iu + ldwrku*n ldwrkr = lda else if( lwork>=wrkbl+( lda+n )*n ) then ! work(iu) is lda by n and work(ir) is n by n ldwrku = lda ir = iu + ldwrku*n ldwrkr = n else ! work(iu) is n by n and work(ir) is n by n ldwrku = n ir = iu + ldwrku*n ldwrkr = n end if itau = ir + ldwrkr*n iwork = itau + n ! compute a=q*r ! (cworkspace: need 2*n*n+2*n, prefer 2*n*n+n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_cgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ! copy r to work(iu), zeroing out below it call stdlib${ii}$_clacpy( 'U', n, n, a, lda, work( iu ),ldwrku ) call stdlib${ii}$_claset( 'L', n-1, n-1, czero, czero,work( iu+1 ), ldwrku ) ! generate q in a ! (cworkspace: need 2*n*n+2*n, prefer 2*n*n+n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_cungqr( m, n, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = 1_${ik}$ itauq = itau itaup = itauq + n iwork = itaup + n ! bidiagonalize r in work(iu), copying result to ! work(ir) ! (cworkspace: need 2*n*n+3*n, ! prefer 2*n*n+2*n+2*n*nb) ! (rworkspace: need n) call stdlib${ii}$_cgebrd( n, n, work( iu ), ldwrku, s,rwork( ie ), work( & itauq ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) call stdlib${ii}$_clacpy( 'U', n, n, work( iu ), ldwrku,work( ir ), ldwrkr ) ! generate left bidiagonalizing vectors in work(iu) ! (cworkspace: need 2*n*n+3*n, prefer 2*n*n+2*n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_cungbr( 'Q', n, n, n, work( iu ), ldwrku,work( itauq ), & work( iwork ),lwork-iwork+1, ierr ) ! generate right bidiagonalizing vectors in work(ir) ! (cworkspace: need 2*n*n+3*n-1, ! prefer 2*n*n+2*n+(n-1)*nb) ! (rworkspace: 0) call stdlib${ii}$_cungbr( 'P', n, n, n, work( ir ), ldwrkr,work( itaup ), & work( iwork ),lwork-iwork+1, ierr ) irwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of r in work(iu) and computing ! right singular vectors of r in work(ir) ! (cworkspace: need 2*n*n) ! (rworkspace: need bdspac) call stdlib${ii}$_cbdsqr( 'U', n, n, n, 0_${ik}$, s, rwork( ie ),work( ir ), ldwrkr, & work( iu ),ldwrku, cdum, 1_${ik}$, rwork( irwork ),info ) ! multiply q in a by left singular vectors of r in ! work(iu), storing result in u ! (cworkspace: need n*n) ! (rworkspace: 0) call stdlib${ii}$_cgemm( 'N', 'N', m, n, n, cone, a, lda,work( iu ), ldwrku, & czero, u, ldu ) ! copy right singular vectors of r to a ! (cworkspace: need n*n) ! (rworkspace: 0) call stdlib${ii}$_clacpy( 'F', n, n, work( ir ), ldwrkr, a,lda ) else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + n ! compute a=q*r, copying result to u ! (cworkspace: need 2*n, prefer n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_cgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_clacpy( 'L', m, n, a, lda, u, ldu ) ! generate q in u ! (cworkspace: need 2*n, prefer n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_cungqr( m, n, n, u, ldu, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = 1_${ik}$ itauq = itau itaup = itauq + n iwork = itaup + n ! zero out below r in a if( n > 1_${ik}$ ) then call stdlib${ii}$_claset( 'L', n-1, n-1, czero, czero,a( 2_${ik}$, 1_${ik}$ ), lda ) end if ! bidiagonalize r in a ! (cworkspace: need 3*n, prefer 2*n+2*n*nb) ! (rworkspace: need n) call stdlib${ii}$_cgebrd( n, n, a, lda, s, rwork( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply q in u by left vectors bidiagonalizing r ! (cworkspace: need 2*n+m, prefer 2*n+m*nb) ! (rworkspace: 0) call stdlib${ii}$_cunmbr( 'Q', 'R', 'N', m, n, n, a, lda,work( itauq ), u, & ldu, work( iwork ),lwork-iwork+1, ierr ) ! generate right vectors bidiagonalizing r in a ! (cworkspace: need 3*n-1, prefer 2*n+(n-1)*nb) ! (rworkspace: 0) call stdlib${ii}$_cungbr( 'P', n, n, n, a, lda, work( itaup ),work( iwork ), & lwork-iwork+1, ierr ) irwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of a in u and computing right ! singular vectors of a in a ! (cworkspace: 0) ! (rworkspace: need bdspac) call stdlib${ii}$_cbdsqr( 'U', n, n, m, 0_${ik}$, s, rwork( ie ), a,lda, u, ldu, & cdum, 1_${ik}$, rwork( irwork ),info ) end if else if( wntvas ) then ! path 6 (m much larger than n, jobu='s', jobvt='s' ! or 'a') ! n left singular vectors to be computed in u and ! n right singular vectors to be computed in vt if( lwork>=n*n+3*n ) then ! sufficient workspace for a fast algorithm iu = 1_${ik}$ if( lwork>=wrkbl+lda*n ) then ! work(iu) is lda by n ldwrku = lda else ! work(iu) is n by n ldwrku = n end if itau = iu + ldwrku*n iwork = itau + n ! compute a=q*r ! (cworkspace: need n*n+2*n, prefer n*n+n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_cgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ! copy r to work(iu), zeroing out below it call stdlib${ii}$_clacpy( 'U', n, n, a, lda, work( iu ),ldwrku ) call stdlib${ii}$_claset( 'L', n-1, n-1, czero, czero,work( iu+1 ), ldwrku ) ! generate q in a ! (cworkspace: need n*n+2*n, prefer n*n+n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_cungqr( m, n, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = 1_${ik}$ itauq = itau itaup = itauq + n iwork = itaup + n ! bidiagonalize r in work(iu), copying result to vt ! (cworkspace: need n*n+3*n, prefer n*n+2*n+2*n*nb) ! (rworkspace: need n) call stdlib${ii}$_cgebrd( n, n, work( iu ), ldwrku, s,rwork( ie ), work( & itauq ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) call stdlib${ii}$_clacpy( 'U', n, n, work( iu ), ldwrku, vt,ldvt ) ! generate left bidiagonalizing vectors in work(iu) ! (cworkspace: need n*n+3*n, prefer n*n+2*n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_cungbr( 'Q', n, n, n, work( iu ), ldwrku,work( itauq ), & work( iwork ),lwork-iwork+1, ierr ) ! generate right bidiagonalizing vectors in vt ! (cworkspace: need n*n+3*n-1, ! prefer n*n+2*n+(n-1)*nb) ! (rworkspace: 0) call stdlib${ii}$_cungbr( 'P', n, n, n, vt, ldvt, work( itaup ),work( iwork ),& lwork-iwork+1, ierr ) irwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of r in work(iu) and computing ! right singular vectors of r in vt ! (cworkspace: need n*n) ! (rworkspace: need bdspac) call stdlib${ii}$_cbdsqr( 'U', n, n, n, 0_${ik}$, s, rwork( ie ), vt,ldvt, work( iu )& , ldwrku, cdum, 1_${ik}$,rwork( irwork ), info ) ! multiply q in a by left singular vectors of r in ! work(iu), storing result in u ! (cworkspace: need n*n) ! (rworkspace: 0) call stdlib${ii}$_cgemm( 'N', 'N', m, n, n, cone, a, lda,work( iu ), ldwrku, & czero, u, ldu ) else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + n ! compute a=q*r, copying result to u ! (cworkspace: need 2*n, prefer n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_cgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_clacpy( 'L', m, n, a, lda, u, ldu ) ! generate q in u ! (cworkspace: need 2*n, prefer n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_cungqr( m, n, n, u, ldu, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ! copy r to vt, zeroing out below it call stdlib${ii}$_clacpy( 'U', n, n, a, lda, vt, ldvt ) if( n>1_${ik}$ )call stdlib${ii}$_claset( 'L', n-1, n-1, czero, czero,vt( 2_${ik}$, 1_${ik}$ ), & ldvt ) ie = 1_${ik}$ itauq = itau itaup = itauq + n iwork = itaup + n ! bidiagonalize r in vt ! (cworkspace: need 3*n, prefer 2*n+2*n*nb) ! (rworkspace: need n) call stdlib${ii}$_cgebrd( n, n, vt, ldvt, s, rwork( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply q in u by left bidiagonalizing vectors ! in vt ! (cworkspace: need 2*n+m, prefer 2*n+m*nb) ! (rworkspace: 0) call stdlib${ii}$_cunmbr( 'Q', 'R', 'N', m, n, n, vt, ldvt,work( itauq ), u, & ldu, work( iwork ),lwork-iwork+1, ierr ) ! generate right bidiagonalizing vectors in vt ! (cworkspace: need 3*n-1, prefer 2*n+(n-1)*nb) ! (rworkspace: 0) call stdlib${ii}$_cungbr( 'P', n, n, n, vt, ldvt, work( itaup ),work( iwork ),& lwork-iwork+1, ierr ) irwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of a in u and computing right ! singular vectors of a in vt ! (cworkspace: 0) ! (rworkspace: need bdspac) call stdlib${ii}$_cbdsqr( 'U', n, n, m, 0_${ik}$, s, rwork( ie ), vt,ldvt, u, ldu, & cdum, 1_${ik}$,rwork( irwork ), info ) end if end if else if( wntua ) then if( wntvn ) then ! path 7 (m much larger than n, jobu='a', jobvt='n') ! m left singular vectors to be computed in u and ! no right singular vectors to be computed if( lwork>=n*n+max( n+m, 3_${ik}$*n ) ) then ! sufficient workspace for a fast algorithm ir = 1_${ik}$ if( lwork>=wrkbl+lda*n ) then ! work(ir) is lda by n ldwrkr = lda else ! work(ir) is n by n ldwrkr = n end if itau = ir + ldwrkr*n iwork = itau + n ! compute a=q*r, copying result to u ! (cworkspace: need n*n+2*n, prefer n*n+n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_cgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_clacpy( 'L', m, n, a, lda, u, ldu ) ! copy r to work(ir), zeroing out below it call stdlib${ii}$_clacpy( 'U', n, n, a, lda, work( ir ),ldwrkr ) call stdlib${ii}$_claset( 'L', n-1, n-1, czero, czero,work( ir+1 ), ldwrkr ) ! generate q in u ! (cworkspace: need n*n+n+m, prefer n*n+n+m*nb) ! (rworkspace: 0) call stdlib${ii}$_cungqr( m, m, n, u, ldu, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = 1_${ik}$ itauq = itau itaup = itauq + n iwork = itaup + n ! bidiagonalize r in work(ir) ! (cworkspace: need n*n+3*n, prefer n*n+2*n+2*n*nb) ! (rworkspace: need n) call stdlib${ii}$_cgebrd( n, n, work( ir ), ldwrkr, s,rwork( ie ), work( & itauq ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) ! generate left bidiagonalizing vectors in work(ir) ! (cworkspace: need n*n+3*n, prefer n*n+2*n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_cungbr( 'Q', n, n, n, work( ir ), ldwrkr,work( itauq ), & work( iwork ),lwork-iwork+1, ierr ) irwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of r in work(ir) ! (cworkspace: need n*n) ! (rworkspace: need bdspac) call stdlib${ii}$_cbdsqr( 'U', n, 0_${ik}$, n, 0_${ik}$, s, rwork( ie ), cdum,1_${ik}$, work( ir ),& ldwrkr, cdum, 1_${ik}$,rwork( irwork ), info ) ! multiply q in u by left singular vectors of r in ! work(ir), storing result in a ! (cworkspace: need n*n) ! (rworkspace: 0) call stdlib${ii}$_cgemm( 'N', 'N', m, n, n, cone, u, ldu,work( ir ), ldwrkr, & czero, a, lda ) ! copy left singular vectors of a from a to u call stdlib${ii}$_clacpy( 'F', m, n, a, lda, u, ldu ) else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + n ! compute a=q*r, copying result to u ! (cworkspace: need 2*n, prefer n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_cgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_clacpy( 'L', m, n, a, lda, u, ldu ) ! generate q in u ! (cworkspace: need n+m, prefer n+m*nb) ! (rworkspace: 0) call stdlib${ii}$_cungqr( m, m, n, u, ldu, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = 1_${ik}$ itauq = itau itaup = itauq + n iwork = itaup + n ! zero out below r in a if( n > 1_${ik}$ ) then call stdlib${ii}$_claset( 'L', n-1, n-1, czero, czero,a( 2_${ik}$, 1_${ik}$ ), lda ) end if ! bidiagonalize r in a ! (cworkspace: need 3*n, prefer 2*n+2*n*nb) ! (rworkspace: need n) call stdlib${ii}$_cgebrd( n, n, a, lda, s, rwork( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply q in u by left bidiagonalizing vectors ! in a ! (cworkspace: need 2*n+m, prefer 2*n+m*nb) ! (rworkspace: 0) call stdlib${ii}$_cunmbr( 'Q', 'R', 'N', m, n, n, a, lda,work( itauq ), u, & ldu, work( iwork ),lwork-iwork+1, ierr ) irwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of a in u ! (cworkspace: 0) ! (rworkspace: need bdspac) call stdlib${ii}$_cbdsqr( 'U', n, 0_${ik}$, m, 0_${ik}$, s, rwork( ie ), cdum,1_${ik}$, u, ldu, & cdum, 1_${ik}$, rwork( irwork ),info ) end if else if( wntvo ) then ! path 8 (m much larger than n, jobu='a', jobvt='o') ! m left singular vectors to be computed in u and ! n right singular vectors to be overwritten on a if( lwork>=2_${ik}$*n*n+max( n+m, 3_${ik}$*n ) ) then ! sufficient workspace for a fast algorithm iu = 1_${ik}$ if( lwork>=wrkbl+2*lda*n ) then ! work(iu) is lda by n and work(ir) is lda by n ldwrku = lda ir = iu + ldwrku*n ldwrkr = lda else if( lwork>=wrkbl+( lda+n )*n ) then ! work(iu) is lda by n and work(ir) is n by n ldwrku = lda ir = iu + ldwrku*n ldwrkr = n else ! work(iu) is n by n and work(ir) is n by n ldwrku = n ir = iu + ldwrku*n ldwrkr = n end if itau = ir + ldwrkr*n iwork = itau + n ! compute a=q*r, copying result to u ! (cworkspace: need 2*n*n+2*n, prefer 2*n*n+n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_cgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_clacpy( 'L', m, n, a, lda, u, ldu ) ! generate q in u ! (cworkspace: need 2*n*n+n+m, prefer 2*n*n+n+m*nb) ! (rworkspace: 0) call stdlib${ii}$_cungqr( m, m, n, u, ldu, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ! copy r to work(iu), zeroing out below it call stdlib${ii}$_clacpy( 'U', n, n, a, lda, work( iu ),ldwrku ) call stdlib${ii}$_claset( 'L', n-1, n-1, czero, czero,work( iu+1 ), ldwrku ) ie = 1_${ik}$ itauq = itau itaup = itauq + n iwork = itaup + n ! bidiagonalize r in work(iu), copying result to ! work(ir) ! (cworkspace: need 2*n*n+3*n, ! prefer 2*n*n+2*n+2*n*nb) ! (rworkspace: need n) call stdlib${ii}$_cgebrd( n, n, work( iu ), ldwrku, s,rwork( ie ), work( & itauq ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) call stdlib${ii}$_clacpy( 'U', n, n, work( iu ), ldwrku,work( ir ), ldwrkr ) ! generate left bidiagonalizing vectors in work(iu) ! (cworkspace: need 2*n*n+3*n, prefer 2*n*n+2*n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_cungbr( 'Q', n, n, n, work( iu ), ldwrku,work( itauq ), & work( iwork ),lwork-iwork+1, ierr ) ! generate right bidiagonalizing vectors in work(ir) ! (cworkspace: need 2*n*n+3*n-1, ! prefer 2*n*n+2*n+(n-1)*nb) ! (rworkspace: 0) call stdlib${ii}$_cungbr( 'P', n, n, n, work( ir ), ldwrkr,work( itaup ), & work( iwork ),lwork-iwork+1, ierr ) irwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of r in work(iu) and computing ! right singular vectors of r in work(ir) ! (cworkspace: need 2*n*n) ! (rworkspace: need bdspac) call stdlib${ii}$_cbdsqr( 'U', n, n, n, 0_${ik}$, s, rwork( ie ),work( ir ), ldwrkr, & work( iu ),ldwrku, cdum, 1_${ik}$, rwork( irwork ),info ) ! multiply q in u by left singular vectors of r in ! work(iu), storing result in a ! (cworkspace: need n*n) ! (rworkspace: 0) call stdlib${ii}$_cgemm( 'N', 'N', m, n, n, cone, u, ldu,work( iu ), ldwrku, & czero, a, lda ) ! copy left singular vectors of a from a to u call stdlib${ii}$_clacpy( 'F', m, n, a, lda, u, ldu ) ! copy right singular vectors of r from work(ir) to a call stdlib${ii}$_clacpy( 'F', n, n, work( ir ), ldwrkr, a,lda ) else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + n ! compute a=q*r, copying result to u ! (cworkspace: need 2*n, prefer n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_cgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_clacpy( 'L', m, n, a, lda, u, ldu ) ! generate q in u ! (cworkspace: need n+m, prefer n+m*nb) ! (rworkspace: 0) call stdlib${ii}$_cungqr( m, m, n, u, ldu, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = 1_${ik}$ itauq = itau itaup = itauq + n iwork = itaup + n ! zero out below r in a if( n > 1_${ik}$ ) then call stdlib${ii}$_claset( 'L', n-1, n-1, czero, czero,a( 2_${ik}$, 1_${ik}$ ), lda ) end if ! bidiagonalize r in a ! (cworkspace: need 3*n, prefer 2*n+2*n*nb) ! (rworkspace: need n) call stdlib${ii}$_cgebrd( n, n, a, lda, s, rwork( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply q in u by left bidiagonalizing vectors ! in a ! (cworkspace: need 2*n+m, prefer 2*n+m*nb) ! (rworkspace: 0) call stdlib${ii}$_cunmbr( 'Q', 'R', 'N', m, n, n, a, lda,work( itauq ), u, & ldu, work( iwork ),lwork-iwork+1, ierr ) ! generate right bidiagonalizing vectors in a ! (cworkspace: need 3*n-1, prefer 2*n+(n-1)*nb) ! (rworkspace: 0) call stdlib${ii}$_cungbr( 'P', n, n, n, a, lda, work( itaup ),work( iwork ), & lwork-iwork+1, ierr ) irwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of a in u and computing right ! singular vectors of a in a ! (cworkspace: 0) ! (rworkspace: need bdspac) call stdlib${ii}$_cbdsqr( 'U', n, n, m, 0_${ik}$, s, rwork( ie ), a,lda, u, ldu, & cdum, 1_${ik}$, rwork( irwork ),info ) end if else if( wntvas ) then ! path 9 (m much larger than n, jobu='a', jobvt='s' ! or 'a') ! m left singular vectors to be computed in u and ! n right singular vectors to be computed in vt if( lwork>=n*n+max( n+m, 3_${ik}$*n ) ) then ! sufficient workspace for a fast algorithm iu = 1_${ik}$ if( lwork>=wrkbl+lda*n ) then ! work(iu) is lda by n ldwrku = lda else ! work(iu) is n by n ldwrku = n end if itau = iu + ldwrku*n iwork = itau + n ! compute a=q*r, copying result to u ! (cworkspace: need n*n+2*n, prefer n*n+n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_cgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_clacpy( 'L', m, n, a, lda, u, ldu ) ! generate q in u ! (cworkspace: need n*n+n+m, prefer n*n+n+m*nb) ! (rworkspace: 0) call stdlib${ii}$_cungqr( m, m, n, u, ldu, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ! copy r to work(iu), zeroing out below it call stdlib${ii}$_clacpy( 'U', n, n, a, lda, work( iu ),ldwrku ) call stdlib${ii}$_claset( 'L', n-1, n-1, czero, czero,work( iu+1 ), ldwrku ) ie = 1_${ik}$ itauq = itau itaup = itauq + n iwork = itaup + n ! bidiagonalize r in work(iu), copying result to vt ! (cworkspace: need n*n+3*n, prefer n*n+2*n+2*n*nb) ! (rworkspace: need n) call stdlib${ii}$_cgebrd( n, n, work( iu ), ldwrku, s,rwork( ie ), work( & itauq ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) call stdlib${ii}$_clacpy( 'U', n, n, work( iu ), ldwrku, vt,ldvt ) ! generate left bidiagonalizing vectors in work(iu) ! (cworkspace: need n*n+3*n, prefer n*n+2*n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_cungbr( 'Q', n, n, n, work( iu ), ldwrku,work( itauq ), & work( iwork ),lwork-iwork+1, ierr ) ! generate right bidiagonalizing vectors in vt ! (cworkspace: need n*n+3*n-1, ! prefer n*n+2*n+(n-1)*nb) ! (rworkspace: need 0) call stdlib${ii}$_cungbr( 'P', n, n, n, vt, ldvt, work( itaup ),work( iwork ),& lwork-iwork+1, ierr ) irwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of r in work(iu) and computing ! right singular vectors of r in vt ! (cworkspace: need n*n) ! (rworkspace: need bdspac) call stdlib${ii}$_cbdsqr( 'U', n, n, n, 0_${ik}$, s, rwork( ie ), vt,ldvt, work( iu )& , ldwrku, cdum, 1_${ik}$,rwork( irwork ), info ) ! multiply q in u by left singular vectors of r in ! work(iu), storing result in a ! (cworkspace: need n*n) ! (rworkspace: 0) call stdlib${ii}$_cgemm( 'N', 'N', m, n, n, cone, u, ldu,work( iu ), ldwrku, & czero, a, lda ) ! copy left singular vectors of a from a to u call stdlib${ii}$_clacpy( 'F', m, n, a, lda, u, ldu ) else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + n ! compute a=q*r, copying result to u ! (cworkspace: need 2*n, prefer n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_cgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_clacpy( 'L', m, n, a, lda, u, ldu ) ! generate q in u ! (cworkspace: need n+m, prefer n+m*nb) ! (rworkspace: 0) call stdlib${ii}$_cungqr( m, m, n, u, ldu, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ! copy r from a to vt, zeroing out below it call stdlib${ii}$_clacpy( 'U', n, n, a, lda, vt, ldvt ) if( n>1_${ik}$ )call stdlib${ii}$_claset( 'L', n-1, n-1, czero, czero,vt( 2_${ik}$, 1_${ik}$ ), & ldvt ) ie = 1_${ik}$ itauq = itau itaup = itauq + n iwork = itaup + n ! bidiagonalize r in vt ! (cworkspace: need 3*n, prefer 2*n+2*n*nb) ! (rworkspace: need n) call stdlib${ii}$_cgebrd( n, n, vt, ldvt, s, rwork( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply q in u by left bidiagonalizing vectors ! in vt ! (cworkspace: need 2*n+m, prefer 2*n+m*nb) ! (rworkspace: 0) call stdlib${ii}$_cunmbr( 'Q', 'R', 'N', m, n, n, vt, ldvt,work( itauq ), u, & ldu, work( iwork ),lwork-iwork+1, ierr ) ! generate right bidiagonalizing vectors in vt ! (cworkspace: need 3*n-1, prefer 2*n+(n-1)*nb) ! (rworkspace: 0) call stdlib${ii}$_cungbr( 'P', n, n, n, vt, ldvt, work( itaup ),work( iwork ),& lwork-iwork+1, ierr ) irwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of a in u and computing right ! singular vectors of a in vt ! (cworkspace: 0) ! (rworkspace: need bdspac) call stdlib${ii}$_cbdsqr( 'U', n, n, m, 0_${ik}$, s, rwork( ie ), vt,ldvt, u, ldu, & cdum, 1_${ik}$,rwork( irwork ), info ) end if end if end if else ! m < mnthr ! path 10 (m at least n, but not much larger) ! reduce to bidiagonal form without qr decomposition ie = 1_${ik}$ itauq = 1_${ik}$ itaup = itauq + n iwork = itaup + n ! bidiagonalize a ! (cworkspace: need 2*n+m, prefer 2*n+(m+n)*nb) ! (rworkspace: need n) call stdlib${ii}$_cgebrd( m, n, a, lda, s, rwork( ie ), work( itauq ),work( itaup ), & work( iwork ), lwork-iwork+1,ierr ) if( wntuas ) then ! if left singular vectors desired in u, copy result to u ! and generate left bidiagonalizing vectors in u ! (cworkspace: need 2*n+ncu, prefer 2*n+ncu*nb) ! (rworkspace: 0) call stdlib${ii}$_clacpy( 'L', m, n, a, lda, u, ldu ) if( wntus )ncu = n if( wntua )ncu = m call stdlib${ii}$_cungbr( 'Q', m, ncu, n, u, ldu, work( itauq ),work( iwork ), & lwork-iwork+1, ierr ) end if if( wntvas ) then ! if right singular vectors desired in vt, copy result to ! vt and generate right bidiagonalizing vectors in vt ! (cworkspace: need 3*n-1, prefer 2*n+(n-1)*nb) ! (rworkspace: 0) call stdlib${ii}$_clacpy( 'U', n, n, a, lda, vt, ldvt ) call stdlib${ii}$_cungbr( 'P', n, n, n, vt, ldvt, work( itaup ),work( iwork ), & lwork-iwork+1, ierr ) end if if( wntuo ) then ! if left singular vectors desired in a, generate left ! bidiagonalizing vectors in a ! (cworkspace: need 3*n, prefer 2*n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_cungbr( 'Q', m, n, n, a, lda, work( itauq ),work( iwork ), lwork-& iwork+1, ierr ) end if if( wntvo ) then ! if right singular vectors desired in a, generate right ! bidiagonalizing vectors in a ! (cworkspace: need 3*n-1, prefer 2*n+(n-1)*nb) ! (rworkspace: 0) call stdlib${ii}$_cungbr( 'P', n, n, n, a, lda, work( itaup ),work( iwork ), lwork-& iwork+1, ierr ) end if irwork = ie + n if( wntuas .or. wntuo )nru = m if( wntun )nru = 0_${ik}$ if( wntvas .or. wntvo )ncvt = n if( wntvn )ncvt = 0_${ik}$ if( ( .not.wntuo ) .and. ( .not.wntvo ) ) then ! perform bidiagonal qr iteration, if desired, computing ! left singular vectors in u and computing right singular ! vectors in vt ! (cworkspace: 0) ! (rworkspace: need bdspac) call stdlib${ii}$_cbdsqr( 'U', n, ncvt, nru, 0_${ik}$, s, rwork( ie ), vt,ldvt, u, ldu, & cdum, 1_${ik}$, rwork( irwork ),info ) else if( ( .not.wntuo ) .and. wntvo ) then ! perform bidiagonal qr iteration, if desired, computing ! left singular vectors in u and computing right singular ! vectors in a ! (cworkspace: 0) ! (rworkspace: need bdspac) call stdlib${ii}$_cbdsqr( 'U', n, ncvt, nru, 0_${ik}$, s, rwork( ie ), a,lda, u, ldu, cdum,& 1_${ik}$, rwork( irwork ),info ) else ! perform bidiagonal qr iteration, if desired, computing ! left singular vectors in a and computing right singular ! vectors in vt ! (cworkspace: 0) ! (rworkspace: need bdspac) call stdlib${ii}$_cbdsqr( 'U', n, ncvt, nru, 0_${ik}$, s, rwork( ie ), vt,ldvt, a, lda, & cdum, 1_${ik}$, rwork( irwork ),info ) end if end if else ! a has more columns than rows. if a has sufficiently more ! columns than rows, first reduce using the lq decomposition (if ! sufficient workspace available) if( n>=mnthr ) then if( wntvn ) then ! path 1t(n much larger than m, jobvt='n') ! no right singular vectors to be computed itau = 1_${ik}$ iwork = itau + m ! compute a=l*q ! (cworkspace: need 2*m, prefer m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_cgelqf( m, n, a, lda, work( itau ), work( iwork ),lwork-iwork+1, & ierr ) ! zero out above l if (m>1_${ik}$) call stdlib${ii}$_claset( 'U', m-1, m-1, czero, czero, a( 1_${ik}$, 2_${ik}$ ),lda ) ie = 1_${ik}$ itauq = 1_${ik}$ itaup = itauq + m iwork = itaup + m ! bidiagonalize l in a ! (cworkspace: need 3*m, prefer 2*m+2*m*nb) ! (rworkspace: need m) call stdlib${ii}$_cgebrd( m, m, a, lda, s, rwork( ie ), work( itauq ),work( itaup ),& work( iwork ), lwork-iwork+1,ierr ) if( wntuo .or. wntuas ) then ! if left singular vectors desired, generate q ! (cworkspace: need 3*m, prefer 2*m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_cungbr( 'Q', m, m, m, a, lda, work( itauq ),work( iwork ), & lwork-iwork+1, ierr ) end if irwork = ie + m nru = 0_${ik}$ if( wntuo .or. wntuas )nru = m ! perform bidiagonal qr iteration, computing left singular ! vectors of a in a if desired ! (cworkspace: 0) ! (rworkspace: need bdspac) call stdlib${ii}$_cbdsqr( 'U', m, 0_${ik}$, nru, 0_${ik}$, s, rwork( ie ), cdum, 1_${ik}$,a, lda, cdum, & 1_${ik}$, rwork( irwork ), info ) ! if left singular vectors desired in u, copy them there if( wntuas )call stdlib${ii}$_clacpy( 'F', m, m, a, lda, u, ldu ) else if( wntvo .and. wntun ) then ! path 2t(n much larger than m, jobu='n', jobvt='o') ! m right singular vectors to be overwritten on a and ! no left singular vectors to be computed if( lwork>=m*m+3*m ) then ! sufficient workspace for a fast algorithm ir = 1_${ik}$ if( lwork>=max( wrkbl, lda*n )+lda*m ) then ! work(iu) is lda by n and work(ir) is lda by m ldwrku = lda chunk = n ldwrkr = lda else if( lwork>=max( wrkbl, lda*n )+m*m ) then ! work(iu) is lda by n and work(ir) is m by m ldwrku = lda chunk = n ldwrkr = m else ! work(iu) is m by chunk and work(ir) is m by m ldwrku = m chunk = ( lwork-m*m ) / m ldwrkr = m end if itau = ir + ldwrkr*m iwork = itau + m ! compute a=l*q ! (cworkspace: need m*m+2*m, prefer m*m+m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_cgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-iwork+& 1_${ik}$, ierr ) ! copy l to work(ir) and zero out above it call stdlib${ii}$_clacpy( 'L', m, m, a, lda, work( ir ), ldwrkr ) call stdlib${ii}$_claset( 'U', m-1, m-1, czero, czero,work( ir+ldwrkr ), ldwrkr ) ! generate q in a ! (cworkspace: need m*m+2*m, prefer m*m+m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_cunglq( m, n, m, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = 1_${ik}$ itauq = itau itaup = itauq + m iwork = itaup + m ! bidiagonalize l in work(ir) ! (cworkspace: need m*m+3*m, prefer m*m+2*m+2*m*nb) ! (rworkspace: need m) call stdlib${ii}$_cgebrd( m, m, work( ir ), ldwrkr, s, rwork( ie ),work( itauq ),& work( itaup ),work( iwork ), lwork-iwork+1, ierr ) ! generate right vectors bidiagonalizing l ! (cworkspace: need m*m+3*m-1, prefer m*m+2*m+(m-1)*nb) ! (rworkspace: 0) call stdlib${ii}$_cungbr( 'P', m, m, m, work( ir ), ldwrkr,work( itaup ), work( & iwork ),lwork-iwork+1, ierr ) irwork = ie + m ! perform bidiagonal qr iteration, computing right ! singular vectors of l in work(ir) ! (cworkspace: need m*m) ! (rworkspace: need bdspac) call stdlib${ii}$_cbdsqr( 'U', m, m, 0_${ik}$, 0_${ik}$, s, rwork( ie ),work( ir ), ldwrkr, & cdum, 1_${ik}$, cdum, 1_${ik}$,rwork( irwork ), info ) iu = itauq ! multiply right singular vectors of l in work(ir) by q ! in a, storing result in work(iu) and copying to a ! (cworkspace: need m*m+m, prefer m*m+m*n) ! (rworkspace: 0) do i = 1, n, chunk blk = min( n-i+1, chunk ) call stdlib${ii}$_cgemm( 'N', 'N', m, blk, m, cone, work( ir ),ldwrkr, a( 1_${ik}$, & i ), lda, czero,work( iu ), ldwrku ) call stdlib${ii}$_clacpy( 'F', m, blk, work( iu ), ldwrku,a( 1_${ik}$, i ), lda ) end do else ! insufficient workspace for a fast algorithm ie = 1_${ik}$ itauq = 1_${ik}$ itaup = itauq + m iwork = itaup + m ! bidiagonalize a ! (cworkspace: need 2*m+n, prefer 2*m+(m+n)*nb) ! (rworkspace: need m) call stdlib${ii}$_cgebrd( m, n, a, lda, s, rwork( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! generate right vectors bidiagonalizing a ! (cworkspace: need 3*m, prefer 2*m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_cungbr( 'P', m, n, m, a, lda, work( itaup ),work( iwork ), & lwork-iwork+1, ierr ) irwork = ie + m ! perform bidiagonal qr iteration, computing right ! singular vectors of a in a ! (cworkspace: 0) ! (rworkspace: need bdspac) call stdlib${ii}$_cbdsqr( 'L', m, n, 0_${ik}$, 0_${ik}$, s, rwork( ie ), a, lda,cdum, 1_${ik}$, cdum, & 1_${ik}$, rwork( irwork ), info ) end if else if( wntvo .and. wntuas ) then ! path 3t(n much larger than m, jobu='s' or 'a', jobvt='o') ! m right singular vectors to be overwritten on a and ! m left singular vectors to be computed in u if( lwork>=m*m+3*m ) then ! sufficient workspace for a fast algorithm ir = 1_${ik}$ if( lwork>=max( wrkbl, lda*n )+lda*m ) then ! work(iu) is lda by n and work(ir) is lda by m ldwrku = lda chunk = n ldwrkr = lda else if( lwork>=max( wrkbl, lda*n )+m*m ) then ! work(iu) is lda by n and work(ir) is m by m ldwrku = lda chunk = n ldwrkr = m else ! work(iu) is m by chunk and work(ir) is m by m ldwrku = m chunk = ( lwork-m*m ) / m ldwrkr = m end if itau = ir + ldwrkr*m iwork = itau + m ! compute a=l*q ! (cworkspace: need m*m+2*m, prefer m*m+m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_cgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-iwork+& 1_${ik}$, ierr ) ! copy l to u, zeroing about above it call stdlib${ii}$_clacpy( 'L', m, m, a, lda, u, ldu ) if (m>1_${ik}$) call stdlib${ii}$_claset( 'U', m-1, m-1, czero, czero, u( 1_${ik}$, 2_${ik}$ ),ldu ) ! generate q in a ! (cworkspace: need m*m+2*m, prefer m*m+m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_cunglq( m, n, m, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = 1_${ik}$ itauq = itau itaup = itauq + m iwork = itaup + m ! bidiagonalize l in u, copying result to work(ir) ! (cworkspace: need m*m+3*m, prefer m*m+2*m+2*m*nb) ! (rworkspace: need m) call stdlib${ii}$_cgebrd( m, m, u, ldu, s, rwork( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) call stdlib${ii}$_clacpy( 'U', m, m, u, ldu, work( ir ), ldwrkr ) ! generate right vectors bidiagonalizing l in work(ir) ! (cworkspace: need m*m+3*m-1, prefer m*m+2*m+(m-1)*nb) ! (rworkspace: 0) call stdlib${ii}$_cungbr( 'P', m, m, m, work( ir ), ldwrkr,work( itaup ), work( & iwork ),lwork-iwork+1, ierr ) ! generate left vectors bidiagonalizing l in u ! (cworkspace: need m*m+3*m, prefer m*m+2*m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_cungbr( 'Q', m, m, m, u, ldu, work( itauq ),work( iwork ), & lwork-iwork+1, ierr ) irwork = ie + m ! perform bidiagonal qr iteration, computing left ! singular vectors of l in u, and computing right ! singular vectors of l in work(ir) ! (cworkspace: need m*m) ! (rworkspace: need bdspac) call stdlib${ii}$_cbdsqr( 'U', m, m, m, 0_${ik}$, s, rwork( ie ),work( ir ), ldwrkr, u, & ldu, cdum, 1_${ik}$,rwork( irwork ), info ) iu = itauq ! multiply right singular vectors of l in work(ir) by q ! in a, storing result in work(iu) and copying to a ! (cworkspace: need m*m+m, prefer m*m+m*n)) ! (rworkspace: 0) do i = 1, n, chunk blk = min( n-i+1, chunk ) call stdlib${ii}$_cgemm( 'N', 'N', m, blk, m, cone, work( ir ),ldwrkr, a( 1_${ik}$, & i ), lda, czero,work( iu ), ldwrku ) call stdlib${ii}$_clacpy( 'F', m, blk, work( iu ), ldwrku,a( 1_${ik}$, i ), lda ) end do else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + m ! compute a=l*q ! (cworkspace: need 2*m, prefer m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_cgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-iwork+& 1_${ik}$, ierr ) ! copy l to u, zeroing out above it call stdlib${ii}$_clacpy( 'L', m, m, a, lda, u, ldu ) if (m>1_${ik}$) call stdlib${ii}$_claset( 'U', m-1, m-1, czero, czero, u( 1_${ik}$, 2_${ik}$ ),ldu ) ! generate q in a ! (cworkspace: need 2*m, prefer m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_cunglq( m, n, m, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = 1_${ik}$ itauq = itau itaup = itauq + m iwork = itaup + m ! bidiagonalize l in u ! (cworkspace: need 3*m, prefer 2*m+2*m*nb) ! (rworkspace: need m) call stdlib${ii}$_cgebrd( m, m, u, ldu, s, rwork( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply right vectors bidiagonalizing l by q in a ! (cworkspace: need 2*m+n, prefer 2*m+n*nb) ! (rworkspace: 0) call stdlib${ii}$_cunmbr( 'P', 'L', 'C', m, n, m, u, ldu,work( itaup ), a, lda, & work( iwork ),lwork-iwork+1, ierr ) ! generate left vectors bidiagonalizing l in u ! (cworkspace: need 3*m, prefer 2*m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_cungbr( 'Q', m, m, m, u, ldu, work( itauq ),work( iwork ), & lwork-iwork+1, ierr ) irwork = ie + m ! perform bidiagonal qr iteration, computing left ! singular vectors of a in u and computing right ! singular vectors of a in a ! (cworkspace: 0) ! (rworkspace: need bdspac) call stdlib${ii}$_cbdsqr( 'U', m, n, m, 0_${ik}$, s, rwork( ie ), a, lda,u, ldu, cdum, & 1_${ik}$, rwork( irwork ), info ) end if else if( wntvs ) then if( wntun ) then ! path 4t(n much larger than m, jobu='n', jobvt='s') ! m right singular vectors to be computed in vt and ! no left singular vectors to be computed if( lwork>=m*m+3*m ) then ! sufficient workspace for a fast algorithm ir = 1_${ik}$ if( lwork>=wrkbl+lda*m ) then ! work(ir) is lda by m ldwrkr = lda else ! work(ir) is m by m ldwrkr = m end if itau = ir + ldwrkr*m iwork = itau + m ! compute a=l*q ! (cworkspace: need m*m+2*m, prefer m*m+m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_cgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ! copy l to work(ir), zeroing out above it call stdlib${ii}$_clacpy( 'L', m, m, a, lda, work( ir ),ldwrkr ) call stdlib${ii}$_claset( 'U', m-1, m-1, czero, czero,work( ir+ldwrkr ), & ldwrkr ) ! generate q in a ! (cworkspace: need m*m+2*m, prefer m*m+m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_cunglq( m, n, m, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = 1_${ik}$ itauq = itau itaup = itauq + m iwork = itaup + m ! bidiagonalize l in work(ir) ! (cworkspace: need m*m+3*m, prefer m*m+2*m+2*m*nb) ! (rworkspace: need m) call stdlib${ii}$_cgebrd( m, m, work( ir ), ldwrkr, s,rwork( ie ), work( & itauq ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) ! generate right vectors bidiagonalizing l in ! work(ir) ! (cworkspace: need m*m+3*m, prefer m*m+2*m+(m-1)*nb) ! (rworkspace: 0) call stdlib${ii}$_cungbr( 'P', m, m, m, work( ir ), ldwrkr,work( itaup ), & work( iwork ),lwork-iwork+1, ierr ) irwork = ie + m ! perform bidiagonal qr iteration, computing right ! singular vectors of l in work(ir) ! (cworkspace: need m*m) ! (rworkspace: need bdspac) call stdlib${ii}$_cbdsqr( 'U', m, m, 0_${ik}$, 0_${ik}$, s, rwork( ie ),work( ir ), ldwrkr, & cdum, 1_${ik}$, cdum, 1_${ik}$,rwork( irwork ), info ) ! multiply right singular vectors of l in work(ir) by ! q in a, storing result in vt ! (cworkspace: need m*m) ! (rworkspace: 0) call stdlib${ii}$_cgemm( 'N', 'N', m, n, m, cone, work( ir ),ldwrkr, a, lda, & czero, vt, ldvt ) else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + m ! compute a=l*q ! (cworkspace: need 2*m, prefer m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_cgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ! copy result to vt call stdlib${ii}$_clacpy( 'U', m, n, a, lda, vt, ldvt ) ! generate q in vt ! (cworkspace: need 2*m, prefer m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_cunglq( m, n, m, vt, ldvt, work( itau ),work( iwork ), & lwork-iwork+1, ierr ) ie = 1_${ik}$ itauq = itau itaup = itauq + m iwork = itaup + m ! zero out above l in a if (m>1_${ik}$) call stdlib${ii}$_claset( 'U', m-1, m-1, czero, czero,a( 1_${ik}$, 2_${ik}$ ), lda ) ! bidiagonalize l in a ! (cworkspace: need 3*m, prefer 2*m+2*m*nb) ! (rworkspace: need m) call stdlib${ii}$_cgebrd( m, m, a, lda, s, rwork( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply right vectors bidiagonalizing l by q in vt ! (cworkspace: need 2*m+n, prefer 2*m+n*nb) ! (rworkspace: 0) call stdlib${ii}$_cunmbr( 'P', 'L', 'C', m, n, m, a, lda,work( itaup ), vt, & ldvt,work( iwork ), lwork-iwork+1, ierr ) irwork = ie + m ! perform bidiagonal qr iteration, computing right ! singular vectors of a in vt ! (cworkspace: 0) ! (rworkspace: need bdspac) call stdlib${ii}$_cbdsqr( 'U', m, n, 0_${ik}$, 0_${ik}$, s, rwork( ie ), vt,ldvt, cdum, 1_${ik}$, & cdum, 1_${ik}$,rwork( irwork ), info ) end if else if( wntuo ) then ! path 5t(n much larger than m, jobu='o', jobvt='s') ! m right singular vectors to be computed in vt and ! m left singular vectors to be overwritten on a if( lwork>=2_${ik}$*m*m+3*m ) then ! sufficient workspace for a fast algorithm iu = 1_${ik}$ if( lwork>=wrkbl+2*lda*m ) then ! work(iu) is lda by m and work(ir) is lda by m ldwrku = lda ir = iu + ldwrku*m ldwrkr = lda else if( lwork>=wrkbl+( lda+m )*m ) then ! work(iu) is lda by m and work(ir) is m by m ldwrku = lda ir = iu + ldwrku*m ldwrkr = m else ! work(iu) is m by m and work(ir) is m by m ldwrku = m ir = iu + ldwrku*m ldwrkr = m end if itau = ir + ldwrkr*m iwork = itau + m ! compute a=l*q ! (cworkspace: need 2*m*m+2*m, prefer 2*m*m+m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_cgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ! copy l to work(iu), zeroing out below it call stdlib${ii}$_clacpy( 'L', m, m, a, lda, work( iu ),ldwrku ) call stdlib${ii}$_claset( 'U', m-1, m-1, czero, czero,work( iu+ldwrku ), & ldwrku ) ! generate q in a ! (cworkspace: need 2*m*m+2*m, prefer 2*m*m+m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_cunglq( m, n, m, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = 1_${ik}$ itauq = itau itaup = itauq + m iwork = itaup + m ! bidiagonalize l in work(iu), copying result to ! work(ir) ! (cworkspace: need 2*m*m+3*m, ! prefer 2*m*m+2*m+2*m*nb) ! (rworkspace: need m) call stdlib${ii}$_cgebrd( m, m, work( iu ), ldwrku, s,rwork( ie ), work( & itauq ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) call stdlib${ii}$_clacpy( 'L', m, m, work( iu ), ldwrku,work( ir ), ldwrkr ) ! generate right bidiagonalizing vectors in work(iu) ! (cworkspace: need 2*m*m+3*m-1, ! prefer 2*m*m+2*m+(m-1)*nb) ! (rworkspace: 0) call stdlib${ii}$_cungbr( 'P', m, m, m, work( iu ), ldwrku,work( itaup ), & work( iwork ),lwork-iwork+1, ierr ) ! generate left bidiagonalizing vectors in work(ir) ! (cworkspace: need 2*m*m+3*m, prefer 2*m*m+2*m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_cungbr( 'Q', m, m, m, work( ir ), ldwrkr,work( itauq ), & work( iwork ),lwork-iwork+1, ierr ) irwork = ie + m ! perform bidiagonal qr iteration, computing left ! singular vectors of l in work(ir) and computing ! right singular vectors of l in work(iu) ! (cworkspace: need 2*m*m) ! (rworkspace: need bdspac) call stdlib${ii}$_cbdsqr( 'U', m, m, m, 0_${ik}$, s, rwork( ie ),work( iu ), ldwrku, & work( ir ),ldwrkr, cdum, 1_${ik}$, rwork( irwork ),info ) ! multiply right singular vectors of l in work(iu) by ! q in a, storing result in vt ! (cworkspace: need m*m) ! (rworkspace: 0) call stdlib${ii}$_cgemm( 'N', 'N', m, n, m, cone, work( iu ),ldwrku, a, lda, & czero, vt, ldvt ) ! copy left singular vectors of l to a ! (cworkspace: need m*m) ! (rworkspace: 0) call stdlib${ii}$_clacpy( 'F', m, m, work( ir ), ldwrkr, a,lda ) else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + m ! compute a=l*q, copying result to vt ! (cworkspace: need 2*m, prefer m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_cgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_clacpy( 'U', m, n, a, lda, vt, ldvt ) ! generate q in vt ! (cworkspace: need 2*m, prefer m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_cunglq( m, n, m, vt, ldvt, work( itau ),work( iwork ), & lwork-iwork+1, ierr ) ie = 1_${ik}$ itauq = itau itaup = itauq + m iwork = itaup + m ! zero out above l in a if (m>1_${ik}$) call stdlib${ii}$_claset( 'U', m-1, m-1, czero, czero,a( 1_${ik}$, 2_${ik}$ ), lda ) ! bidiagonalize l in a ! (cworkspace: need 3*m, prefer 2*m+2*m*nb) ! (rworkspace: need m) call stdlib${ii}$_cgebrd( m, m, a, lda, s, rwork( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply right vectors bidiagonalizing l by q in vt ! (cworkspace: need 2*m+n, prefer 2*m+n*nb) ! (rworkspace: 0) call stdlib${ii}$_cunmbr( 'P', 'L', 'C', m, n, m, a, lda,work( itaup ), vt, & ldvt,work( iwork ), lwork-iwork+1, ierr ) ! generate left bidiagonalizing vectors of l in a ! (cworkspace: need 3*m, prefer 2*m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_cungbr( 'Q', m, m, m, a, lda, work( itauq ),work( iwork ), & lwork-iwork+1, ierr ) irwork = ie + m ! perform bidiagonal qr iteration, computing left ! singular vectors of a in a and computing right ! singular vectors of a in vt ! (cworkspace: 0) ! (rworkspace: need bdspac) call stdlib${ii}$_cbdsqr( 'U', m, n, m, 0_${ik}$, s, rwork( ie ), vt,ldvt, a, lda, & cdum, 1_${ik}$,rwork( irwork ), info ) end if else if( wntuas ) then ! path 6t(n much larger than m, jobu='s' or 'a', ! jobvt='s') ! m right singular vectors to be computed in vt and ! m left singular vectors to be computed in u if( lwork>=m*m+3*m ) then ! sufficient workspace for a fast algorithm iu = 1_${ik}$ if( lwork>=wrkbl+lda*m ) then ! work(iu) is lda by n ldwrku = lda else ! work(iu) is lda by m ldwrku = m end if itau = iu + ldwrku*m iwork = itau + m ! compute a=l*q ! (cworkspace: need m*m+2*m, prefer m*m+m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_cgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ! copy l to work(iu), zeroing out above it call stdlib${ii}$_clacpy( 'L', m, m, a, lda, work( iu ),ldwrku ) call stdlib${ii}$_claset( 'U', m-1, m-1, czero, czero,work( iu+ldwrku ), & ldwrku ) ! generate q in a ! (cworkspace: need m*m+2*m, prefer m*m+m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_cunglq( m, n, m, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = 1_${ik}$ itauq = itau itaup = itauq + m iwork = itaup + m ! bidiagonalize l in work(iu), copying result to u ! (cworkspace: need m*m+3*m, prefer m*m+2*m+2*m*nb) ! (rworkspace: need m) call stdlib${ii}$_cgebrd( m, m, work( iu ), ldwrku, s,rwork( ie ), work( & itauq ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) call stdlib${ii}$_clacpy( 'L', m, m, work( iu ), ldwrku, u,ldu ) ! generate right bidiagonalizing vectors in work(iu) ! (cworkspace: need m*m+3*m-1, ! prefer m*m+2*m+(m-1)*nb) ! (rworkspace: 0) call stdlib${ii}$_cungbr( 'P', m, m, m, work( iu ), ldwrku,work( itaup ), & work( iwork ),lwork-iwork+1, ierr ) ! generate left bidiagonalizing vectors in u ! (cworkspace: need m*m+3*m, prefer m*m+2*m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_cungbr( 'Q', m, m, m, u, ldu, work( itauq ),work( iwork ), & lwork-iwork+1, ierr ) irwork = ie + m ! perform bidiagonal qr iteration, computing left ! singular vectors of l in u and computing right ! singular vectors of l in work(iu) ! (cworkspace: need m*m) ! (rworkspace: need bdspac) call stdlib${ii}$_cbdsqr( 'U', m, m, m, 0_${ik}$, s, rwork( ie ),work( iu ), ldwrku, & u, ldu, cdum, 1_${ik}$,rwork( irwork ), info ) ! multiply right singular vectors of l in work(iu) by ! q in a, storing result in vt ! (cworkspace: need m*m) ! (rworkspace: 0) call stdlib${ii}$_cgemm( 'N', 'N', m, n, m, cone, work( iu ),ldwrku, a, lda, & czero, vt, ldvt ) else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + m ! compute a=l*q, copying result to vt ! (cworkspace: need 2*m, prefer m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_cgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_clacpy( 'U', m, n, a, lda, vt, ldvt ) ! generate q in vt ! (cworkspace: need 2*m, prefer m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_cunglq( m, n, m, vt, ldvt, work( itau ),work( iwork ), & lwork-iwork+1, ierr ) ! copy l to u, zeroing out above it call stdlib${ii}$_clacpy( 'L', m, m, a, lda, u, ldu ) if (m>1_${ik}$) call stdlib${ii}$_claset( 'U', m-1, m-1, czero, czero,u( 1_${ik}$, 2_${ik}$ ), ldu ) ie = 1_${ik}$ itauq = itau itaup = itauq + m iwork = itaup + m ! bidiagonalize l in u ! (cworkspace: need 3*m, prefer 2*m+2*m*nb) ! (rworkspace: need m) call stdlib${ii}$_cgebrd( m, m, u, ldu, s, rwork( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply right bidiagonalizing vectors in u by q ! in vt ! (cworkspace: need 2*m+n, prefer 2*m+n*nb) ! (rworkspace: 0) call stdlib${ii}$_cunmbr( 'P', 'L', 'C', m, n, m, u, ldu,work( itaup ), vt, & ldvt,work( iwork ), lwork-iwork+1, ierr ) ! generate left bidiagonalizing vectors in u ! (cworkspace: need 3*m, prefer 2*m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_cungbr( 'Q', m, m, m, u, ldu, work( itauq ),work( iwork ), & lwork-iwork+1, ierr ) irwork = ie + m ! perform bidiagonal qr iteration, computing left ! singular vectors of a in u and computing right ! singular vectors of a in vt ! (cworkspace: 0) ! (rworkspace: need bdspac) call stdlib${ii}$_cbdsqr( 'U', m, n, m, 0_${ik}$, s, rwork( ie ), vt,ldvt, u, ldu, & cdum, 1_${ik}$,rwork( irwork ), info ) end if end if else if( wntva ) then if( wntun ) then ! path 7t(n much larger than m, jobu='n', jobvt='a') ! n right singular vectors to be computed in vt and ! no left singular vectors to be computed if( lwork>=m*m+max( n+m, 3_${ik}$*m ) ) then ! sufficient workspace for a fast algorithm ir = 1_${ik}$ if( lwork>=wrkbl+lda*m ) then ! work(ir) is lda by m ldwrkr = lda else ! work(ir) is m by m ldwrkr = m end if itau = ir + ldwrkr*m iwork = itau + m ! compute a=l*q, copying result to vt ! (cworkspace: need m*m+2*m, prefer m*m+m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_cgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_clacpy( 'U', m, n, a, lda, vt, ldvt ) ! copy l to work(ir), zeroing out above it call stdlib${ii}$_clacpy( 'L', m, m, a, lda, work( ir ),ldwrkr ) call stdlib${ii}$_claset( 'U', m-1, m-1, czero, czero,work( ir+ldwrkr ), & ldwrkr ) ! generate q in vt ! (cworkspace: need m*m+m+n, prefer m*m+m+n*nb) ! (rworkspace: 0) call stdlib${ii}$_cunglq( n, n, m, vt, ldvt, work( itau ),work( iwork ), & lwork-iwork+1, ierr ) ie = 1_${ik}$ itauq = itau itaup = itauq + m iwork = itaup + m ! bidiagonalize l in work(ir) ! (cworkspace: need m*m+3*m, prefer m*m+2*m+2*m*nb) ! (rworkspace: need m) call stdlib${ii}$_cgebrd( m, m, work( ir ), ldwrkr, s,rwork( ie ), work( & itauq ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) ! generate right bidiagonalizing vectors in work(ir) ! (cworkspace: need m*m+3*m-1, ! prefer m*m+2*m+(m-1)*nb) ! (rworkspace: 0) call stdlib${ii}$_cungbr( 'P', m, m, m, work( ir ), ldwrkr,work( itaup ), & work( iwork ),lwork-iwork+1, ierr ) irwork = ie + m ! perform bidiagonal qr iteration, computing right ! singular vectors of l in work(ir) ! (cworkspace: need m*m) ! (rworkspace: need bdspac) call stdlib${ii}$_cbdsqr( 'U', m, m, 0_${ik}$, 0_${ik}$, s, rwork( ie ),work( ir ), ldwrkr, & cdum, 1_${ik}$, cdum, 1_${ik}$,rwork( irwork ), info ) ! multiply right singular vectors of l in work(ir) by ! q in vt, storing result in a ! (cworkspace: need m*m) ! (rworkspace: 0) call stdlib${ii}$_cgemm( 'N', 'N', m, n, m, cone, work( ir ),ldwrkr, vt, ldvt,& czero, a, lda ) ! copy right singular vectors of a from a to vt call stdlib${ii}$_clacpy( 'F', m, n, a, lda, vt, ldvt ) else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + m ! compute a=l*q, copying result to vt ! (cworkspace: need 2*m, prefer m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_cgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_clacpy( 'U', m, n, a, lda, vt, ldvt ) ! generate q in vt ! (cworkspace: need m+n, prefer m+n*nb) ! (rworkspace: 0) call stdlib${ii}$_cunglq( n, n, m, vt, ldvt, work( itau ),work( iwork ), & lwork-iwork+1, ierr ) ie = 1_${ik}$ itauq = itau itaup = itauq + m iwork = itaup + m ! zero out above l in a if (m>1_${ik}$) call stdlib${ii}$_claset( 'U', m-1, m-1, czero, czero,a( 1_${ik}$, 2_${ik}$ ), lda ) ! bidiagonalize l in a ! (cworkspace: need 3*m, prefer 2*m+2*m*nb) ! (rworkspace: need m) call stdlib${ii}$_cgebrd( m, m, a, lda, s, rwork( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply right bidiagonalizing vectors in a by q ! in vt ! (cworkspace: need 2*m+n, prefer 2*m+n*nb) ! (rworkspace: 0) call stdlib${ii}$_cunmbr( 'P', 'L', 'C', m, n, m, a, lda,work( itaup ), vt, & ldvt,work( iwork ), lwork-iwork+1, ierr ) irwork = ie + m ! perform bidiagonal qr iteration, computing right ! singular vectors of a in vt ! (cworkspace: 0) ! (rworkspace: need bdspac) call stdlib${ii}$_cbdsqr( 'U', m, n, 0_${ik}$, 0_${ik}$, s, rwork( ie ), vt,ldvt, cdum, 1_${ik}$, & cdum, 1_${ik}$,rwork( irwork ), info ) end if else if( wntuo ) then ! path 8t(n much larger than m, jobu='o', jobvt='a') ! n right singular vectors to be computed in vt and ! m left singular vectors to be overwritten on a if( lwork>=2_${ik}$*m*m+max( n+m, 3_${ik}$*m ) ) then ! sufficient workspace for a fast algorithm iu = 1_${ik}$ if( lwork>=wrkbl+2*lda*m ) then ! work(iu) is lda by m and work(ir) is lda by m ldwrku = lda ir = iu + ldwrku*m ldwrkr = lda else if( lwork>=wrkbl+( lda+m )*m ) then ! work(iu) is lda by m and work(ir) is m by m ldwrku = lda ir = iu + ldwrku*m ldwrkr = m else ! work(iu) is m by m and work(ir) is m by m ldwrku = m ir = iu + ldwrku*m ldwrkr = m end if itau = ir + ldwrkr*m iwork = itau + m ! compute a=l*q, copying result to vt ! (cworkspace: need 2*m*m+2*m, prefer 2*m*m+m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_cgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_clacpy( 'U', m, n, a, lda, vt, ldvt ) ! generate q in vt ! (cworkspace: need 2*m*m+m+n, prefer 2*m*m+m+n*nb) ! (rworkspace: 0) call stdlib${ii}$_cunglq( n, n, m, vt, ldvt, work( itau ),work( iwork ), & lwork-iwork+1, ierr ) ! copy l to work(iu), zeroing out above it call stdlib${ii}$_clacpy( 'L', m, m, a, lda, work( iu ),ldwrku ) call stdlib${ii}$_claset( 'U', m-1, m-1, czero, czero,work( iu+ldwrku ), & ldwrku ) ie = 1_${ik}$ itauq = itau itaup = itauq + m iwork = itaup + m ! bidiagonalize l in work(iu), copying result to ! work(ir) ! (cworkspace: need 2*m*m+3*m, ! prefer 2*m*m+2*m+2*m*nb) ! (rworkspace: need m) call stdlib${ii}$_cgebrd( m, m, work( iu ), ldwrku, s,rwork( ie ), work( & itauq ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) call stdlib${ii}$_clacpy( 'L', m, m, work( iu ), ldwrku,work( ir ), ldwrkr ) ! generate right bidiagonalizing vectors in work(iu) ! (cworkspace: need 2*m*m+3*m-1, ! prefer 2*m*m+2*m+(m-1)*nb) ! (rworkspace: 0) call stdlib${ii}$_cungbr( 'P', m, m, m, work( iu ), ldwrku,work( itaup ), & work( iwork ),lwork-iwork+1, ierr ) ! generate left bidiagonalizing vectors in work(ir) ! (cworkspace: need 2*m*m+3*m, prefer 2*m*m+2*m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_cungbr( 'Q', m, m, m, work( ir ), ldwrkr,work( itauq ), & work( iwork ),lwork-iwork+1, ierr ) irwork = ie + m ! perform bidiagonal qr iteration, computing left ! singular vectors of l in work(ir) and computing ! right singular vectors of l in work(iu) ! (cworkspace: need 2*m*m) ! (rworkspace: need bdspac) call stdlib${ii}$_cbdsqr( 'U', m, m, m, 0_${ik}$, s, rwork( ie ),work( iu ), ldwrku, & work( ir ),ldwrkr, cdum, 1_${ik}$, rwork( irwork ),info ) ! multiply right singular vectors of l in work(iu) by ! q in vt, storing result in a ! (cworkspace: need m*m) ! (rworkspace: 0) call stdlib${ii}$_cgemm( 'N', 'N', m, n, m, cone, work( iu ),ldwrku, vt, ldvt,& czero, a, lda ) ! copy right singular vectors of a from a to vt call stdlib${ii}$_clacpy( 'F', m, n, a, lda, vt, ldvt ) ! copy left singular vectors of a from work(ir) to a call stdlib${ii}$_clacpy( 'F', m, m, work( ir ), ldwrkr, a,lda ) else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + m ! compute a=l*q, copying result to vt ! (cworkspace: need 2*m, prefer m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_cgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_clacpy( 'U', m, n, a, lda, vt, ldvt ) ! generate q in vt ! (cworkspace: need m+n, prefer m+n*nb) ! (rworkspace: 0) call stdlib${ii}$_cunglq( n, n, m, vt, ldvt, work( itau ),work( iwork ), & lwork-iwork+1, ierr ) ie = 1_${ik}$ itauq = itau itaup = itauq + m iwork = itaup + m ! zero out above l in a if (m>1_${ik}$) call stdlib${ii}$_claset( 'U', m-1, m-1, czero, czero,a( 1_${ik}$, 2_${ik}$ ), lda ) ! bidiagonalize l in a ! (cworkspace: need 3*m, prefer 2*m+2*m*nb) ! (rworkspace: need m) call stdlib${ii}$_cgebrd( m, m, a, lda, s, rwork( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply right bidiagonalizing vectors in a by q ! in vt ! (cworkspace: need 2*m+n, prefer 2*m+n*nb) ! (rworkspace: 0) call stdlib${ii}$_cunmbr( 'P', 'L', 'C', m, n, m, a, lda,work( itaup ), vt, & ldvt,work( iwork ), lwork-iwork+1, ierr ) ! generate left bidiagonalizing vectors in a ! (cworkspace: need 3*m, prefer 2*m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_cungbr( 'Q', m, m, m, a, lda, work( itauq ),work( iwork ), & lwork-iwork+1, ierr ) irwork = ie + m ! perform bidiagonal qr iteration, computing left ! singular vectors of a in a and computing right ! singular vectors of a in vt ! (cworkspace: 0) ! (rworkspace: need bdspac) call stdlib${ii}$_cbdsqr( 'U', m, n, m, 0_${ik}$, s, rwork( ie ), vt,ldvt, a, lda, & cdum, 1_${ik}$,rwork( irwork ), info ) end if else if( wntuas ) then ! path 9t(n much larger than m, jobu='s' or 'a', ! jobvt='a') ! n right singular vectors to be computed in vt and ! m left singular vectors to be computed in u if( lwork>=m*m+max( n+m, 3_${ik}$*m ) ) then ! sufficient workspace for a fast algorithm iu = 1_${ik}$ if( lwork>=wrkbl+lda*m ) then ! work(iu) is lda by m ldwrku = lda else ! work(iu) is m by m ldwrku = m end if itau = iu + ldwrku*m iwork = itau + m ! compute a=l*q, copying result to vt ! (cworkspace: need m*m+2*m, prefer m*m+m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_cgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_clacpy( 'U', m, n, a, lda, vt, ldvt ) ! generate q in vt ! (cworkspace: need m*m+m+n, prefer m*m+m+n*nb) ! (rworkspace: 0) call stdlib${ii}$_cunglq( n, n, m, vt, ldvt, work( itau ),work( iwork ), & lwork-iwork+1, ierr ) ! copy l to work(iu), zeroing out above it call stdlib${ii}$_clacpy( 'L', m, m, a, lda, work( iu ),ldwrku ) call stdlib${ii}$_claset( 'U', m-1, m-1, czero, czero,work( iu+ldwrku ), & ldwrku ) ie = 1_${ik}$ itauq = itau itaup = itauq + m iwork = itaup + m ! bidiagonalize l in work(iu), copying result to u ! (cworkspace: need m*m+3*m, prefer m*m+2*m+2*m*nb) ! (rworkspace: need m) call stdlib${ii}$_cgebrd( m, m, work( iu ), ldwrku, s,rwork( ie ), work( & itauq ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) call stdlib${ii}$_clacpy( 'L', m, m, work( iu ), ldwrku, u,ldu ) ! generate right bidiagonalizing vectors in work(iu) ! (cworkspace: need m*m+3*m, prefer m*m+2*m+(m-1)*nb) ! (rworkspace: 0) call stdlib${ii}$_cungbr( 'P', m, m, m, work( iu ), ldwrku,work( itaup ), & work( iwork ),lwork-iwork+1, ierr ) ! generate left bidiagonalizing vectors in u ! (cworkspace: need m*m+3*m, prefer m*m+2*m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_cungbr( 'Q', m, m, m, u, ldu, work( itauq ),work( iwork ), & lwork-iwork+1, ierr ) irwork = ie + m ! perform bidiagonal qr iteration, computing left ! singular vectors of l in u and computing right ! singular vectors of l in work(iu) ! (cworkspace: need m*m) ! (rworkspace: need bdspac) call stdlib${ii}$_cbdsqr( 'U', m, m, m, 0_${ik}$, s, rwork( ie ),work( iu ), ldwrku, & u, ldu, cdum, 1_${ik}$,rwork( irwork ), info ) ! multiply right singular vectors of l in work(iu) by ! q in vt, storing result in a ! (cworkspace: need m*m) ! (rworkspace: 0) call stdlib${ii}$_cgemm( 'N', 'N', m, n, m, cone, work( iu ),ldwrku, vt, ldvt,& czero, a, lda ) ! copy right singular vectors of a from a to vt call stdlib${ii}$_clacpy( 'F', m, n, a, lda, vt, ldvt ) else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + m ! compute a=l*q, copying result to vt ! (cworkspace: need 2*m, prefer m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_cgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_clacpy( 'U', m, n, a, lda, vt, ldvt ) ! generate q in vt ! (cworkspace: need m+n, prefer m+n*nb) ! (rworkspace: 0) call stdlib${ii}$_cunglq( n, n, m, vt, ldvt, work( itau ),work( iwork ), & lwork-iwork+1, ierr ) ! copy l to u, zeroing out above it call stdlib${ii}$_clacpy( 'L', m, m, a, lda, u, ldu ) if (m>1_${ik}$) call stdlib${ii}$_claset( 'U', m-1, m-1, czero, czero,u( 1_${ik}$, 2_${ik}$ ), ldu ) ie = 1_${ik}$ itauq = itau itaup = itauq + m iwork = itaup + m ! bidiagonalize l in u ! (cworkspace: need 3*m, prefer 2*m+2*m*nb) ! (rworkspace: need m) call stdlib${ii}$_cgebrd( m, m, u, ldu, s, rwork( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply right bidiagonalizing vectors in u by q ! in vt ! (cworkspace: need 2*m+n, prefer 2*m+n*nb) ! (rworkspace: 0) call stdlib${ii}$_cunmbr( 'P', 'L', 'C', m, n, m, u, ldu,work( itaup ), vt, & ldvt,work( iwork ), lwork-iwork+1, ierr ) ! generate left bidiagonalizing vectors in u ! (cworkspace: need 3*m, prefer 2*m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_cungbr( 'Q', m, m, m, u, ldu, work( itauq ),work( iwork ), & lwork-iwork+1, ierr ) irwork = ie + m ! perform bidiagonal qr iteration, computing left ! singular vectors of a in u and computing right ! singular vectors of a in vt ! (cworkspace: 0) ! (rworkspace: need bdspac) call stdlib${ii}$_cbdsqr( 'U', m, n, m, 0_${ik}$, s, rwork( ie ), vt,ldvt, u, ldu, & cdum, 1_${ik}$,rwork( irwork ), info ) end if end if end if else ! n < mnthr ! path 10t(n greater than m, but not much larger) ! reduce to bidiagonal form without lq decomposition ie = 1_${ik}$ itauq = 1_${ik}$ itaup = itauq + m iwork = itaup + m ! bidiagonalize a ! (cworkspace: need 2*m+n, prefer 2*m+(m+n)*nb) ! (rworkspace: m) call stdlib${ii}$_cgebrd( m, n, a, lda, s, rwork( ie ), work( itauq ),work( itaup ), & work( iwork ), lwork-iwork+1,ierr ) if( wntuas ) then ! if left singular vectors desired in u, copy result to u ! and generate left bidiagonalizing vectors in u ! (cworkspace: need 3*m-1, prefer 2*m+(m-1)*nb) ! (rworkspace: 0) call stdlib${ii}$_clacpy( 'L', m, m, a, lda, u, ldu ) call stdlib${ii}$_cungbr( 'Q', m, m, n, u, ldu, work( itauq ),work( iwork ), lwork-& iwork+1, ierr ) end if if( wntvas ) then ! if right singular vectors desired in vt, copy result to ! vt and generate right bidiagonalizing vectors in vt ! (cworkspace: need 2*m+nrvt, prefer 2*m+nrvt*nb) ! (rworkspace: 0) call stdlib${ii}$_clacpy( 'U', m, n, a, lda, vt, ldvt ) if( wntva )nrvt = n if( wntvs )nrvt = m call stdlib${ii}$_cungbr( 'P', nrvt, n, m, vt, ldvt, work( itaup ),work( iwork ), & lwork-iwork+1, ierr ) end if if( wntuo ) then ! if left singular vectors desired in a, generate left ! bidiagonalizing vectors in a ! (cworkspace: need 3*m-1, prefer 2*m+(m-1)*nb) ! (rworkspace: 0) call stdlib${ii}$_cungbr( 'Q', m, m, n, a, lda, work( itauq ),work( iwork ), lwork-& iwork+1, ierr ) end if if( wntvo ) then ! if right singular vectors desired in a, generate right ! bidiagonalizing vectors in a ! (cworkspace: need 3*m, prefer 2*m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_cungbr( 'P', m, n, m, a, lda, work( itaup ),work( iwork ), lwork-& iwork+1, ierr ) end if irwork = ie + m if( wntuas .or. wntuo )nru = m if( wntun )nru = 0_${ik}$ if( wntvas .or. wntvo )ncvt = n if( wntvn )ncvt = 0_${ik}$ if( ( .not.wntuo ) .and. ( .not.wntvo ) ) then ! perform bidiagonal qr iteration, if desired, computing ! left singular vectors in u and computing right singular ! vectors in vt ! (cworkspace: 0) ! (rworkspace: need bdspac) call stdlib${ii}$_cbdsqr( 'L', m, ncvt, nru, 0_${ik}$, s, rwork( ie ), vt,ldvt, u, ldu, & cdum, 1_${ik}$, rwork( irwork ),info ) else if( ( .not.wntuo ) .and. wntvo ) then ! perform bidiagonal qr iteration, if desired, computing ! left singular vectors in u and computing right singular ! vectors in a ! (cworkspace: 0) ! (rworkspace: need bdspac) call stdlib${ii}$_cbdsqr( 'L', m, ncvt, nru, 0_${ik}$, s, rwork( ie ), a,lda, u, ldu, cdum,& 1_${ik}$, rwork( irwork ),info ) else ! perform bidiagonal qr iteration, if desired, computing ! left singular vectors in a and computing right singular ! vectors in vt ! (cworkspace: 0) ! (rworkspace: need bdspac) call stdlib${ii}$_cbdsqr( 'L', m, ncvt, nru, 0_${ik}$, s, rwork( ie ), vt,ldvt, a, lda, & cdum, 1_${ik}$, rwork( irwork ),info ) end if end if end if ! undo scaling if necessary if( iscl==1_${ik}$ ) then if( anrm>bignum )call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, bignum, anrm, minmn, 1_${ik}$, s, minmn,& ierr ) if( info/=0_${ik}$ .and. anrm>bignum )call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, bignum, anrm, minmn-1,& 1_${ik}$,rwork( ie ), minmn, ierr ) if( anrm<smlnum )call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, smlnum, anrm, minmn, 1_${ik}$, s, minmn,& ierr ) if( info/=0_${ik}$ .and. anrm<smlnum )call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, smlnum, anrm, minmn-1,& 1_${ik}$,rwork( ie ), minmn, ierr ) end if ! return optimal workspace in work(1) work( 1_${ik}$ ) = maxwrk return end subroutine stdlib${ii}$_cgesvd module subroutine stdlib${ii}$_zgesvd( jobu, jobvt, m, n, a, lda, s, u, ldu,vt, ldvt, work, lwork, rwork, & !! ZGESVD computes the singular value decomposition (SVD) of a complex !! M-by-N matrix A, optionally computing the left and/or right singular !! vectors. The SVD is written !! A = U * SIGMA * conjugate-transpose(V) !! where SIGMA is an M-by-N matrix which is zero except for its !! min(m,n) diagonal elements, U is an M-by-M unitary matrix, and !! V is an N-by-N unitary matrix. The diagonal elements of SIGMA !! are the singular values of A; they are real and non-negative, and !! are returned in descending order. The first min(m,n) columns of !! U and V are the left and right singular vectors of A. !! Note that the routine returns V**H, not V. info ) ! -- lapack driver routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments character, intent(in) :: jobu, jobvt integer(${ik}$), intent(out) :: info integer(${ik}$), intent(in) :: lda, ldu, ldvt, lwork, m, n ! Array Arguments real(dp), intent(out) :: rwork(*), s(*) complex(dp), intent(inout) :: a(lda,*) complex(dp), intent(out) :: u(ldu,*), vt(ldvt,*), work(*) ! ===================================================================== ! Local Scalars logical(lk) :: lquery, wntua, wntuas, wntun, wntuo, wntus, wntva, wntvas, wntvn, wntvo,& wntvs integer(${ik}$) :: blk, chunk, i, ie, ierr, ir, irwork, iscl, itau, itaup, itauq, iu, & iwork, ldwrkr, ldwrku, maxwrk, minmn, minwrk, mnthr, ncu, ncvt, nru, nrvt, & wrkbl integer(${ik}$) :: lwork_zgeqrf, lwork_zungqr_n, lwork_zungqr_m, lwork_zgebrd, & lwork_zungbr_p, lwork_zungbr_q, lwork_zgelqf, lwork_zunglq_n, lwork_zunglq_m real(dp) :: anrm, bignum, eps, smlnum ! Local Arrays real(dp) :: dum(1_${ik}$) complex(dp) :: cdum(1_${ik}$) ! Intrinsic Functions ! Executable Statements ! test the input arguments info = 0_${ik}$ minmn = min( m, n ) wntua = stdlib_lsame( jobu, 'A' ) wntus = stdlib_lsame( jobu, 'S' ) wntuas = wntua .or. wntus wntuo = stdlib_lsame( jobu, 'O' ) wntun = stdlib_lsame( jobu, 'N' ) wntva = stdlib_lsame( jobvt, 'A' ) wntvs = stdlib_lsame( jobvt, 'S' ) wntvas = wntva .or. wntvs wntvo = stdlib_lsame( jobvt, 'O' ) wntvn = stdlib_lsame( jobvt, 'N' ) lquery = ( lwork==-1_${ik}$ ) if( .not.( wntua .or. wntus .or. wntuo .or. wntun ) ) then info = -1_${ik}$ else if( .not.( wntva .or. wntvs .or. wntvo .or. wntvn ) .or.( wntvo .and. wntuo ) ) & then info = -2_${ik}$ else if( m<0_${ik}$ ) then info = -3_${ik}$ else if( n<0_${ik}$ ) then info = -4_${ik}$ else if( lda<max( 1_${ik}$, m ) ) then info = -6_${ik}$ else if( ldu<1_${ik}$ .or. ( wntuas .and. ldu<m ) ) then info = -9_${ik}$ else if( ldvt<1_${ik}$ .or. ( wntva .and. ldvt<n ) .or.( wntvs .and. ldvt<minmn ) ) & then info = -11_${ik}$ end if ! compute workspace ! (note: comments in the code beginning "workspace:" describe the ! minimal amount of workspace needed at that point in the code, ! as well as the preferred amount for good performance. ! cworkspace refers to complex workspace, and rworkspace to ! real workspace. nb refers to the optimal block size for the ! immediately following subroutine, as returned by stdlib${ii}$_ilaenv.) if( info==0_${ik}$ ) then minwrk = 1_${ik}$ maxwrk = 1_${ik}$ if( m>=n .and. minmn>0_${ik}$ ) then ! space needed for stdlib${ii}$_zbdsqr is bdspac = 5*n mnthr = stdlib${ii}$_ilaenv( 6_${ik}$, 'ZGESVD', jobu // jobvt, m, n, 0_${ik}$, 0_${ik}$ ) ! compute space needed for stdlib${ii}$_zgeqrf call stdlib${ii}$_zgeqrf( m, n, a, lda, cdum(1_${ik}$), cdum(1_${ik}$), -1_${ik}$, ierr ) lwork_zgeqrf = int( cdum(1_${ik}$),KIND=${ik}$) ! compute space needed for stdlib${ii}$_zungqr call stdlib${ii}$_zungqr( m, n, n, a, lda, cdum(1_${ik}$), cdum(1_${ik}$), -1_${ik}$, ierr ) lwork_zungqr_n = int( cdum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_zungqr( m, m, n, a, lda, cdum(1_${ik}$), cdum(1_${ik}$), -1_${ik}$, ierr ) lwork_zungqr_m = int( cdum(1_${ik}$),KIND=${ik}$) ! compute space needed for stdlib${ii}$_zgebrd call stdlib${ii}$_zgebrd( n, n, a, lda, s, dum(1_${ik}$), cdum(1_${ik}$),cdum(1_${ik}$), cdum(1_${ik}$), -1_${ik}$, ierr ) lwork_zgebrd = int( cdum(1_${ik}$),KIND=${ik}$) ! compute space needed for stdlib${ii}$_zungbr call stdlib${ii}$_zungbr( 'P', n, n, n, a, lda, cdum(1_${ik}$),cdum(1_${ik}$), -1_${ik}$, ierr ) lwork_zungbr_p = int( cdum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_zungbr( 'Q', n, n, n, a, lda, cdum(1_${ik}$),cdum(1_${ik}$), -1_${ik}$, ierr ) lwork_zungbr_q = int( cdum(1_${ik}$),KIND=${ik}$) if( m>=mnthr ) then if( wntun ) then ! path 1 (m much larger than n, jobu='n') maxwrk = n + lwork_zgeqrf maxwrk = max( maxwrk, 2_${ik}$*n+lwork_zgebrd ) if( wntvo .or. wntvas )maxwrk = max( maxwrk, 2_${ik}$*n+lwork_zungbr_p ) minwrk = 3_${ik}$*n else if( wntuo .and. wntvn ) then ! path 2 (m much larger than n, jobu='o', jobvt='n') wrkbl = n + lwork_zgeqrf wrkbl = max( wrkbl, n+lwork_zungqr_n ) wrkbl = max( wrkbl, 2_${ik}$*n+lwork_zgebrd ) wrkbl = max( wrkbl, 2_${ik}$*n+lwork_zungbr_q ) maxwrk = max( n*n+wrkbl, n*n+m*n ) minwrk = 2_${ik}$*n + m else if( wntuo .and. wntvas ) then ! path 3 (m much larger than n, jobu='o', jobvt='s' or ! 'a') wrkbl = n + lwork_zgeqrf wrkbl = max( wrkbl, n+lwork_zungqr_n ) wrkbl = max( wrkbl, 2_${ik}$*n+lwork_zgebrd ) wrkbl = max( wrkbl, 2_${ik}$*n+lwork_zungbr_q ) wrkbl = max( wrkbl, 2_${ik}$*n+lwork_zungbr_p ) maxwrk = max( n*n+wrkbl, n*n+m*n ) minwrk = 2_${ik}$*n + m else if( wntus .and. wntvn ) then ! path 4 (m much larger than n, jobu='s', jobvt='n') wrkbl = n + lwork_zgeqrf wrkbl = max( wrkbl, n+lwork_zungqr_n ) wrkbl = max( wrkbl, 2_${ik}$*n+lwork_zgebrd ) wrkbl = max( wrkbl, 2_${ik}$*n+lwork_zungbr_q ) maxwrk = n*n + wrkbl minwrk = 2_${ik}$*n + m else if( wntus .and. wntvo ) then ! path 5 (m much larger than n, jobu='s', jobvt='o') wrkbl = n + lwork_zgeqrf wrkbl = max( wrkbl, n+lwork_zungqr_n ) wrkbl = max( wrkbl, 2_${ik}$*n+lwork_zgebrd ) wrkbl = max( wrkbl, 2_${ik}$*n+lwork_zungbr_q ) wrkbl = max( wrkbl, 2_${ik}$*n+lwork_zungbr_p ) maxwrk = 2_${ik}$*n*n + wrkbl minwrk = 2_${ik}$*n + m else if( wntus .and. wntvas ) then ! path 6 (m much larger than n, jobu='s', jobvt='s' or ! 'a') wrkbl = n + lwork_zgeqrf wrkbl = max( wrkbl, n+lwork_zungqr_n ) wrkbl = max( wrkbl, 2_${ik}$*n+lwork_zgebrd ) wrkbl = max( wrkbl, 2_${ik}$*n+lwork_zungbr_q ) wrkbl = max( wrkbl, 2_${ik}$*n+lwork_zungbr_p ) maxwrk = n*n + wrkbl minwrk = 2_${ik}$*n + m else if( wntua .and. wntvn ) then ! path 7 (m much larger than n, jobu='a', jobvt='n') wrkbl = n + lwork_zgeqrf wrkbl = max( wrkbl, n+lwork_zungqr_m ) wrkbl = max( wrkbl, 2_${ik}$*n+lwork_zgebrd ) wrkbl = max( wrkbl, 2_${ik}$*n+lwork_zungbr_q ) maxwrk = n*n + wrkbl minwrk = 2_${ik}$*n + m else if( wntua .and. wntvo ) then ! path 8 (m much larger than n, jobu='a', jobvt='o') wrkbl = n + lwork_zgeqrf wrkbl = max( wrkbl, n+lwork_zungqr_m ) wrkbl = max( wrkbl, 2_${ik}$*n+lwork_zgebrd ) wrkbl = max( wrkbl, 2_${ik}$*n+lwork_zungbr_q ) wrkbl = max( wrkbl, 2_${ik}$*n+lwork_zungbr_p ) maxwrk = 2_${ik}$*n*n + wrkbl minwrk = 2_${ik}$*n + m else if( wntua .and. wntvas ) then ! path 9 (m much larger than n, jobu='a', jobvt='s' or ! 'a') wrkbl = n + lwork_zgeqrf wrkbl = max( wrkbl, n+lwork_zungqr_m ) wrkbl = max( wrkbl, 2_${ik}$*n+lwork_zgebrd ) wrkbl = max( wrkbl, 2_${ik}$*n+lwork_zungbr_q ) wrkbl = max( wrkbl, 2_${ik}$*n+lwork_zungbr_p ) maxwrk = n*n + wrkbl minwrk = 2_${ik}$*n + m end if else ! path 10 (m at least n, but not much larger) call stdlib${ii}$_zgebrd( m, n, a, lda, s, dum(1_${ik}$), cdum(1_${ik}$),cdum(1_${ik}$), cdum(1_${ik}$), -1_${ik}$, & ierr ) lwork_zgebrd = int( cdum(1_${ik}$),KIND=${ik}$) maxwrk = 2_${ik}$*n + lwork_zgebrd if( wntus .or. wntuo ) then call stdlib${ii}$_zungbr( 'Q', m, n, n, a, lda, cdum(1_${ik}$),cdum(1_${ik}$), -1_${ik}$, ierr ) lwork_zungbr_q = int( cdum(1_${ik}$),KIND=${ik}$) maxwrk = max( maxwrk, 2_${ik}$*n+lwork_zungbr_q ) end if if( wntua ) then call stdlib${ii}$_zungbr( 'Q', m, m, n, a, lda, cdum(1_${ik}$),cdum(1_${ik}$), -1_${ik}$, ierr ) lwork_zungbr_q = int( cdum(1_${ik}$),KIND=${ik}$) maxwrk = max( maxwrk, 2_${ik}$*n+lwork_zungbr_q ) end if if( .not.wntvn ) then maxwrk = max( maxwrk, 2_${ik}$*n+lwork_zungbr_p ) end if minwrk = 2_${ik}$*n + m end if else if( minmn>0_${ik}$ ) then ! space needed for stdlib${ii}$_zbdsqr is bdspac = 5*m mnthr = stdlib${ii}$_ilaenv( 6_${ik}$, 'ZGESVD', jobu // jobvt, m, n, 0_${ik}$, 0_${ik}$ ) ! compute space needed for stdlib${ii}$_zgelqf call stdlib${ii}$_zgelqf( m, n, a, lda, cdum(1_${ik}$), cdum(1_${ik}$), -1_${ik}$, ierr ) lwork_zgelqf = int( cdum(1_${ik}$),KIND=${ik}$) ! compute space needed for stdlib${ii}$_zunglq call stdlib${ii}$_zunglq( n, n, m, cdum(1_${ik}$), n, cdum(1_${ik}$), cdum(1_${ik}$), -1_${ik}$,ierr ) lwork_zunglq_n = int( cdum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_zunglq( m, n, m, a, lda, cdum(1_${ik}$), cdum(1_${ik}$), -1_${ik}$, ierr ) lwork_zunglq_m = int( cdum(1_${ik}$),KIND=${ik}$) ! compute space needed for stdlib${ii}$_zgebrd call stdlib${ii}$_zgebrd( m, m, a, lda, s, dum(1_${ik}$), cdum(1_${ik}$),cdum(1_${ik}$), cdum(1_${ik}$), -1_${ik}$, ierr ) lwork_zgebrd = int( cdum(1_${ik}$),KIND=${ik}$) ! compute space needed for stdlib${ii}$_zungbr p call stdlib${ii}$_zungbr( 'P', m, m, m, a, n, cdum(1_${ik}$),cdum(1_${ik}$), -1_${ik}$, ierr ) lwork_zungbr_p = int( cdum(1_${ik}$),KIND=${ik}$) ! compute space needed for stdlib${ii}$_zungbr q call stdlib${ii}$_zungbr( 'Q', m, m, m, a, n, cdum(1_${ik}$),cdum(1_${ik}$), -1_${ik}$, ierr ) lwork_zungbr_q = int( cdum(1_${ik}$),KIND=${ik}$) if( n>=mnthr ) then if( wntvn ) then ! path 1t(n much larger than m, jobvt='n') maxwrk = m + lwork_zgelqf maxwrk = max( maxwrk, 2_${ik}$*m+lwork_zgebrd ) if( wntuo .or. wntuas )maxwrk = max( maxwrk, 2_${ik}$*m+lwork_zungbr_q ) minwrk = 3_${ik}$*m else if( wntvo .and. wntun ) then ! path 2t(n much larger than m, jobu='n', jobvt='o') wrkbl = m + lwork_zgelqf wrkbl = max( wrkbl, m+lwork_zunglq_m ) wrkbl = max( wrkbl, 2_${ik}$*m+lwork_zgebrd ) wrkbl = max( wrkbl, 2_${ik}$*m+lwork_zungbr_p ) maxwrk = max( m*m+wrkbl, m*m+m*n ) minwrk = 2_${ik}$*m + n else if( wntvo .and. wntuas ) then ! path 3t(n much larger than m, jobu='s' or 'a', ! jobvt='o') wrkbl = m + lwork_zgelqf wrkbl = max( wrkbl, m+lwork_zunglq_m ) wrkbl = max( wrkbl, 2_${ik}$*m+lwork_zgebrd ) wrkbl = max( wrkbl, 2_${ik}$*m+lwork_zungbr_p ) wrkbl = max( wrkbl, 2_${ik}$*m+lwork_zungbr_q ) maxwrk = max( m*m+wrkbl, m*m+m*n ) minwrk = 2_${ik}$*m + n else if( wntvs .and. wntun ) then ! path 4t(n much larger than m, jobu='n', jobvt='s') wrkbl = m + lwork_zgelqf wrkbl = max( wrkbl, m+lwork_zunglq_m ) wrkbl = max( wrkbl, 2_${ik}$*m+lwork_zgebrd ) wrkbl = max( wrkbl, 2_${ik}$*m+lwork_zungbr_p ) maxwrk = m*m + wrkbl minwrk = 2_${ik}$*m + n else if( wntvs .and. wntuo ) then ! path 5t(n much larger than m, jobu='o', jobvt='s') wrkbl = m + lwork_zgelqf wrkbl = max( wrkbl, m+lwork_zunglq_m ) wrkbl = max( wrkbl, 2_${ik}$*m+lwork_zgebrd ) wrkbl = max( wrkbl, 2_${ik}$*m+lwork_zungbr_p ) wrkbl = max( wrkbl, 2_${ik}$*m+lwork_zungbr_q ) maxwrk = 2_${ik}$*m*m + wrkbl minwrk = 2_${ik}$*m + n else if( wntvs .and. wntuas ) then ! path 6t(n much larger than m, jobu='s' or 'a', ! jobvt='s') wrkbl = m + lwork_zgelqf wrkbl = max( wrkbl, m+lwork_zunglq_m ) wrkbl = max( wrkbl, 2_${ik}$*m+lwork_zgebrd ) wrkbl = max( wrkbl, 2_${ik}$*m+lwork_zungbr_p ) wrkbl = max( wrkbl, 2_${ik}$*m+lwork_zungbr_q ) maxwrk = m*m + wrkbl minwrk = 2_${ik}$*m + n else if( wntva .and. wntun ) then ! path 7t(n much larger than m, jobu='n', jobvt='a') wrkbl = m + lwork_zgelqf wrkbl = max( wrkbl, m+lwork_zunglq_n ) wrkbl = max( wrkbl, 2_${ik}$*m+lwork_zgebrd ) wrkbl = max( wrkbl, 2_${ik}$*m+lwork_zungbr_p ) maxwrk = m*m + wrkbl minwrk = 2_${ik}$*m + n else if( wntva .and. wntuo ) then ! path 8t(n much larger than m, jobu='o', jobvt='a') wrkbl = m + lwork_zgelqf wrkbl = max( wrkbl, m+lwork_zunglq_n ) wrkbl = max( wrkbl, 2_${ik}$*m+lwork_zgebrd ) wrkbl = max( wrkbl, 2_${ik}$*m+lwork_zungbr_p ) wrkbl = max( wrkbl, 2_${ik}$*m+lwork_zungbr_q ) maxwrk = 2_${ik}$*m*m + wrkbl minwrk = 2_${ik}$*m + n else if( wntva .and. wntuas ) then ! path 9t(n much larger than m, jobu='s' or 'a', ! jobvt='a') wrkbl = m + lwork_zgelqf wrkbl = max( wrkbl, m+lwork_zunglq_n ) wrkbl = max( wrkbl, 2_${ik}$*m+lwork_zgebrd ) wrkbl = max( wrkbl, 2_${ik}$*m+lwork_zungbr_p ) wrkbl = max( wrkbl, 2_${ik}$*m+lwork_zungbr_q ) maxwrk = m*m + wrkbl minwrk = 2_${ik}$*m + n end if else ! path 10t(n greater than m, but not much larger) call stdlib${ii}$_zgebrd( m, n, a, lda, s, dum(1_${ik}$), cdum(1_${ik}$),cdum(1_${ik}$), cdum(1_${ik}$), -1_${ik}$, & ierr ) lwork_zgebrd = int( cdum(1_${ik}$),KIND=${ik}$) maxwrk = 2_${ik}$*m + lwork_zgebrd if( wntvs .or. wntvo ) then ! compute space needed for stdlib${ii}$_zungbr p call stdlib${ii}$_zungbr( 'P', m, n, m, a, n, cdum(1_${ik}$),cdum(1_${ik}$), -1_${ik}$, ierr ) lwork_zungbr_p = int( cdum(1_${ik}$),KIND=${ik}$) maxwrk = max( maxwrk, 2_${ik}$*m+lwork_zungbr_p ) end if if( wntva ) then call stdlib${ii}$_zungbr( 'P', n, n, m, a, n, cdum(1_${ik}$),cdum(1_${ik}$), -1_${ik}$, ierr ) lwork_zungbr_p = int( cdum(1_${ik}$),KIND=${ik}$) maxwrk = max( maxwrk, 2_${ik}$*m+lwork_zungbr_p ) end if if( .not.wntun ) then maxwrk = max( maxwrk, 2_${ik}$*m+lwork_zungbr_q ) end if minwrk = 2_${ik}$*m + n end if end if maxwrk = max( maxwrk, minwrk ) work( 1_${ik}$ ) = maxwrk if( lwork<minwrk .and. .not.lquery ) then info = -13_${ik}$ end if end if if( info/=0_${ik}$ ) then call stdlib${ii}$_xerbla( 'ZGESVD', -info ) return else if( lquery ) then return end if ! quick return if possible if( m==0_${ik}$ .or. n==0_${ik}$ ) then return end if ! get machine constants eps = stdlib${ii}$_dlamch( 'P' ) smlnum = sqrt( stdlib${ii}$_dlamch( 'S' ) ) / eps bignum = one / smlnum ! scale a if max element outside range [smlnum,bignum] anrm = stdlib${ii}$_zlange( 'M', m, n, a, lda, dum ) iscl = 0_${ik}$ if( anrm>zero .and. anrm<smlnum ) then iscl = 1_${ik}$ call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, m, n, a, lda, ierr ) else if( anrm>bignum ) then iscl = 1_${ik}$ call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, m, n, a, lda, ierr ) end if if( m>=n ) then ! a has at least as many rows as columns. if a has sufficiently ! more rows than columns, first reduce using the qr ! decomposition (if sufficient workspace available) if( m>=mnthr ) then if( wntun ) then ! path 1 (m much larger than n, jobu='n') ! no left singular vectors to be computed itau = 1_${ik}$ iwork = itau + n ! compute a=q*r ! (cworkspace: need 2*n, prefer n+n*nb) ! (rworkspace: need 0) call stdlib${ii}$_zgeqrf( m, n, a, lda, work( itau ), work( iwork ),lwork-iwork+1, & ierr ) ! zero out below r if( n > 1_${ik}$ ) then call stdlib${ii}$_zlaset( 'L', n-1, n-1, czero, czero, a( 2_${ik}$, 1_${ik}$ ),lda ) end if ie = 1_${ik}$ itauq = 1_${ik}$ itaup = itauq + n iwork = itaup + n ! bidiagonalize r in a ! (cworkspace: need 3*n, prefer 2*n+2*n*nb) ! (rworkspace: need n) call stdlib${ii}$_zgebrd( n, n, a, lda, s, rwork( ie ), work( itauq ),work( itaup ),& work( iwork ), lwork-iwork+1,ierr ) ncvt = 0_${ik}$ if( wntvo .or. wntvas ) then ! if right singular vectors desired, generate p'. ! (cworkspace: need 3*n-1, prefer 2*n+(n-1)*nb) ! (rworkspace: 0) call stdlib${ii}$_zungbr( 'P', n, n, n, a, lda, work( itaup ),work( iwork ), & lwork-iwork+1, ierr ) ncvt = n end if irwork = ie + n ! perform bidiagonal qr iteration, computing right ! singular vectors of a in a if desired ! (cworkspace: 0) ! (rworkspace: need bdspac) call stdlib${ii}$_zbdsqr( 'U', n, ncvt, 0_${ik}$, 0_${ik}$, s, rwork( ie ), a, lda,cdum, 1_${ik}$, cdum, & 1_${ik}$, rwork( irwork ), info ) ! if right singular vectors desired in vt, copy them there if( wntvas )call stdlib${ii}$_zlacpy( 'F', n, n, a, lda, vt, ldvt ) else if( wntuo .and. wntvn ) then ! path 2 (m much larger than n, jobu='o', jobvt='n') ! n left singular vectors to be overwritten on a and ! no right singular vectors to be computed if( lwork>=n*n+3*n ) then ! sufficient workspace for a fast algorithm ir = 1_${ik}$ if( lwork>=max( wrkbl, lda*n )+lda*n ) then ! work(iu) is lda by n, work(ir) is lda by n ldwrku = lda ldwrkr = lda else if( lwork>=max( wrkbl, lda*n )+n*n ) then ! work(iu) is lda by n, work(ir) is n by n ldwrku = lda ldwrkr = n else ! work(iu) is ldwrku by n, work(ir) is n by n ldwrku = ( lwork-n*n ) / n ldwrkr = n end if itau = ir + ldwrkr*n iwork = itau + n ! compute a=q*r ! (cworkspace: need n*n+2*n, prefer n*n+n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_zgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-iwork+& 1_${ik}$, ierr ) ! copy r to work(ir) and zero out below it call stdlib${ii}$_zlacpy( 'U', n, n, a, lda, work( ir ), ldwrkr ) call stdlib${ii}$_zlaset( 'L', n-1, n-1, czero, czero,work( ir+1 ), ldwrkr ) ! generate q in a ! (cworkspace: need n*n+2*n, prefer n*n+n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_zungqr( m, n, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = 1_${ik}$ itauq = itau itaup = itauq + n iwork = itaup + n ! bidiagonalize r in work(ir) ! (cworkspace: need n*n+3*n, prefer n*n+2*n+2*n*nb) ! (rworkspace: need n) call stdlib${ii}$_zgebrd( n, n, work( ir ), ldwrkr, s, rwork( ie ),work( itauq ),& work( itaup ),work( iwork ), lwork-iwork+1, ierr ) ! generate left vectors bidiagonalizing r ! (cworkspace: need n*n+3*n, prefer n*n+2*n+n*nb) ! (rworkspace: need 0) call stdlib${ii}$_zungbr( 'Q', n, n, n, work( ir ), ldwrkr,work( itauq ), work( & iwork ),lwork-iwork+1, ierr ) irwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of r in work(ir) ! (cworkspace: need n*n) ! (rworkspace: need bdspac) call stdlib${ii}$_zbdsqr( 'U', n, 0_${ik}$, n, 0_${ik}$, s, rwork( ie ), cdum, 1_${ik}$,work( ir ), & ldwrkr, cdum, 1_${ik}$,rwork( irwork ), info ) iu = itauq ! multiply q in a by left singular vectors of r in ! work(ir), storing result in work(iu) and copying to a ! (cworkspace: need n*n+n, prefer n*n+m*n) ! (rworkspace: 0) do i = 1, m, ldwrku chunk = min( m-i+1, ldwrku ) call stdlib${ii}$_zgemm( 'N', 'N', chunk, n, n, cone, a( i, 1_${ik}$ ),lda, work( ir & ), ldwrkr, czero,work( iu ), ldwrku ) call stdlib${ii}$_zlacpy( 'F', chunk, n, work( iu ), ldwrku,a( i, 1_${ik}$ ), lda ) end do else ! insufficient workspace for a fast algorithm ie = 1_${ik}$ itauq = 1_${ik}$ itaup = itauq + n iwork = itaup + n ! bidiagonalize a ! (cworkspace: need 2*n+m, prefer 2*n+(m+n)*nb) ! (rworkspace: n) call stdlib${ii}$_zgebrd( m, n, a, lda, s, rwork( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! generate left vectors bidiagonalizing a ! (cworkspace: need 3*n, prefer 2*n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_zungbr( 'Q', m, n, n, a, lda, work( itauq ),work( iwork ), & lwork-iwork+1, ierr ) irwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of a in a ! (cworkspace: need 0) ! (rworkspace: need bdspac) call stdlib${ii}$_zbdsqr( 'U', n, 0_${ik}$, m, 0_${ik}$, s, rwork( ie ), cdum, 1_${ik}$,a, lda, cdum, & 1_${ik}$, rwork( irwork ), info ) end if else if( wntuo .and. wntvas ) then ! path 3 (m much larger than n, jobu='o', jobvt='s' or 'a') ! n left singular vectors to be overwritten on a and ! n right singular vectors to be computed in vt if( lwork>=n*n+3*n ) then ! sufficient workspace for a fast algorithm ir = 1_${ik}$ if( lwork>=max( wrkbl, lda*n )+lda*n ) then ! work(iu) is lda by n and work(ir) is lda by n ldwrku = lda ldwrkr = lda else if( lwork>=max( wrkbl, lda*n )+n*n ) then ! work(iu) is lda by n and work(ir) is n by n ldwrku = lda ldwrkr = n else ! work(iu) is ldwrku by n and work(ir) is n by n ldwrku = ( lwork-n*n ) / n ldwrkr = n end if itau = ir + ldwrkr*n iwork = itau + n ! compute a=q*r ! (cworkspace: need n*n+2*n, prefer n*n+n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_zgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-iwork+& 1_${ik}$, ierr ) ! copy r to vt, zeroing out below it call stdlib${ii}$_zlacpy( 'U', n, n, a, lda, vt, ldvt ) if( n>1_${ik}$ )call stdlib${ii}$_zlaset( 'L', n-1, n-1, czero, czero,vt( 2_${ik}$, 1_${ik}$ ), ldvt ) ! generate q in a ! (cworkspace: need n*n+2*n, prefer n*n+n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_zungqr( m, n, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = 1_${ik}$ itauq = itau itaup = itauq + n iwork = itaup + n ! bidiagonalize r in vt, copying result to work(ir) ! (cworkspace: need n*n+3*n, prefer n*n+2*n+2*n*nb) ! (rworkspace: need n) call stdlib${ii}$_zgebrd( n, n, vt, ldvt, s, rwork( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) call stdlib${ii}$_zlacpy( 'L', n, n, vt, ldvt, work( ir ), ldwrkr ) ! generate left vectors bidiagonalizing r in work(ir) ! (cworkspace: need n*n+3*n, prefer n*n+2*n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_zungbr( 'Q', n, n, n, work( ir ), ldwrkr,work( itauq ), work( & iwork ),lwork-iwork+1, ierr ) ! generate right vectors bidiagonalizing r in vt ! (cworkspace: need n*n+3*n-1, prefer n*n+2*n+(n-1)*nb) ! (rworkspace: 0) call stdlib${ii}$_zungbr( 'P', n, n, n, vt, ldvt, work( itaup ),work( iwork ), & lwork-iwork+1, ierr ) irwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of r in work(ir) and computing right ! singular vectors of r in vt ! (cworkspace: need n*n) ! (rworkspace: need bdspac) call stdlib${ii}$_zbdsqr( 'U', n, n, n, 0_${ik}$, s, rwork( ie ), vt,ldvt, work( ir ), & ldwrkr, cdum, 1_${ik}$,rwork( irwork ), info ) iu = itauq ! multiply q in a by left singular vectors of r in ! work(ir), storing result in work(iu) and copying to a ! (cworkspace: need n*n+n, prefer n*n+m*n) ! (rworkspace: 0) do i = 1, m, ldwrku chunk = min( m-i+1, ldwrku ) call stdlib${ii}$_zgemm( 'N', 'N', chunk, n, n, cone, a( i, 1_${ik}$ ),lda, work( ir & ), ldwrkr, czero,work( iu ), ldwrku ) call stdlib${ii}$_zlacpy( 'F', chunk, n, work( iu ), ldwrku,a( i, 1_${ik}$ ), lda ) end do else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + n ! compute a=q*r ! (cworkspace: need 2*n, prefer n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_zgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-iwork+& 1_${ik}$, ierr ) ! copy r to vt, zeroing out below it call stdlib${ii}$_zlacpy( 'U', n, n, a, lda, vt, ldvt ) if( n>1_${ik}$ )call stdlib${ii}$_zlaset( 'L', n-1, n-1, czero, czero,vt( 2_${ik}$, 1_${ik}$ ), ldvt ) ! generate q in a ! (cworkspace: need 2*n, prefer n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_zungqr( m, n, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = 1_${ik}$ itauq = itau itaup = itauq + n iwork = itaup + n ! bidiagonalize r in vt ! (cworkspace: need 3*n, prefer 2*n+2*n*nb) ! (rworkspace: n) call stdlib${ii}$_zgebrd( n, n, vt, ldvt, s, rwork( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply q in a by left vectors bidiagonalizing r ! (cworkspace: need 2*n+m, prefer 2*n+m*nb) ! (rworkspace: 0) call stdlib${ii}$_zunmbr( 'Q', 'R', 'N', m, n, n, vt, ldvt,work( itauq ), a, lda,& work( iwork ),lwork-iwork+1, ierr ) ! generate right vectors bidiagonalizing r in vt ! (cworkspace: need 3*n-1, prefer 2*n+(n-1)*nb) ! (rworkspace: 0) call stdlib${ii}$_zungbr( 'P', n, n, n, vt, ldvt, work( itaup ),work( iwork ), & lwork-iwork+1, ierr ) irwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of a in a and computing right ! singular vectors of a in vt ! (cworkspace: 0) ! (rworkspace: need bdspac) call stdlib${ii}$_zbdsqr( 'U', n, n, m, 0_${ik}$, s, rwork( ie ), vt,ldvt, a, lda, cdum,& 1_${ik}$, rwork( irwork ),info ) end if else if( wntus ) then if( wntvn ) then ! path 4 (m much larger than n, jobu='s', jobvt='n') ! n left singular vectors to be computed in u and ! no right singular vectors to be computed if( lwork>=n*n+3*n ) then ! sufficient workspace for a fast algorithm ir = 1_${ik}$ if( lwork>=wrkbl+lda*n ) then ! work(ir) is lda by n ldwrkr = lda else ! work(ir) is n by n ldwrkr = n end if itau = ir + ldwrkr*n iwork = itau + n ! compute a=q*r ! (cworkspace: need n*n+2*n, prefer n*n+n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_zgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ! copy r to work(ir), zeroing out below it call stdlib${ii}$_zlacpy( 'U', n, n, a, lda, work( ir ),ldwrkr ) call stdlib${ii}$_zlaset( 'L', n-1, n-1, czero, czero,work( ir+1 ), ldwrkr ) ! generate q in a ! (cworkspace: need n*n+2*n, prefer n*n+n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_zungqr( m, n, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = 1_${ik}$ itauq = itau itaup = itauq + n iwork = itaup + n ! bidiagonalize r in work(ir) ! (cworkspace: need n*n+3*n, prefer n*n+2*n+2*n*nb) ! (rworkspace: need n) call stdlib${ii}$_zgebrd( n, n, work( ir ), ldwrkr, s,rwork( ie ), work( & itauq ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) ! generate left vectors bidiagonalizing r in work(ir) ! (cworkspace: need n*n+3*n, prefer n*n+2*n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_zungbr( 'Q', n, n, n, work( ir ), ldwrkr,work( itauq ), & work( iwork ),lwork-iwork+1, ierr ) irwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of r in work(ir) ! (cworkspace: need n*n) ! (rworkspace: need bdspac) call stdlib${ii}$_zbdsqr( 'U', n, 0_${ik}$, n, 0_${ik}$, s, rwork( ie ), cdum,1_${ik}$, work( ir ),& ldwrkr, cdum, 1_${ik}$,rwork( irwork ), info ) ! multiply q in a by left singular vectors of r in ! work(ir), storing result in u ! (cworkspace: need n*n) ! (rworkspace: 0) call stdlib${ii}$_zgemm( 'N', 'N', m, n, n, cone, a, lda,work( ir ), ldwrkr, & czero, u, ldu ) else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + n ! compute a=q*r, copying result to u ! (cworkspace: need 2*n, prefer n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_zgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_zlacpy( 'L', m, n, a, lda, u, ldu ) ! generate q in u ! (cworkspace: need 2*n, prefer n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_zungqr( m, n, n, u, ldu, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = 1_${ik}$ itauq = itau itaup = itauq + n iwork = itaup + n ! zero out below r in a if( n > 1_${ik}$ ) then call stdlib${ii}$_zlaset( 'L', n-1, n-1, czero, czero,a( 2_${ik}$, 1_${ik}$ ), lda ) end if ! bidiagonalize r in a ! (cworkspace: need 3*n, prefer 2*n+2*n*nb) ! (rworkspace: need n) call stdlib${ii}$_zgebrd( n, n, a, lda, s, rwork( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply q in u by left vectors bidiagonalizing r ! (cworkspace: need 2*n+m, prefer 2*n+m*nb) ! (rworkspace: 0) call stdlib${ii}$_zunmbr( 'Q', 'R', 'N', m, n, n, a, lda,work( itauq ), u, & ldu, work( iwork ),lwork-iwork+1, ierr ) irwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of a in u ! (cworkspace: 0) ! (rworkspace: need bdspac) call stdlib${ii}$_zbdsqr( 'U', n, 0_${ik}$, m, 0_${ik}$, s, rwork( ie ), cdum,1_${ik}$, u, ldu, & cdum, 1_${ik}$, rwork( irwork ),info ) end if else if( wntvo ) then ! path 5 (m much larger than n, jobu='s', jobvt='o') ! n left singular vectors to be computed in u and ! n right singular vectors to be overwritten on a if( lwork>=2_${ik}$*n*n+3*n ) then ! sufficient workspace for a fast algorithm iu = 1_${ik}$ if( lwork>=wrkbl+2*lda*n ) then ! work(iu) is lda by n and work(ir) is lda by n ldwrku = lda ir = iu + ldwrku*n ldwrkr = lda else if( lwork>=wrkbl+( lda+n )*n ) then ! work(iu) is lda by n and work(ir) is n by n ldwrku = lda ir = iu + ldwrku*n ldwrkr = n else ! work(iu) is n by n and work(ir) is n by n ldwrku = n ir = iu + ldwrku*n ldwrkr = n end if itau = ir + ldwrkr*n iwork = itau + n ! compute a=q*r ! (cworkspace: need 2*n*n+2*n, prefer 2*n*n+n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_zgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ! copy r to work(iu), zeroing out below it call stdlib${ii}$_zlacpy( 'U', n, n, a, lda, work( iu ),ldwrku ) call stdlib${ii}$_zlaset( 'L', n-1, n-1, czero, czero,work( iu+1 ), ldwrku ) ! generate q in a ! (cworkspace: need 2*n*n+2*n, prefer 2*n*n+n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_zungqr( m, n, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = 1_${ik}$ itauq = itau itaup = itauq + n iwork = itaup + n ! bidiagonalize r in work(iu), copying result to ! work(ir) ! (cworkspace: need 2*n*n+3*n, ! prefer 2*n*n+2*n+2*n*nb) ! (rworkspace: need n) call stdlib${ii}$_zgebrd( n, n, work( iu ), ldwrku, s,rwork( ie ), work( & itauq ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) call stdlib${ii}$_zlacpy( 'U', n, n, work( iu ), ldwrku,work( ir ), ldwrkr ) ! generate left bidiagonalizing vectors in work(iu) ! (cworkspace: need 2*n*n+3*n, prefer 2*n*n+2*n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_zungbr( 'Q', n, n, n, work( iu ), ldwrku,work( itauq ), & work( iwork ),lwork-iwork+1, ierr ) ! generate right bidiagonalizing vectors in work(ir) ! (cworkspace: need 2*n*n+3*n-1, ! prefer 2*n*n+2*n+(n-1)*nb) ! (rworkspace: 0) call stdlib${ii}$_zungbr( 'P', n, n, n, work( ir ), ldwrkr,work( itaup ), & work( iwork ),lwork-iwork+1, ierr ) irwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of r in work(iu) and computing ! right singular vectors of r in work(ir) ! (cworkspace: need 2*n*n) ! (rworkspace: need bdspac) call stdlib${ii}$_zbdsqr( 'U', n, n, n, 0_${ik}$, s, rwork( ie ),work( ir ), ldwrkr, & work( iu ),ldwrku, cdum, 1_${ik}$, rwork( irwork ),info ) ! multiply q in a by left singular vectors of r in ! work(iu), storing result in u ! (cworkspace: need n*n) ! (rworkspace: 0) call stdlib${ii}$_zgemm( 'N', 'N', m, n, n, cone, a, lda,work( iu ), ldwrku, & czero, u, ldu ) ! copy right singular vectors of r to a ! (cworkspace: need n*n) ! (rworkspace: 0) call stdlib${ii}$_zlacpy( 'F', n, n, work( ir ), ldwrkr, a,lda ) else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + n ! compute a=q*r, copying result to u ! (cworkspace: need 2*n, prefer n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_zgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_zlacpy( 'L', m, n, a, lda, u, ldu ) ! generate q in u ! (cworkspace: need 2*n, prefer n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_zungqr( m, n, n, u, ldu, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = 1_${ik}$ itauq = itau itaup = itauq + n iwork = itaup + n ! zero out below r in a if( n > 1_${ik}$ ) then call stdlib${ii}$_zlaset( 'L', n-1, n-1, czero, czero,a( 2_${ik}$, 1_${ik}$ ), lda ) end if ! bidiagonalize r in a ! (cworkspace: need 3*n, prefer 2*n+2*n*nb) ! (rworkspace: need n) call stdlib${ii}$_zgebrd( n, n, a, lda, s, rwork( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply q in u by left vectors bidiagonalizing r ! (cworkspace: need 2*n+m, prefer 2*n+m*nb) ! (rworkspace: 0) call stdlib${ii}$_zunmbr( 'Q', 'R', 'N', m, n, n, a, lda,work( itauq ), u, & ldu, work( iwork ),lwork-iwork+1, ierr ) ! generate right vectors bidiagonalizing r in a ! (cworkspace: need 3*n-1, prefer 2*n+(n-1)*nb) ! (rworkspace: 0) call stdlib${ii}$_zungbr( 'P', n, n, n, a, lda, work( itaup ),work( iwork ), & lwork-iwork+1, ierr ) irwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of a in u and computing right ! singular vectors of a in a ! (cworkspace: 0) ! (rworkspace: need bdspac) call stdlib${ii}$_zbdsqr( 'U', n, n, m, 0_${ik}$, s, rwork( ie ), a,lda, u, ldu, & cdum, 1_${ik}$, rwork( irwork ),info ) end if else if( wntvas ) then ! path 6 (m much larger than n, jobu='s', jobvt='s' ! or 'a') ! n left singular vectors to be computed in u and ! n right singular vectors to be computed in vt if( lwork>=n*n+3*n ) then ! sufficient workspace for a fast algorithm iu = 1_${ik}$ if( lwork>=wrkbl+lda*n ) then ! work(iu) is lda by n ldwrku = lda else ! work(iu) is n by n ldwrku = n end if itau = iu + ldwrku*n iwork = itau + n ! compute a=q*r ! (cworkspace: need n*n+2*n, prefer n*n+n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_zgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ! copy r to work(iu), zeroing out below it call stdlib${ii}$_zlacpy( 'U', n, n, a, lda, work( iu ),ldwrku ) call stdlib${ii}$_zlaset( 'L', n-1, n-1, czero, czero,work( iu+1 ), ldwrku ) ! generate q in a ! (cworkspace: need n*n+2*n, prefer n*n+n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_zungqr( m, n, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = 1_${ik}$ itauq = itau itaup = itauq + n iwork = itaup + n ! bidiagonalize r in work(iu), copying result to vt ! (cworkspace: need n*n+3*n, prefer n*n+2*n+2*n*nb) ! (rworkspace: need n) call stdlib${ii}$_zgebrd( n, n, work( iu ), ldwrku, s,rwork( ie ), work( & itauq ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) call stdlib${ii}$_zlacpy( 'U', n, n, work( iu ), ldwrku, vt,ldvt ) ! generate left bidiagonalizing vectors in work(iu) ! (cworkspace: need n*n+3*n, prefer n*n+2*n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_zungbr( 'Q', n, n, n, work( iu ), ldwrku,work( itauq ), & work( iwork ),lwork-iwork+1, ierr ) ! generate right bidiagonalizing vectors in vt ! (cworkspace: need n*n+3*n-1, ! prefer n*n+2*n+(n-1)*nb) ! (rworkspace: 0) call stdlib${ii}$_zungbr( 'P', n, n, n, vt, ldvt, work( itaup ),work( iwork ),& lwork-iwork+1, ierr ) irwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of r in work(iu) and computing ! right singular vectors of r in vt ! (cworkspace: need n*n) ! (rworkspace: need bdspac) call stdlib${ii}$_zbdsqr( 'U', n, n, n, 0_${ik}$, s, rwork( ie ), vt,ldvt, work( iu )& , ldwrku, cdum, 1_${ik}$,rwork( irwork ), info ) ! multiply q in a by left singular vectors of r in ! work(iu), storing result in u ! (cworkspace: need n*n) ! (rworkspace: 0) call stdlib${ii}$_zgemm( 'N', 'N', m, n, n, cone, a, lda,work( iu ), ldwrku, & czero, u, ldu ) else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + n ! compute a=q*r, copying result to u ! (cworkspace: need 2*n, prefer n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_zgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_zlacpy( 'L', m, n, a, lda, u, ldu ) ! generate q in u ! (cworkspace: need 2*n, prefer n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_zungqr( m, n, n, u, ldu, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ! copy r to vt, zeroing out below it call stdlib${ii}$_zlacpy( 'U', n, n, a, lda, vt, ldvt ) if( n>1_${ik}$ )call stdlib${ii}$_zlaset( 'L', n-1, n-1, czero, czero,vt( 2_${ik}$, 1_${ik}$ ), & ldvt ) ie = 1_${ik}$ itauq = itau itaup = itauq + n iwork = itaup + n ! bidiagonalize r in vt ! (cworkspace: need 3*n, prefer 2*n+2*n*nb) ! (rworkspace: need n) call stdlib${ii}$_zgebrd( n, n, vt, ldvt, s, rwork( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply q in u by left bidiagonalizing vectors ! in vt ! (cworkspace: need 2*n+m, prefer 2*n+m*nb) ! (rworkspace: 0) call stdlib${ii}$_zunmbr( 'Q', 'R', 'N', m, n, n, vt, ldvt,work( itauq ), u, & ldu, work( iwork ),lwork-iwork+1, ierr ) ! generate right bidiagonalizing vectors in vt ! (cworkspace: need 3*n-1, prefer 2*n+(n-1)*nb) ! (rworkspace: 0) call stdlib${ii}$_zungbr( 'P', n, n, n, vt, ldvt, work( itaup ),work( iwork ),& lwork-iwork+1, ierr ) irwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of a in u and computing right ! singular vectors of a in vt ! (cworkspace: 0) ! (rworkspace: need bdspac) call stdlib${ii}$_zbdsqr( 'U', n, n, m, 0_${ik}$, s, rwork( ie ), vt,ldvt, u, ldu, & cdum, 1_${ik}$,rwork( irwork ), info ) end if end if else if( wntua ) then if( wntvn ) then ! path 7 (m much larger than n, jobu='a', jobvt='n') ! m left singular vectors to be computed in u and ! no right singular vectors to be computed if( lwork>=n*n+max( n+m, 3_${ik}$*n ) ) then ! sufficient workspace for a fast algorithm ir = 1_${ik}$ if( lwork>=wrkbl+lda*n ) then ! work(ir) is lda by n ldwrkr = lda else ! work(ir) is n by n ldwrkr = n end if itau = ir + ldwrkr*n iwork = itau + n ! compute a=q*r, copying result to u ! (cworkspace: need n*n+2*n, prefer n*n+n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_zgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_zlacpy( 'L', m, n, a, lda, u, ldu ) ! copy r to work(ir), zeroing out below it call stdlib${ii}$_zlacpy( 'U', n, n, a, lda, work( ir ),ldwrkr ) call stdlib${ii}$_zlaset( 'L', n-1, n-1, czero, czero,work( ir+1 ), ldwrkr ) ! generate q in u ! (cworkspace: need n*n+n+m, prefer n*n+n+m*nb) ! (rworkspace: 0) call stdlib${ii}$_zungqr( m, m, n, u, ldu, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = 1_${ik}$ itauq = itau itaup = itauq + n iwork = itaup + n ! bidiagonalize r in work(ir) ! (cworkspace: need n*n+3*n, prefer n*n+2*n+2*n*nb) ! (rworkspace: need n) call stdlib${ii}$_zgebrd( n, n, work( ir ), ldwrkr, s,rwork( ie ), work( & itauq ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) ! generate left bidiagonalizing vectors in work(ir) ! (cworkspace: need n*n+3*n, prefer n*n+2*n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_zungbr( 'Q', n, n, n, work( ir ), ldwrkr,work( itauq ), & work( iwork ),lwork-iwork+1, ierr ) irwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of r in work(ir) ! (cworkspace: need n*n) ! (rworkspace: need bdspac) call stdlib${ii}$_zbdsqr( 'U', n, 0_${ik}$, n, 0_${ik}$, s, rwork( ie ), cdum,1_${ik}$, work( ir ),& ldwrkr, cdum, 1_${ik}$,rwork( irwork ), info ) ! multiply q in u by left singular vectors of r in ! work(ir), storing result in a ! (cworkspace: need n*n) ! (rworkspace: 0) call stdlib${ii}$_zgemm( 'N', 'N', m, n, n, cone, u, ldu,work( ir ), ldwrkr, & czero, a, lda ) ! copy left singular vectors of a from a to u call stdlib${ii}$_zlacpy( 'F', m, n, a, lda, u, ldu ) else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + n ! compute a=q*r, copying result to u ! (cworkspace: need 2*n, prefer n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_zgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_zlacpy( 'L', m, n, a, lda, u, ldu ) ! generate q in u ! (cworkspace: need n+m, prefer n+m*nb) ! (rworkspace: 0) call stdlib${ii}$_zungqr( m, m, n, u, ldu, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = 1_${ik}$ itauq = itau itaup = itauq + n iwork = itaup + n ! zero out below r in a if( n > 1_${ik}$ ) then call stdlib${ii}$_zlaset( 'L', n-1, n-1, czero, czero,a( 2_${ik}$, 1_${ik}$ ), lda ) end if ! bidiagonalize r in a ! (cworkspace: need 3*n, prefer 2*n+2*n*nb) ! (rworkspace: need n) call stdlib${ii}$_zgebrd( n, n, a, lda, s, rwork( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply q in u by left bidiagonalizing vectors ! in a ! (cworkspace: need 2*n+m, prefer 2*n+m*nb) ! (rworkspace: 0) call stdlib${ii}$_zunmbr( 'Q', 'R', 'N', m, n, n, a, lda,work( itauq ), u, & ldu, work( iwork ),lwork-iwork+1, ierr ) irwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of a in u ! (cworkspace: 0) ! (rworkspace: need bdspac) call stdlib${ii}$_zbdsqr( 'U', n, 0_${ik}$, m, 0_${ik}$, s, rwork( ie ), cdum,1_${ik}$, u, ldu, & cdum, 1_${ik}$, rwork( irwork ),info ) end if else if( wntvo ) then ! path 8 (m much larger than n, jobu='a', jobvt='o') ! m left singular vectors to be computed in u and ! n right singular vectors to be overwritten on a if( lwork>=2_${ik}$*n*n+max( n+m, 3_${ik}$*n ) ) then ! sufficient workspace for a fast algorithm iu = 1_${ik}$ if( lwork>=wrkbl+2*lda*n ) then ! work(iu) is lda by n and work(ir) is lda by n ldwrku = lda ir = iu + ldwrku*n ldwrkr = lda else if( lwork>=wrkbl+( lda+n )*n ) then ! work(iu) is lda by n and work(ir) is n by n ldwrku = lda ir = iu + ldwrku*n ldwrkr = n else ! work(iu) is n by n and work(ir) is n by n ldwrku = n ir = iu + ldwrku*n ldwrkr = n end if itau = ir + ldwrkr*n iwork = itau + n ! compute a=q*r, copying result to u ! (cworkspace: need 2*n*n+2*n, prefer 2*n*n+n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_zgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_zlacpy( 'L', m, n, a, lda, u, ldu ) ! generate q in u ! (cworkspace: need 2*n*n+n+m, prefer 2*n*n+n+m*nb) ! (rworkspace: 0) call stdlib${ii}$_zungqr( m, m, n, u, ldu, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ! copy r to work(iu), zeroing out below it call stdlib${ii}$_zlacpy( 'U', n, n, a, lda, work( iu ),ldwrku ) call stdlib${ii}$_zlaset( 'L', n-1, n-1, czero, czero,work( iu+1 ), ldwrku ) ie = 1_${ik}$ itauq = itau itaup = itauq + n iwork = itaup + n ! bidiagonalize r in work(iu), copying result to ! work(ir) ! (cworkspace: need 2*n*n+3*n, ! prefer 2*n*n+2*n+2*n*nb) ! (rworkspace: need n) call stdlib${ii}$_zgebrd( n, n, work( iu ), ldwrku, s,rwork( ie ), work( & itauq ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) call stdlib${ii}$_zlacpy( 'U', n, n, work( iu ), ldwrku,work( ir ), ldwrkr ) ! generate left bidiagonalizing vectors in work(iu) ! (cworkspace: need 2*n*n+3*n, prefer 2*n*n+2*n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_zungbr( 'Q', n, n, n, work( iu ), ldwrku,work( itauq ), & work( iwork ),lwork-iwork+1, ierr ) ! generate right bidiagonalizing vectors in work(ir) ! (cworkspace: need 2*n*n+3*n-1, ! prefer 2*n*n+2*n+(n-1)*nb) ! (rworkspace: 0) call stdlib${ii}$_zungbr( 'P', n, n, n, work( ir ), ldwrkr,work( itaup ), & work( iwork ),lwork-iwork+1, ierr ) irwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of r in work(iu) and computing ! right singular vectors of r in work(ir) ! (cworkspace: need 2*n*n) ! (rworkspace: need bdspac) call stdlib${ii}$_zbdsqr( 'U', n, n, n, 0_${ik}$, s, rwork( ie ),work( ir ), ldwrkr, & work( iu ),ldwrku, cdum, 1_${ik}$, rwork( irwork ),info ) ! multiply q in u by left singular vectors of r in ! work(iu), storing result in a ! (cworkspace: need n*n) ! (rworkspace: 0) call stdlib${ii}$_zgemm( 'N', 'N', m, n, n, cone, u, ldu,work( iu ), ldwrku, & czero, a, lda ) ! copy left singular vectors of a from a to u call stdlib${ii}$_zlacpy( 'F', m, n, a, lda, u, ldu ) ! copy right singular vectors of r from work(ir) to a call stdlib${ii}$_zlacpy( 'F', n, n, work( ir ), ldwrkr, a,lda ) else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + n ! compute a=q*r, copying result to u ! (cworkspace: need 2*n, prefer n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_zgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_zlacpy( 'L', m, n, a, lda, u, ldu ) ! generate q in u ! (cworkspace: need n+m, prefer n+m*nb) ! (rworkspace: 0) call stdlib${ii}$_zungqr( m, m, n, u, ldu, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = 1_${ik}$ itauq = itau itaup = itauq + n iwork = itaup + n ! zero out below r in a if( n > 1_${ik}$ ) then call stdlib${ii}$_zlaset( 'L', n-1, n-1, czero, czero,a( 2_${ik}$, 1_${ik}$ ), lda ) end if ! bidiagonalize r in a ! (cworkspace: need 3*n, prefer 2*n+2*n*nb) ! (rworkspace: need n) call stdlib${ii}$_zgebrd( n, n, a, lda, s, rwork( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply q in u by left bidiagonalizing vectors ! in a ! (cworkspace: need 2*n+m, prefer 2*n+m*nb) ! (rworkspace: 0) call stdlib${ii}$_zunmbr( 'Q', 'R', 'N', m, n, n, a, lda,work( itauq ), u, & ldu, work( iwork ),lwork-iwork+1, ierr ) ! generate right bidiagonalizing vectors in a ! (cworkspace: need 3*n-1, prefer 2*n+(n-1)*nb) ! (rworkspace: 0) call stdlib${ii}$_zungbr( 'P', n, n, n, a, lda, work( itaup ),work( iwork ), & lwork-iwork+1, ierr ) irwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of a in u and computing right ! singular vectors of a in a ! (cworkspace: 0) ! (rworkspace: need bdspac) call stdlib${ii}$_zbdsqr( 'U', n, n, m, 0_${ik}$, s, rwork( ie ), a,lda, u, ldu, & cdum, 1_${ik}$, rwork( irwork ),info ) end if else if( wntvas ) then ! path 9 (m much larger than n, jobu='a', jobvt='s' ! or 'a') ! m left singular vectors to be computed in u and ! n right singular vectors to be computed in vt if( lwork>=n*n+max( n+m, 3_${ik}$*n ) ) then ! sufficient workspace for a fast algorithm iu = 1_${ik}$ if( lwork>=wrkbl+lda*n ) then ! work(iu) is lda by n ldwrku = lda else ! work(iu) is n by n ldwrku = n end if itau = iu + ldwrku*n iwork = itau + n ! compute a=q*r, copying result to u ! (cworkspace: need n*n+2*n, prefer n*n+n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_zgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_zlacpy( 'L', m, n, a, lda, u, ldu ) ! generate q in u ! (cworkspace: need n*n+n+m, prefer n*n+n+m*nb) ! (rworkspace: 0) call stdlib${ii}$_zungqr( m, m, n, u, ldu, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ! copy r to work(iu), zeroing out below it call stdlib${ii}$_zlacpy( 'U', n, n, a, lda, work( iu ),ldwrku ) call stdlib${ii}$_zlaset( 'L', n-1, n-1, czero, czero,work( iu+1 ), ldwrku ) ie = 1_${ik}$ itauq = itau itaup = itauq + n iwork = itaup + n ! bidiagonalize r in work(iu), copying result to vt ! (cworkspace: need n*n+3*n, prefer n*n+2*n+2*n*nb) ! (rworkspace: need n) call stdlib${ii}$_zgebrd( n, n, work( iu ), ldwrku, s,rwork( ie ), work( & itauq ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) call stdlib${ii}$_zlacpy( 'U', n, n, work( iu ), ldwrku, vt,ldvt ) ! generate left bidiagonalizing vectors in work(iu) ! (cworkspace: need n*n+3*n, prefer n*n+2*n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_zungbr( 'Q', n, n, n, work( iu ), ldwrku,work( itauq ), & work( iwork ),lwork-iwork+1, ierr ) ! generate right bidiagonalizing vectors in vt ! (cworkspace: need n*n+3*n-1, ! prefer n*n+2*n+(n-1)*nb) ! (rworkspace: need 0) call stdlib${ii}$_zungbr( 'P', n, n, n, vt, ldvt, work( itaup ),work( iwork ),& lwork-iwork+1, ierr ) irwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of r in work(iu) and computing ! right singular vectors of r in vt ! (cworkspace: need n*n) ! (rworkspace: need bdspac) call stdlib${ii}$_zbdsqr( 'U', n, n, n, 0_${ik}$, s, rwork( ie ), vt,ldvt, work( iu )& , ldwrku, cdum, 1_${ik}$,rwork( irwork ), info ) ! multiply q in u by left singular vectors of r in ! work(iu), storing result in a ! (cworkspace: need n*n) ! (rworkspace: 0) call stdlib${ii}$_zgemm( 'N', 'N', m, n, n, cone, u, ldu,work( iu ), ldwrku, & czero, a, lda ) ! copy left singular vectors of a from a to u call stdlib${ii}$_zlacpy( 'F', m, n, a, lda, u, ldu ) else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + n ! compute a=q*r, copying result to u ! (cworkspace: need 2*n, prefer n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_zgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_zlacpy( 'L', m, n, a, lda, u, ldu ) ! generate q in u ! (cworkspace: need n+m, prefer n+m*nb) ! (rworkspace: 0) call stdlib${ii}$_zungqr( m, m, n, u, ldu, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ! copy r from a to vt, zeroing out below it call stdlib${ii}$_zlacpy( 'U', n, n, a, lda, vt, ldvt ) if( n>1_${ik}$ )call stdlib${ii}$_zlaset( 'L', n-1, n-1, czero, czero,vt( 2_${ik}$, 1_${ik}$ ), & ldvt ) ie = 1_${ik}$ itauq = itau itaup = itauq + n iwork = itaup + n ! bidiagonalize r in vt ! (cworkspace: need 3*n, prefer 2*n+2*n*nb) ! (rworkspace: need n) call stdlib${ii}$_zgebrd( n, n, vt, ldvt, s, rwork( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply q in u by left bidiagonalizing vectors ! in vt ! (cworkspace: need 2*n+m, prefer 2*n+m*nb) ! (rworkspace: 0) call stdlib${ii}$_zunmbr( 'Q', 'R', 'N', m, n, n, vt, ldvt,work( itauq ), u, & ldu, work( iwork ),lwork-iwork+1, ierr ) ! generate right bidiagonalizing vectors in vt ! (cworkspace: need 3*n-1, prefer 2*n+(n-1)*nb) ! (rworkspace: 0) call stdlib${ii}$_zungbr( 'P', n, n, n, vt, ldvt, work( itaup ),work( iwork ),& lwork-iwork+1, ierr ) irwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of a in u and computing right ! singular vectors of a in vt ! (cworkspace: 0) ! (rworkspace: need bdspac) call stdlib${ii}$_zbdsqr( 'U', n, n, m, 0_${ik}$, s, rwork( ie ), vt,ldvt, u, ldu, & cdum, 1_${ik}$,rwork( irwork ), info ) end if end if end if else ! m < mnthr ! path 10 (m at least n, but not much larger) ! reduce to bidiagonal form without qr decomposition ie = 1_${ik}$ itauq = 1_${ik}$ itaup = itauq + n iwork = itaup + n ! bidiagonalize a ! (cworkspace: need 2*n+m, prefer 2*n+(m+n)*nb) ! (rworkspace: need n) call stdlib${ii}$_zgebrd( m, n, a, lda, s, rwork( ie ), work( itauq ),work( itaup ), & work( iwork ), lwork-iwork+1,ierr ) if( wntuas ) then ! if left singular vectors desired in u, copy result to u ! and generate left bidiagonalizing vectors in u ! (cworkspace: need 2*n+ncu, prefer 2*n+ncu*nb) ! (rworkspace: 0) call stdlib${ii}$_zlacpy( 'L', m, n, a, lda, u, ldu ) if( wntus )ncu = n if( wntua )ncu = m call stdlib${ii}$_zungbr( 'Q', m, ncu, n, u, ldu, work( itauq ),work( iwork ), & lwork-iwork+1, ierr ) end if if( wntvas ) then ! if right singular vectors desired in vt, copy result to ! vt and generate right bidiagonalizing vectors in vt ! (cworkspace: need 3*n-1, prefer 2*n+(n-1)*nb) ! (rworkspace: 0) call stdlib${ii}$_zlacpy( 'U', n, n, a, lda, vt, ldvt ) call stdlib${ii}$_zungbr( 'P', n, n, n, vt, ldvt, work( itaup ),work( iwork ), & lwork-iwork+1, ierr ) end if if( wntuo ) then ! if left singular vectors desired in a, generate left ! bidiagonalizing vectors in a ! (cworkspace: need 3*n, prefer 2*n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_zungbr( 'Q', m, n, n, a, lda, work( itauq ),work( iwork ), lwork-& iwork+1, ierr ) end if if( wntvo ) then ! if right singular vectors desired in a, generate right ! bidiagonalizing vectors in a ! (cworkspace: need 3*n-1, prefer 2*n+(n-1)*nb) ! (rworkspace: 0) call stdlib${ii}$_zungbr( 'P', n, n, n, a, lda, work( itaup ),work( iwork ), lwork-& iwork+1, ierr ) end if irwork = ie + n if( wntuas .or. wntuo )nru = m if( wntun )nru = 0_${ik}$ if( wntvas .or. wntvo )ncvt = n if( wntvn )ncvt = 0_${ik}$ if( ( .not.wntuo ) .and. ( .not.wntvo ) ) then ! perform bidiagonal qr iteration, if desired, computing ! left singular vectors in u and computing right singular ! vectors in vt ! (cworkspace: 0) ! (rworkspace: need bdspac) call stdlib${ii}$_zbdsqr( 'U', n, ncvt, nru, 0_${ik}$, s, rwork( ie ), vt,ldvt, u, ldu, & cdum, 1_${ik}$, rwork( irwork ),info ) else if( ( .not.wntuo ) .and. wntvo ) then ! perform bidiagonal qr iteration, if desired, computing ! left singular vectors in u and computing right singular ! vectors in a ! (cworkspace: 0) ! (rworkspace: need bdspac) call stdlib${ii}$_zbdsqr( 'U', n, ncvt, nru, 0_${ik}$, s, rwork( ie ), a,lda, u, ldu, cdum,& 1_${ik}$, rwork( irwork ),info ) else ! perform bidiagonal qr iteration, if desired, computing ! left singular vectors in a and computing right singular ! vectors in vt ! (cworkspace: 0) ! (rworkspace: need bdspac) call stdlib${ii}$_zbdsqr( 'U', n, ncvt, nru, 0_${ik}$, s, rwork( ie ), vt,ldvt, a, lda, & cdum, 1_${ik}$, rwork( irwork ),info ) end if end if else ! a has more columns than rows. if a has sufficiently more ! columns than rows, first reduce using the lq decomposition (if ! sufficient workspace available) if( n>=mnthr ) then if( wntvn ) then ! path 1t(n much larger than m, jobvt='n') ! no right singular vectors to be computed itau = 1_${ik}$ iwork = itau + m ! compute a=l*q ! (cworkspace: need 2*m, prefer m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_zgelqf( m, n, a, lda, work( itau ), work( iwork ),lwork-iwork+1, & ierr ) ! zero out above l if (m>1_${ik}$) call stdlib${ii}$_zlaset( 'U', m-1, m-1, czero, czero, a( 1_${ik}$, 2_${ik}$ ),lda ) ie = 1_${ik}$ itauq = 1_${ik}$ itaup = itauq + m iwork = itaup + m ! bidiagonalize l in a ! (cworkspace: need 3*m, prefer 2*m+2*m*nb) ! (rworkspace: need m) call stdlib${ii}$_zgebrd( m, m, a, lda, s, rwork( ie ), work( itauq ),work( itaup ),& work( iwork ), lwork-iwork+1,ierr ) if( wntuo .or. wntuas ) then ! if left singular vectors desired, generate q ! (cworkspace: need 3*m, prefer 2*m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_zungbr( 'Q', m, m, m, a, lda, work( itauq ),work( iwork ), & lwork-iwork+1, ierr ) end if irwork = ie + m nru = 0_${ik}$ if( wntuo .or. wntuas )nru = m ! perform bidiagonal qr iteration, computing left singular ! vectors of a in a if desired ! (cworkspace: 0) ! (rworkspace: need bdspac) call stdlib${ii}$_zbdsqr( 'U', m, 0_${ik}$, nru, 0_${ik}$, s, rwork( ie ), cdum, 1_${ik}$,a, lda, cdum, & 1_${ik}$, rwork( irwork ), info ) ! if left singular vectors desired in u, copy them there if( wntuas )call stdlib${ii}$_zlacpy( 'F', m, m, a, lda, u, ldu ) else if( wntvo .and. wntun ) then ! path 2t(n much larger than m, jobu='n', jobvt='o') ! m right singular vectors to be overwritten on a and ! no left singular vectors to be computed if( lwork>=m*m+3*m ) then ! sufficient workspace for a fast algorithm ir = 1_${ik}$ if( lwork>=max( wrkbl, lda*n )+lda*m ) then ! work(iu) is lda by n and work(ir) is lda by m ldwrku = lda chunk = n ldwrkr = lda else if( lwork>=max( wrkbl, lda*n )+m*m ) then ! work(iu) is lda by n and work(ir) is m by m ldwrku = lda chunk = n ldwrkr = m else ! work(iu) is m by chunk and work(ir) is m by m ldwrku = m chunk = ( lwork-m*m ) / m ldwrkr = m end if itau = ir + ldwrkr*m iwork = itau + m ! compute a=l*q ! (cworkspace: need m*m+2*m, prefer m*m+m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_zgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-iwork+& 1_${ik}$, ierr ) ! copy l to work(ir) and zero out above it call stdlib${ii}$_zlacpy( 'L', m, m, a, lda, work( ir ), ldwrkr ) call stdlib${ii}$_zlaset( 'U', m-1, m-1, czero, czero,work( ir+ldwrkr ), ldwrkr ) ! generate q in a ! (cworkspace: need m*m+2*m, prefer m*m+m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_zunglq( m, n, m, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = 1_${ik}$ itauq = itau itaup = itauq + m iwork = itaup + m ! bidiagonalize l in work(ir) ! (cworkspace: need m*m+3*m, prefer m*m+2*m+2*m*nb) ! (rworkspace: need m) call stdlib${ii}$_zgebrd( m, m, work( ir ), ldwrkr, s, rwork( ie ),work( itauq ),& work( itaup ),work( iwork ), lwork-iwork+1, ierr ) ! generate right vectors bidiagonalizing l ! (cworkspace: need m*m+3*m-1, prefer m*m+2*m+(m-1)*nb) ! (rworkspace: 0) call stdlib${ii}$_zungbr( 'P', m, m, m, work( ir ), ldwrkr,work( itaup ), work( & iwork ),lwork-iwork+1, ierr ) irwork = ie + m ! perform bidiagonal qr iteration, computing right ! singular vectors of l in work(ir) ! (cworkspace: need m*m) ! (rworkspace: need bdspac) call stdlib${ii}$_zbdsqr( 'U', m, m, 0_${ik}$, 0_${ik}$, s, rwork( ie ),work( ir ), ldwrkr, & cdum, 1_${ik}$, cdum, 1_${ik}$,rwork( irwork ), info ) iu = itauq ! multiply right singular vectors of l in work(ir) by q ! in a, storing result in work(iu) and copying to a ! (cworkspace: need m*m+m, prefer m*m+m*n) ! (rworkspace: 0) do i = 1, n, chunk blk = min( n-i+1, chunk ) call stdlib${ii}$_zgemm( 'N', 'N', m, blk, m, cone, work( ir ),ldwrkr, a( 1_${ik}$, & i ), lda, czero,work( iu ), ldwrku ) call stdlib${ii}$_zlacpy( 'F', m, blk, work( iu ), ldwrku,a( 1_${ik}$, i ), lda ) end do else ! insufficient workspace for a fast algorithm ie = 1_${ik}$ itauq = 1_${ik}$ itaup = itauq + m iwork = itaup + m ! bidiagonalize a ! (cworkspace: need 2*m+n, prefer 2*m+(m+n)*nb) ! (rworkspace: need m) call stdlib${ii}$_zgebrd( m, n, a, lda, s, rwork( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! generate right vectors bidiagonalizing a ! (cworkspace: need 3*m, prefer 2*m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_zungbr( 'P', m, n, m, a, lda, work( itaup ),work( iwork ), & lwork-iwork+1, ierr ) irwork = ie + m ! perform bidiagonal qr iteration, computing right ! singular vectors of a in a ! (cworkspace: 0) ! (rworkspace: need bdspac) call stdlib${ii}$_zbdsqr( 'L', m, n, 0_${ik}$, 0_${ik}$, s, rwork( ie ), a, lda,cdum, 1_${ik}$, cdum, & 1_${ik}$, rwork( irwork ), info ) end if else if( wntvo .and. wntuas ) then ! path 3t(n much larger than m, jobu='s' or 'a', jobvt='o') ! m right singular vectors to be overwritten on a and ! m left singular vectors to be computed in u if( lwork>=m*m+3*m ) then ! sufficient workspace for a fast algorithm ir = 1_${ik}$ if( lwork>=max( wrkbl, lda*n )+lda*m ) then ! work(iu) is lda by n and work(ir) is lda by m ldwrku = lda chunk = n ldwrkr = lda else if( lwork>=max( wrkbl, lda*n )+m*m ) then ! work(iu) is lda by n and work(ir) is m by m ldwrku = lda chunk = n ldwrkr = m else ! work(iu) is m by chunk and work(ir) is m by m ldwrku = m chunk = ( lwork-m*m ) / m ldwrkr = m end if itau = ir + ldwrkr*m iwork = itau + m ! compute a=l*q ! (cworkspace: need m*m+2*m, prefer m*m+m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_zgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-iwork+& 1_${ik}$, ierr ) ! copy l to u, zeroing about above it call stdlib${ii}$_zlacpy( 'L', m, m, a, lda, u, ldu ) if (m>1_${ik}$) call stdlib${ii}$_zlaset( 'U', m-1, m-1, czero, czero, u( 1_${ik}$, 2_${ik}$ ),ldu ) ! generate q in a ! (cworkspace: need m*m+2*m, prefer m*m+m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_zunglq( m, n, m, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = 1_${ik}$ itauq = itau itaup = itauq + m iwork = itaup + m ! bidiagonalize l in u, copying result to work(ir) ! (cworkspace: need m*m+3*m, prefer m*m+2*m+2*m*nb) ! (rworkspace: need m) call stdlib${ii}$_zgebrd( m, m, u, ldu, s, rwork( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) call stdlib${ii}$_zlacpy( 'U', m, m, u, ldu, work( ir ), ldwrkr ) ! generate right vectors bidiagonalizing l in work(ir) ! (cworkspace: need m*m+3*m-1, prefer m*m+2*m+(m-1)*nb) ! (rworkspace: 0) call stdlib${ii}$_zungbr( 'P', m, m, m, work( ir ), ldwrkr,work( itaup ), work( & iwork ),lwork-iwork+1, ierr ) ! generate left vectors bidiagonalizing l in u ! (cworkspace: need m*m+3*m, prefer m*m+2*m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_zungbr( 'Q', m, m, m, u, ldu, work( itauq ),work( iwork ), & lwork-iwork+1, ierr ) irwork = ie + m ! perform bidiagonal qr iteration, computing left ! singular vectors of l in u, and computing right ! singular vectors of l in work(ir) ! (cworkspace: need m*m) ! (rworkspace: need bdspac) call stdlib${ii}$_zbdsqr( 'U', m, m, m, 0_${ik}$, s, rwork( ie ),work( ir ), ldwrkr, u, & ldu, cdum, 1_${ik}$,rwork( irwork ), info ) iu = itauq ! multiply right singular vectors of l in work(ir) by q ! in a, storing result in work(iu) and copying to a ! (cworkspace: need m*m+m, prefer m*m+m*n)) ! (rworkspace: 0) do i = 1, n, chunk blk = min( n-i+1, chunk ) call stdlib${ii}$_zgemm( 'N', 'N', m, blk, m, cone, work( ir ),ldwrkr, a( 1_${ik}$, & i ), lda, czero,work( iu ), ldwrku ) call stdlib${ii}$_zlacpy( 'F', m, blk, work( iu ), ldwrku,a( 1_${ik}$, i ), lda ) end do else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + m ! compute a=l*q ! (cworkspace: need 2*m, prefer m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_zgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-iwork+& 1_${ik}$, ierr ) ! copy l to u, zeroing out above it call stdlib${ii}$_zlacpy( 'L', m, m, a, lda, u, ldu ) if (m>1_${ik}$) call stdlib${ii}$_zlaset( 'U', m-1, m-1, czero, czero, u( 1_${ik}$, 2_${ik}$ ),ldu ) ! generate q in a ! (cworkspace: need 2*m, prefer m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_zunglq( m, n, m, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = 1_${ik}$ itauq = itau itaup = itauq + m iwork = itaup + m ! bidiagonalize l in u ! (cworkspace: need 3*m, prefer 2*m+2*m*nb) ! (rworkspace: need m) call stdlib${ii}$_zgebrd( m, m, u, ldu, s, rwork( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply right vectors bidiagonalizing l by q in a ! (cworkspace: need 2*m+n, prefer 2*m+n*nb) ! (rworkspace: 0) call stdlib${ii}$_zunmbr( 'P', 'L', 'C', m, n, m, u, ldu,work( itaup ), a, lda, & work( iwork ),lwork-iwork+1, ierr ) ! generate left vectors bidiagonalizing l in u ! (cworkspace: need 3*m, prefer 2*m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_zungbr( 'Q', m, m, m, u, ldu, work( itauq ),work( iwork ), & lwork-iwork+1, ierr ) irwork = ie + m ! perform bidiagonal qr iteration, computing left ! singular vectors of a in u and computing right ! singular vectors of a in a ! (cworkspace: 0) ! (rworkspace: need bdspac) call stdlib${ii}$_zbdsqr( 'U', m, n, m, 0_${ik}$, s, rwork( ie ), a, lda,u, ldu, cdum, & 1_${ik}$, rwork( irwork ), info ) end if else if( wntvs ) then if( wntun ) then ! path 4t(n much larger than m, jobu='n', jobvt='s') ! m right singular vectors to be computed in vt and ! no left singular vectors to be computed if( lwork>=m*m+3*m ) then ! sufficient workspace for a fast algorithm ir = 1_${ik}$ if( lwork>=wrkbl+lda*m ) then ! work(ir) is lda by m ldwrkr = lda else ! work(ir) is m by m ldwrkr = m end if itau = ir + ldwrkr*m iwork = itau + m ! compute a=l*q ! (cworkspace: need m*m+2*m, prefer m*m+m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_zgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ! copy l to work(ir), zeroing out above it call stdlib${ii}$_zlacpy( 'L', m, m, a, lda, work( ir ),ldwrkr ) call stdlib${ii}$_zlaset( 'U', m-1, m-1, czero, czero,work( ir+ldwrkr ), & ldwrkr ) ! generate q in a ! (cworkspace: need m*m+2*m, prefer m*m+m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_zunglq( m, n, m, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = 1_${ik}$ itauq = itau itaup = itauq + m iwork = itaup + m ! bidiagonalize l in work(ir) ! (cworkspace: need m*m+3*m, prefer m*m+2*m+2*m*nb) ! (rworkspace: need m) call stdlib${ii}$_zgebrd( m, m, work( ir ), ldwrkr, s,rwork( ie ), work( & itauq ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) ! generate right vectors bidiagonalizing l in ! work(ir) ! (cworkspace: need m*m+3*m, prefer m*m+2*m+(m-1)*nb) ! (rworkspace: 0) call stdlib${ii}$_zungbr( 'P', m, m, m, work( ir ), ldwrkr,work( itaup ), & work( iwork ),lwork-iwork+1, ierr ) irwork = ie + m ! perform bidiagonal qr iteration, computing right ! singular vectors of l in work(ir) ! (cworkspace: need m*m) ! (rworkspace: need bdspac) call stdlib${ii}$_zbdsqr( 'U', m, m, 0_${ik}$, 0_${ik}$, s, rwork( ie ),work( ir ), ldwrkr, & cdum, 1_${ik}$, cdum, 1_${ik}$,rwork( irwork ), info ) ! multiply right singular vectors of l in work(ir) by ! q in a, storing result in vt ! (cworkspace: need m*m) ! (rworkspace: 0) call stdlib${ii}$_zgemm( 'N', 'N', m, n, m, cone, work( ir ),ldwrkr, a, lda, & czero, vt, ldvt ) else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + m ! compute a=l*q ! (cworkspace: need 2*m, prefer m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_zgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ! copy result to vt call stdlib${ii}$_zlacpy( 'U', m, n, a, lda, vt, ldvt ) ! generate q in vt ! (cworkspace: need 2*m, prefer m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_zunglq( m, n, m, vt, ldvt, work( itau ),work( iwork ), & lwork-iwork+1, ierr ) ie = 1_${ik}$ itauq = itau itaup = itauq + m iwork = itaup + m ! zero out above l in a if (m>1_${ik}$) call stdlib${ii}$_zlaset( 'U', m-1, m-1, czero, czero,a( 1_${ik}$, 2_${ik}$ ), lda ) ! bidiagonalize l in a ! (cworkspace: need 3*m, prefer 2*m+2*m*nb) ! (rworkspace: need m) call stdlib${ii}$_zgebrd( m, m, a, lda, s, rwork( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply right vectors bidiagonalizing l by q in vt ! (cworkspace: need 2*m+n, prefer 2*m+n*nb) ! (rworkspace: 0) call stdlib${ii}$_zunmbr( 'P', 'L', 'C', m, n, m, a, lda,work( itaup ), vt, & ldvt,work( iwork ), lwork-iwork+1, ierr ) irwork = ie + m ! perform bidiagonal qr iteration, computing right ! singular vectors of a in vt ! (cworkspace: 0) ! (rworkspace: need bdspac) call stdlib${ii}$_zbdsqr( 'U', m, n, 0_${ik}$, 0_${ik}$, s, rwork( ie ), vt,ldvt, cdum, 1_${ik}$, & cdum, 1_${ik}$,rwork( irwork ), info ) end if else if( wntuo ) then ! path 5t(n much larger than m, jobu='o', jobvt='s') ! m right singular vectors to be computed in vt and ! m left singular vectors to be overwritten on a if( lwork>=2_${ik}$*m*m+3*m ) then ! sufficient workspace for a fast algorithm iu = 1_${ik}$ if( lwork>=wrkbl+2*lda*m ) then ! work(iu) is lda by m and work(ir) is lda by m ldwrku = lda ir = iu + ldwrku*m ldwrkr = lda else if( lwork>=wrkbl+( lda+m )*m ) then ! work(iu) is lda by m and work(ir) is m by m ldwrku = lda ir = iu + ldwrku*m ldwrkr = m else ! work(iu) is m by m and work(ir) is m by m ldwrku = m ir = iu + ldwrku*m ldwrkr = m end if itau = ir + ldwrkr*m iwork = itau + m ! compute a=l*q ! (cworkspace: need 2*m*m+2*m, prefer 2*m*m+m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_zgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ! copy l to work(iu), zeroing out below it call stdlib${ii}$_zlacpy( 'L', m, m, a, lda, work( iu ),ldwrku ) call stdlib${ii}$_zlaset( 'U', m-1, m-1, czero, czero,work( iu+ldwrku ), & ldwrku ) ! generate q in a ! (cworkspace: need 2*m*m+2*m, prefer 2*m*m+m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_zunglq( m, n, m, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = 1_${ik}$ itauq = itau itaup = itauq + m iwork = itaup + m ! bidiagonalize l in work(iu), copying result to ! work(ir) ! (cworkspace: need 2*m*m+3*m, ! prefer 2*m*m+2*m+2*m*nb) ! (rworkspace: need m) call stdlib${ii}$_zgebrd( m, m, work( iu ), ldwrku, s,rwork( ie ), work( & itauq ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) call stdlib${ii}$_zlacpy( 'L', m, m, work( iu ), ldwrku,work( ir ), ldwrkr ) ! generate right bidiagonalizing vectors in work(iu) ! (cworkspace: need 2*m*m+3*m-1, ! prefer 2*m*m+2*m+(m-1)*nb) ! (rworkspace: 0) call stdlib${ii}$_zungbr( 'P', m, m, m, work( iu ), ldwrku,work( itaup ), & work( iwork ),lwork-iwork+1, ierr ) ! generate left bidiagonalizing vectors in work(ir) ! (cworkspace: need 2*m*m+3*m, prefer 2*m*m+2*m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_zungbr( 'Q', m, m, m, work( ir ), ldwrkr,work( itauq ), & work( iwork ),lwork-iwork+1, ierr ) irwork = ie + m ! perform bidiagonal qr iteration, computing left ! singular vectors of l in work(ir) and computing ! right singular vectors of l in work(iu) ! (cworkspace: need 2*m*m) ! (rworkspace: need bdspac) call stdlib${ii}$_zbdsqr( 'U', m, m, m, 0_${ik}$, s, rwork( ie ),work( iu ), ldwrku, & work( ir ),ldwrkr, cdum, 1_${ik}$, rwork( irwork ),info ) ! multiply right singular vectors of l in work(iu) by ! q in a, storing result in vt ! (cworkspace: need m*m) ! (rworkspace: 0) call stdlib${ii}$_zgemm( 'N', 'N', m, n, m, cone, work( iu ),ldwrku, a, lda, & czero, vt, ldvt ) ! copy left singular vectors of l to a ! (cworkspace: need m*m) ! (rworkspace: 0) call stdlib${ii}$_zlacpy( 'F', m, m, work( ir ), ldwrkr, a,lda ) else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + m ! compute a=l*q, copying result to vt ! (cworkspace: need 2*m, prefer m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_zgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_zlacpy( 'U', m, n, a, lda, vt, ldvt ) ! generate q in vt ! (cworkspace: need 2*m, prefer m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_zunglq( m, n, m, vt, ldvt, work( itau ),work( iwork ), & lwork-iwork+1, ierr ) ie = 1_${ik}$ itauq = itau itaup = itauq + m iwork = itaup + m ! zero out above l in a if (m>1_${ik}$) call stdlib${ii}$_zlaset( 'U', m-1, m-1, czero, czero,a( 1_${ik}$, 2_${ik}$ ), lda ) ! bidiagonalize l in a ! (cworkspace: need 3*m, prefer 2*m+2*m*nb) ! (rworkspace: need m) call stdlib${ii}$_zgebrd( m, m, a, lda, s, rwork( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply right vectors bidiagonalizing l by q in vt ! (cworkspace: need 2*m+n, prefer 2*m+n*nb) ! (rworkspace: 0) call stdlib${ii}$_zunmbr( 'P', 'L', 'C', m, n, m, a, lda,work( itaup ), vt, & ldvt,work( iwork ), lwork-iwork+1, ierr ) ! generate left bidiagonalizing vectors of l in a ! (cworkspace: need 3*m, prefer 2*m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_zungbr( 'Q', m, m, m, a, lda, work( itauq ),work( iwork ), & lwork-iwork+1, ierr ) irwork = ie + m ! perform bidiagonal qr iteration, computing left ! singular vectors of a in a and computing right ! singular vectors of a in vt ! (cworkspace: 0) ! (rworkspace: need bdspac) call stdlib${ii}$_zbdsqr( 'U', m, n, m, 0_${ik}$, s, rwork( ie ), vt,ldvt, a, lda, & cdum, 1_${ik}$,rwork( irwork ), info ) end if else if( wntuas ) then ! path 6t(n much larger than m, jobu='s' or 'a', ! jobvt='s') ! m right singular vectors to be computed in vt and ! m left singular vectors to be computed in u if( lwork>=m*m+3*m ) then ! sufficient workspace for a fast algorithm iu = 1_${ik}$ if( lwork>=wrkbl+lda*m ) then ! work(iu) is lda by n ldwrku = lda else ! work(iu) is lda by m ldwrku = m end if itau = iu + ldwrku*m iwork = itau + m ! compute a=l*q ! (cworkspace: need m*m+2*m, prefer m*m+m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_zgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ! copy l to work(iu), zeroing out above it call stdlib${ii}$_zlacpy( 'L', m, m, a, lda, work( iu ),ldwrku ) call stdlib${ii}$_zlaset( 'U', m-1, m-1, czero, czero,work( iu+ldwrku ), & ldwrku ) ! generate q in a ! (cworkspace: need m*m+2*m, prefer m*m+m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_zunglq( m, n, m, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = 1_${ik}$ itauq = itau itaup = itauq + m iwork = itaup + m ! bidiagonalize l in work(iu), copying result to u ! (cworkspace: need m*m+3*m, prefer m*m+2*m+2*m*nb) ! (rworkspace: need m) call stdlib${ii}$_zgebrd( m, m, work( iu ), ldwrku, s,rwork( ie ), work( & itauq ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) call stdlib${ii}$_zlacpy( 'L', m, m, work( iu ), ldwrku, u,ldu ) ! generate right bidiagonalizing vectors in work(iu) ! (cworkspace: need m*m+3*m-1, ! prefer m*m+2*m+(m-1)*nb) ! (rworkspace: 0) call stdlib${ii}$_zungbr( 'P', m, m, m, work( iu ), ldwrku,work( itaup ), & work( iwork ),lwork-iwork+1, ierr ) ! generate left bidiagonalizing vectors in u ! (cworkspace: need m*m+3*m, prefer m*m+2*m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_zungbr( 'Q', m, m, m, u, ldu, work( itauq ),work( iwork ), & lwork-iwork+1, ierr ) irwork = ie + m ! perform bidiagonal qr iteration, computing left ! singular vectors of l in u and computing right ! singular vectors of l in work(iu) ! (cworkspace: need m*m) ! (rworkspace: need bdspac) call stdlib${ii}$_zbdsqr( 'U', m, m, m, 0_${ik}$, s, rwork( ie ),work( iu ), ldwrku, & u, ldu, cdum, 1_${ik}$,rwork( irwork ), info ) ! multiply right singular vectors of l in work(iu) by ! q in a, storing result in vt ! (cworkspace: need m*m) ! (rworkspace: 0) call stdlib${ii}$_zgemm( 'N', 'N', m, n, m, cone, work( iu ),ldwrku, a, lda, & czero, vt, ldvt ) else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + m ! compute a=l*q, copying result to vt ! (cworkspace: need 2*m, prefer m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_zgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_zlacpy( 'U', m, n, a, lda, vt, ldvt ) ! generate q in vt ! (cworkspace: need 2*m, prefer m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_zunglq( m, n, m, vt, ldvt, work( itau ),work( iwork ), & lwork-iwork+1, ierr ) ! copy l to u, zeroing out above it call stdlib${ii}$_zlacpy( 'L', m, m, a, lda, u, ldu ) if (m>1_${ik}$) call stdlib${ii}$_zlaset( 'U', m-1, m-1, czero, czero,u( 1_${ik}$, 2_${ik}$ ), ldu ) ie = 1_${ik}$ itauq = itau itaup = itauq + m iwork = itaup + m ! bidiagonalize l in u ! (cworkspace: need 3*m, prefer 2*m+2*m*nb) ! (rworkspace: need m) call stdlib${ii}$_zgebrd( m, m, u, ldu, s, rwork( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply right bidiagonalizing vectors in u by q ! in vt ! (cworkspace: need 2*m+n, prefer 2*m+n*nb) ! (rworkspace: 0) call stdlib${ii}$_zunmbr( 'P', 'L', 'C', m, n, m, u, ldu,work( itaup ), vt, & ldvt,work( iwork ), lwork-iwork+1, ierr ) ! generate left bidiagonalizing vectors in u ! (cworkspace: need 3*m, prefer 2*m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_zungbr( 'Q', m, m, m, u, ldu, work( itauq ),work( iwork ), & lwork-iwork+1, ierr ) irwork = ie + m ! perform bidiagonal qr iteration, computing left ! singular vectors of a in u and computing right ! singular vectors of a in vt ! (cworkspace: 0) ! (rworkspace: need bdspac) call stdlib${ii}$_zbdsqr( 'U', m, n, m, 0_${ik}$, s, rwork( ie ), vt,ldvt, u, ldu, & cdum, 1_${ik}$,rwork( irwork ), info ) end if end if else if( wntva ) then if( wntun ) then ! path 7t(n much larger than m, jobu='n', jobvt='a') ! n right singular vectors to be computed in vt and ! no left singular vectors to be computed if( lwork>=m*m+max( n+m, 3_${ik}$*m ) ) then ! sufficient workspace for a fast algorithm ir = 1_${ik}$ if( lwork>=wrkbl+lda*m ) then ! work(ir) is lda by m ldwrkr = lda else ! work(ir) is m by m ldwrkr = m end if itau = ir + ldwrkr*m iwork = itau + m ! compute a=l*q, copying result to vt ! (cworkspace: need m*m+2*m, prefer m*m+m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_zgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_zlacpy( 'U', m, n, a, lda, vt, ldvt ) ! copy l to work(ir), zeroing out above it call stdlib${ii}$_zlacpy( 'L', m, m, a, lda, work( ir ),ldwrkr ) call stdlib${ii}$_zlaset( 'U', m-1, m-1, czero, czero,work( ir+ldwrkr ), & ldwrkr ) ! generate q in vt ! (cworkspace: need m*m+m+n, prefer m*m+m+n*nb) ! (rworkspace: 0) call stdlib${ii}$_zunglq( n, n, m, vt, ldvt, work( itau ),work( iwork ), & lwork-iwork+1, ierr ) ie = 1_${ik}$ itauq = itau itaup = itauq + m iwork = itaup + m ! bidiagonalize l in work(ir) ! (cworkspace: need m*m+3*m, prefer m*m+2*m+2*m*nb) ! (rworkspace: need m) call stdlib${ii}$_zgebrd( m, m, work( ir ), ldwrkr, s,rwork( ie ), work( & itauq ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) ! generate right bidiagonalizing vectors in work(ir) ! (cworkspace: need m*m+3*m-1, ! prefer m*m+2*m+(m-1)*nb) ! (rworkspace: 0) call stdlib${ii}$_zungbr( 'P', m, m, m, work( ir ), ldwrkr,work( itaup ), & work( iwork ),lwork-iwork+1, ierr ) irwork = ie + m ! perform bidiagonal qr iteration, computing right ! singular vectors of l in work(ir) ! (cworkspace: need m*m) ! (rworkspace: need bdspac) call stdlib${ii}$_zbdsqr( 'U', m, m, 0_${ik}$, 0_${ik}$, s, rwork( ie ),work( ir ), ldwrkr, & cdum, 1_${ik}$, cdum, 1_${ik}$,rwork( irwork ), info ) ! multiply right singular vectors of l in work(ir) by ! q in vt, storing result in a ! (cworkspace: need m*m) ! (rworkspace: 0) call stdlib${ii}$_zgemm( 'N', 'N', m, n, m, cone, work( ir ),ldwrkr, vt, ldvt,& czero, a, lda ) ! copy right singular vectors of a from a to vt call stdlib${ii}$_zlacpy( 'F', m, n, a, lda, vt, ldvt ) else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + m ! compute a=l*q, copying result to vt ! (cworkspace: need 2*m, prefer m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_zgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_zlacpy( 'U', m, n, a, lda, vt, ldvt ) ! generate q in vt ! (cworkspace: need m+n, prefer m+n*nb) ! (rworkspace: 0) call stdlib${ii}$_zunglq( n, n, m, vt, ldvt, work( itau ),work( iwork ), & lwork-iwork+1, ierr ) ie = 1_${ik}$ itauq = itau itaup = itauq + m iwork = itaup + m ! zero out above l in a if (m>1_${ik}$) call stdlib${ii}$_zlaset( 'U', m-1, m-1, czero, czero,a( 1_${ik}$, 2_${ik}$ ), lda ) ! bidiagonalize l in a ! (cworkspace: need 3*m, prefer 2*m+2*m*nb) ! (rworkspace: need m) call stdlib${ii}$_zgebrd( m, m, a, lda, s, rwork( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply right bidiagonalizing vectors in a by q ! in vt ! (cworkspace: need 2*m+n, prefer 2*m+n*nb) ! (rworkspace: 0) call stdlib${ii}$_zunmbr( 'P', 'L', 'C', m, n, m, a, lda,work( itaup ), vt, & ldvt,work( iwork ), lwork-iwork+1, ierr ) irwork = ie + m ! perform bidiagonal qr iteration, computing right ! singular vectors of a in vt ! (cworkspace: 0) ! (rworkspace: need bdspac) call stdlib${ii}$_zbdsqr( 'U', m, n, 0_${ik}$, 0_${ik}$, s, rwork( ie ), vt,ldvt, cdum, 1_${ik}$, & cdum, 1_${ik}$,rwork( irwork ), info ) end if else if( wntuo ) then ! path 8t(n much larger than m, jobu='o', jobvt='a') ! n right singular vectors to be computed in vt and ! m left singular vectors to be overwritten on a if( lwork>=2_${ik}$*m*m+max( n+m, 3_${ik}$*m ) ) then ! sufficient workspace for a fast algorithm iu = 1_${ik}$ if( lwork>=wrkbl+2*lda*m ) then ! work(iu) is lda by m and work(ir) is lda by m ldwrku = lda ir = iu + ldwrku*m ldwrkr = lda else if( lwork>=wrkbl+( lda+m )*m ) then ! work(iu) is lda by m and work(ir) is m by m ldwrku = lda ir = iu + ldwrku*m ldwrkr = m else ! work(iu) is m by m and work(ir) is m by m ldwrku = m ir = iu + ldwrku*m ldwrkr = m end if itau = ir + ldwrkr*m iwork = itau + m ! compute a=l*q, copying result to vt ! (cworkspace: need 2*m*m+2*m, prefer 2*m*m+m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_zgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_zlacpy( 'U', m, n, a, lda, vt, ldvt ) ! generate q in vt ! (cworkspace: need 2*m*m+m+n, prefer 2*m*m+m+n*nb) ! (rworkspace: 0) call stdlib${ii}$_zunglq( n, n, m, vt, ldvt, work( itau ),work( iwork ), & lwork-iwork+1, ierr ) ! copy l to work(iu), zeroing out above it call stdlib${ii}$_zlacpy( 'L', m, m, a, lda, work( iu ),ldwrku ) call stdlib${ii}$_zlaset( 'U', m-1, m-1, czero, czero,work( iu+ldwrku ), & ldwrku ) ie = 1_${ik}$ itauq = itau itaup = itauq + m iwork = itaup + m ! bidiagonalize l in work(iu), copying result to ! work(ir) ! (cworkspace: need 2*m*m+3*m, ! prefer 2*m*m+2*m+2*m*nb) ! (rworkspace: need m) call stdlib${ii}$_zgebrd( m, m, work( iu ), ldwrku, s,rwork( ie ), work( & itauq ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) call stdlib${ii}$_zlacpy( 'L', m, m, work( iu ), ldwrku,work( ir ), ldwrkr ) ! generate right bidiagonalizing vectors in work(iu) ! (cworkspace: need 2*m*m+3*m-1, ! prefer 2*m*m+2*m+(m-1)*nb) ! (rworkspace: 0) call stdlib${ii}$_zungbr( 'P', m, m, m, work( iu ), ldwrku,work( itaup ), & work( iwork ),lwork-iwork+1, ierr ) ! generate left bidiagonalizing vectors in work(ir) ! (cworkspace: need 2*m*m+3*m, prefer 2*m*m+2*m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_zungbr( 'Q', m, m, m, work( ir ), ldwrkr,work( itauq ), & work( iwork ),lwork-iwork+1, ierr ) irwork = ie + m ! perform bidiagonal qr iteration, computing left ! singular vectors of l in work(ir) and computing ! right singular vectors of l in work(iu) ! (cworkspace: need 2*m*m) ! (rworkspace: need bdspac) call stdlib${ii}$_zbdsqr( 'U', m, m, m, 0_${ik}$, s, rwork( ie ),work( iu ), ldwrku, & work( ir ),ldwrkr, cdum, 1_${ik}$, rwork( irwork ),info ) ! multiply right singular vectors of l in work(iu) by ! q in vt, storing result in a ! (cworkspace: need m*m) ! (rworkspace: 0) call stdlib${ii}$_zgemm( 'N', 'N', m, n, m, cone, work( iu ),ldwrku, vt, ldvt,& czero, a, lda ) ! copy right singular vectors of a from a to vt call stdlib${ii}$_zlacpy( 'F', m, n, a, lda, vt, ldvt ) ! copy left singular vectors of a from work(ir) to a call stdlib${ii}$_zlacpy( 'F', m, m, work( ir ), ldwrkr, a,lda ) else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + m ! compute a=l*q, copying result to vt ! (cworkspace: need 2*m, prefer m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_zgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_zlacpy( 'U', m, n, a, lda, vt, ldvt ) ! generate q in vt ! (cworkspace: need m+n, prefer m+n*nb) ! (rworkspace: 0) call stdlib${ii}$_zunglq( n, n, m, vt, ldvt, work( itau ),work( iwork ), & lwork-iwork+1, ierr ) ie = 1_${ik}$ itauq = itau itaup = itauq + m iwork = itaup + m ! zero out above l in a if (m>1_${ik}$) call stdlib${ii}$_zlaset( 'U', m-1, m-1, czero, czero,a( 1_${ik}$, 2_${ik}$ ), lda ) ! bidiagonalize l in a ! (cworkspace: need 3*m, prefer 2*m+2*m*nb) ! (rworkspace: need m) call stdlib${ii}$_zgebrd( m, m, a, lda, s, rwork( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply right bidiagonalizing vectors in a by q ! in vt ! (cworkspace: need 2*m+n, prefer 2*m+n*nb) ! (rworkspace: 0) call stdlib${ii}$_zunmbr( 'P', 'L', 'C', m, n, m, a, lda,work( itaup ), vt, & ldvt,work( iwork ), lwork-iwork+1, ierr ) ! generate left bidiagonalizing vectors in a ! (cworkspace: need 3*m, prefer 2*m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_zungbr( 'Q', m, m, m, a, lda, work( itauq ),work( iwork ), & lwork-iwork+1, ierr ) irwork = ie + m ! perform bidiagonal qr iteration, computing left ! singular vectors of a in a and computing right ! singular vectors of a in vt ! (cworkspace: 0) ! (rworkspace: need bdspac) call stdlib${ii}$_zbdsqr( 'U', m, n, m, 0_${ik}$, s, rwork( ie ), vt,ldvt, a, lda, & cdum, 1_${ik}$,rwork( irwork ), info ) end if else if( wntuas ) then ! path 9t(n much larger than m, jobu='s' or 'a', ! jobvt='a') ! n right singular vectors to be computed in vt and ! m left singular vectors to be computed in u if( lwork>=m*m+max( n+m, 3_${ik}$*m ) ) then ! sufficient workspace for a fast algorithm iu = 1_${ik}$ if( lwork>=wrkbl+lda*m ) then ! work(iu) is lda by m ldwrku = lda else ! work(iu) is m by m ldwrku = m end if itau = iu + ldwrku*m iwork = itau + m ! compute a=l*q, copying result to vt ! (cworkspace: need m*m+2*m, prefer m*m+m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_zgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_zlacpy( 'U', m, n, a, lda, vt, ldvt ) ! generate q in vt ! (cworkspace: need m*m+m+n, prefer m*m+m+n*nb) ! (rworkspace: 0) call stdlib${ii}$_zunglq( n, n, m, vt, ldvt, work( itau ),work( iwork ), & lwork-iwork+1, ierr ) ! copy l to work(iu), zeroing out above it call stdlib${ii}$_zlacpy( 'L', m, m, a, lda, work( iu ),ldwrku ) call stdlib${ii}$_zlaset( 'U', m-1, m-1, czero, czero,work( iu+ldwrku ), & ldwrku ) ie = 1_${ik}$ itauq = itau itaup = itauq + m iwork = itaup + m ! bidiagonalize l in work(iu), copying result to u ! (cworkspace: need m*m+3*m, prefer m*m+2*m+2*m*nb) ! (rworkspace: need m) call stdlib${ii}$_zgebrd( m, m, work( iu ), ldwrku, s,rwork( ie ), work( & itauq ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) call stdlib${ii}$_zlacpy( 'L', m, m, work( iu ), ldwrku, u,ldu ) ! generate right bidiagonalizing vectors in work(iu) ! (cworkspace: need m*m+3*m, prefer m*m+2*m+(m-1)*nb) ! (rworkspace: 0) call stdlib${ii}$_zungbr( 'P', m, m, m, work( iu ), ldwrku,work( itaup ), & work( iwork ),lwork-iwork+1, ierr ) ! generate left bidiagonalizing vectors in u ! (cworkspace: need m*m+3*m, prefer m*m+2*m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_zungbr( 'Q', m, m, m, u, ldu, work( itauq ),work( iwork ), & lwork-iwork+1, ierr ) irwork = ie + m ! perform bidiagonal qr iteration, computing left ! singular vectors of l in u and computing right ! singular vectors of l in work(iu) ! (cworkspace: need m*m) ! (rworkspace: need bdspac) call stdlib${ii}$_zbdsqr( 'U', m, m, m, 0_${ik}$, s, rwork( ie ),work( iu ), ldwrku, & u, ldu, cdum, 1_${ik}$,rwork( irwork ), info ) ! multiply right singular vectors of l in work(iu) by ! q in vt, storing result in a ! (cworkspace: need m*m) ! (rworkspace: 0) call stdlib${ii}$_zgemm( 'N', 'N', m, n, m, cone, work( iu ),ldwrku, vt, ldvt,& czero, a, lda ) ! copy right singular vectors of a from a to vt call stdlib${ii}$_zlacpy( 'F', m, n, a, lda, vt, ldvt ) else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + m ! compute a=l*q, copying result to vt ! (cworkspace: need 2*m, prefer m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_zgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_zlacpy( 'U', m, n, a, lda, vt, ldvt ) ! generate q in vt ! (cworkspace: need m+n, prefer m+n*nb) ! (rworkspace: 0) call stdlib${ii}$_zunglq( n, n, m, vt, ldvt, work( itau ),work( iwork ), & lwork-iwork+1, ierr ) ! copy l to u, zeroing out above it call stdlib${ii}$_zlacpy( 'L', m, m, a, lda, u, ldu ) if (m>1_${ik}$) call stdlib${ii}$_zlaset( 'U', m-1, m-1, czero, czero,u( 1_${ik}$, 2_${ik}$ ), ldu ) ie = 1_${ik}$ itauq = itau itaup = itauq + m iwork = itaup + m ! bidiagonalize l in u ! (cworkspace: need 3*m, prefer 2*m+2*m*nb) ! (rworkspace: need m) call stdlib${ii}$_zgebrd( m, m, u, ldu, s, rwork( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply right bidiagonalizing vectors in u by q ! in vt ! (cworkspace: need 2*m+n, prefer 2*m+n*nb) ! (rworkspace: 0) call stdlib${ii}$_zunmbr( 'P', 'L', 'C', m, n, m, u, ldu,work( itaup ), vt, & ldvt,work( iwork ), lwork-iwork+1, ierr ) ! generate left bidiagonalizing vectors in u ! (cworkspace: need 3*m, prefer 2*m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_zungbr( 'Q', m, m, m, u, ldu, work( itauq ),work( iwork ), & lwork-iwork+1, ierr ) irwork = ie + m ! perform bidiagonal qr iteration, computing left ! singular vectors of a in u and computing right ! singular vectors of a in vt ! (cworkspace: 0) ! (rworkspace: need bdspac) call stdlib${ii}$_zbdsqr( 'U', m, n, m, 0_${ik}$, s, rwork( ie ), vt,ldvt, u, ldu, & cdum, 1_${ik}$,rwork( irwork ), info ) end if end if end if else ! n < mnthr ! path 10t(n greater than m, but not much larger) ! reduce to bidiagonal form without lq decomposition ie = 1_${ik}$ itauq = 1_${ik}$ itaup = itauq + m iwork = itaup + m ! bidiagonalize a ! (cworkspace: need 2*m+n, prefer 2*m+(m+n)*nb) ! (rworkspace: m) call stdlib${ii}$_zgebrd( m, n, a, lda, s, rwork( ie ), work( itauq ),work( itaup ), & work( iwork ), lwork-iwork+1,ierr ) if( wntuas ) then ! if left singular vectors desired in u, copy result to u ! and generate left bidiagonalizing vectors in u ! (cworkspace: need 3*m-1, prefer 2*m+(m-1)*nb) ! (rworkspace: 0) call stdlib${ii}$_zlacpy( 'L', m, m, a, lda, u, ldu ) call stdlib${ii}$_zungbr( 'Q', m, m, n, u, ldu, work( itauq ),work( iwork ), lwork-& iwork+1, ierr ) end if if( wntvas ) then ! if right singular vectors desired in vt, copy result to ! vt and generate right bidiagonalizing vectors in vt ! (cworkspace: need 2*m+nrvt, prefer 2*m+nrvt*nb) ! (rworkspace: 0) call stdlib${ii}$_zlacpy( 'U', m, n, a, lda, vt, ldvt ) if( wntva )nrvt = n if( wntvs )nrvt = m call stdlib${ii}$_zungbr( 'P', nrvt, n, m, vt, ldvt, work( itaup ),work( iwork ), & lwork-iwork+1, ierr ) end if if( wntuo ) then ! if left singular vectors desired in a, generate left ! bidiagonalizing vectors in a ! (cworkspace: need 3*m-1, prefer 2*m+(m-1)*nb) ! (rworkspace: 0) call stdlib${ii}$_zungbr( 'Q', m, m, n, a, lda, work( itauq ),work( iwork ), lwork-& iwork+1, ierr ) end if if( wntvo ) then ! if right singular vectors desired in a, generate right ! bidiagonalizing vectors in a ! (cworkspace: need 3*m, prefer 2*m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_zungbr( 'P', m, n, m, a, lda, work( itaup ),work( iwork ), lwork-& iwork+1, ierr ) end if irwork = ie + m if( wntuas .or. wntuo )nru = m if( wntun )nru = 0_${ik}$ if( wntvas .or. wntvo )ncvt = n if( wntvn )ncvt = 0_${ik}$ if( ( .not.wntuo ) .and. ( .not.wntvo ) ) then ! perform bidiagonal qr iteration, if desired, computing ! left singular vectors in u and computing right singular ! vectors in vt ! (cworkspace: 0) ! (rworkspace: need bdspac) call stdlib${ii}$_zbdsqr( 'L', m, ncvt, nru, 0_${ik}$, s, rwork( ie ), vt,ldvt, u, ldu, & cdum, 1_${ik}$, rwork( irwork ),info ) else if( ( .not.wntuo ) .and. wntvo ) then ! perform bidiagonal qr iteration, if desired, computing ! left singular vectors in u and computing right singular ! vectors in a ! (cworkspace: 0) ! (rworkspace: need bdspac) call stdlib${ii}$_zbdsqr( 'L', m, ncvt, nru, 0_${ik}$, s, rwork( ie ), a,lda, u, ldu, cdum,& 1_${ik}$, rwork( irwork ),info ) else ! perform bidiagonal qr iteration, if desired, computing ! left singular vectors in a and computing right singular ! vectors in vt ! (cworkspace: 0) ! (rworkspace: need bdspac) call stdlib${ii}$_zbdsqr( 'L', m, ncvt, nru, 0_${ik}$, s, rwork( ie ), vt,ldvt, a, lda, & cdum, 1_${ik}$, rwork( irwork ),info ) end if end if end if ! undo scaling if necessary if( iscl==1_${ik}$ ) then if( anrm>bignum )call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, bignum, anrm, minmn, 1_${ik}$, s, minmn,& ierr ) if( info/=0_${ik}$ .and. anrm>bignum )call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, bignum, anrm, minmn-1,& 1_${ik}$,rwork( ie ), minmn, ierr ) if( anrm<smlnum )call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, smlnum, anrm, minmn, 1_${ik}$, s, minmn,& ierr ) if( info/=0_${ik}$ .and. anrm<smlnum )call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, smlnum, anrm, minmn-1,& 1_${ik}$,rwork( ie ), minmn, ierr ) end if ! return optimal workspace in work(1) work( 1_${ik}$ ) = maxwrk return end subroutine stdlib${ii}$_zgesvd #:for ck,ct,ci in CMPLX_KINDS_TYPES #:if not ck in ["sp","dp"] module subroutine stdlib${ii}$_${ci}$gesvd( jobu, jobvt, m, n, a, lda, s, u, ldu,vt, ldvt, work, lwork, rwork, & !! ZGESVD: computes the singular value decomposition (SVD) of a complex !! M-by-N matrix A, optionally computing the left and/or right singular !! vectors. The SVD is written !! A = U * SIGMA * conjugate-transpose(V) !! where SIGMA is an M-by-N matrix which is zero except for its !! min(m,n) diagonal elements, U is an M-by-M unitary matrix, and !! V is an N-by-N unitary matrix. The diagonal elements of SIGMA !! are the singular values of A; they are real and non-negative, and !! are returned in descending order. The first min(m,n) columns of !! U and V are the left and right singular vectors of A. !! Note that the routine returns V**H, not V. info ) ! -- lapack driver routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_${ck}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments character, intent(in) :: jobu, jobvt integer(${ik}$), intent(out) :: info integer(${ik}$), intent(in) :: lda, ldu, ldvt, lwork, m, n ! Array Arguments real(${ck}$), intent(out) :: rwork(*), s(*) complex(${ck}$), intent(inout) :: a(lda,*) complex(${ck}$), intent(out) :: u(ldu,*), vt(ldvt,*), work(*) ! ===================================================================== ! Local Scalars logical(lk) :: lquery, wntua, wntuas, wntun, wntuo, wntus, wntva, wntvas, wntvn, wntvo,& wntvs integer(${ik}$) :: blk, chunk, i, ie, ierr, ir, irwork, iscl, itau, itaup, itauq, iu, & iwork, ldwrkr, ldwrku, maxwrk, minmn, minwrk, mnthr, ncu, ncvt, nru, nrvt, & wrkbl integer(${ik}$) :: lwork_wgeqrf, lwork_wungqr_n, lwork_wungqr_m, lwork_wgebrd, & lwork_wungbr_p, lwork_wungbr_q, lwork_wgelqf, lwork_wunglq_n, lwork_wunglq_m real(${ck}$) :: anrm, bignum, eps, smlnum ! Local Arrays real(${ck}$) :: dum(1_${ik}$) complex(${ck}$) :: cdum(1_${ik}$) ! Intrinsic Functions ! Executable Statements ! test the input arguments info = 0_${ik}$ minmn = min( m, n ) wntua = stdlib_lsame( jobu, 'A' ) wntus = stdlib_lsame( jobu, 'S' ) wntuas = wntua .or. wntus wntuo = stdlib_lsame( jobu, 'O' ) wntun = stdlib_lsame( jobu, 'N' ) wntva = stdlib_lsame( jobvt, 'A' ) wntvs = stdlib_lsame( jobvt, 'S' ) wntvas = wntva .or. wntvs wntvo = stdlib_lsame( jobvt, 'O' ) wntvn = stdlib_lsame( jobvt, 'N' ) lquery = ( lwork==-1_${ik}$ ) if( .not.( wntua .or. wntus .or. wntuo .or. wntun ) ) then info = -1_${ik}$ else if( .not.( wntva .or. wntvs .or. wntvo .or. wntvn ) .or.( wntvo .and. wntuo ) ) & then info = -2_${ik}$ else if( m<0_${ik}$ ) then info = -3_${ik}$ else if( n<0_${ik}$ ) then info = -4_${ik}$ else if( lda<max( 1_${ik}$, m ) ) then info = -6_${ik}$ else if( ldu<1_${ik}$ .or. ( wntuas .and. ldu<m ) ) then info = -9_${ik}$ else if( ldvt<1_${ik}$ .or. ( wntva .and. ldvt<n ) .or.( wntvs .and. ldvt<minmn ) ) & then info = -11_${ik}$ end if ! compute workspace ! (note: comments in the code beginning "workspace:" describe the ! minimal amount of workspace needed at that point in the code, ! as well as the preferred amount for good performance. ! cworkspace refers to complex workspace, and rworkspace to ! real workspace. nb refers to the optimal block size for the ! immediately following subroutine, as returned by stdlib${ii}$_ilaenv.) if( info==0_${ik}$ ) then minwrk = 1_${ik}$ maxwrk = 1_${ik}$ if( m>=n .and. minmn>0_${ik}$ ) then ! space needed for stdlib${ii}$_${ci}$bdsqr is bdspac = 5*n mnthr = stdlib${ii}$_ilaenv( 6_${ik}$, 'ZGESVD', jobu // jobvt, m, n, 0_${ik}$, 0_${ik}$ ) ! compute space needed for stdlib${ii}$_${ci}$geqrf call stdlib${ii}$_${ci}$geqrf( m, n, a, lda, cdum(1_${ik}$), cdum(1_${ik}$), -1_${ik}$, ierr ) lwork_wgeqrf = int( cdum(1_${ik}$),KIND=${ik}$) ! compute space needed for stdlib${ii}$_${ci}$ungqr call stdlib${ii}$_${ci}$ungqr( m, n, n, a, lda, cdum(1_${ik}$), cdum(1_${ik}$), -1_${ik}$, ierr ) lwork_wungqr_n = int( cdum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_${ci}$ungqr( m, m, n, a, lda, cdum(1_${ik}$), cdum(1_${ik}$), -1_${ik}$, ierr ) lwork_wungqr_m = int( cdum(1_${ik}$),KIND=${ik}$) ! compute space needed for stdlib${ii}$_${ci}$gebrd call stdlib${ii}$_${ci}$gebrd( n, n, a, lda, s, dum(1_${ik}$), cdum(1_${ik}$),cdum(1_${ik}$), cdum(1_${ik}$), -1_${ik}$, ierr ) lwork_wgebrd = int( cdum(1_${ik}$),KIND=${ik}$) ! compute space needed for stdlib${ii}$_${ci}$ungbr call stdlib${ii}$_${ci}$ungbr( 'P', n, n, n, a, lda, cdum(1_${ik}$),cdum(1_${ik}$), -1_${ik}$, ierr ) lwork_wungbr_p = int( cdum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_${ci}$ungbr( 'Q', n, n, n, a, lda, cdum(1_${ik}$),cdum(1_${ik}$), -1_${ik}$, ierr ) lwork_wungbr_q = int( cdum(1_${ik}$),KIND=${ik}$) if( m>=mnthr ) then if( wntun ) then ! path 1 (m much larger than n, jobu='n') maxwrk = n + lwork_wgeqrf maxwrk = max( maxwrk, 2_${ik}$*n+lwork_wgebrd ) if( wntvo .or. wntvas )maxwrk = max( maxwrk, 2_${ik}$*n+lwork_wungbr_p ) minwrk = 3_${ik}$*n else if( wntuo .and. wntvn ) then ! path 2 (m much larger than n, jobu='o', jobvt='n') wrkbl = n + lwork_wgeqrf wrkbl = max( wrkbl, n+lwork_wungqr_n ) wrkbl = max( wrkbl, 2_${ik}$*n+lwork_wgebrd ) wrkbl = max( wrkbl, 2_${ik}$*n+lwork_wungbr_q ) maxwrk = max( n*n+wrkbl, n*n+m*n ) minwrk = 2_${ik}$*n + m else if( wntuo .and. wntvas ) then ! path 3 (m much larger than n, jobu='o', jobvt='s' or ! 'a') wrkbl = n + lwork_wgeqrf wrkbl = max( wrkbl, n+lwork_wungqr_n ) wrkbl = max( wrkbl, 2_${ik}$*n+lwork_wgebrd ) wrkbl = max( wrkbl, 2_${ik}$*n+lwork_wungbr_q ) wrkbl = max( wrkbl, 2_${ik}$*n+lwork_wungbr_p ) maxwrk = max( n*n+wrkbl, n*n+m*n ) minwrk = 2_${ik}$*n + m else if( wntus .and. wntvn ) then ! path 4 (m much larger than n, jobu='s', jobvt='n') wrkbl = n + lwork_wgeqrf wrkbl = max( wrkbl, n+lwork_wungqr_n ) wrkbl = max( wrkbl, 2_${ik}$*n+lwork_wgebrd ) wrkbl = max( wrkbl, 2_${ik}$*n+lwork_wungbr_q ) maxwrk = n*n + wrkbl minwrk = 2_${ik}$*n + m else if( wntus .and. wntvo ) then ! path 5 (m much larger than n, jobu='s', jobvt='o') wrkbl = n + lwork_wgeqrf wrkbl = max( wrkbl, n+lwork_wungqr_n ) wrkbl = max( wrkbl, 2_${ik}$*n+lwork_wgebrd ) wrkbl = max( wrkbl, 2_${ik}$*n+lwork_wungbr_q ) wrkbl = max( wrkbl, 2_${ik}$*n+lwork_wungbr_p ) maxwrk = 2_${ik}$*n*n + wrkbl minwrk = 2_${ik}$*n + m else if( wntus .and. wntvas ) then ! path 6 (m much larger than n, jobu='s', jobvt='s' or ! 'a') wrkbl = n + lwork_wgeqrf wrkbl = max( wrkbl, n+lwork_wungqr_n ) wrkbl = max( wrkbl, 2_${ik}$*n+lwork_wgebrd ) wrkbl = max( wrkbl, 2_${ik}$*n+lwork_wungbr_q ) wrkbl = max( wrkbl, 2_${ik}$*n+lwork_wungbr_p ) maxwrk = n*n + wrkbl minwrk = 2_${ik}$*n + m else if( wntua .and. wntvn ) then ! path 7 (m much larger than n, jobu='a', jobvt='n') wrkbl = n + lwork_wgeqrf wrkbl = max( wrkbl, n+lwork_wungqr_m ) wrkbl = max( wrkbl, 2_${ik}$*n+lwork_wgebrd ) wrkbl = max( wrkbl, 2_${ik}$*n+lwork_wungbr_q ) maxwrk = n*n + wrkbl minwrk = 2_${ik}$*n + m else if( wntua .and. wntvo ) then ! path 8 (m much larger than n, jobu='a', jobvt='o') wrkbl = n + lwork_wgeqrf wrkbl = max( wrkbl, n+lwork_wungqr_m ) wrkbl = max( wrkbl, 2_${ik}$*n+lwork_wgebrd ) wrkbl = max( wrkbl, 2_${ik}$*n+lwork_wungbr_q ) wrkbl = max( wrkbl, 2_${ik}$*n+lwork_wungbr_p ) maxwrk = 2_${ik}$*n*n + wrkbl minwrk = 2_${ik}$*n + m else if( wntua .and. wntvas ) then ! path 9 (m much larger than n, jobu='a', jobvt='s' or ! 'a') wrkbl = n + lwork_wgeqrf wrkbl = max( wrkbl, n+lwork_wungqr_m ) wrkbl = max( wrkbl, 2_${ik}$*n+lwork_wgebrd ) wrkbl = max( wrkbl, 2_${ik}$*n+lwork_wungbr_q ) wrkbl = max( wrkbl, 2_${ik}$*n+lwork_wungbr_p ) maxwrk = n*n + wrkbl minwrk = 2_${ik}$*n + m end if else ! path 10 (m at least n, but not much larger) call stdlib${ii}$_${ci}$gebrd( m, n, a, lda, s, dum(1_${ik}$), cdum(1_${ik}$),cdum(1_${ik}$), cdum(1_${ik}$), -1_${ik}$, & ierr ) lwork_wgebrd = int( cdum(1_${ik}$),KIND=${ik}$) maxwrk = 2_${ik}$*n + lwork_wgebrd if( wntus .or. wntuo ) then call stdlib${ii}$_${ci}$ungbr( 'Q', m, n, n, a, lda, cdum(1_${ik}$),cdum(1_${ik}$), -1_${ik}$, ierr ) lwork_wungbr_q = int( cdum(1_${ik}$),KIND=${ik}$) maxwrk = max( maxwrk, 2_${ik}$*n+lwork_wungbr_q ) end if if( wntua ) then call stdlib${ii}$_${ci}$ungbr( 'Q', m, m, n, a, lda, cdum(1_${ik}$),cdum(1_${ik}$), -1_${ik}$, ierr ) lwork_wungbr_q = int( cdum(1_${ik}$),KIND=${ik}$) maxwrk = max( maxwrk, 2_${ik}$*n+lwork_wungbr_q ) end if if( .not.wntvn ) then maxwrk = max( maxwrk, 2_${ik}$*n+lwork_wungbr_p ) end if minwrk = 2_${ik}$*n + m end if else if( minmn>0_${ik}$ ) then ! space needed for stdlib${ii}$_${ci}$bdsqr is bdspac = 5*m mnthr = stdlib${ii}$_ilaenv( 6_${ik}$, 'ZGESVD', jobu // jobvt, m, n, 0_${ik}$, 0_${ik}$ ) ! compute space needed for stdlib${ii}$_${ci}$gelqf call stdlib${ii}$_${ci}$gelqf( m, n, a, lda, cdum(1_${ik}$), cdum(1_${ik}$), -1_${ik}$, ierr ) lwork_wgelqf = int( cdum(1_${ik}$),KIND=${ik}$) ! compute space needed for stdlib${ii}$_${ci}$unglq call stdlib${ii}$_${ci}$unglq( n, n, m, cdum(1_${ik}$), n, cdum(1_${ik}$), cdum(1_${ik}$), -1_${ik}$,ierr ) lwork_wunglq_n = int( cdum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_${ci}$unglq( m, n, m, a, lda, cdum(1_${ik}$), cdum(1_${ik}$), -1_${ik}$, ierr ) lwork_wunglq_m = int( cdum(1_${ik}$),KIND=${ik}$) ! compute space needed for stdlib${ii}$_${ci}$gebrd call stdlib${ii}$_${ci}$gebrd( m, m, a, lda, s, dum(1_${ik}$), cdum(1_${ik}$),cdum(1_${ik}$), cdum(1_${ik}$), -1_${ik}$, ierr ) lwork_wgebrd = int( cdum(1_${ik}$),KIND=${ik}$) ! compute space needed for stdlib${ii}$_${ci}$ungbr p call stdlib${ii}$_${ci}$ungbr( 'P', m, m, m, a, n, cdum(1_${ik}$),cdum(1_${ik}$), -1_${ik}$, ierr ) lwork_wungbr_p = int( cdum(1_${ik}$),KIND=${ik}$) ! compute space needed for stdlib${ii}$_${ci}$ungbr q call stdlib${ii}$_${ci}$ungbr( 'Q', m, m, m, a, n, cdum(1_${ik}$),cdum(1_${ik}$), -1_${ik}$, ierr ) lwork_wungbr_q = int( cdum(1_${ik}$),KIND=${ik}$) if( n>=mnthr ) then if( wntvn ) then ! path 1t(n much larger than m, jobvt='n') maxwrk = m + lwork_wgelqf maxwrk = max( maxwrk, 2_${ik}$*m+lwork_wgebrd ) if( wntuo .or. wntuas )maxwrk = max( maxwrk, 2_${ik}$*m+lwork_wungbr_q ) minwrk = 3_${ik}$*m else if( wntvo .and. wntun ) then ! path 2t(n much larger than m, jobu='n', jobvt='o') wrkbl = m + lwork_wgelqf wrkbl = max( wrkbl, m+lwork_wunglq_m ) wrkbl = max( wrkbl, 2_${ik}$*m+lwork_wgebrd ) wrkbl = max( wrkbl, 2_${ik}$*m+lwork_wungbr_p ) maxwrk = max( m*m+wrkbl, m*m+m*n ) minwrk = 2_${ik}$*m + n else if( wntvo .and. wntuas ) then ! path 3t(n much larger than m, jobu='s' or 'a', ! jobvt='o') wrkbl = m + lwork_wgelqf wrkbl = max( wrkbl, m+lwork_wunglq_m ) wrkbl = max( wrkbl, 2_${ik}$*m+lwork_wgebrd ) wrkbl = max( wrkbl, 2_${ik}$*m+lwork_wungbr_p ) wrkbl = max( wrkbl, 2_${ik}$*m+lwork_wungbr_q ) maxwrk = max( m*m+wrkbl, m*m+m*n ) minwrk = 2_${ik}$*m + n else if( wntvs .and. wntun ) then ! path 4t(n much larger than m, jobu='n', jobvt='s') wrkbl = m + lwork_wgelqf wrkbl = max( wrkbl, m+lwork_wunglq_m ) wrkbl = max( wrkbl, 2_${ik}$*m+lwork_wgebrd ) wrkbl = max( wrkbl, 2_${ik}$*m+lwork_wungbr_p ) maxwrk = m*m + wrkbl minwrk = 2_${ik}$*m + n else if( wntvs .and. wntuo ) then ! path 5t(n much larger than m, jobu='o', jobvt='s') wrkbl = m + lwork_wgelqf wrkbl = max( wrkbl, m+lwork_wunglq_m ) wrkbl = max( wrkbl, 2_${ik}$*m+lwork_wgebrd ) wrkbl = max( wrkbl, 2_${ik}$*m+lwork_wungbr_p ) wrkbl = max( wrkbl, 2_${ik}$*m+lwork_wungbr_q ) maxwrk = 2_${ik}$*m*m + wrkbl minwrk = 2_${ik}$*m + n else if( wntvs .and. wntuas ) then ! path 6t(n much larger than m, jobu='s' or 'a', ! jobvt='s') wrkbl = m + lwork_wgelqf wrkbl = max( wrkbl, m+lwork_wunglq_m ) wrkbl = max( wrkbl, 2_${ik}$*m+lwork_wgebrd ) wrkbl = max( wrkbl, 2_${ik}$*m+lwork_wungbr_p ) wrkbl = max( wrkbl, 2_${ik}$*m+lwork_wungbr_q ) maxwrk = m*m + wrkbl minwrk = 2_${ik}$*m + n else if( wntva .and. wntun ) then ! path 7t(n much larger than m, jobu='n', jobvt='a') wrkbl = m + lwork_wgelqf wrkbl = max( wrkbl, m+lwork_wunglq_n ) wrkbl = max( wrkbl, 2_${ik}$*m+lwork_wgebrd ) wrkbl = max( wrkbl, 2_${ik}$*m+lwork_wungbr_p ) maxwrk = m*m + wrkbl minwrk = 2_${ik}$*m + n else if( wntva .and. wntuo ) then ! path 8t(n much larger than m, jobu='o', jobvt='a') wrkbl = m + lwork_wgelqf wrkbl = max( wrkbl, m+lwork_wunglq_n ) wrkbl = max( wrkbl, 2_${ik}$*m+lwork_wgebrd ) wrkbl = max( wrkbl, 2_${ik}$*m+lwork_wungbr_p ) wrkbl = max( wrkbl, 2_${ik}$*m+lwork_wungbr_q ) maxwrk = 2_${ik}$*m*m + wrkbl minwrk = 2_${ik}$*m + n else if( wntva .and. wntuas ) then ! path 9t(n much larger than m, jobu='s' or 'a', ! jobvt='a') wrkbl = m + lwork_wgelqf wrkbl = max( wrkbl, m+lwork_wunglq_n ) wrkbl = max( wrkbl, 2_${ik}$*m+lwork_wgebrd ) wrkbl = max( wrkbl, 2_${ik}$*m+lwork_wungbr_p ) wrkbl = max( wrkbl, 2_${ik}$*m+lwork_wungbr_q ) maxwrk = m*m + wrkbl minwrk = 2_${ik}$*m + n end if else ! path 10t(n greater than m, but not much larger) call stdlib${ii}$_${ci}$gebrd( m, n, a, lda, s, dum(1_${ik}$), cdum(1_${ik}$),cdum(1_${ik}$), cdum(1_${ik}$), -1_${ik}$, & ierr ) lwork_wgebrd = int( cdum(1_${ik}$),KIND=${ik}$) maxwrk = 2_${ik}$*m + lwork_wgebrd if( wntvs .or. wntvo ) then ! compute space needed for stdlib${ii}$_${ci}$ungbr p call stdlib${ii}$_${ci}$ungbr( 'P', m, n, m, a, n, cdum(1_${ik}$),cdum(1_${ik}$), -1_${ik}$, ierr ) lwork_wungbr_p = int( cdum(1_${ik}$),KIND=${ik}$) maxwrk = max( maxwrk, 2_${ik}$*m+lwork_wungbr_p ) end if if( wntva ) then call stdlib${ii}$_${ci}$ungbr( 'P', n, n, m, a, n, cdum(1_${ik}$),cdum(1_${ik}$), -1_${ik}$, ierr ) lwork_wungbr_p = int( cdum(1_${ik}$),KIND=${ik}$) maxwrk = max( maxwrk, 2_${ik}$*m+lwork_wungbr_p ) end if if( .not.wntun ) then maxwrk = max( maxwrk, 2_${ik}$*m+lwork_wungbr_q ) end if minwrk = 2_${ik}$*m + n end if end if maxwrk = max( maxwrk, minwrk ) work( 1_${ik}$ ) = maxwrk if( lwork<minwrk .and. .not.lquery ) then info = -13_${ik}$ end if end if if( info/=0_${ik}$ ) then call stdlib${ii}$_xerbla( 'ZGESVD', -info ) return else if( lquery ) then return end if ! quick return if possible if( m==0_${ik}$ .or. n==0_${ik}$ ) then return end if ! get machine constants eps = stdlib${ii}$_${c2ri(ci)}$lamch( 'P' ) smlnum = sqrt( stdlib${ii}$_${c2ri(ci)}$lamch( 'S' ) ) / eps bignum = one / smlnum ! scale a if max element outside range [smlnum,bignum] anrm = stdlib${ii}$_${ci}$lange( 'M', m, n, a, lda, dum ) iscl = 0_${ik}$ if( anrm>zero .and. anrm<smlnum ) then iscl = 1_${ik}$ call stdlib${ii}$_${ci}$lascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, m, n, a, lda, ierr ) else if( anrm>bignum ) then iscl = 1_${ik}$ call stdlib${ii}$_${ci}$lascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, m, n, a, lda, ierr ) end if if( m>=n ) then ! a has at least as many rows as columns. if a has sufficiently ! more rows than columns, first reduce using the qr ! decomposition (if sufficient workspace available) if( m>=mnthr ) then if( wntun ) then ! path 1 (m much larger than n, jobu='n') ! no left singular vectors to be computed itau = 1_${ik}$ iwork = itau + n ! compute a=q*r ! (cworkspace: need 2*n, prefer n+n*nb) ! (rworkspace: need 0) call stdlib${ii}$_${ci}$geqrf( m, n, a, lda, work( itau ), work( iwork ),lwork-iwork+1, & ierr ) ! zero out below r if( n > 1_${ik}$ ) then call stdlib${ii}$_${ci}$laset( 'L', n-1, n-1, czero, czero, a( 2_${ik}$, 1_${ik}$ ),lda ) end if ie = 1_${ik}$ itauq = 1_${ik}$ itaup = itauq + n iwork = itaup + n ! bidiagonalize r in a ! (cworkspace: need 3*n, prefer 2*n+2*n*nb) ! (rworkspace: need n) call stdlib${ii}$_${ci}$gebrd( n, n, a, lda, s, rwork( ie ), work( itauq ),work( itaup ),& work( iwork ), lwork-iwork+1,ierr ) ncvt = 0_${ik}$ if( wntvo .or. wntvas ) then ! if right singular vectors desired, generate p'. ! (cworkspace: need 3*n-1, prefer 2*n+(n-1)*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$ungbr( 'P', n, n, n, a, lda, work( itaup ),work( iwork ), & lwork-iwork+1, ierr ) ncvt = n end if irwork = ie + n ! perform bidiagonal qr iteration, computing right ! singular vectors of a in a if desired ! (cworkspace: 0) ! (rworkspace: need bdspac) call stdlib${ii}$_${ci}$bdsqr( 'U', n, ncvt, 0_${ik}$, 0_${ik}$, s, rwork( ie ), a, lda,cdum, 1_${ik}$, cdum, & 1_${ik}$, rwork( irwork ), info ) ! if right singular vectors desired in vt, copy them there if( wntvas )call stdlib${ii}$_${ci}$lacpy( 'F', n, n, a, lda, vt, ldvt ) else if( wntuo .and. wntvn ) then ! path 2 (m much larger than n, jobu='o', jobvt='n') ! n left singular vectors to be overwritten on a and ! no right singular vectors to be computed if( lwork>=n*n+3*n ) then ! sufficient workspace for a fast algorithm ir = 1_${ik}$ if( lwork>=max( wrkbl, lda*n )+lda*n ) then ! work(iu) is lda by n, work(ir) is lda by n ldwrku = lda ldwrkr = lda else if( lwork>=max( wrkbl, lda*n )+n*n ) then ! work(iu) is lda by n, work(ir) is n by n ldwrku = lda ldwrkr = n else ! work(iu) is ldwrku by n, work(ir) is n by n ldwrku = ( lwork-n*n ) / n ldwrkr = n end if itau = ir + ldwrkr*n iwork = itau + n ! compute a=q*r ! (cworkspace: need n*n+2*n, prefer n*n+n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$geqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-iwork+& 1_${ik}$, ierr ) ! copy r to work(ir) and zero out below it call stdlib${ii}$_${ci}$lacpy( 'U', n, n, a, lda, work( ir ), ldwrkr ) call stdlib${ii}$_${ci}$laset( 'L', n-1, n-1, czero, czero,work( ir+1 ), ldwrkr ) ! generate q in a ! (cworkspace: need n*n+2*n, prefer n*n+n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$ungqr( m, n, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = 1_${ik}$ itauq = itau itaup = itauq + n iwork = itaup + n ! bidiagonalize r in work(ir) ! (cworkspace: need n*n+3*n, prefer n*n+2*n+2*n*nb) ! (rworkspace: need n) call stdlib${ii}$_${ci}$gebrd( n, n, work( ir ), ldwrkr, s, rwork( ie ),work( itauq ),& work( itaup ),work( iwork ), lwork-iwork+1, ierr ) ! generate left vectors bidiagonalizing r ! (cworkspace: need n*n+3*n, prefer n*n+2*n+n*nb) ! (rworkspace: need 0) call stdlib${ii}$_${ci}$ungbr( 'Q', n, n, n, work( ir ), ldwrkr,work( itauq ), work( & iwork ),lwork-iwork+1, ierr ) irwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of r in work(ir) ! (cworkspace: need n*n) ! (rworkspace: need bdspac) call stdlib${ii}$_${ci}$bdsqr( 'U', n, 0_${ik}$, n, 0_${ik}$, s, rwork( ie ), cdum, 1_${ik}$,work( ir ), & ldwrkr, cdum, 1_${ik}$,rwork( irwork ), info ) iu = itauq ! multiply q in a by left singular vectors of r in ! work(ir), storing result in work(iu) and copying to a ! (cworkspace: need n*n+n, prefer n*n+m*n) ! (rworkspace: 0) do i = 1, m, ldwrku chunk = min( m-i+1, ldwrku ) call stdlib${ii}$_${ci}$gemm( 'N', 'N', chunk, n, n, cone, a( i, 1_${ik}$ ),lda, work( ir & ), ldwrkr, czero,work( iu ), ldwrku ) call stdlib${ii}$_${ci}$lacpy( 'F', chunk, n, work( iu ), ldwrku,a( i, 1_${ik}$ ), lda ) end do else ! insufficient workspace for a fast algorithm ie = 1_${ik}$ itauq = 1_${ik}$ itaup = itauq + n iwork = itaup + n ! bidiagonalize a ! (cworkspace: need 2*n+m, prefer 2*n+(m+n)*nb) ! (rworkspace: n) call stdlib${ii}$_${ci}$gebrd( m, n, a, lda, s, rwork( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! generate left vectors bidiagonalizing a ! (cworkspace: need 3*n, prefer 2*n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$ungbr( 'Q', m, n, n, a, lda, work( itauq ),work( iwork ), & lwork-iwork+1, ierr ) irwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of a in a ! (cworkspace: need 0) ! (rworkspace: need bdspac) call stdlib${ii}$_${ci}$bdsqr( 'U', n, 0_${ik}$, m, 0_${ik}$, s, rwork( ie ), cdum, 1_${ik}$,a, lda, cdum, & 1_${ik}$, rwork( irwork ), info ) end if else if( wntuo .and. wntvas ) then ! path 3 (m much larger than n, jobu='o', jobvt='s' or 'a') ! n left singular vectors to be overwritten on a and ! n right singular vectors to be computed in vt if( lwork>=n*n+3*n ) then ! sufficient workspace for a fast algorithm ir = 1_${ik}$ if( lwork>=max( wrkbl, lda*n )+lda*n ) then ! work(iu) is lda by n and work(ir) is lda by n ldwrku = lda ldwrkr = lda else if( lwork>=max( wrkbl, lda*n )+n*n ) then ! work(iu) is lda by n and work(ir) is n by n ldwrku = lda ldwrkr = n else ! work(iu) is ldwrku by n and work(ir) is n by n ldwrku = ( lwork-n*n ) / n ldwrkr = n end if itau = ir + ldwrkr*n iwork = itau + n ! compute a=q*r ! (cworkspace: need n*n+2*n, prefer n*n+n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$geqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-iwork+& 1_${ik}$, ierr ) ! copy r to vt, zeroing out below it call stdlib${ii}$_${ci}$lacpy( 'U', n, n, a, lda, vt, ldvt ) if( n>1_${ik}$ )call stdlib${ii}$_${ci}$laset( 'L', n-1, n-1, czero, czero,vt( 2_${ik}$, 1_${ik}$ ), ldvt ) ! generate q in a ! (cworkspace: need n*n+2*n, prefer n*n+n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$ungqr( m, n, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = 1_${ik}$ itauq = itau itaup = itauq + n iwork = itaup + n ! bidiagonalize r in vt, copying result to work(ir) ! (cworkspace: need n*n+3*n, prefer n*n+2*n+2*n*nb) ! (rworkspace: need n) call stdlib${ii}$_${ci}$gebrd( n, n, vt, ldvt, s, rwork( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) call stdlib${ii}$_${ci}$lacpy( 'L', n, n, vt, ldvt, work( ir ), ldwrkr ) ! generate left vectors bidiagonalizing r in work(ir) ! (cworkspace: need n*n+3*n, prefer n*n+2*n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$ungbr( 'Q', n, n, n, work( ir ), ldwrkr,work( itauq ), work( & iwork ),lwork-iwork+1, ierr ) ! generate right vectors bidiagonalizing r in vt ! (cworkspace: need n*n+3*n-1, prefer n*n+2*n+(n-1)*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$ungbr( 'P', n, n, n, vt, ldvt, work( itaup ),work( iwork ), & lwork-iwork+1, ierr ) irwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of r in work(ir) and computing right ! singular vectors of r in vt ! (cworkspace: need n*n) ! (rworkspace: need bdspac) call stdlib${ii}$_${ci}$bdsqr( 'U', n, n, n, 0_${ik}$, s, rwork( ie ), vt,ldvt, work( ir ), & ldwrkr, cdum, 1_${ik}$,rwork( irwork ), info ) iu = itauq ! multiply q in a by left singular vectors of r in ! work(ir), storing result in work(iu) and copying to a ! (cworkspace: need n*n+n, prefer n*n+m*n) ! (rworkspace: 0) do i = 1, m, ldwrku chunk = min( m-i+1, ldwrku ) call stdlib${ii}$_${ci}$gemm( 'N', 'N', chunk, n, n, cone, a( i, 1_${ik}$ ),lda, work( ir & ), ldwrkr, czero,work( iu ), ldwrku ) call stdlib${ii}$_${ci}$lacpy( 'F', chunk, n, work( iu ), ldwrku,a( i, 1_${ik}$ ), lda ) end do else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + n ! compute a=q*r ! (cworkspace: need 2*n, prefer n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$geqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-iwork+& 1_${ik}$, ierr ) ! copy r to vt, zeroing out below it call stdlib${ii}$_${ci}$lacpy( 'U', n, n, a, lda, vt, ldvt ) if( n>1_${ik}$ )call stdlib${ii}$_${ci}$laset( 'L', n-1, n-1, czero, czero,vt( 2_${ik}$, 1_${ik}$ ), ldvt ) ! generate q in a ! (cworkspace: need 2*n, prefer n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$ungqr( m, n, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = 1_${ik}$ itauq = itau itaup = itauq + n iwork = itaup + n ! bidiagonalize r in vt ! (cworkspace: need 3*n, prefer 2*n+2*n*nb) ! (rworkspace: n) call stdlib${ii}$_${ci}$gebrd( n, n, vt, ldvt, s, rwork( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply q in a by left vectors bidiagonalizing r ! (cworkspace: need 2*n+m, prefer 2*n+m*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$unmbr( 'Q', 'R', 'N', m, n, n, vt, ldvt,work( itauq ), a, lda,& work( iwork ),lwork-iwork+1, ierr ) ! generate right vectors bidiagonalizing r in vt ! (cworkspace: need 3*n-1, prefer 2*n+(n-1)*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$ungbr( 'P', n, n, n, vt, ldvt, work( itaup ),work( iwork ), & lwork-iwork+1, ierr ) irwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of a in a and computing right ! singular vectors of a in vt ! (cworkspace: 0) ! (rworkspace: need bdspac) call stdlib${ii}$_${ci}$bdsqr( 'U', n, n, m, 0_${ik}$, s, rwork( ie ), vt,ldvt, a, lda, cdum,& 1_${ik}$, rwork( irwork ),info ) end if else if( wntus ) then if( wntvn ) then ! path 4 (m much larger than n, jobu='s', jobvt='n') ! n left singular vectors to be computed in u and ! no right singular vectors to be computed if( lwork>=n*n+3*n ) then ! sufficient workspace for a fast algorithm ir = 1_${ik}$ if( lwork>=wrkbl+lda*n ) then ! work(ir) is lda by n ldwrkr = lda else ! work(ir) is n by n ldwrkr = n end if itau = ir + ldwrkr*n iwork = itau + n ! compute a=q*r ! (cworkspace: need n*n+2*n, prefer n*n+n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$geqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ! copy r to work(ir), zeroing out below it call stdlib${ii}$_${ci}$lacpy( 'U', n, n, a, lda, work( ir ),ldwrkr ) call stdlib${ii}$_${ci}$laset( 'L', n-1, n-1, czero, czero,work( ir+1 ), ldwrkr ) ! generate q in a ! (cworkspace: need n*n+2*n, prefer n*n+n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$ungqr( m, n, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = 1_${ik}$ itauq = itau itaup = itauq + n iwork = itaup + n ! bidiagonalize r in work(ir) ! (cworkspace: need n*n+3*n, prefer n*n+2*n+2*n*nb) ! (rworkspace: need n) call stdlib${ii}$_${ci}$gebrd( n, n, work( ir ), ldwrkr, s,rwork( ie ), work( & itauq ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) ! generate left vectors bidiagonalizing r in work(ir) ! (cworkspace: need n*n+3*n, prefer n*n+2*n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$ungbr( 'Q', n, n, n, work( ir ), ldwrkr,work( itauq ), & work( iwork ),lwork-iwork+1, ierr ) irwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of r in work(ir) ! (cworkspace: need n*n) ! (rworkspace: need bdspac) call stdlib${ii}$_${ci}$bdsqr( 'U', n, 0_${ik}$, n, 0_${ik}$, s, rwork( ie ), cdum,1_${ik}$, work( ir ),& ldwrkr, cdum, 1_${ik}$,rwork( irwork ), info ) ! multiply q in a by left singular vectors of r in ! work(ir), storing result in u ! (cworkspace: need n*n) ! (rworkspace: 0) call stdlib${ii}$_${ci}$gemm( 'N', 'N', m, n, n, cone, a, lda,work( ir ), ldwrkr, & czero, u, ldu ) else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + n ! compute a=q*r, copying result to u ! (cworkspace: need 2*n, prefer n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$geqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_${ci}$lacpy( 'L', m, n, a, lda, u, ldu ) ! generate q in u ! (cworkspace: need 2*n, prefer n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$ungqr( m, n, n, u, ldu, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = 1_${ik}$ itauq = itau itaup = itauq + n iwork = itaup + n ! zero out below r in a if( n > 1_${ik}$ ) then call stdlib${ii}$_${ci}$laset( 'L', n-1, n-1, czero, czero,a( 2_${ik}$, 1_${ik}$ ), lda ) end if ! bidiagonalize r in a ! (cworkspace: need 3*n, prefer 2*n+2*n*nb) ! (rworkspace: need n) call stdlib${ii}$_${ci}$gebrd( n, n, a, lda, s, rwork( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply q in u by left vectors bidiagonalizing r ! (cworkspace: need 2*n+m, prefer 2*n+m*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$unmbr( 'Q', 'R', 'N', m, n, n, a, lda,work( itauq ), u, & ldu, work( iwork ),lwork-iwork+1, ierr ) irwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of a in u ! (cworkspace: 0) ! (rworkspace: need bdspac) call stdlib${ii}$_${ci}$bdsqr( 'U', n, 0_${ik}$, m, 0_${ik}$, s, rwork( ie ), cdum,1_${ik}$, u, ldu, & cdum, 1_${ik}$, rwork( irwork ),info ) end if else if( wntvo ) then ! path 5 (m much larger than n, jobu='s', jobvt='o') ! n left singular vectors to be computed in u and ! n right singular vectors to be overwritten on a if( lwork>=2_${ik}$*n*n+3*n ) then ! sufficient workspace for a fast algorithm iu = 1_${ik}$ if( lwork>=wrkbl+2*lda*n ) then ! work(iu) is lda by n and work(ir) is lda by n ldwrku = lda ir = iu + ldwrku*n ldwrkr = lda else if( lwork>=wrkbl+( lda+n )*n ) then ! work(iu) is lda by n and work(ir) is n by n ldwrku = lda ir = iu + ldwrku*n ldwrkr = n else ! work(iu) is n by n and work(ir) is n by n ldwrku = n ir = iu + ldwrku*n ldwrkr = n end if itau = ir + ldwrkr*n iwork = itau + n ! compute a=q*r ! (cworkspace: need 2*n*n+2*n, prefer 2*n*n+n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$geqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ! copy r to work(iu), zeroing out below it call stdlib${ii}$_${ci}$lacpy( 'U', n, n, a, lda, work( iu ),ldwrku ) call stdlib${ii}$_${ci}$laset( 'L', n-1, n-1, czero, czero,work( iu+1 ), ldwrku ) ! generate q in a ! (cworkspace: need 2*n*n+2*n, prefer 2*n*n+n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$ungqr( m, n, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = 1_${ik}$ itauq = itau itaup = itauq + n iwork = itaup + n ! bidiagonalize r in work(iu), copying result to ! work(ir) ! (cworkspace: need 2*n*n+3*n, ! prefer 2*n*n+2*n+2*n*nb) ! (rworkspace: need n) call stdlib${ii}$_${ci}$gebrd( n, n, work( iu ), ldwrku, s,rwork( ie ), work( & itauq ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) call stdlib${ii}$_${ci}$lacpy( 'U', n, n, work( iu ), ldwrku,work( ir ), ldwrkr ) ! generate left bidiagonalizing vectors in work(iu) ! (cworkspace: need 2*n*n+3*n, prefer 2*n*n+2*n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$ungbr( 'Q', n, n, n, work( iu ), ldwrku,work( itauq ), & work( iwork ),lwork-iwork+1, ierr ) ! generate right bidiagonalizing vectors in work(ir) ! (cworkspace: need 2*n*n+3*n-1, ! prefer 2*n*n+2*n+(n-1)*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$ungbr( 'P', n, n, n, work( ir ), ldwrkr,work( itaup ), & work( iwork ),lwork-iwork+1, ierr ) irwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of r in work(iu) and computing ! right singular vectors of r in work(ir) ! (cworkspace: need 2*n*n) ! (rworkspace: need bdspac) call stdlib${ii}$_${ci}$bdsqr( 'U', n, n, n, 0_${ik}$, s, rwork( ie ),work( ir ), ldwrkr, & work( iu ),ldwrku, cdum, 1_${ik}$, rwork( irwork ),info ) ! multiply q in a by left singular vectors of r in ! work(iu), storing result in u ! (cworkspace: need n*n) ! (rworkspace: 0) call stdlib${ii}$_${ci}$gemm( 'N', 'N', m, n, n, cone, a, lda,work( iu ), ldwrku, & czero, u, ldu ) ! copy right singular vectors of r to a ! (cworkspace: need n*n) ! (rworkspace: 0) call stdlib${ii}$_${ci}$lacpy( 'F', n, n, work( ir ), ldwrkr, a,lda ) else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + n ! compute a=q*r, copying result to u ! (cworkspace: need 2*n, prefer n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$geqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_${ci}$lacpy( 'L', m, n, a, lda, u, ldu ) ! generate q in u ! (cworkspace: need 2*n, prefer n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$ungqr( m, n, n, u, ldu, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = 1_${ik}$ itauq = itau itaup = itauq + n iwork = itaup + n ! zero out below r in a if( n > 1_${ik}$ ) then call stdlib${ii}$_${ci}$laset( 'L', n-1, n-1, czero, czero,a( 2_${ik}$, 1_${ik}$ ), lda ) end if ! bidiagonalize r in a ! (cworkspace: need 3*n, prefer 2*n+2*n*nb) ! (rworkspace: need n) call stdlib${ii}$_${ci}$gebrd( n, n, a, lda, s, rwork( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply q in u by left vectors bidiagonalizing r ! (cworkspace: need 2*n+m, prefer 2*n+m*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$unmbr( 'Q', 'R', 'N', m, n, n, a, lda,work( itauq ), u, & ldu, work( iwork ),lwork-iwork+1, ierr ) ! generate right vectors bidiagonalizing r in a ! (cworkspace: need 3*n-1, prefer 2*n+(n-1)*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$ungbr( 'P', n, n, n, a, lda, work( itaup ),work( iwork ), & lwork-iwork+1, ierr ) irwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of a in u and computing right ! singular vectors of a in a ! (cworkspace: 0) ! (rworkspace: need bdspac) call stdlib${ii}$_${ci}$bdsqr( 'U', n, n, m, 0_${ik}$, s, rwork( ie ), a,lda, u, ldu, & cdum, 1_${ik}$, rwork( irwork ),info ) end if else if( wntvas ) then ! path 6 (m much larger than n, jobu='s', jobvt='s' ! or 'a') ! n left singular vectors to be computed in u and ! n right singular vectors to be computed in vt if( lwork>=n*n+3*n ) then ! sufficient workspace for a fast algorithm iu = 1_${ik}$ if( lwork>=wrkbl+lda*n ) then ! work(iu) is lda by n ldwrku = lda else ! work(iu) is n by n ldwrku = n end if itau = iu + ldwrku*n iwork = itau + n ! compute a=q*r ! (cworkspace: need n*n+2*n, prefer n*n+n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$geqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ! copy r to work(iu), zeroing out below it call stdlib${ii}$_${ci}$lacpy( 'U', n, n, a, lda, work( iu ),ldwrku ) call stdlib${ii}$_${ci}$laset( 'L', n-1, n-1, czero, czero,work( iu+1 ), ldwrku ) ! generate q in a ! (cworkspace: need n*n+2*n, prefer n*n+n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$ungqr( m, n, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = 1_${ik}$ itauq = itau itaup = itauq + n iwork = itaup + n ! bidiagonalize r in work(iu), copying result to vt ! (cworkspace: need n*n+3*n, prefer n*n+2*n+2*n*nb) ! (rworkspace: need n) call stdlib${ii}$_${ci}$gebrd( n, n, work( iu ), ldwrku, s,rwork( ie ), work( & itauq ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) call stdlib${ii}$_${ci}$lacpy( 'U', n, n, work( iu ), ldwrku, vt,ldvt ) ! generate left bidiagonalizing vectors in work(iu) ! (cworkspace: need n*n+3*n, prefer n*n+2*n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$ungbr( 'Q', n, n, n, work( iu ), ldwrku,work( itauq ), & work( iwork ),lwork-iwork+1, ierr ) ! generate right bidiagonalizing vectors in vt ! (cworkspace: need n*n+3*n-1, ! prefer n*n+2*n+(n-1)*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$ungbr( 'P', n, n, n, vt, ldvt, work( itaup ),work( iwork ),& lwork-iwork+1, ierr ) irwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of r in work(iu) and computing ! right singular vectors of r in vt ! (cworkspace: need n*n) ! (rworkspace: need bdspac) call stdlib${ii}$_${ci}$bdsqr( 'U', n, n, n, 0_${ik}$, s, rwork( ie ), vt,ldvt, work( iu )& , ldwrku, cdum, 1_${ik}$,rwork( irwork ), info ) ! multiply q in a by left singular vectors of r in ! work(iu), storing result in u ! (cworkspace: need n*n) ! (rworkspace: 0) call stdlib${ii}$_${ci}$gemm( 'N', 'N', m, n, n, cone, a, lda,work( iu ), ldwrku, & czero, u, ldu ) else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + n ! compute a=q*r, copying result to u ! (cworkspace: need 2*n, prefer n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$geqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_${ci}$lacpy( 'L', m, n, a, lda, u, ldu ) ! generate q in u ! (cworkspace: need 2*n, prefer n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$ungqr( m, n, n, u, ldu, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ! copy r to vt, zeroing out below it call stdlib${ii}$_${ci}$lacpy( 'U', n, n, a, lda, vt, ldvt ) if( n>1_${ik}$ )call stdlib${ii}$_${ci}$laset( 'L', n-1, n-1, czero, czero,vt( 2_${ik}$, 1_${ik}$ ), & ldvt ) ie = 1_${ik}$ itauq = itau itaup = itauq + n iwork = itaup + n ! bidiagonalize r in vt ! (cworkspace: need 3*n, prefer 2*n+2*n*nb) ! (rworkspace: need n) call stdlib${ii}$_${ci}$gebrd( n, n, vt, ldvt, s, rwork( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply q in u by left bidiagonalizing vectors ! in vt ! (cworkspace: need 2*n+m, prefer 2*n+m*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$unmbr( 'Q', 'R', 'N', m, n, n, vt, ldvt,work( itauq ), u, & ldu, work( iwork ),lwork-iwork+1, ierr ) ! generate right bidiagonalizing vectors in vt ! (cworkspace: need 3*n-1, prefer 2*n+(n-1)*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$ungbr( 'P', n, n, n, vt, ldvt, work( itaup ),work( iwork ),& lwork-iwork+1, ierr ) irwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of a in u and computing right ! singular vectors of a in vt ! (cworkspace: 0) ! (rworkspace: need bdspac) call stdlib${ii}$_${ci}$bdsqr( 'U', n, n, m, 0_${ik}$, s, rwork( ie ), vt,ldvt, u, ldu, & cdum, 1_${ik}$,rwork( irwork ), info ) end if end if else if( wntua ) then if( wntvn ) then ! path 7 (m much larger than n, jobu='a', jobvt='n') ! m left singular vectors to be computed in u and ! no right singular vectors to be computed if( lwork>=n*n+max( n+m, 3_${ik}$*n ) ) then ! sufficient workspace for a fast algorithm ir = 1_${ik}$ if( lwork>=wrkbl+lda*n ) then ! work(ir) is lda by n ldwrkr = lda else ! work(ir) is n by n ldwrkr = n end if itau = ir + ldwrkr*n iwork = itau + n ! compute a=q*r, copying result to u ! (cworkspace: need n*n+2*n, prefer n*n+n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$geqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_${ci}$lacpy( 'L', m, n, a, lda, u, ldu ) ! copy r to work(ir), zeroing out below it call stdlib${ii}$_${ci}$lacpy( 'U', n, n, a, lda, work( ir ),ldwrkr ) call stdlib${ii}$_${ci}$laset( 'L', n-1, n-1, czero, czero,work( ir+1 ), ldwrkr ) ! generate q in u ! (cworkspace: need n*n+n+m, prefer n*n+n+m*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$ungqr( m, m, n, u, ldu, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = 1_${ik}$ itauq = itau itaup = itauq + n iwork = itaup + n ! bidiagonalize r in work(ir) ! (cworkspace: need n*n+3*n, prefer n*n+2*n+2*n*nb) ! (rworkspace: need n) call stdlib${ii}$_${ci}$gebrd( n, n, work( ir ), ldwrkr, s,rwork( ie ), work( & itauq ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) ! generate left bidiagonalizing vectors in work(ir) ! (cworkspace: need n*n+3*n, prefer n*n+2*n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$ungbr( 'Q', n, n, n, work( ir ), ldwrkr,work( itauq ), & work( iwork ),lwork-iwork+1, ierr ) irwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of r in work(ir) ! (cworkspace: need n*n) ! (rworkspace: need bdspac) call stdlib${ii}$_${ci}$bdsqr( 'U', n, 0_${ik}$, n, 0_${ik}$, s, rwork( ie ), cdum,1_${ik}$, work( ir ),& ldwrkr, cdum, 1_${ik}$,rwork( irwork ), info ) ! multiply q in u by left singular vectors of r in ! work(ir), storing result in a ! (cworkspace: need n*n) ! (rworkspace: 0) call stdlib${ii}$_${ci}$gemm( 'N', 'N', m, n, n, cone, u, ldu,work( ir ), ldwrkr, & czero, a, lda ) ! copy left singular vectors of a from a to u call stdlib${ii}$_${ci}$lacpy( 'F', m, n, a, lda, u, ldu ) else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + n ! compute a=q*r, copying result to u ! (cworkspace: need 2*n, prefer n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$geqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_${ci}$lacpy( 'L', m, n, a, lda, u, ldu ) ! generate q in u ! (cworkspace: need n+m, prefer n+m*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$ungqr( m, m, n, u, ldu, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = 1_${ik}$ itauq = itau itaup = itauq + n iwork = itaup + n ! zero out below r in a if( n > 1_${ik}$ ) then call stdlib${ii}$_${ci}$laset( 'L', n-1, n-1, czero, czero,a( 2_${ik}$, 1_${ik}$ ), lda ) end if ! bidiagonalize r in a ! (cworkspace: need 3*n, prefer 2*n+2*n*nb) ! (rworkspace: need n) call stdlib${ii}$_${ci}$gebrd( n, n, a, lda, s, rwork( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply q in u by left bidiagonalizing vectors ! in a ! (cworkspace: need 2*n+m, prefer 2*n+m*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$unmbr( 'Q', 'R', 'N', m, n, n, a, lda,work( itauq ), u, & ldu, work( iwork ),lwork-iwork+1, ierr ) irwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of a in u ! (cworkspace: 0) ! (rworkspace: need bdspac) call stdlib${ii}$_${ci}$bdsqr( 'U', n, 0_${ik}$, m, 0_${ik}$, s, rwork( ie ), cdum,1_${ik}$, u, ldu, & cdum, 1_${ik}$, rwork( irwork ),info ) end if else if( wntvo ) then ! path 8 (m much larger than n, jobu='a', jobvt='o') ! m left singular vectors to be computed in u and ! n right singular vectors to be overwritten on a if( lwork>=2_${ik}$*n*n+max( n+m, 3_${ik}$*n ) ) then ! sufficient workspace for a fast algorithm iu = 1_${ik}$ if( lwork>=wrkbl+2*lda*n ) then ! work(iu) is lda by n and work(ir) is lda by n ldwrku = lda ir = iu + ldwrku*n ldwrkr = lda else if( lwork>=wrkbl+( lda+n )*n ) then ! work(iu) is lda by n and work(ir) is n by n ldwrku = lda ir = iu + ldwrku*n ldwrkr = n else ! work(iu) is n by n and work(ir) is n by n ldwrku = n ir = iu + ldwrku*n ldwrkr = n end if itau = ir + ldwrkr*n iwork = itau + n ! compute a=q*r, copying result to u ! (cworkspace: need 2*n*n+2*n, prefer 2*n*n+n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$geqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_${ci}$lacpy( 'L', m, n, a, lda, u, ldu ) ! generate q in u ! (cworkspace: need 2*n*n+n+m, prefer 2*n*n+n+m*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$ungqr( m, m, n, u, ldu, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ! copy r to work(iu), zeroing out below it call stdlib${ii}$_${ci}$lacpy( 'U', n, n, a, lda, work( iu ),ldwrku ) call stdlib${ii}$_${ci}$laset( 'L', n-1, n-1, czero, czero,work( iu+1 ), ldwrku ) ie = 1_${ik}$ itauq = itau itaup = itauq + n iwork = itaup + n ! bidiagonalize r in work(iu), copying result to ! work(ir) ! (cworkspace: need 2*n*n+3*n, ! prefer 2*n*n+2*n+2*n*nb) ! (rworkspace: need n) call stdlib${ii}$_${ci}$gebrd( n, n, work( iu ), ldwrku, s,rwork( ie ), work( & itauq ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) call stdlib${ii}$_${ci}$lacpy( 'U', n, n, work( iu ), ldwrku,work( ir ), ldwrkr ) ! generate left bidiagonalizing vectors in work(iu) ! (cworkspace: need 2*n*n+3*n, prefer 2*n*n+2*n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$ungbr( 'Q', n, n, n, work( iu ), ldwrku,work( itauq ), & work( iwork ),lwork-iwork+1, ierr ) ! generate right bidiagonalizing vectors in work(ir) ! (cworkspace: need 2*n*n+3*n-1, ! prefer 2*n*n+2*n+(n-1)*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$ungbr( 'P', n, n, n, work( ir ), ldwrkr,work( itaup ), & work( iwork ),lwork-iwork+1, ierr ) irwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of r in work(iu) and computing ! right singular vectors of r in work(ir) ! (cworkspace: need 2*n*n) ! (rworkspace: need bdspac) call stdlib${ii}$_${ci}$bdsqr( 'U', n, n, n, 0_${ik}$, s, rwork( ie ),work( ir ), ldwrkr, & work( iu ),ldwrku, cdum, 1_${ik}$, rwork( irwork ),info ) ! multiply q in u by left singular vectors of r in ! work(iu), storing result in a ! (cworkspace: need n*n) ! (rworkspace: 0) call stdlib${ii}$_${ci}$gemm( 'N', 'N', m, n, n, cone, u, ldu,work( iu ), ldwrku, & czero, a, lda ) ! copy left singular vectors of a from a to u call stdlib${ii}$_${ci}$lacpy( 'F', m, n, a, lda, u, ldu ) ! copy right singular vectors of r from work(ir) to a call stdlib${ii}$_${ci}$lacpy( 'F', n, n, work( ir ), ldwrkr, a,lda ) else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + n ! compute a=q*r, copying result to u ! (cworkspace: need 2*n, prefer n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$geqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_${ci}$lacpy( 'L', m, n, a, lda, u, ldu ) ! generate q in u ! (cworkspace: need n+m, prefer n+m*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$ungqr( m, m, n, u, ldu, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = 1_${ik}$ itauq = itau itaup = itauq + n iwork = itaup + n ! zero out below r in a if( n > 1_${ik}$ ) then call stdlib${ii}$_${ci}$laset( 'L', n-1, n-1, czero, czero,a( 2_${ik}$, 1_${ik}$ ), lda ) end if ! bidiagonalize r in a ! (cworkspace: need 3*n, prefer 2*n+2*n*nb) ! (rworkspace: need n) call stdlib${ii}$_${ci}$gebrd( n, n, a, lda, s, rwork( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply q in u by left bidiagonalizing vectors ! in a ! (cworkspace: need 2*n+m, prefer 2*n+m*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$unmbr( 'Q', 'R', 'N', m, n, n, a, lda,work( itauq ), u, & ldu, work( iwork ),lwork-iwork+1, ierr ) ! generate right bidiagonalizing vectors in a ! (cworkspace: need 3*n-1, prefer 2*n+(n-1)*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$ungbr( 'P', n, n, n, a, lda, work( itaup ),work( iwork ), & lwork-iwork+1, ierr ) irwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of a in u and computing right ! singular vectors of a in a ! (cworkspace: 0) ! (rworkspace: need bdspac) call stdlib${ii}$_${ci}$bdsqr( 'U', n, n, m, 0_${ik}$, s, rwork( ie ), a,lda, u, ldu, & cdum, 1_${ik}$, rwork( irwork ),info ) end if else if( wntvas ) then ! path 9 (m much larger than n, jobu='a', jobvt='s' ! or 'a') ! m left singular vectors to be computed in u and ! n right singular vectors to be computed in vt if( lwork>=n*n+max( n+m, 3_${ik}$*n ) ) then ! sufficient workspace for a fast algorithm iu = 1_${ik}$ if( lwork>=wrkbl+lda*n ) then ! work(iu) is lda by n ldwrku = lda else ! work(iu) is n by n ldwrku = n end if itau = iu + ldwrku*n iwork = itau + n ! compute a=q*r, copying result to u ! (cworkspace: need n*n+2*n, prefer n*n+n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$geqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_${ci}$lacpy( 'L', m, n, a, lda, u, ldu ) ! generate q in u ! (cworkspace: need n*n+n+m, prefer n*n+n+m*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$ungqr( m, m, n, u, ldu, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ! copy r to work(iu), zeroing out below it call stdlib${ii}$_${ci}$lacpy( 'U', n, n, a, lda, work( iu ),ldwrku ) call stdlib${ii}$_${ci}$laset( 'L', n-1, n-1, czero, czero,work( iu+1 ), ldwrku ) ie = 1_${ik}$ itauq = itau itaup = itauq + n iwork = itaup + n ! bidiagonalize r in work(iu), copying result to vt ! (cworkspace: need n*n+3*n, prefer n*n+2*n+2*n*nb) ! (rworkspace: need n) call stdlib${ii}$_${ci}$gebrd( n, n, work( iu ), ldwrku, s,rwork( ie ), work( & itauq ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) call stdlib${ii}$_${ci}$lacpy( 'U', n, n, work( iu ), ldwrku, vt,ldvt ) ! generate left bidiagonalizing vectors in work(iu) ! (cworkspace: need n*n+3*n, prefer n*n+2*n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$ungbr( 'Q', n, n, n, work( iu ), ldwrku,work( itauq ), & work( iwork ),lwork-iwork+1, ierr ) ! generate right bidiagonalizing vectors in vt ! (cworkspace: need n*n+3*n-1, ! prefer n*n+2*n+(n-1)*nb) ! (rworkspace: need 0) call stdlib${ii}$_${ci}$ungbr( 'P', n, n, n, vt, ldvt, work( itaup ),work( iwork ),& lwork-iwork+1, ierr ) irwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of r in work(iu) and computing ! right singular vectors of r in vt ! (cworkspace: need n*n) ! (rworkspace: need bdspac) call stdlib${ii}$_${ci}$bdsqr( 'U', n, n, n, 0_${ik}$, s, rwork( ie ), vt,ldvt, work( iu )& , ldwrku, cdum, 1_${ik}$,rwork( irwork ), info ) ! multiply q in u by left singular vectors of r in ! work(iu), storing result in a ! (cworkspace: need n*n) ! (rworkspace: 0) call stdlib${ii}$_${ci}$gemm( 'N', 'N', m, n, n, cone, u, ldu,work( iu ), ldwrku, & czero, a, lda ) ! copy left singular vectors of a from a to u call stdlib${ii}$_${ci}$lacpy( 'F', m, n, a, lda, u, ldu ) else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + n ! compute a=q*r, copying result to u ! (cworkspace: need 2*n, prefer n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$geqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_${ci}$lacpy( 'L', m, n, a, lda, u, ldu ) ! generate q in u ! (cworkspace: need n+m, prefer n+m*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$ungqr( m, m, n, u, ldu, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ! copy r from a to vt, zeroing out below it call stdlib${ii}$_${ci}$lacpy( 'U', n, n, a, lda, vt, ldvt ) if( n>1_${ik}$ )call stdlib${ii}$_${ci}$laset( 'L', n-1, n-1, czero, czero,vt( 2_${ik}$, 1_${ik}$ ), & ldvt ) ie = 1_${ik}$ itauq = itau itaup = itauq + n iwork = itaup + n ! bidiagonalize r in vt ! (cworkspace: need 3*n, prefer 2*n+2*n*nb) ! (rworkspace: need n) call stdlib${ii}$_${ci}$gebrd( n, n, vt, ldvt, s, rwork( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply q in u by left bidiagonalizing vectors ! in vt ! (cworkspace: need 2*n+m, prefer 2*n+m*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$unmbr( 'Q', 'R', 'N', m, n, n, vt, ldvt,work( itauq ), u, & ldu, work( iwork ),lwork-iwork+1, ierr ) ! generate right bidiagonalizing vectors in vt ! (cworkspace: need 3*n-1, prefer 2*n+(n-1)*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$ungbr( 'P', n, n, n, vt, ldvt, work( itaup ),work( iwork ),& lwork-iwork+1, ierr ) irwork = ie + n ! perform bidiagonal qr iteration, computing left ! singular vectors of a in u and computing right ! singular vectors of a in vt ! (cworkspace: 0) ! (rworkspace: need bdspac) call stdlib${ii}$_${ci}$bdsqr( 'U', n, n, m, 0_${ik}$, s, rwork( ie ), vt,ldvt, u, ldu, & cdum, 1_${ik}$,rwork( irwork ), info ) end if end if end if else ! m < mnthr ! path 10 (m at least n, but not much larger) ! reduce to bidiagonal form without qr decomposition ie = 1_${ik}$ itauq = 1_${ik}$ itaup = itauq + n iwork = itaup + n ! bidiagonalize a ! (cworkspace: need 2*n+m, prefer 2*n+(m+n)*nb) ! (rworkspace: need n) call stdlib${ii}$_${ci}$gebrd( m, n, a, lda, s, rwork( ie ), work( itauq ),work( itaup ), & work( iwork ), lwork-iwork+1,ierr ) if( wntuas ) then ! if left singular vectors desired in u, copy result to u ! and generate left bidiagonalizing vectors in u ! (cworkspace: need 2*n+ncu, prefer 2*n+ncu*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$lacpy( 'L', m, n, a, lda, u, ldu ) if( wntus )ncu = n if( wntua )ncu = m call stdlib${ii}$_${ci}$ungbr( 'Q', m, ncu, n, u, ldu, work( itauq ),work( iwork ), & lwork-iwork+1, ierr ) end if if( wntvas ) then ! if right singular vectors desired in vt, copy result to ! vt and generate right bidiagonalizing vectors in vt ! (cworkspace: need 3*n-1, prefer 2*n+(n-1)*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$lacpy( 'U', n, n, a, lda, vt, ldvt ) call stdlib${ii}$_${ci}$ungbr( 'P', n, n, n, vt, ldvt, work( itaup ),work( iwork ), & lwork-iwork+1, ierr ) end if if( wntuo ) then ! if left singular vectors desired in a, generate left ! bidiagonalizing vectors in a ! (cworkspace: need 3*n, prefer 2*n+n*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$ungbr( 'Q', m, n, n, a, lda, work( itauq ),work( iwork ), lwork-& iwork+1, ierr ) end if if( wntvo ) then ! if right singular vectors desired in a, generate right ! bidiagonalizing vectors in a ! (cworkspace: need 3*n-1, prefer 2*n+(n-1)*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$ungbr( 'P', n, n, n, a, lda, work( itaup ),work( iwork ), lwork-& iwork+1, ierr ) end if irwork = ie + n if( wntuas .or. wntuo )nru = m if( wntun )nru = 0_${ik}$ if( wntvas .or. wntvo )ncvt = n if( wntvn )ncvt = 0_${ik}$ if( ( .not.wntuo ) .and. ( .not.wntvo ) ) then ! perform bidiagonal qr iteration, if desired, computing ! left singular vectors in u and computing right singular ! vectors in vt ! (cworkspace: 0) ! (rworkspace: need bdspac) call stdlib${ii}$_${ci}$bdsqr( 'U', n, ncvt, nru, 0_${ik}$, s, rwork( ie ), vt,ldvt, u, ldu, & cdum, 1_${ik}$, rwork( irwork ),info ) else if( ( .not.wntuo ) .and. wntvo ) then ! perform bidiagonal qr iteration, if desired, computing ! left singular vectors in u and computing right singular ! vectors in a ! (cworkspace: 0) ! (rworkspace: need bdspac) call stdlib${ii}$_${ci}$bdsqr( 'U', n, ncvt, nru, 0_${ik}$, s, rwork( ie ), a,lda, u, ldu, cdum,& 1_${ik}$, rwork( irwork ),info ) else ! perform bidiagonal qr iteration, if desired, computing ! left singular vectors in a and computing right singular ! vectors in vt ! (cworkspace: 0) ! (rworkspace: need bdspac) call stdlib${ii}$_${ci}$bdsqr( 'U', n, ncvt, nru, 0_${ik}$, s, rwork( ie ), vt,ldvt, a, lda, & cdum, 1_${ik}$, rwork( irwork ),info ) end if end if else ! a has more columns than rows. if a has sufficiently more ! columns than rows, first reduce using the lq decomposition (if ! sufficient workspace available) if( n>=mnthr ) then if( wntvn ) then ! path 1t(n much larger than m, jobvt='n') ! no right singular vectors to be computed itau = 1_${ik}$ iwork = itau + m ! compute a=l*q ! (cworkspace: need 2*m, prefer m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$gelqf( m, n, a, lda, work( itau ), work( iwork ),lwork-iwork+1, & ierr ) ! zero out above l if (m>1_${ik}$) call stdlib${ii}$_${ci}$laset( 'U', m-1, m-1, czero, czero, a( 1_${ik}$, 2_${ik}$ ),lda ) ie = 1_${ik}$ itauq = 1_${ik}$ itaup = itauq + m iwork = itaup + m ! bidiagonalize l in a ! (cworkspace: need 3*m, prefer 2*m+2*m*nb) ! (rworkspace: need m) call stdlib${ii}$_${ci}$gebrd( m, m, a, lda, s, rwork( ie ), work( itauq ),work( itaup ),& work( iwork ), lwork-iwork+1,ierr ) if( wntuo .or. wntuas ) then ! if left singular vectors desired, generate q ! (cworkspace: need 3*m, prefer 2*m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$ungbr( 'Q', m, m, m, a, lda, work( itauq ),work( iwork ), & lwork-iwork+1, ierr ) end if irwork = ie + m nru = 0_${ik}$ if( wntuo .or. wntuas )nru = m ! perform bidiagonal qr iteration, computing left singular ! vectors of a in a if desired ! (cworkspace: 0) ! (rworkspace: need bdspac) call stdlib${ii}$_${ci}$bdsqr( 'U', m, 0_${ik}$, nru, 0_${ik}$, s, rwork( ie ), cdum, 1_${ik}$,a, lda, cdum, & 1_${ik}$, rwork( irwork ), info ) ! if left singular vectors desired in u, copy them there if( wntuas )call stdlib${ii}$_${ci}$lacpy( 'F', m, m, a, lda, u, ldu ) else if( wntvo .and. wntun ) then ! path 2t(n much larger than m, jobu='n', jobvt='o') ! m right singular vectors to be overwritten on a and ! no left singular vectors to be computed if( lwork>=m*m+3*m ) then ! sufficient workspace for a fast algorithm ir = 1_${ik}$ if( lwork>=max( wrkbl, lda*n )+lda*m ) then ! work(iu) is lda by n and work(ir) is lda by m ldwrku = lda chunk = n ldwrkr = lda else if( lwork>=max( wrkbl, lda*n )+m*m ) then ! work(iu) is lda by n and work(ir) is m by m ldwrku = lda chunk = n ldwrkr = m else ! work(iu) is m by chunk and work(ir) is m by m ldwrku = m chunk = ( lwork-m*m ) / m ldwrkr = m end if itau = ir + ldwrkr*m iwork = itau + m ! compute a=l*q ! (cworkspace: need m*m+2*m, prefer m*m+m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$gelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-iwork+& 1_${ik}$, ierr ) ! copy l to work(ir) and zero out above it call stdlib${ii}$_${ci}$lacpy( 'L', m, m, a, lda, work( ir ), ldwrkr ) call stdlib${ii}$_${ci}$laset( 'U', m-1, m-1, czero, czero,work( ir+ldwrkr ), ldwrkr ) ! generate q in a ! (cworkspace: need m*m+2*m, prefer m*m+m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$unglq( m, n, m, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = 1_${ik}$ itauq = itau itaup = itauq + m iwork = itaup + m ! bidiagonalize l in work(ir) ! (cworkspace: need m*m+3*m, prefer m*m+2*m+2*m*nb) ! (rworkspace: need m) call stdlib${ii}$_${ci}$gebrd( m, m, work( ir ), ldwrkr, s, rwork( ie ),work( itauq ),& work( itaup ),work( iwork ), lwork-iwork+1, ierr ) ! generate right vectors bidiagonalizing l ! (cworkspace: need m*m+3*m-1, prefer m*m+2*m+(m-1)*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$ungbr( 'P', m, m, m, work( ir ), ldwrkr,work( itaup ), work( & iwork ),lwork-iwork+1, ierr ) irwork = ie + m ! perform bidiagonal qr iteration, computing right ! singular vectors of l in work(ir) ! (cworkspace: need m*m) ! (rworkspace: need bdspac) call stdlib${ii}$_${ci}$bdsqr( 'U', m, m, 0_${ik}$, 0_${ik}$, s, rwork( ie ),work( ir ), ldwrkr, & cdum, 1_${ik}$, cdum, 1_${ik}$,rwork( irwork ), info ) iu = itauq ! multiply right singular vectors of l in work(ir) by q ! in a, storing result in work(iu) and copying to a ! (cworkspace: need m*m+m, prefer m*m+m*n) ! (rworkspace: 0) do i = 1, n, chunk blk = min( n-i+1, chunk ) call stdlib${ii}$_${ci}$gemm( 'N', 'N', m, blk, m, cone, work( ir ),ldwrkr, a( 1_${ik}$, & i ), lda, czero,work( iu ), ldwrku ) call stdlib${ii}$_${ci}$lacpy( 'F', m, blk, work( iu ), ldwrku,a( 1_${ik}$, i ), lda ) end do else ! insufficient workspace for a fast algorithm ie = 1_${ik}$ itauq = 1_${ik}$ itaup = itauq + m iwork = itaup + m ! bidiagonalize a ! (cworkspace: need 2*m+n, prefer 2*m+(m+n)*nb) ! (rworkspace: need m) call stdlib${ii}$_${ci}$gebrd( m, n, a, lda, s, rwork( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! generate right vectors bidiagonalizing a ! (cworkspace: need 3*m, prefer 2*m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$ungbr( 'P', m, n, m, a, lda, work( itaup ),work( iwork ), & lwork-iwork+1, ierr ) irwork = ie + m ! perform bidiagonal qr iteration, computing right ! singular vectors of a in a ! (cworkspace: 0) ! (rworkspace: need bdspac) call stdlib${ii}$_${ci}$bdsqr( 'L', m, n, 0_${ik}$, 0_${ik}$, s, rwork( ie ), a, lda,cdum, 1_${ik}$, cdum, & 1_${ik}$, rwork( irwork ), info ) end if else if( wntvo .and. wntuas ) then ! path 3t(n much larger than m, jobu='s' or 'a', jobvt='o') ! m right singular vectors to be overwritten on a and ! m left singular vectors to be computed in u if( lwork>=m*m+3*m ) then ! sufficient workspace for a fast algorithm ir = 1_${ik}$ if( lwork>=max( wrkbl, lda*n )+lda*m ) then ! work(iu) is lda by n and work(ir) is lda by m ldwrku = lda chunk = n ldwrkr = lda else if( lwork>=max( wrkbl, lda*n )+m*m ) then ! work(iu) is lda by n and work(ir) is m by m ldwrku = lda chunk = n ldwrkr = m else ! work(iu) is m by chunk and work(ir) is m by m ldwrku = m chunk = ( lwork-m*m ) / m ldwrkr = m end if itau = ir + ldwrkr*m iwork = itau + m ! compute a=l*q ! (cworkspace: need m*m+2*m, prefer m*m+m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$gelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-iwork+& 1_${ik}$, ierr ) ! copy l to u, zeroing about above it call stdlib${ii}$_${ci}$lacpy( 'L', m, m, a, lda, u, ldu ) if (m>1_${ik}$) call stdlib${ii}$_${ci}$laset( 'U', m-1, m-1, czero, czero, u( 1_${ik}$, 2_${ik}$ ),ldu ) ! generate q in a ! (cworkspace: need m*m+2*m, prefer m*m+m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$unglq( m, n, m, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = 1_${ik}$ itauq = itau itaup = itauq + m iwork = itaup + m ! bidiagonalize l in u, copying result to work(ir) ! (cworkspace: need m*m+3*m, prefer m*m+2*m+2*m*nb) ! (rworkspace: need m) call stdlib${ii}$_${ci}$gebrd( m, m, u, ldu, s, rwork( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) call stdlib${ii}$_${ci}$lacpy( 'U', m, m, u, ldu, work( ir ), ldwrkr ) ! generate right vectors bidiagonalizing l in work(ir) ! (cworkspace: need m*m+3*m-1, prefer m*m+2*m+(m-1)*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$ungbr( 'P', m, m, m, work( ir ), ldwrkr,work( itaup ), work( & iwork ),lwork-iwork+1, ierr ) ! generate left vectors bidiagonalizing l in u ! (cworkspace: need m*m+3*m, prefer m*m+2*m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$ungbr( 'Q', m, m, m, u, ldu, work( itauq ),work( iwork ), & lwork-iwork+1, ierr ) irwork = ie + m ! perform bidiagonal qr iteration, computing left ! singular vectors of l in u, and computing right ! singular vectors of l in work(ir) ! (cworkspace: need m*m) ! (rworkspace: need bdspac) call stdlib${ii}$_${ci}$bdsqr( 'U', m, m, m, 0_${ik}$, s, rwork( ie ),work( ir ), ldwrkr, u, & ldu, cdum, 1_${ik}$,rwork( irwork ), info ) iu = itauq ! multiply right singular vectors of l in work(ir) by q ! in a, storing result in work(iu) and copying to a ! (cworkspace: need m*m+m, prefer m*m+m*n)) ! (rworkspace: 0) do i = 1, n, chunk blk = min( n-i+1, chunk ) call stdlib${ii}$_${ci}$gemm( 'N', 'N', m, blk, m, cone, work( ir ),ldwrkr, a( 1_${ik}$, & i ), lda, czero,work( iu ), ldwrku ) call stdlib${ii}$_${ci}$lacpy( 'F', m, blk, work( iu ), ldwrku,a( 1_${ik}$, i ), lda ) end do else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + m ! compute a=l*q ! (cworkspace: need 2*m, prefer m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$gelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-iwork+& 1_${ik}$, ierr ) ! copy l to u, zeroing out above it call stdlib${ii}$_${ci}$lacpy( 'L', m, m, a, lda, u, ldu ) if (m>1_${ik}$) call stdlib${ii}$_${ci}$laset( 'U', m-1, m-1, czero, czero, u( 1_${ik}$, 2_${ik}$ ),ldu ) ! generate q in a ! (cworkspace: need 2*m, prefer m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$unglq( m, n, m, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = 1_${ik}$ itauq = itau itaup = itauq + m iwork = itaup + m ! bidiagonalize l in u ! (cworkspace: need 3*m, prefer 2*m+2*m*nb) ! (rworkspace: need m) call stdlib${ii}$_${ci}$gebrd( m, m, u, ldu, s, rwork( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply right vectors bidiagonalizing l by q in a ! (cworkspace: need 2*m+n, prefer 2*m+n*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$unmbr( 'P', 'L', 'C', m, n, m, u, ldu,work( itaup ), a, lda, & work( iwork ),lwork-iwork+1, ierr ) ! generate left vectors bidiagonalizing l in u ! (cworkspace: need 3*m, prefer 2*m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$ungbr( 'Q', m, m, m, u, ldu, work( itauq ),work( iwork ), & lwork-iwork+1, ierr ) irwork = ie + m ! perform bidiagonal qr iteration, computing left ! singular vectors of a in u and computing right ! singular vectors of a in a ! (cworkspace: 0) ! (rworkspace: need bdspac) call stdlib${ii}$_${ci}$bdsqr( 'U', m, n, m, 0_${ik}$, s, rwork( ie ), a, lda,u, ldu, cdum, & 1_${ik}$, rwork( irwork ), info ) end if else if( wntvs ) then if( wntun ) then ! path 4t(n much larger than m, jobu='n', jobvt='s') ! m right singular vectors to be computed in vt and ! no left singular vectors to be computed if( lwork>=m*m+3*m ) then ! sufficient workspace for a fast algorithm ir = 1_${ik}$ if( lwork>=wrkbl+lda*m ) then ! work(ir) is lda by m ldwrkr = lda else ! work(ir) is m by m ldwrkr = m end if itau = ir + ldwrkr*m iwork = itau + m ! compute a=l*q ! (cworkspace: need m*m+2*m, prefer m*m+m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$gelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ! copy l to work(ir), zeroing out above it call stdlib${ii}$_${ci}$lacpy( 'L', m, m, a, lda, work( ir ),ldwrkr ) call stdlib${ii}$_${ci}$laset( 'U', m-1, m-1, czero, czero,work( ir+ldwrkr ), & ldwrkr ) ! generate q in a ! (cworkspace: need m*m+2*m, prefer m*m+m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$unglq( m, n, m, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = 1_${ik}$ itauq = itau itaup = itauq + m iwork = itaup + m ! bidiagonalize l in work(ir) ! (cworkspace: need m*m+3*m, prefer m*m+2*m+2*m*nb) ! (rworkspace: need m) call stdlib${ii}$_${ci}$gebrd( m, m, work( ir ), ldwrkr, s,rwork( ie ), work( & itauq ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) ! generate right vectors bidiagonalizing l in ! work(ir) ! (cworkspace: need m*m+3*m, prefer m*m+2*m+(m-1)*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$ungbr( 'P', m, m, m, work( ir ), ldwrkr,work( itaup ), & work( iwork ),lwork-iwork+1, ierr ) irwork = ie + m ! perform bidiagonal qr iteration, computing right ! singular vectors of l in work(ir) ! (cworkspace: need m*m) ! (rworkspace: need bdspac) call stdlib${ii}$_${ci}$bdsqr( 'U', m, m, 0_${ik}$, 0_${ik}$, s, rwork( ie ),work( ir ), ldwrkr, & cdum, 1_${ik}$, cdum, 1_${ik}$,rwork( irwork ), info ) ! multiply right singular vectors of l in work(ir) by ! q in a, storing result in vt ! (cworkspace: need m*m) ! (rworkspace: 0) call stdlib${ii}$_${ci}$gemm( 'N', 'N', m, n, m, cone, work( ir ),ldwrkr, a, lda, & czero, vt, ldvt ) else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + m ! compute a=l*q ! (cworkspace: need 2*m, prefer m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$gelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ! copy result to vt call stdlib${ii}$_${ci}$lacpy( 'U', m, n, a, lda, vt, ldvt ) ! generate q in vt ! (cworkspace: need 2*m, prefer m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$unglq( m, n, m, vt, ldvt, work( itau ),work( iwork ), & lwork-iwork+1, ierr ) ie = 1_${ik}$ itauq = itau itaup = itauq + m iwork = itaup + m ! zero out above l in a if (m>1_${ik}$) call stdlib${ii}$_${ci}$laset( 'U', m-1, m-1, czero, czero,a( 1_${ik}$, 2_${ik}$ ), lda ) ! bidiagonalize l in a ! (cworkspace: need 3*m, prefer 2*m+2*m*nb) ! (rworkspace: need m) call stdlib${ii}$_${ci}$gebrd( m, m, a, lda, s, rwork( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply right vectors bidiagonalizing l by q in vt ! (cworkspace: need 2*m+n, prefer 2*m+n*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$unmbr( 'P', 'L', 'C', m, n, m, a, lda,work( itaup ), vt, & ldvt,work( iwork ), lwork-iwork+1, ierr ) irwork = ie + m ! perform bidiagonal qr iteration, computing right ! singular vectors of a in vt ! (cworkspace: 0) ! (rworkspace: need bdspac) call stdlib${ii}$_${ci}$bdsqr( 'U', m, n, 0_${ik}$, 0_${ik}$, s, rwork( ie ), vt,ldvt, cdum, 1_${ik}$, & cdum, 1_${ik}$,rwork( irwork ), info ) end if else if( wntuo ) then ! path 5t(n much larger than m, jobu='o', jobvt='s') ! m right singular vectors to be computed in vt and ! m left singular vectors to be overwritten on a if( lwork>=2_${ik}$*m*m+3*m ) then ! sufficient workspace for a fast algorithm iu = 1_${ik}$ if( lwork>=wrkbl+2*lda*m ) then ! work(iu) is lda by m and work(ir) is lda by m ldwrku = lda ir = iu + ldwrku*m ldwrkr = lda else if( lwork>=wrkbl+( lda+m )*m ) then ! work(iu) is lda by m and work(ir) is m by m ldwrku = lda ir = iu + ldwrku*m ldwrkr = m else ! work(iu) is m by m and work(ir) is m by m ldwrku = m ir = iu + ldwrku*m ldwrkr = m end if itau = ir + ldwrkr*m iwork = itau + m ! compute a=l*q ! (cworkspace: need 2*m*m+2*m, prefer 2*m*m+m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$gelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ! copy l to work(iu), zeroing out below it call stdlib${ii}$_${ci}$lacpy( 'L', m, m, a, lda, work( iu ),ldwrku ) call stdlib${ii}$_${ci}$laset( 'U', m-1, m-1, czero, czero,work( iu+ldwrku ), & ldwrku ) ! generate q in a ! (cworkspace: need 2*m*m+2*m, prefer 2*m*m+m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$unglq( m, n, m, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = 1_${ik}$ itauq = itau itaup = itauq + m iwork = itaup + m ! bidiagonalize l in work(iu), copying result to ! work(ir) ! (cworkspace: need 2*m*m+3*m, ! prefer 2*m*m+2*m+2*m*nb) ! (rworkspace: need m) call stdlib${ii}$_${ci}$gebrd( m, m, work( iu ), ldwrku, s,rwork( ie ), work( & itauq ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) call stdlib${ii}$_${ci}$lacpy( 'L', m, m, work( iu ), ldwrku,work( ir ), ldwrkr ) ! generate right bidiagonalizing vectors in work(iu) ! (cworkspace: need 2*m*m+3*m-1, ! prefer 2*m*m+2*m+(m-1)*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$ungbr( 'P', m, m, m, work( iu ), ldwrku,work( itaup ), & work( iwork ),lwork-iwork+1, ierr ) ! generate left bidiagonalizing vectors in work(ir) ! (cworkspace: need 2*m*m+3*m, prefer 2*m*m+2*m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$ungbr( 'Q', m, m, m, work( ir ), ldwrkr,work( itauq ), & work( iwork ),lwork-iwork+1, ierr ) irwork = ie + m ! perform bidiagonal qr iteration, computing left ! singular vectors of l in work(ir) and computing ! right singular vectors of l in work(iu) ! (cworkspace: need 2*m*m) ! (rworkspace: need bdspac) call stdlib${ii}$_${ci}$bdsqr( 'U', m, m, m, 0_${ik}$, s, rwork( ie ),work( iu ), ldwrku, & work( ir ),ldwrkr, cdum, 1_${ik}$, rwork( irwork ),info ) ! multiply right singular vectors of l in work(iu) by ! q in a, storing result in vt ! (cworkspace: need m*m) ! (rworkspace: 0) call stdlib${ii}$_${ci}$gemm( 'N', 'N', m, n, m, cone, work( iu ),ldwrku, a, lda, & czero, vt, ldvt ) ! copy left singular vectors of l to a ! (cworkspace: need m*m) ! (rworkspace: 0) call stdlib${ii}$_${ci}$lacpy( 'F', m, m, work( ir ), ldwrkr, a,lda ) else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + m ! compute a=l*q, copying result to vt ! (cworkspace: need 2*m, prefer m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$gelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_${ci}$lacpy( 'U', m, n, a, lda, vt, ldvt ) ! generate q in vt ! (cworkspace: need 2*m, prefer m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$unglq( m, n, m, vt, ldvt, work( itau ),work( iwork ), & lwork-iwork+1, ierr ) ie = 1_${ik}$ itauq = itau itaup = itauq + m iwork = itaup + m ! zero out above l in a if (m>1_${ik}$) call stdlib${ii}$_${ci}$laset( 'U', m-1, m-1, czero, czero,a( 1_${ik}$, 2_${ik}$ ), lda ) ! bidiagonalize l in a ! (cworkspace: need 3*m, prefer 2*m+2*m*nb) ! (rworkspace: need m) call stdlib${ii}$_${ci}$gebrd( m, m, a, lda, s, rwork( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply right vectors bidiagonalizing l by q in vt ! (cworkspace: need 2*m+n, prefer 2*m+n*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$unmbr( 'P', 'L', 'C', m, n, m, a, lda,work( itaup ), vt, & ldvt,work( iwork ), lwork-iwork+1, ierr ) ! generate left bidiagonalizing vectors of l in a ! (cworkspace: need 3*m, prefer 2*m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$ungbr( 'Q', m, m, m, a, lda, work( itauq ),work( iwork ), & lwork-iwork+1, ierr ) irwork = ie + m ! perform bidiagonal qr iteration, computing left ! singular vectors of a in a and computing right ! singular vectors of a in vt ! (cworkspace: 0) ! (rworkspace: need bdspac) call stdlib${ii}$_${ci}$bdsqr( 'U', m, n, m, 0_${ik}$, s, rwork( ie ), vt,ldvt, a, lda, & cdum, 1_${ik}$,rwork( irwork ), info ) end if else if( wntuas ) then ! path 6t(n much larger than m, jobu='s' or 'a', ! jobvt='s') ! m right singular vectors to be computed in vt and ! m left singular vectors to be computed in u if( lwork>=m*m+3*m ) then ! sufficient workspace for a fast algorithm iu = 1_${ik}$ if( lwork>=wrkbl+lda*m ) then ! work(iu) is lda by n ldwrku = lda else ! work(iu) is lda by m ldwrku = m end if itau = iu + ldwrku*m iwork = itau + m ! compute a=l*q ! (cworkspace: need m*m+2*m, prefer m*m+m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$gelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ! copy l to work(iu), zeroing out above it call stdlib${ii}$_${ci}$lacpy( 'L', m, m, a, lda, work( iu ),ldwrku ) call stdlib${ii}$_${ci}$laset( 'U', m-1, m-1, czero, czero,work( iu+ldwrku ), & ldwrku ) ! generate q in a ! (cworkspace: need m*m+2*m, prefer m*m+m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$unglq( m, n, m, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) ie = 1_${ik}$ itauq = itau itaup = itauq + m iwork = itaup + m ! bidiagonalize l in work(iu), copying result to u ! (cworkspace: need m*m+3*m, prefer m*m+2*m+2*m*nb) ! (rworkspace: need m) call stdlib${ii}$_${ci}$gebrd( m, m, work( iu ), ldwrku, s,rwork( ie ), work( & itauq ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) call stdlib${ii}$_${ci}$lacpy( 'L', m, m, work( iu ), ldwrku, u,ldu ) ! generate right bidiagonalizing vectors in work(iu) ! (cworkspace: need m*m+3*m-1, ! prefer m*m+2*m+(m-1)*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$ungbr( 'P', m, m, m, work( iu ), ldwrku,work( itaup ), & work( iwork ),lwork-iwork+1, ierr ) ! generate left bidiagonalizing vectors in u ! (cworkspace: need m*m+3*m, prefer m*m+2*m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$ungbr( 'Q', m, m, m, u, ldu, work( itauq ),work( iwork ), & lwork-iwork+1, ierr ) irwork = ie + m ! perform bidiagonal qr iteration, computing left ! singular vectors of l in u and computing right ! singular vectors of l in work(iu) ! (cworkspace: need m*m) ! (rworkspace: need bdspac) call stdlib${ii}$_${ci}$bdsqr( 'U', m, m, m, 0_${ik}$, s, rwork( ie ),work( iu ), ldwrku, & u, ldu, cdum, 1_${ik}$,rwork( irwork ), info ) ! multiply right singular vectors of l in work(iu) by ! q in a, storing result in vt ! (cworkspace: need m*m) ! (rworkspace: 0) call stdlib${ii}$_${ci}$gemm( 'N', 'N', m, n, m, cone, work( iu ),ldwrku, a, lda, & czero, vt, ldvt ) else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + m ! compute a=l*q, copying result to vt ! (cworkspace: need 2*m, prefer m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$gelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_${ci}$lacpy( 'U', m, n, a, lda, vt, ldvt ) ! generate q in vt ! (cworkspace: need 2*m, prefer m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$unglq( m, n, m, vt, ldvt, work( itau ),work( iwork ), & lwork-iwork+1, ierr ) ! copy l to u, zeroing out above it call stdlib${ii}$_${ci}$lacpy( 'L', m, m, a, lda, u, ldu ) if (m>1_${ik}$) call stdlib${ii}$_${ci}$laset( 'U', m-1, m-1, czero, czero,u( 1_${ik}$, 2_${ik}$ ), ldu ) ie = 1_${ik}$ itauq = itau itaup = itauq + m iwork = itaup + m ! bidiagonalize l in u ! (cworkspace: need 3*m, prefer 2*m+2*m*nb) ! (rworkspace: need m) call stdlib${ii}$_${ci}$gebrd( m, m, u, ldu, s, rwork( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply right bidiagonalizing vectors in u by q ! in vt ! (cworkspace: need 2*m+n, prefer 2*m+n*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$unmbr( 'P', 'L', 'C', m, n, m, u, ldu,work( itaup ), vt, & ldvt,work( iwork ), lwork-iwork+1, ierr ) ! generate left bidiagonalizing vectors in u ! (cworkspace: need 3*m, prefer 2*m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$ungbr( 'Q', m, m, m, u, ldu, work( itauq ),work( iwork ), & lwork-iwork+1, ierr ) irwork = ie + m ! perform bidiagonal qr iteration, computing left ! singular vectors of a in u and computing right ! singular vectors of a in vt ! (cworkspace: 0) ! (rworkspace: need bdspac) call stdlib${ii}$_${ci}$bdsqr( 'U', m, n, m, 0_${ik}$, s, rwork( ie ), vt,ldvt, u, ldu, & cdum, 1_${ik}$,rwork( irwork ), info ) end if end if else if( wntva ) then if( wntun ) then ! path 7t(n much larger than m, jobu='n', jobvt='a') ! n right singular vectors to be computed in vt and ! no left singular vectors to be computed if( lwork>=m*m+max( n+m, 3_${ik}$*m ) ) then ! sufficient workspace for a fast algorithm ir = 1_${ik}$ if( lwork>=wrkbl+lda*m ) then ! work(ir) is lda by m ldwrkr = lda else ! work(ir) is m by m ldwrkr = m end if itau = ir + ldwrkr*m iwork = itau + m ! compute a=l*q, copying result to vt ! (cworkspace: need m*m+2*m, prefer m*m+m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$gelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_${ci}$lacpy( 'U', m, n, a, lda, vt, ldvt ) ! copy l to work(ir), zeroing out above it call stdlib${ii}$_${ci}$lacpy( 'L', m, m, a, lda, work( ir ),ldwrkr ) call stdlib${ii}$_${ci}$laset( 'U', m-1, m-1, czero, czero,work( ir+ldwrkr ), & ldwrkr ) ! generate q in vt ! (cworkspace: need m*m+m+n, prefer m*m+m+n*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$unglq( n, n, m, vt, ldvt, work( itau ),work( iwork ), & lwork-iwork+1, ierr ) ie = 1_${ik}$ itauq = itau itaup = itauq + m iwork = itaup + m ! bidiagonalize l in work(ir) ! (cworkspace: need m*m+3*m, prefer m*m+2*m+2*m*nb) ! (rworkspace: need m) call stdlib${ii}$_${ci}$gebrd( m, m, work( ir ), ldwrkr, s,rwork( ie ), work( & itauq ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) ! generate right bidiagonalizing vectors in work(ir) ! (cworkspace: need m*m+3*m-1, ! prefer m*m+2*m+(m-1)*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$ungbr( 'P', m, m, m, work( ir ), ldwrkr,work( itaup ), & work( iwork ),lwork-iwork+1, ierr ) irwork = ie + m ! perform bidiagonal qr iteration, computing right ! singular vectors of l in work(ir) ! (cworkspace: need m*m) ! (rworkspace: need bdspac) call stdlib${ii}$_${ci}$bdsqr( 'U', m, m, 0_${ik}$, 0_${ik}$, s, rwork( ie ),work( ir ), ldwrkr, & cdum, 1_${ik}$, cdum, 1_${ik}$,rwork( irwork ), info ) ! multiply right singular vectors of l in work(ir) by ! q in vt, storing result in a ! (cworkspace: need m*m) ! (rworkspace: 0) call stdlib${ii}$_${ci}$gemm( 'N', 'N', m, n, m, cone, work( ir ),ldwrkr, vt, ldvt,& czero, a, lda ) ! copy right singular vectors of a from a to vt call stdlib${ii}$_${ci}$lacpy( 'F', m, n, a, lda, vt, ldvt ) else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + m ! compute a=l*q, copying result to vt ! (cworkspace: need 2*m, prefer m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$gelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_${ci}$lacpy( 'U', m, n, a, lda, vt, ldvt ) ! generate q in vt ! (cworkspace: need m+n, prefer m+n*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$unglq( n, n, m, vt, ldvt, work( itau ),work( iwork ), & lwork-iwork+1, ierr ) ie = 1_${ik}$ itauq = itau itaup = itauq + m iwork = itaup + m ! zero out above l in a if (m>1_${ik}$) call stdlib${ii}$_${ci}$laset( 'U', m-1, m-1, czero, czero,a( 1_${ik}$, 2_${ik}$ ), lda ) ! bidiagonalize l in a ! (cworkspace: need 3*m, prefer 2*m+2*m*nb) ! (rworkspace: need m) call stdlib${ii}$_${ci}$gebrd( m, m, a, lda, s, rwork( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply right bidiagonalizing vectors in a by q ! in vt ! (cworkspace: need 2*m+n, prefer 2*m+n*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$unmbr( 'P', 'L', 'C', m, n, m, a, lda,work( itaup ), vt, & ldvt,work( iwork ), lwork-iwork+1, ierr ) irwork = ie + m ! perform bidiagonal qr iteration, computing right ! singular vectors of a in vt ! (cworkspace: 0) ! (rworkspace: need bdspac) call stdlib${ii}$_${ci}$bdsqr( 'U', m, n, 0_${ik}$, 0_${ik}$, s, rwork( ie ), vt,ldvt, cdum, 1_${ik}$, & cdum, 1_${ik}$,rwork( irwork ), info ) end if else if( wntuo ) then ! path 8t(n much larger than m, jobu='o', jobvt='a') ! n right singular vectors to be computed in vt and ! m left singular vectors to be overwritten on a if( lwork>=2_${ik}$*m*m+max( n+m, 3_${ik}$*m ) ) then ! sufficient workspace for a fast algorithm iu = 1_${ik}$ if( lwork>=wrkbl+2*lda*m ) then ! work(iu) is lda by m and work(ir) is lda by m ldwrku = lda ir = iu + ldwrku*m ldwrkr = lda else if( lwork>=wrkbl+( lda+m )*m ) then ! work(iu) is lda by m and work(ir) is m by m ldwrku = lda ir = iu + ldwrku*m ldwrkr = m else ! work(iu) is m by m and work(ir) is m by m ldwrku = m ir = iu + ldwrku*m ldwrkr = m end if itau = ir + ldwrkr*m iwork = itau + m ! compute a=l*q, copying result to vt ! (cworkspace: need 2*m*m+2*m, prefer 2*m*m+m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$gelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_${ci}$lacpy( 'U', m, n, a, lda, vt, ldvt ) ! generate q in vt ! (cworkspace: need 2*m*m+m+n, prefer 2*m*m+m+n*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$unglq( n, n, m, vt, ldvt, work( itau ),work( iwork ), & lwork-iwork+1, ierr ) ! copy l to work(iu), zeroing out above it call stdlib${ii}$_${ci}$lacpy( 'L', m, m, a, lda, work( iu ),ldwrku ) call stdlib${ii}$_${ci}$laset( 'U', m-1, m-1, czero, czero,work( iu+ldwrku ), & ldwrku ) ie = 1_${ik}$ itauq = itau itaup = itauq + m iwork = itaup + m ! bidiagonalize l in work(iu), copying result to ! work(ir) ! (cworkspace: need 2*m*m+3*m, ! prefer 2*m*m+2*m+2*m*nb) ! (rworkspace: need m) call stdlib${ii}$_${ci}$gebrd( m, m, work( iu ), ldwrku, s,rwork( ie ), work( & itauq ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) call stdlib${ii}$_${ci}$lacpy( 'L', m, m, work( iu ), ldwrku,work( ir ), ldwrkr ) ! generate right bidiagonalizing vectors in work(iu) ! (cworkspace: need 2*m*m+3*m-1, ! prefer 2*m*m+2*m+(m-1)*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$ungbr( 'P', m, m, m, work( iu ), ldwrku,work( itaup ), & work( iwork ),lwork-iwork+1, ierr ) ! generate left bidiagonalizing vectors in work(ir) ! (cworkspace: need 2*m*m+3*m, prefer 2*m*m+2*m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$ungbr( 'Q', m, m, m, work( ir ), ldwrkr,work( itauq ), & work( iwork ),lwork-iwork+1, ierr ) irwork = ie + m ! perform bidiagonal qr iteration, computing left ! singular vectors of l in work(ir) and computing ! right singular vectors of l in work(iu) ! (cworkspace: need 2*m*m) ! (rworkspace: need bdspac) call stdlib${ii}$_${ci}$bdsqr( 'U', m, m, m, 0_${ik}$, s, rwork( ie ),work( iu ), ldwrku, & work( ir ),ldwrkr, cdum, 1_${ik}$, rwork( irwork ),info ) ! multiply right singular vectors of l in work(iu) by ! q in vt, storing result in a ! (cworkspace: need m*m) ! (rworkspace: 0) call stdlib${ii}$_${ci}$gemm( 'N', 'N', m, n, m, cone, work( iu ),ldwrku, vt, ldvt,& czero, a, lda ) ! copy right singular vectors of a from a to vt call stdlib${ii}$_${ci}$lacpy( 'F', m, n, a, lda, vt, ldvt ) ! copy left singular vectors of a from work(ir) to a call stdlib${ii}$_${ci}$lacpy( 'F', m, m, work( ir ), ldwrkr, a,lda ) else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + m ! compute a=l*q, copying result to vt ! (cworkspace: need 2*m, prefer m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$gelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_${ci}$lacpy( 'U', m, n, a, lda, vt, ldvt ) ! generate q in vt ! (cworkspace: need m+n, prefer m+n*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$unglq( n, n, m, vt, ldvt, work( itau ),work( iwork ), & lwork-iwork+1, ierr ) ie = 1_${ik}$ itauq = itau itaup = itauq + m iwork = itaup + m ! zero out above l in a if (m>1_${ik}$) call stdlib${ii}$_${ci}$laset( 'U', m-1, m-1, czero, czero,a( 1_${ik}$, 2_${ik}$ ), lda ) ! bidiagonalize l in a ! (cworkspace: need 3*m, prefer 2*m+2*m*nb) ! (rworkspace: need m) call stdlib${ii}$_${ci}$gebrd( m, m, a, lda, s, rwork( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply right bidiagonalizing vectors in a by q ! in vt ! (cworkspace: need 2*m+n, prefer 2*m+n*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$unmbr( 'P', 'L', 'C', m, n, m, a, lda,work( itaup ), vt, & ldvt,work( iwork ), lwork-iwork+1, ierr ) ! generate left bidiagonalizing vectors in a ! (cworkspace: need 3*m, prefer 2*m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$ungbr( 'Q', m, m, m, a, lda, work( itauq ),work( iwork ), & lwork-iwork+1, ierr ) irwork = ie + m ! perform bidiagonal qr iteration, computing left ! singular vectors of a in a and computing right ! singular vectors of a in vt ! (cworkspace: 0) ! (rworkspace: need bdspac) call stdlib${ii}$_${ci}$bdsqr( 'U', m, n, m, 0_${ik}$, s, rwork( ie ), vt,ldvt, a, lda, & cdum, 1_${ik}$,rwork( irwork ), info ) end if else if( wntuas ) then ! path 9t(n much larger than m, jobu='s' or 'a', ! jobvt='a') ! n right singular vectors to be computed in vt and ! m left singular vectors to be computed in u if( lwork>=m*m+max( n+m, 3_${ik}$*m ) ) then ! sufficient workspace for a fast algorithm iu = 1_${ik}$ if( lwork>=wrkbl+lda*m ) then ! work(iu) is lda by m ldwrku = lda else ! work(iu) is m by m ldwrku = m end if itau = iu + ldwrku*m iwork = itau + m ! compute a=l*q, copying result to vt ! (cworkspace: need m*m+2*m, prefer m*m+m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$gelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_${ci}$lacpy( 'U', m, n, a, lda, vt, ldvt ) ! generate q in vt ! (cworkspace: need m*m+m+n, prefer m*m+m+n*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$unglq( n, n, m, vt, ldvt, work( itau ),work( iwork ), & lwork-iwork+1, ierr ) ! copy l to work(iu), zeroing out above it call stdlib${ii}$_${ci}$lacpy( 'L', m, m, a, lda, work( iu ),ldwrku ) call stdlib${ii}$_${ci}$laset( 'U', m-1, m-1, czero, czero,work( iu+ldwrku ), & ldwrku ) ie = 1_${ik}$ itauq = itau itaup = itauq + m iwork = itaup + m ! bidiagonalize l in work(iu), copying result to u ! (cworkspace: need m*m+3*m, prefer m*m+2*m+2*m*nb) ! (rworkspace: need m) call stdlib${ii}$_${ci}$gebrd( m, m, work( iu ), ldwrku, s,rwork( ie ), work( & itauq ),work( itaup ), work( iwork ),lwork-iwork+1, ierr ) call stdlib${ii}$_${ci}$lacpy( 'L', m, m, work( iu ), ldwrku, u,ldu ) ! generate right bidiagonalizing vectors in work(iu) ! (cworkspace: need m*m+3*m, prefer m*m+2*m+(m-1)*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$ungbr( 'P', m, m, m, work( iu ), ldwrku,work( itaup ), & work( iwork ),lwork-iwork+1, ierr ) ! generate left bidiagonalizing vectors in u ! (cworkspace: need m*m+3*m, prefer m*m+2*m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$ungbr( 'Q', m, m, m, u, ldu, work( itauq ),work( iwork ), & lwork-iwork+1, ierr ) irwork = ie + m ! perform bidiagonal qr iteration, computing left ! singular vectors of l in u and computing right ! singular vectors of l in work(iu) ! (cworkspace: need m*m) ! (rworkspace: need bdspac) call stdlib${ii}$_${ci}$bdsqr( 'U', m, m, m, 0_${ik}$, s, rwork( ie ),work( iu ), ldwrku, & u, ldu, cdum, 1_${ik}$,rwork( irwork ), info ) ! multiply right singular vectors of l in work(iu) by ! q in vt, storing result in a ! (cworkspace: need m*m) ! (rworkspace: 0) call stdlib${ii}$_${ci}$gemm( 'N', 'N', m, n, m, cone, work( iu ),ldwrku, vt, ldvt,& czero, a, lda ) ! copy right singular vectors of a from a to vt call stdlib${ii}$_${ci}$lacpy( 'F', m, n, a, lda, vt, ldvt ) else ! insufficient workspace for a fast algorithm itau = 1_${ik}$ iwork = itau + m ! compute a=l*q, copying result to vt ! (cworkspace: need 2*m, prefer m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$gelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-& iwork+1, ierr ) call stdlib${ii}$_${ci}$lacpy( 'U', m, n, a, lda, vt, ldvt ) ! generate q in vt ! (cworkspace: need m+n, prefer m+n*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$unglq( n, n, m, vt, ldvt, work( itau ),work( iwork ), & lwork-iwork+1, ierr ) ! copy l to u, zeroing out above it call stdlib${ii}$_${ci}$lacpy( 'L', m, m, a, lda, u, ldu ) if (m>1_${ik}$) call stdlib${ii}$_${ci}$laset( 'U', m-1, m-1, czero, czero,u( 1_${ik}$, 2_${ik}$ ), ldu ) ie = 1_${ik}$ itauq = itau itaup = itauq + m iwork = itaup + m ! bidiagonalize l in u ! (cworkspace: need 3*m, prefer 2*m+2*m*nb) ! (rworkspace: need m) call stdlib${ii}$_${ci}$gebrd( m, m, u, ldu, s, rwork( ie ),work( itauq ), work( & itaup ),work( iwork ), lwork-iwork+1, ierr ) ! multiply right bidiagonalizing vectors in u by q ! in vt ! (cworkspace: need 2*m+n, prefer 2*m+n*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$unmbr( 'P', 'L', 'C', m, n, m, u, ldu,work( itaup ), vt, & ldvt,work( iwork ), lwork-iwork+1, ierr ) ! generate left bidiagonalizing vectors in u ! (cworkspace: need 3*m, prefer 2*m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$ungbr( 'Q', m, m, m, u, ldu, work( itauq ),work( iwork ), & lwork-iwork+1, ierr ) irwork = ie + m ! perform bidiagonal qr iteration, computing left ! singular vectors of a in u and computing right ! singular vectors of a in vt ! (cworkspace: 0) ! (rworkspace: need bdspac) call stdlib${ii}$_${ci}$bdsqr( 'U', m, n, m, 0_${ik}$, s, rwork( ie ), vt,ldvt, u, ldu, & cdum, 1_${ik}$,rwork( irwork ), info ) end if end if end if else ! n < mnthr ! path 10t(n greater than m, but not much larger) ! reduce to bidiagonal form without lq decomposition ie = 1_${ik}$ itauq = 1_${ik}$ itaup = itauq + m iwork = itaup + m ! bidiagonalize a ! (cworkspace: need 2*m+n, prefer 2*m+(m+n)*nb) ! (rworkspace: m) call stdlib${ii}$_${ci}$gebrd( m, n, a, lda, s, rwork( ie ), work( itauq ),work( itaup ), & work( iwork ), lwork-iwork+1,ierr ) if( wntuas ) then ! if left singular vectors desired in u, copy result to u ! and generate left bidiagonalizing vectors in u ! (cworkspace: need 3*m-1, prefer 2*m+(m-1)*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$lacpy( 'L', m, m, a, lda, u, ldu ) call stdlib${ii}$_${ci}$ungbr( 'Q', m, m, n, u, ldu, work( itauq ),work( iwork ), lwork-& iwork+1, ierr ) end if if( wntvas ) then ! if right singular vectors desired in vt, copy result to ! vt and generate right bidiagonalizing vectors in vt ! (cworkspace: need 2*m+nrvt, prefer 2*m+nrvt*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$lacpy( 'U', m, n, a, lda, vt, ldvt ) if( wntva )nrvt = n if( wntvs )nrvt = m call stdlib${ii}$_${ci}$ungbr( 'P', nrvt, n, m, vt, ldvt, work( itaup ),work( iwork ), & lwork-iwork+1, ierr ) end if if( wntuo ) then ! if left singular vectors desired in a, generate left ! bidiagonalizing vectors in a ! (cworkspace: need 3*m-1, prefer 2*m+(m-1)*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$ungbr( 'Q', m, m, n, a, lda, work( itauq ),work( iwork ), lwork-& iwork+1, ierr ) end if if( wntvo ) then ! if right singular vectors desired in a, generate right ! bidiagonalizing vectors in a ! (cworkspace: need 3*m, prefer 2*m+m*nb) ! (rworkspace: 0) call stdlib${ii}$_${ci}$ungbr( 'P', m, n, m, a, lda, work( itaup ),work( iwork ), lwork-& iwork+1, ierr ) end if irwork = ie + m if( wntuas .or. wntuo )nru = m if( wntun )nru = 0_${ik}$ if( wntvas .or. wntvo )ncvt = n if( wntvn )ncvt = 0_${ik}$ if( ( .not.wntuo ) .and. ( .not.wntvo ) ) then ! perform bidiagonal qr iteration, if desired, computing ! left singular vectors in u and computing right singular ! vectors in vt ! (cworkspace: 0) ! (rworkspace: need bdspac) call stdlib${ii}$_${ci}$bdsqr( 'L', m, ncvt, nru, 0_${ik}$, s, rwork( ie ), vt,ldvt, u, ldu, & cdum, 1_${ik}$, rwork( irwork ),info ) else if( ( .not.wntuo ) .and. wntvo ) then ! perform bidiagonal qr iteration, if desired, computing ! left singular vectors in u and computing right singular ! vectors in a ! (cworkspace: 0) ! (rworkspace: need bdspac) call stdlib${ii}$_${ci}$bdsqr( 'L', m, ncvt, nru, 0_${ik}$, s, rwork( ie ), a,lda, u, ldu, cdum,& 1_${ik}$, rwork( irwork ),info ) else ! perform bidiagonal qr iteration, if desired, computing ! left singular vectors in a and computing right singular ! vectors in vt ! (cworkspace: 0) ! (rworkspace: need bdspac) call stdlib${ii}$_${ci}$bdsqr( 'L', m, ncvt, nru, 0_${ik}$, s, rwork( ie ), vt,ldvt, a, lda, & cdum, 1_${ik}$, rwork( irwork ),info ) end if end if end if ! undo scaling if necessary if( iscl==1_${ik}$ ) then if( anrm>bignum )call stdlib${ii}$_${c2ri(ci)}$lascl( 'G', 0_${ik}$, 0_${ik}$, bignum, anrm, minmn, 1_${ik}$, s, minmn,& ierr ) if( info/=0_${ik}$ .and. anrm>bignum )call stdlib${ii}$_${c2ri(ci)}$lascl( 'G', 0_${ik}$, 0_${ik}$, bignum, anrm, minmn-1,& 1_${ik}$,rwork( ie ), minmn, ierr ) if( anrm<smlnum )call stdlib${ii}$_${c2ri(ci)}$lascl( 'G', 0_${ik}$, 0_${ik}$, smlnum, anrm, minmn, 1_${ik}$, s, minmn,& ierr ) if( info/=0_${ik}$ .and. anrm<smlnum )call stdlib${ii}$_${c2ri(ci)}$lascl( 'G', 0_${ik}$, 0_${ik}$, smlnum, anrm, minmn-1,& 1_${ik}$,rwork( ie ), minmn, ierr ) end if ! return optimal workspace in work(1) work( 1_${ik}$ ) = maxwrk return end subroutine stdlib${ii}$_${ci}$gesvd #:endif #:endfor module subroutine stdlib${ii}$_sgesvdq( joba, jobp, jobr, jobu, jobv, m, n, a, lda,s, u, ldu, v, ldv, & !! SGESVDQ computes the singular value decomposition (SVD) of a real !! M-by-N matrix A, where M >= N. The SVD of A is written as !! [++] [xx] [x0] [xx] !! A = U * SIGMA * V^*, [++] = [xx] * [ox] * [xx] !! [++] [xx] !! where SIGMA is an N-by-N diagonal matrix, U is an M-by-N orthonormal !! matrix, and V is an N-by-N orthogonal matrix. The diagonal elements !! of SIGMA are the singular values of A. The columns of U and V are the !! left and the right singular vectors of A, respectively. numrank, iwork, liwork,work, lwork, rwork, lrwork, info ) use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments character, intent(in) :: joba, jobp, jobr, jobu, jobv integer(${ik}$), intent(in) :: m, n, lda, ldu, ldv, liwork, lrwork integer(${ik}$), intent(out) :: numrank, info integer(${ik}$), intent(inout) :: lwork ! Array Arguments real(sp), intent(inout) :: a(lda,*) real(sp), intent(out) :: u(ldu,*), v(ldv,*), work(*) real(sp), intent(out) :: s(*), rwork(*) integer(${ik}$), intent(out) :: iwork(*) ! ===================================================================== ! Local Scalars integer(${ik}$) :: ierr, iwoff, nr, n1, optratio, p, q integer(${ik}$) :: lwcon, lwqp3, lwrk_sgelqf, lwrk_sgesvd, lwrk_sgesvd2, lwrk_sgeqp3, & lwrk_sgeqrf, lwrk_sormlq, lwrk_sormqr, lwrk_sormqr2, lwlqf, lwqrf, lwsvd, lwsvd2, & lworq, lworq2, lwunlq, minwrk, minwrk2, optwrk, optwrk2, iminwrk, rminwrk logical(lk) :: accla, acclm, acclh, ascaled, conda, dntwu, dntwv, lquery, lsvc0, lsvec,& rowprm, rsvec, rtrans, wntua, wntuf, wntur, wntus, wntva, wntvr real(sp) :: big, epsln, rtmp, sconda, sfmin ! Local Arrays real(sp) :: rdummy(1_${ik}$) ! Intrinsic Functions ! Executable Statements ! test the input arguments wntus = stdlib_lsame( jobu, 'S' ) .or. stdlib_lsame( jobu, 'U' ) wntur = stdlib_lsame( jobu, 'R' ) wntua = stdlib_lsame( jobu, 'A' ) wntuf = stdlib_lsame( jobu, 'F' ) lsvc0 = wntus .or. wntur .or. wntua lsvec = lsvc0 .or. wntuf dntwu = stdlib_lsame( jobu, 'N' ) wntvr = stdlib_lsame( jobv, 'R' ) wntva = stdlib_lsame( jobv, 'A' ) .or. stdlib_lsame( jobv, 'V' ) rsvec = wntvr .or. wntva dntwv = stdlib_lsame( jobv, 'N' ) accla = stdlib_lsame( joba, 'A' ) acclm = stdlib_lsame( joba, 'M' ) conda = stdlib_lsame( joba, 'E' ) acclh = stdlib_lsame( joba, 'H' ) .or. conda rowprm = stdlib_lsame( jobp, 'P' ) rtrans = stdlib_lsame( jobr, 'T' ) if ( rowprm ) then if ( conda ) then iminwrk = max( 1_${ik}$, n + m - 1_${ik}$ + n ) else iminwrk = max( 1_${ik}$, n + m - 1_${ik}$ ) end if rminwrk = max( 2_${ik}$, m ) else if ( conda ) then iminwrk = max( 1_${ik}$, n + n ) else iminwrk = max( 1_${ik}$, n ) end if rminwrk = 2_${ik}$ end if lquery = (liwork == -1_${ik}$ .or. lwork == -1_${ik}$ .or. lrwork == -1_${ik}$) info = 0_${ik}$ if ( .not. ( accla .or. acclm .or. acclh ) ) then info = -1_${ik}$ else if ( .not.( rowprm .or. stdlib_lsame( jobp, 'N' ) ) ) then info = -2_${ik}$ else if ( .not.( rtrans .or. stdlib_lsame( jobr, 'N' ) ) ) then info = -3_${ik}$ else if ( .not.( lsvec .or. dntwu ) ) then info = -4_${ik}$ else if ( wntur .and. wntva ) then info = -5_${ik}$ else if ( .not.( rsvec .or. dntwv )) then info = -5_${ik}$ else if ( m<0_${ik}$ ) then info = -6_${ik}$ else if ( ( n<0_${ik}$ ) .or. ( n>m ) ) then info = -7_${ik}$ else if ( lda<max( 1_${ik}$, m ) ) then info = -9_${ik}$ else if ( ldu<1_${ik}$ .or. ( lsvc0 .and. ldu<m ) .or.( wntuf .and. ldu<n ) ) then info = -12_${ik}$ else if ( ldv<1_${ik}$ .or. ( rsvec .and. ldv<n ) .or.( conda .and. ldv<n ) ) then info = -14_${ik}$ else if ( liwork < iminwrk .and. .not. lquery ) then info = -17_${ik}$ end if if ( info == 0_${ik}$ ) then ! Compute The Minimal And The Optimal Workspace Lengths ! [[the expressions for computing the minimal and the optimal ! values of lwork are written with a lot of redundancy and ! can be simplified. however, this detailed form is easier for ! maintenance and modifications of the code.]] ! Minimal Workspace Length For Stdlib_Sgeqp3 Of An M X N Matrix lwqp3 = 3_${ik}$ * n + 1_${ik}$ ! Minimal Workspace Length For Stdlib_Sormqr To Build Left Singular Vectors if ( wntus .or. wntur ) then lworq = max( n , 1_${ik}$ ) else if ( wntua ) then lworq = max( m , 1_${ik}$ ) end if ! Minimal Workspace Length For Stdlib_Spocon Of An N X N Matrix lwcon = 3_${ik}$ * n ! Stdlib_Sgesvd Of An N X N Matrix lwsvd = max( 5_${ik}$ * n, 1_${ik}$ ) if ( lquery ) then call stdlib${ii}$_sgeqp3( m, n, a, lda, iwork, rdummy, rdummy, -1_${ik}$,ierr ) lwrk_sgeqp3 = int( rdummy(1_${ik}$),KIND=${ik}$) if ( wntus .or. wntur ) then call stdlib${ii}$_sormqr( 'L', 'N', m, n, n, a, lda, rdummy, u,ldu, rdummy, -1_${ik}$, & ierr ) lwrk_sormqr = int( rdummy(1_${ik}$),KIND=${ik}$) else if ( wntua ) then call stdlib${ii}$_sormqr( 'L', 'N', m, m, n, a, lda, rdummy, u,ldu, rdummy, -1_${ik}$, & ierr ) lwrk_sormqr = int( rdummy(1_${ik}$),KIND=${ik}$) else lwrk_sormqr = 0_${ik}$ end if end if minwrk = 2_${ik}$ optwrk = 2_${ik}$ if ( .not. (lsvec .or. rsvec )) then ! Minimal And Optimal Sizes Of The Workspace If ! only the singular values are requested if ( conda ) then minwrk = max( n+lwqp3, lwcon, lwsvd ) else minwrk = max( n+lwqp3, lwsvd ) end if if ( lquery ) then call stdlib${ii}$_sgesvd( 'N', 'N', n, n, a, lda, s, u, ldu,v, ldv, rdummy, -1_${ik}$, & ierr ) lwrk_sgesvd = int( rdummy(1_${ik}$),KIND=${ik}$) if ( conda ) then optwrk = max( n+lwrk_sgeqp3, n+lwcon, lwrk_sgesvd ) else optwrk = max( n+lwrk_sgeqp3, lwrk_sgesvd ) end if end if else if ( lsvec .and. (.not.rsvec) ) then ! Minimal And Optimal Sizes Of The Workspace If The ! singular values and the left singular vectors are requested if ( conda ) then minwrk = n + max( lwqp3, lwcon, lwsvd, lworq ) else minwrk = n + max( lwqp3, lwsvd, lworq ) end if if ( lquery ) then if ( rtrans ) then call stdlib${ii}$_sgesvd( 'N', 'O', n, n, a, lda, s, u, ldu,v, ldv, rdummy, -1_${ik}$, & ierr ) else call stdlib${ii}$_sgesvd( 'O', 'N', n, n, a, lda, s, u, ldu,v, ldv, rdummy, -1_${ik}$, & ierr ) end if lwrk_sgesvd = int( rdummy(1_${ik}$),KIND=${ik}$) if ( conda ) then optwrk = n + max( lwrk_sgeqp3, lwcon, lwrk_sgesvd,lwrk_sormqr ) else optwrk = n + max( lwrk_sgeqp3, lwrk_sgesvd,lwrk_sormqr ) end if end if else if ( rsvec .and. (.not.lsvec) ) then ! Minimal And Optimal Sizes Of The Workspace If The ! singular values and the right singular vectors are requested if ( conda ) then minwrk = n + max( lwqp3, lwcon, lwsvd ) else minwrk = n + max( lwqp3, lwsvd ) end if if ( lquery ) then if ( rtrans ) then call stdlib${ii}$_sgesvd( 'O', 'N', n, n, a, lda, s, u, ldu,v, ldv, rdummy, -& 1_${ik}$, ierr ) else call stdlib${ii}$_sgesvd( 'N', 'O', n, n, a, lda, s, u, ldu,v, ldv, rdummy, -& 1_${ik}$, ierr ) end if lwrk_sgesvd = int( rdummy(1_${ik}$),KIND=${ik}$) if ( conda ) then optwrk = n + max( lwrk_sgeqp3, lwcon, lwrk_sgesvd ) else optwrk = n + max( lwrk_sgeqp3, lwrk_sgesvd ) end if end if else ! Minimal And Optimal Sizes Of The Workspace If The ! full svd is requested if ( rtrans ) then minwrk = max( lwqp3, lwsvd, lworq ) if ( conda ) minwrk = max( minwrk, lwcon ) minwrk = minwrk + n if ( wntva ) then ! .. minimal workspace length for n x n/2 stdlib${ii}$_sgeqrf lwqrf = max( n/2_${ik}$, 1_${ik}$ ) ! .. minimal workspace length for n/2 x n/2 stdlib${ii}$_sgesvd lwsvd2 = max( 5_${ik}$ * (n/2_${ik}$), 1_${ik}$ ) lworq2 = max( n, 1_${ik}$ ) minwrk2 = max( lwqp3, n/2_${ik}$+lwqrf, n/2_${ik}$+lwsvd2,n/2_${ik}$+lworq2, lworq ) if ( conda ) minwrk2 = max( minwrk2, lwcon ) minwrk2 = n + minwrk2 minwrk = max( minwrk, minwrk2 ) end if else minwrk = max( lwqp3, lwsvd, lworq ) if ( conda ) minwrk = max( minwrk, lwcon ) minwrk = minwrk + n if ( wntva ) then ! .. minimal workspace length for n/2 x n stdlib${ii}$_sgelqf lwlqf = max( n/2_${ik}$, 1_${ik}$ ) lwsvd2 = max( 5_${ik}$ * (n/2_${ik}$), 1_${ik}$ ) lwunlq = max( n , 1_${ik}$ ) minwrk2 = max( lwqp3, n/2_${ik}$+lwlqf, n/2_${ik}$+lwsvd2,n/2_${ik}$+lwunlq, lworq ) if ( conda ) minwrk2 = max( minwrk2, lwcon ) minwrk2 = n + minwrk2 minwrk = max( minwrk, minwrk2 ) end if end if if ( lquery ) then if ( rtrans ) then call stdlib${ii}$_sgesvd( 'O', 'A', n, n, a, lda, s, u, ldu,v, ldv, rdummy, -1_${ik}$, & ierr ) lwrk_sgesvd = int( rdummy(1_${ik}$),KIND=${ik}$) optwrk = max(lwrk_sgeqp3,lwrk_sgesvd,lwrk_sormqr) if ( conda ) optwrk = max( optwrk, lwcon ) optwrk = n + optwrk if ( wntva ) then call stdlib${ii}$_sgeqrf(n,n/2_${ik}$,u,ldu,rdummy,rdummy,-1_${ik}$,ierr) lwrk_sgeqrf = int( rdummy(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_sgesvd( 'S', 'O', n/2_${ik}$,n/2_${ik}$, v,ldv, s, u,ldu,v, ldv, rdummy,& -1_${ik}$, ierr ) lwrk_sgesvd2 = int( rdummy(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_sormqr( 'R', 'C', n, n, n/2_${ik}$, u, ldu, rdummy,v, ldv, & rdummy, -1_${ik}$, ierr ) lwrk_sormqr2 = int( rdummy(1_${ik}$),KIND=${ik}$) optwrk2 = max( lwrk_sgeqp3, n/2_${ik}$+lwrk_sgeqrf,n/2_${ik}$+lwrk_sgesvd2, n/2_${ik}$+& lwrk_sormqr2 ) if ( conda ) optwrk2 = max( optwrk2, lwcon ) optwrk2 = n + optwrk2 optwrk = max( optwrk, optwrk2 ) end if else call stdlib${ii}$_sgesvd( 'S', 'O', n, n, a, lda, s, u, ldu,v, ldv, rdummy, -1_${ik}$, & ierr ) lwrk_sgesvd = int( rdummy(1_${ik}$),KIND=${ik}$) optwrk = max(lwrk_sgeqp3,lwrk_sgesvd,lwrk_sormqr) if ( conda ) optwrk = max( optwrk, lwcon ) optwrk = n + optwrk if ( wntva ) then call stdlib${ii}$_sgelqf(n/2_${ik}$,n,u,ldu,rdummy,rdummy,-1_${ik}$,ierr) lwrk_sgelqf = int( rdummy(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_sgesvd( 'S','O', n/2_${ik}$,n/2_${ik}$, v, ldv, s, u, ldu,v, ldv, rdummy,& -1_${ik}$, ierr ) lwrk_sgesvd2 = int( rdummy(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_sormlq( 'R', 'N', n, n, n/2_${ik}$, u, ldu, rdummy,v, ldv, rdummy,& -1_${ik}$,ierr ) lwrk_sormlq = int( rdummy(1_${ik}$),KIND=${ik}$) optwrk2 = max( lwrk_sgeqp3, n/2_${ik}$+lwrk_sgelqf,n/2_${ik}$+lwrk_sgesvd2, n/2_${ik}$+& lwrk_sormlq ) if ( conda ) optwrk2 = max( optwrk2, lwcon ) optwrk2 = n + optwrk2 optwrk = max( optwrk, optwrk2 ) end if end if end if end if minwrk = max( 2_${ik}$, minwrk ) optwrk = max( 2_${ik}$, optwrk ) if ( lwork < minwrk .and. (.not.lquery) ) info = -19_${ik}$ end if if (info == 0_${ik}$ .and. lrwork < rminwrk .and. .not. lquery) then info = -21_${ik}$ end if if( info/=0_${ik}$ ) then call stdlib${ii}$_xerbla( 'SGESVDQ', -info ) return else if ( lquery ) then ! return optimal workspace iwork(1_${ik}$) = iminwrk work(1_${ik}$) = optwrk work(2_${ik}$) = minwrk rwork(1_${ik}$) = rminwrk return end if ! quick return if the matrix is void. if( ( m==0_${ik}$ ) .or. ( n==0_${ik}$ ) ) then ! All Output Is Void return end if big = stdlib${ii}$_slamch('O') ascaled = .false. iwoff = 1_${ik}$ if ( rowprm ) then iwoff = m ! Reordering The Rows In Decreasing Sequence In The ! ell-infinity norm - this enhances numerical robustness in ! the case of differently scaled rows. do p = 1, m ! rwork(p) = abs( a(p,stdlib${ii}$_icamax(n,a(p,1),lda)) ) ! [[stdlib${ii}$_slange will return nan if an entry of the p-th row is nan]] rwork(p) = stdlib${ii}$_slange( 'M', 1_${ik}$, n, a(p,1_${ik}$), lda, rdummy ) ! .. check for nan's and inf's if ( ( rwork(p) /= rwork(p) ) .or.( (rwork(p)*zero) /= zero ) ) then info = -8_${ik}$ call stdlib${ii}$_xerbla( 'SGESVDQ', -info ) return end if end do do p = 1, m - 1 q = stdlib${ii}$_isamax( m-p+1, rwork(p), 1_${ik}$ ) + p - 1_${ik}$ iwork(n+p) = q if ( p /= q ) then rtmp = rwork(p) rwork(p) = rwork(q) rwork(q) = rtmp end if end do if ( rwork(1_${ik}$) == zero ) then ! quick return: a is the m x n zero matrix. numrank = 0_${ik}$ call stdlib${ii}$_slaset( 'G', n, 1_${ik}$, zero, zero, s, n ) if ( wntus ) call stdlib${ii}$_slaset('G', m, n, zero, one, u, ldu) if ( wntua ) call stdlib${ii}$_slaset('G', m, m, zero, one, u, ldu) if ( wntva ) call stdlib${ii}$_slaset('G', n, n, zero, one, v, ldv) if ( wntuf ) then call stdlib${ii}$_slaset( 'G', n, 1_${ik}$, zero, zero, work, n ) call stdlib${ii}$_slaset( 'G', m, n, zero, one, u, ldu ) end if do p = 1, n iwork(p) = p end do if ( rowprm ) then do p = n + 1, n + m - 1 iwork(p) = p - n end do end if if ( conda ) rwork(1_${ik}$) = -1_${ik}$ rwork(2_${ik}$) = -1_${ik}$ return end if if ( rwork(1_${ik}$) > big / sqrt(real(m,KIND=sp)) ) then ! .. to prevent overflow in the qr factorization, scale the ! matrix by 1/sqrt(m) if too large entry detected call stdlib${ii}$_slascl('G',0_${ik}$,0_${ik}$,sqrt(real(m,KIND=sp)),one, m,n, a,lda, ierr) ascaled = .true. end if call stdlib${ii}$_slaswp( n, a, lda, 1_${ik}$, m-1, iwork(n+1), 1_${ik}$ ) end if ! .. at this stage, preemptive scaling is done only to avoid column ! norms overflows during the qr factorization. the svd procedure should ! have its own scaling to save the singular values from overflows and ! underflows. that depends on the svd procedure. if ( .not.rowprm ) then rtmp = stdlib${ii}$_slange( 'M', m, n, a, lda, rdummy ) if ( ( rtmp /= rtmp ) .or.( (rtmp*zero) /= zero ) ) then info = -8_${ik}$ call stdlib${ii}$_xerbla( 'SGESVDQ', -info ) return end if if ( rtmp > big / sqrt(real(m,KIND=sp)) ) then ! .. to prevent overflow in the qr factorization, scale the ! matrix by 1/sqrt(m) if too large entry detected call stdlib${ii}$_slascl('G',0_${ik}$,0_${ik}$, sqrt(real(m,KIND=sp)),one, m,n, a,lda, ierr) ascaled = .true. end if end if ! Qr Factorization With Column Pivoting ! a * p = q * [ r ] ! [ 0 ] do p = 1, n ! All Columns Are Free Columns iwork(p) = 0_${ik}$ end do call stdlib${ii}$_sgeqp3( m, n, a, lda, iwork, work, work(n+1), lwork-n,ierr ) ! if the user requested accuracy level allows truncation in the ! computed upper triangular factor, the matrix r is examined and, ! if possible, replaced with its leading upper trapezoidal part. epsln = stdlib${ii}$_slamch('E') sfmin = stdlib${ii}$_slamch('S') ! small = sfmin / epsln nr = n if ( accla ) then ! standard absolute error bound suffices. all sigma_i with ! sigma_i < n*eps*||a||_f are flushed to zero. this is an ! aggressive enforcement of lower numerical rank by introducing a ! backward error of the order of n*eps*||a||_f. nr = 1_${ik}$ rtmp = sqrt(real(n,KIND=sp))*epsln do p = 2, n if ( abs(a(p,p)) < (rtmp*abs(a(1,1))) ) go to 3002 nr = nr + 1_${ik}$ end do 3002 continue elseif ( acclm ) then ! .. similarly as above, only slightly more gentle (less aggressive). ! sudden drop on the diagonal of r is used as the criterion for being ! close-to-rank-deficient. the threshold is set to epsln=stdlib${ii}$_slamch('e'). ! [[this can be made more flexible by replacing this hard-coded value ! with a user specified threshold.]] also, the values that underflow ! will be truncated. nr = 1_${ik}$ do p = 2, n if ( ( abs(a(p,p)) < (epsln*abs(a(p-1,p-1))) ) .or.( abs(a(p,p)) < sfmin ) ) go to 3402 nr = nr + 1_${ik}$ end do 3402 continue else ! Rrqr Not Authorized To Determine Numerical Rank Except In The ! obvious case of zero pivots. ! .. inspect r for exact zeros on the diagonal; ! r(i,i)=0 => r(i:n,i:n)=0. nr = 1_${ik}$ do p = 2, n if ( abs(a(p,p)) == zero ) go to 3502 nr = nr + 1_${ik}$ end do 3502 continue if ( conda ) then ! estimate the scaled condition number of a. use the fact that it is ! the same as the scaled condition number of r. ! V Is Used As Workspace call stdlib${ii}$_slacpy( 'U', n, n, a, lda, v, ldv ) ! only the leading nr x nr submatrix of the triangular factor ! is considered. only if nr=n will this give a reliable error ! bound. however, even for nr < n, this can be used on an ! expert level and obtain useful information in the sense of ! perturbation theory. do p = 1, nr rtmp = stdlib${ii}$_snrm2( p, v(1_${ik}$,p), 1_${ik}$ ) call stdlib${ii}$_sscal( p, one/rtmp, v(1_${ik}$,p), 1_${ik}$ ) end do if ( .not. ( lsvec .or. rsvec ) ) then call stdlib${ii}$_spocon( 'U', nr, v, ldv, one, rtmp,work, iwork(n+iwoff), ierr & ) else call stdlib${ii}$_spocon( 'U', nr, v, ldv, one, rtmp,work(n+1), iwork(n+iwoff), & ierr ) end if sconda = one / sqrt(rtmp) ! for nr=n, sconda is an estimate of sqrt(||(r^* * r)^(-1)||_1), ! n^(-1/4) * sconda <= ||r^(-1)||_2 <= n^(1/4) * sconda ! see the reference [1] for more details. end if endif if ( wntur ) then n1 = nr else if ( wntus .or. wntuf) then n1 = n else if ( wntua ) then n1 = m end if if ( .not. ( rsvec .or. lsvec ) ) then ! ....................................................................... ! Only The Singular Values Are Requested ! ....................................................................... if ( rtrans ) then ! .. compute the singular values of r**t = [a](1:nr,1:n)**t ! .. set the lower triangle of [a] to [a](1:nr,1:n)**t and ! the upper triangle of [a] to zero. do p = 1, min( n, nr ) do q = p + 1, n a(q,p) = a(p,q) if ( q <= nr ) a(p,q) = zero end do end do call stdlib${ii}$_sgesvd( 'N', 'N', n, nr, a, lda, s, u, ldu,v, ldv, work, lwork, info & ) else ! .. compute the singular values of r = [a](1:nr,1:n) if ( nr > 1_${ik}$ )call stdlib${ii}$_slaset( 'L', nr-1,nr-1, zero,zero, a(2_${ik}$,1_${ik}$), lda ) call stdlib${ii}$_sgesvd( 'N', 'N', nr, n, a, lda, s, u, ldu,v, ldv, work, lwork, info & ) end if else if ( lsvec .and. ( .not. rsvec) ) then ! ....................................................................... ! The Singular Values And The Left Singular Vectors Requested ! ......................................................................."""""""" if ( rtrans ) then ! .. apply stdlib${ii}$_sgesvd to r**t ! .. copy r**t into [u] and overwrite [u] with the right singular ! vectors of r do p = 1, nr do q = p, n u(q,p) = a(p,q) end do end do if ( nr > 1_${ik}$ )call stdlib${ii}$_slaset( 'U', nr-1,nr-1, zero,zero, u(1_${ik}$,2_${ik}$), ldu ) ! .. the left singular vectors not computed, the nr right singular ! vectors overwrite [u](1:nr,1:nr) as transposed. these ! will be pre-multiplied by q to build the left singular vectors of a. call stdlib${ii}$_sgesvd( 'N', 'O', n, nr, u, ldu, s, u, ldu,u, ldu, work(n+1), & lwork-n, info ) do p = 1, nr do q = p + 1, nr rtmp = u(q,p) u(q,p) = u(p,q) u(p,q) = rtmp end do end do else ! Apply Stdlib_Sgesvd To R ! .. copy r into [u] and overwrite [u] with the left singular vectors call stdlib${ii}$_slacpy( 'U', nr, n, a, lda, u, ldu ) if ( nr > 1_${ik}$ )call stdlib${ii}$_slaset( 'L', nr-1, nr-1, zero, zero, u(2_${ik}$,1_${ik}$), ldu ) ! .. the right singular vectors not computed, the nr left singular ! vectors overwrite [u](1:nr,1:nr) call stdlib${ii}$_sgesvd( 'O', 'N', nr, n, u, ldu, s, u, ldu,v, ldv, work(n+1), & lwork-n, info ) ! .. now [u](1:nr,1:nr) contains the nr left singular vectors of ! r. these will be pre-multiplied by q to build the left singular ! vectors of a. end if ! Assemble The Left Singular Vector Matrix U Of Dimensions ! (m x nr) or (m x n) or (m x m). if ( ( nr < m ) .and. ( .not.wntuf ) ) then call stdlib${ii}$_slaset('A', m-nr, nr, zero, zero, u(nr+1,1_${ik}$), ldu) if ( nr < n1 ) then call stdlib${ii}$_slaset( 'A',nr,n1-nr,zero,zero,u(1_${ik}$,nr+1), ldu ) call stdlib${ii}$_slaset( 'A',m-nr,n1-nr,zero,one,u(nr+1,nr+1), ldu ) end if end if ! the q matrix from the first qrf is built into the left singular ! vectors matrix u. if ( .not.wntuf )call stdlib${ii}$_sormqr( 'L', 'N', m, n1, n, a, lda, work, u,ldu, work(& n+1), lwork-n, ierr ) if ( rowprm .and. .not.wntuf )call stdlib${ii}$_slaswp( n1, u, ldu, 1_${ik}$, m-1, iwork(n+1), -& 1_${ik}$ ) else if ( rsvec .and. ( .not. lsvec ) ) then ! ....................................................................... ! The Singular Values And The Right Singular Vectors Requested ! ....................................................................... if ( rtrans ) then ! .. apply stdlib${ii}$_sgesvd to r**t ! .. copy r**t into v and overwrite v with the left singular vectors do p = 1, nr do q = p, n v(q,p) = (a(p,q)) end do end do if ( nr > 1_${ik}$ )call stdlib${ii}$_slaset( 'U', nr-1,nr-1, zero,zero, v(1_${ik}$,2_${ik}$), ldv ) ! .. the left singular vectors of r**t overwrite v, the right singular ! vectors not computed if ( wntvr .or. ( nr == n ) ) then call stdlib${ii}$_sgesvd( 'O', 'N', n, nr, v, ldv, s, u, ldu,u, ldu, work(n+1), & lwork-n, info ) do p = 1, nr do q = p + 1, nr rtmp = v(q,p) v(q,p) = v(p,q) v(p,q) = rtmp end do end do if ( nr < n ) then do p = 1, nr do q = nr + 1, n v(p,q) = v(q,p) end do end do end if call stdlib${ii}$_slapmt( .false., nr, n, v, ldv, iwork ) else ! .. need all n right singular vectors and nr < n ! [!] this is simple implementation that augments [v](1:n,1:nr) ! by padding a zero block. in the case nr << n, a more efficient ! way is to first use the qr factorization. for more details ! how to implement this, see the " full svd " branch. call stdlib${ii}$_slaset('G', n, n-nr, zero, zero, v(1_${ik}$,nr+1), ldv) call stdlib${ii}$_sgesvd( 'O', 'N', n, n, v, ldv, s, u, ldu,u, ldu, work(n+1), & lwork-n, info ) do p = 1, n do q = p + 1, n rtmp = v(q,p) v(q,p) = v(p,q) v(p,q) = rtmp end do end do call stdlib${ii}$_slapmt( .false., n, n, v, ldv, iwork ) end if else ! Aply Stdlib_Sgesvd To R ! Copy R Into V And Overwrite V With The Right Singular Vectors call stdlib${ii}$_slacpy( 'U', nr, n, a, lda, v, ldv ) if ( nr > 1_${ik}$ )call stdlib${ii}$_slaset( 'L', nr-1, nr-1, zero, zero, v(2_${ik}$,1_${ik}$), ldv ) ! .. the right singular vectors overwrite v, the nr left singular ! vectors stored in u(1:nr,1:nr) if ( wntvr .or. ( nr == n ) ) then call stdlib${ii}$_sgesvd( 'N', 'O', nr, n, v, ldv, s, u, ldu,v, ldv, work(n+1), & lwork-n, info ) call stdlib${ii}$_slapmt( .false., nr, n, v, ldv, iwork ) ! .. now [v](1:nr,1:n) contains v(1:n,1:nr)**t else ! .. need all n right singular vectors and nr < n ! [!] this is simple implementation that augments [v](1:nr,1:n) ! by padding a zero block. in the case nr << n, a more efficient ! way is to first use the lq factorization. for more details ! how to implement this, see the " full svd " branch. call stdlib${ii}$_slaset('G', n-nr, n, zero,zero, v(nr+1,1_${ik}$), ldv) call stdlib${ii}$_sgesvd( 'N', 'O', n, n, v, ldv, s, u, ldu,v, ldv, work(n+1), & lwork-n, info ) call stdlib${ii}$_slapmt( .false., n, n, v, ldv, iwork ) end if ! .. now [v] contains the transposed matrix of the right singular ! vectors of a. end if else ! ....................................................................... ! Full Svd Requested ! ....................................................................... if ( rtrans ) then ! .. apply stdlib${ii}$_sgesvd to r**t [[this option is left for r if ( wntvr .or. ( nr == n ) ) then ! .. copy r**t into [v] and overwrite [v] with the left singular ! vectors of r**t do p = 1, nr do q = p, n v(q,p) = a(p,q) end do end do if ( nr > 1_${ik}$ )call stdlib${ii}$_slaset( 'U', nr-1,nr-1, zero,zero, v(1_${ik}$,2_${ik}$), ldv ) ! .. the left singular vectors of r**t overwrite [v], the nr right ! singular vectors of r**t stored in [u](1:nr,1:nr) as transposed call stdlib${ii}$_sgesvd( 'O', 'A', n, nr, v, ldv, s, v, ldv,u, ldu, work(n+1), & lwork-n, info ) ! Assemble V do p = 1, nr do q = p + 1, nr rtmp = v(q,p) v(q,p) = v(p,q) v(p,q) = rtmp end do end do if ( nr < n ) then do p = 1, nr do q = nr+1, n v(p,q) = v(q,p) end do end do end if call stdlib${ii}$_slapmt( .false., nr, n, v, ldv, iwork ) do p = 1, nr do q = p + 1, nr rtmp = u(q,p) u(q,p) = u(p,q) u(p,q) = rtmp end do end do if ( ( nr < m ) .and. .not.(wntuf)) then call stdlib${ii}$_slaset('A', m-nr,nr, zero,zero, u(nr+1,1_${ik}$), ldu) if ( nr < n1 ) then call stdlib${ii}$_slaset('A',nr,n1-nr,zero,zero,u(1_${ik}$,nr+1),ldu) call stdlib${ii}$_slaset( 'A',m-nr,n1-nr,zero,one,u(nr+1,nr+1), ldu ) end if end if else ! .. need all n right singular vectors and nr < n ! .. copy r**t into [v] and overwrite [v] with the left singular ! vectors of r**t ! [[the optimal ratio n/nr for using qrf instead of padding ! with zeros. here hard coded to 2; it must be at least ! two due to work space constraints.]] ! optratio = stdlib${ii}$_ilaenv(6, 'sgesvd', 's' // 'o', nr,n,0,0) ! optratio = max( optratio, 2 ) optratio = 2_${ik}$ if ( optratio*nr > n ) then do p = 1, nr do q = p, n v(q,p) = a(p,q) end do end do if ( nr > 1_${ik}$ )call stdlib${ii}$_slaset('U',nr-1,nr-1, zero,zero, v(1_${ik}$,2_${ik}$),ldv) call stdlib${ii}$_slaset('A',n,n-nr,zero,zero,v(1_${ik}$,nr+1),ldv) call stdlib${ii}$_sgesvd( 'O', 'A', n, n, v, ldv, s, v, ldv,u, ldu, work(n+1), & lwork-n, info ) do p = 1, n do q = p + 1, n rtmp = v(q,p) v(q,p) = v(p,q) v(p,q) = rtmp end do end do call stdlib${ii}$_slapmt( .false., n, n, v, ldv, iwork ) ! Assemble The Left Singular Vector Matrix U Of Dimensions ! (m x n1), i.e. (m x n) or (m x m). do p = 1, n do q = p + 1, n rtmp = u(q,p) u(q,p) = u(p,q) u(p,q) = rtmp end do end do if ( ( n < m ) .and. .not.(wntuf)) then call stdlib${ii}$_slaset('A',m-n,n,zero,zero,u(n+1,1_${ik}$),ldu) if ( n < n1 ) then call stdlib${ii}$_slaset('A',n,n1-n,zero,zero,u(1_${ik}$,n+1),ldu) call stdlib${ii}$_slaset('A',m-n,n1-n,zero,one,u(n+1,n+1), ldu ) end if end if else ! .. copy r**t into [u] and overwrite [u] with the right ! singular vectors of r do p = 1, nr do q = p, n u(q,nr+p) = a(p,q) end do end do if ( nr > 1_${ik}$ )call stdlib${ii}$_slaset('U',nr-1,nr-1,zero,zero,u(1_${ik}$,nr+2),ldu) call stdlib${ii}$_sgeqrf( n, nr, u(1_${ik}$,nr+1), ldu, work(n+1),work(n+nr+1), lwork-& n-nr, ierr ) do p = 1, nr do q = 1, n v(q,p) = u(p,nr+q) end do end do call stdlib${ii}$_slaset('U',nr-1,nr-1,zero,zero,v(1_${ik}$,2_${ik}$),ldv) call stdlib${ii}$_sgesvd( 'S', 'O', nr, nr, v, ldv, s, u, ldu,v,ldv, work(n+nr+1)& ,lwork-n-nr, info ) call stdlib${ii}$_slaset('A',n-nr,nr,zero,zero,v(nr+1,1_${ik}$),ldv) call stdlib${ii}$_slaset('A',nr,n-nr,zero,zero,v(1_${ik}$,nr+1),ldv) call stdlib${ii}$_slaset('A',n-nr,n-nr,zero,one,v(nr+1,nr+1),ldv) call stdlib${ii}$_sormqr('R','C', n, n, nr, u(1_${ik}$,nr+1), ldu,work(n+1),v,ldv,work(& n+nr+1),lwork-n-nr,ierr) call stdlib${ii}$_slapmt( .false., n, n, v, ldv, iwork ) ! Assemble The Left Singular Vector Matrix U Of Dimensions ! (m x nr) or (m x n) or (m x m). if ( ( nr < m ) .and. .not.(wntuf)) then call stdlib${ii}$_slaset('A',m-nr,nr,zero,zero,u(nr+1,1_${ik}$),ldu) if ( nr < n1 ) then call stdlib${ii}$_slaset('A',nr,n1-nr,zero,zero,u(1_${ik}$,nr+1),ldu) call stdlib${ii}$_slaset( 'A',m-nr,n1-nr,zero,one,u(nr+1,nr+1),ldu) end if end if end if end if else ! .. apply stdlib${ii}$_sgesvd to r [[this is the recommended option]] if ( wntvr .or. ( nr == n ) ) then ! .. copy r into [v] and overwrite v with the right singular vectors call stdlib${ii}$_slacpy( 'U', nr, n, a, lda, v, ldv ) if ( nr > 1_${ik}$ )call stdlib${ii}$_slaset( 'L', nr-1,nr-1, zero,zero, v(2_${ik}$,1_${ik}$), ldv ) ! .. the right singular vectors of r overwrite [v], the nr left ! singular vectors of r stored in [u](1:nr,1:nr) call stdlib${ii}$_sgesvd( 'S', 'O', nr, n, v, ldv, s, u, ldu,v, ldv, work(n+1), & lwork-n, info ) call stdlib${ii}$_slapmt( .false., nr, n, v, ldv, iwork ) ! .. now [v](1:nr,1:n) contains v(1:n,1:nr)**t ! Assemble The Left Singular Vector Matrix U Of Dimensions ! (m x nr) or (m x n) or (m x m). if ( ( nr < m ) .and. .not.(wntuf)) then call stdlib${ii}$_slaset('A', m-nr,nr, zero,zero, u(nr+1,1_${ik}$), ldu) if ( nr < n1 ) then call stdlib${ii}$_slaset('A',nr,n1-nr,zero,zero,u(1_${ik}$,nr+1),ldu) call stdlib${ii}$_slaset( 'A',m-nr,n1-nr,zero,one,u(nr+1,nr+1), ldu ) end if end if else ! .. need all n right singular vectors and nr < n ! The Requested Number Of The Left Singular Vectors ! is then n1 (n or m) ! [[the optimal ratio n/nr for using lq instead of padding ! with zeros. here hard coded to 2; it must be at least ! two due to work space constraints.]] ! optratio = stdlib${ii}$_ilaenv(6, 'sgesvd', 's' // 'o', nr,n,0,0) ! optratio = max( optratio, 2 ) optratio = 2_${ik}$ if ( optratio * nr > n ) then call stdlib${ii}$_slacpy( 'U', nr, n, a, lda, v, ldv ) if ( nr > 1_${ik}$ )call stdlib${ii}$_slaset('L', nr-1,nr-1, zero,zero, v(2_${ik}$,1_${ik}$),ldv) ! .. the right singular vectors of r overwrite [v], the nr left ! singular vectors of r stored in [u](1:nr,1:nr) call stdlib${ii}$_slaset('A', n-nr,n, zero,zero, v(nr+1,1_${ik}$),ldv) call stdlib${ii}$_sgesvd( 'S', 'O', n, n, v, ldv, s, u, ldu,v, ldv, work(n+1), & lwork-n, info ) call stdlib${ii}$_slapmt( .false., n, n, v, ldv, iwork ) ! .. now [v] contains the transposed matrix of the right ! singular vectors of a. the leading n left singular vectors ! are in [u](1:n,1:n) ! Assemble The Left Singular Vector Matrix U Of Dimensions ! (m x n1), i.e. (m x n) or (m x m). if ( ( n < m ) .and. .not.(wntuf)) then call stdlib${ii}$_slaset('A',m-n,n,zero,zero,u(n+1,1_${ik}$),ldu) if ( n < n1 ) then call stdlib${ii}$_slaset('A',n,n1-n,zero,zero,u(1_${ik}$,n+1),ldu) call stdlib${ii}$_slaset( 'A',m-n,n1-n,zero,one,u(n+1,n+1), ldu ) end if end if else call stdlib${ii}$_slacpy( 'U', nr, n, a, lda, u(nr+1,1_${ik}$), ldu ) if ( nr > 1_${ik}$ )call stdlib${ii}$_slaset('L',nr-1,nr-1,zero,zero,u(nr+2,1_${ik}$),ldu) call stdlib${ii}$_sgelqf( nr, n, u(nr+1,1_${ik}$), ldu, work(n+1),work(n+nr+1), lwork-n-& nr, ierr ) call stdlib${ii}$_slacpy('L',nr,nr,u(nr+1,1_${ik}$),ldu,v,ldv) if ( nr > 1_${ik}$ )call stdlib${ii}$_slaset('U',nr-1,nr-1,zero,zero,v(1_${ik}$,2_${ik}$),ldv) call stdlib${ii}$_sgesvd( 'S', 'O', nr, nr, v, ldv, s, u, ldu,v, ldv, work(n+nr+& 1_${ik}$), lwork-n-nr, info ) call stdlib${ii}$_slaset('A',n-nr,nr,zero,zero,v(nr+1,1_${ik}$),ldv) call stdlib${ii}$_slaset('A',nr,n-nr,zero,zero,v(1_${ik}$,nr+1),ldv) call stdlib${ii}$_slaset('A',n-nr,n-nr,zero,one,v(nr+1,nr+1),ldv) call stdlib${ii}$_sormlq('R','N',n,n,nr,u(nr+1,1_${ik}$),ldu,work(n+1),v, ldv, work(n+& nr+1),lwork-n-nr,ierr) call stdlib${ii}$_slapmt( .false., n, n, v, ldv, iwork ) ! Assemble The Left Singular Vector Matrix U Of Dimensions ! (m x nr) or (m x n) or (m x m). if ( ( nr < m ) .and. .not.(wntuf)) then call stdlib${ii}$_slaset('A',m-nr,nr,zero,zero,u(nr+1,1_${ik}$),ldu) if ( nr < n1 ) then call stdlib${ii}$_slaset('A',nr,n1-nr,zero,zero,u(1_${ik}$,nr+1),ldu) call stdlib${ii}$_slaset( 'A',m-nr,n1-nr,zero,one,u(nr+1,nr+1), ldu ) end if end if end if end if ! .. end of the "r**t or r" branch end if ! the q matrix from the first qrf is built into the left singular ! vectors matrix u. if ( .not. wntuf )call stdlib${ii}$_sormqr( 'L', 'N', m, n1, n, a, lda, work, u,ldu, work(& n+1), lwork-n, ierr ) if ( rowprm .and. .not.wntuf )call stdlib${ii}$_slaswp( n1, u, ldu, 1_${ik}$, m-1, iwork(n+1), -& 1_${ik}$ ) ! ... end of the "full svd" branch end if ! check whether some singular values are returned as zeros, e.g. ! due to underflow, and update the numerical rank. p = nr do q = p, 1, -1 if ( s(q) > zero ) go to 4002 nr = nr - 1_${ik}$ end do 4002 continue ! .. if numerical rank deficiency is detected, the truncated ! singular values are set to zero. if ( nr < n ) call stdlib${ii}$_slaset( 'G', n-nr,1_${ik}$, zero,zero, s(nr+1), n ) ! .. undo scaling; this may cause overflow in the largest singular ! values. if ( ascaled )call stdlib${ii}$_slascl( 'G',0_${ik}$,0_${ik}$, one,sqrt(real(m,KIND=sp)), nr,1_${ik}$, s, n, ierr & ) if ( conda ) rwork(1_${ik}$) = sconda rwork(2_${ik}$) = p - nr ! .. p-nr is the number of singular values that are computed as ! exact zeros in stdlib${ii}$_sgesvd() applied to the (possibly truncated) ! full row rank triangular (trapezoidal) factor of a. numrank = nr return end subroutine stdlib${ii}$_sgesvdq module subroutine stdlib${ii}$_dgesvdq( joba, jobp, jobr, jobu, jobv, m, n, a, lda,s, u, ldu, v, ldv, & !! DGESVDQ computes the singular value decomposition (SVD) of a real !! M-by-N matrix A, where M >= N. The SVD of A is written as !! [++] [xx] [x0] [xx] !! A = U * SIGMA * V^*, [++] = [xx] * [ox] * [xx] !! [++] [xx] !! where SIGMA is an N-by-N diagonal matrix, U is an M-by-N orthonormal !! matrix, and V is an N-by-N orthogonal matrix. The diagonal elements !! of SIGMA are the singular values of A. The columns of U and V are the !! left and the right singular vectors of A, respectively. numrank, iwork, liwork,work, lwork, rwork, lrwork, info ) use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments character, intent(in) :: joba, jobp, jobr, jobu, jobv integer(${ik}$), intent(in) :: m, n, lda, ldu, ldv, liwork, lrwork integer(${ik}$), intent(out) :: numrank, info integer(${ik}$), intent(inout) :: lwork ! Array Arguments real(dp), intent(inout) :: a(lda,*) real(dp), intent(out) :: u(ldu,*), v(ldv,*), work(*) real(dp), intent(out) :: s(*), rwork(*) integer(${ik}$), intent(out) :: iwork(*) ! ===================================================================== ! Local Scalars integer(${ik}$) :: ierr, iwoff, nr, n1, optratio, p, q integer(${ik}$) :: lwcon, lwqp3, lwrk_dgelqf, lwrk_dgesvd, lwrk_dgesvd2, lwrk_dgeqp3, & lwrk_dgeqrf, lwrk_dormlq, lwrk_dormqr, lwrk_dormqr2, lwlqf, lwqrf, lwsvd, lwsvd2, & lworq, lworq2, lworlq, minwrk, minwrk2, optwrk, optwrk2, iminwrk, rminwrk logical(lk) :: accla, acclm, acclh, ascaled, conda, dntwu, dntwv, lquery, lsvc0, lsvec,& rowprm, rsvec, rtrans, wntua, wntuf, wntur, wntus, wntva, wntvr real(dp) :: big, epsln, rtmp, sconda, sfmin ! Local Arrays real(dp) :: rdummy(1_${ik}$) ! Intrinsic Functions ! test the input arguments wntus = stdlib_lsame( jobu, 'S' ) .or. stdlib_lsame( jobu, 'U' ) wntur = stdlib_lsame( jobu, 'R' ) wntua = stdlib_lsame( jobu, 'A' ) wntuf = stdlib_lsame( jobu, 'F' ) lsvc0 = wntus .or. wntur .or. wntua lsvec = lsvc0 .or. wntuf dntwu = stdlib_lsame( jobu, 'N' ) wntvr = stdlib_lsame( jobv, 'R' ) wntva = stdlib_lsame( jobv, 'A' ) .or. stdlib_lsame( jobv, 'V' ) rsvec = wntvr .or. wntva dntwv = stdlib_lsame( jobv, 'N' ) accla = stdlib_lsame( joba, 'A' ) acclm = stdlib_lsame( joba, 'M' ) conda = stdlib_lsame( joba, 'E' ) acclh = stdlib_lsame( joba, 'H' ) .or. conda rowprm = stdlib_lsame( jobp, 'P' ) rtrans = stdlib_lsame( jobr, 'T' ) if ( rowprm ) then if ( conda ) then iminwrk = max( 1_${ik}$, n + m - 1_${ik}$ + n ) else iminwrk = max( 1_${ik}$, n + m - 1_${ik}$ ) end if rminwrk = max( 2_${ik}$, m ) else if ( conda ) then iminwrk = max( 1_${ik}$, n + n ) else iminwrk = max( 1_${ik}$, n ) end if rminwrk = 2_${ik}$ end if lquery = (liwork == -1_${ik}$ .or. lwork == -1_${ik}$ .or. lrwork == -1_${ik}$) info = 0_${ik}$ if ( .not. ( accla .or. acclm .or. acclh ) ) then info = -1_${ik}$ else if ( .not.( rowprm .or. stdlib_lsame( jobp, 'N' ) ) ) then info = -2_${ik}$ else if ( .not.( rtrans .or. stdlib_lsame( jobr, 'N' ) ) ) then info = -3_${ik}$ else if ( .not.( lsvec .or. dntwu ) ) then info = -4_${ik}$ else if ( wntur .and. wntva ) then info = -5_${ik}$ else if ( .not.( rsvec .or. dntwv )) then info = -5_${ik}$ else if ( m<0_${ik}$ ) then info = -6_${ik}$ else if ( ( n<0_${ik}$ ) .or. ( n>m ) ) then info = -7_${ik}$ else if ( lda<max( 1_${ik}$, m ) ) then info = -9_${ik}$ else if ( ldu<1_${ik}$ .or. ( lsvc0 .and. ldu<m ) .or.( wntuf .and. ldu<n ) ) then info = -12_${ik}$ else if ( ldv<1_${ik}$ .or. ( rsvec .and. ldv<n ) .or.( conda .and. ldv<n ) ) then info = -14_${ik}$ else if ( liwork < iminwrk .and. .not. lquery ) then info = -17_${ik}$ end if if ( info == 0_${ik}$ ) then ! Compute The Minimal And The Optimal Workspace Lengths ! [[the expressions for computing the minimal and the optimal ! values of lwork are written with a lot of redundancy and ! can be simplified. however, this detailed form is easier for ! maintenance and modifications of the code.]] ! Minimal Workspace Length For Stdlib_Dgeqp3 Of An M X N Matrix lwqp3 = 3_${ik}$ * n + 1_${ik}$ ! Minimal Workspace Length For Stdlib_Dormqr To Build Left Singular Vectors if ( wntus .or. wntur ) then lworq = max( n , 1_${ik}$ ) else if ( wntua ) then lworq = max( m , 1_${ik}$ ) end if ! Minimal Workspace Length For Stdlib_Dpocon Of An N X N Matrix lwcon = 3_${ik}$ * n ! Stdlib_Dgesvd Of An N X N Matrix lwsvd = max( 5_${ik}$ * n, 1_${ik}$ ) if ( lquery ) then call stdlib${ii}$_dgeqp3( m, n, a, lda, iwork, rdummy, rdummy, -1_${ik}$,ierr ) lwrk_dgeqp3 = int( rdummy(1_${ik}$),KIND=${ik}$) if ( wntus .or. wntur ) then call stdlib${ii}$_dormqr( 'L', 'N', m, n, n, a, lda, rdummy, u,ldu, rdummy, -1_${ik}$, & ierr ) lwrk_dormqr = int( rdummy(1_${ik}$),KIND=${ik}$) else if ( wntua ) then call stdlib${ii}$_dormqr( 'L', 'N', m, m, n, a, lda, rdummy, u,ldu, rdummy, -1_${ik}$, & ierr ) lwrk_dormqr = int( rdummy(1_${ik}$),KIND=${ik}$) else lwrk_dormqr = 0_${ik}$ end if end if minwrk = 2_${ik}$ optwrk = 2_${ik}$ if ( .not. (lsvec .or. rsvec )) then ! Minimal And Optimal Sizes Of The Workspace If ! only the singular values are requested if ( conda ) then minwrk = max( n+lwqp3, lwcon, lwsvd ) else minwrk = max( n+lwqp3, lwsvd ) end if if ( lquery ) then call stdlib${ii}$_dgesvd( 'N', 'N', n, n, a, lda, s, u, ldu,v, ldv, rdummy, -1_${ik}$, & ierr ) lwrk_dgesvd = int( rdummy(1_${ik}$),KIND=${ik}$) if ( conda ) then optwrk = max( n+lwrk_dgeqp3, n+lwcon, lwrk_dgesvd ) else optwrk = max( n+lwrk_dgeqp3, lwrk_dgesvd ) end if end if else if ( lsvec .and. (.not.rsvec) ) then ! Minimal And Optimal Sizes Of The Workspace If The ! singular values and the left singular vectors are requested if ( conda ) then minwrk = n + max( lwqp3, lwcon, lwsvd, lworq ) else minwrk = n + max( lwqp3, lwsvd, lworq ) end if if ( lquery ) then if ( rtrans ) then call stdlib${ii}$_dgesvd( 'N', 'O', n, n, a, lda, s, u, ldu,v, ldv, rdummy, -1_${ik}$, & ierr ) else call stdlib${ii}$_dgesvd( 'O', 'N', n, n, a, lda, s, u, ldu,v, ldv, rdummy, -1_${ik}$, & ierr ) end if lwrk_dgesvd = int( rdummy(1_${ik}$),KIND=${ik}$) if ( conda ) then optwrk = n + max( lwrk_dgeqp3, lwcon, lwrk_dgesvd,lwrk_dormqr ) else optwrk = n + max( lwrk_dgeqp3, lwrk_dgesvd,lwrk_dormqr ) end if end if else if ( rsvec .and. (.not.lsvec) ) then ! Minimal And Optimal Sizes Of The Workspace If The ! singular values and the right singular vectors are requested if ( conda ) then minwrk = n + max( lwqp3, lwcon, lwsvd ) else minwrk = n + max( lwqp3, lwsvd ) end if if ( lquery ) then if ( rtrans ) then call stdlib${ii}$_dgesvd( 'O', 'N', n, n, a, lda, s, u, ldu,v, ldv, rdummy, -& 1_${ik}$, ierr ) else call stdlib${ii}$_dgesvd( 'N', 'O', n, n, a, lda, s, u, ldu,v, ldv, rdummy, -& 1_${ik}$, ierr ) end if lwrk_dgesvd = int( rdummy(1_${ik}$),KIND=${ik}$) if ( conda ) then optwrk = n + max( lwrk_dgeqp3, lwcon, lwrk_dgesvd ) else optwrk = n + max( lwrk_dgeqp3, lwrk_dgesvd ) end if end if else ! Minimal And Optimal Sizes Of The Workspace If The ! full svd is requested if ( rtrans ) then minwrk = max( lwqp3, lwsvd, lworq ) if ( conda ) minwrk = max( minwrk, lwcon ) minwrk = minwrk + n if ( wntva ) then ! .. minimal workspace length for n x n/2 stdlib${ii}$_dgeqrf lwqrf = max( n/2_${ik}$, 1_${ik}$ ) ! .. minimal workspace length for n/2 x n/2 stdlib${ii}$_dgesvd lwsvd2 = max( 5_${ik}$ * (n/2_${ik}$), 1_${ik}$ ) lworq2 = max( n, 1_${ik}$ ) minwrk2 = max( lwqp3, n/2_${ik}$+lwqrf, n/2_${ik}$+lwsvd2,n/2_${ik}$+lworq2, lworq ) if ( conda ) minwrk2 = max( minwrk2, lwcon ) minwrk2 = n + minwrk2 minwrk = max( minwrk, minwrk2 ) end if else minwrk = max( lwqp3, lwsvd, lworq ) if ( conda ) minwrk = max( minwrk, lwcon ) minwrk = minwrk + n if ( wntva ) then ! .. minimal workspace length for n/2 x n stdlib${ii}$_dgelqf lwlqf = max( n/2_${ik}$, 1_${ik}$ ) lwsvd2 = max( 5_${ik}$ * (n/2_${ik}$), 1_${ik}$ ) lworlq = max( n , 1_${ik}$ ) minwrk2 = max( lwqp3, n/2_${ik}$+lwlqf, n/2_${ik}$+lwsvd2,n/2_${ik}$+lworlq, lworq ) if ( conda ) minwrk2 = max( minwrk2, lwcon ) minwrk2 = n + minwrk2 minwrk = max( minwrk, minwrk2 ) end if end if if ( lquery ) then if ( rtrans ) then call stdlib${ii}$_dgesvd( 'O', 'A', n, n, a, lda, s, u, ldu,v, ldv, rdummy, -1_${ik}$, & ierr ) lwrk_dgesvd = int( rdummy(1_${ik}$),KIND=${ik}$) optwrk = max(lwrk_dgeqp3,lwrk_dgesvd,lwrk_dormqr) if ( conda ) optwrk = max( optwrk, lwcon ) optwrk = n + optwrk if ( wntva ) then call stdlib${ii}$_dgeqrf(n,n/2_${ik}$,u,ldu,rdummy,rdummy,-1_${ik}$,ierr) lwrk_dgeqrf = int( rdummy(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_dgesvd( 'S', 'O', n/2_${ik}$,n/2_${ik}$, v,ldv, s, u,ldu,v, ldv, rdummy,& -1_${ik}$, ierr ) lwrk_dgesvd2 = int( rdummy(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_dormqr( 'R', 'C', n, n, n/2_${ik}$, u, ldu, rdummy,v, ldv, & rdummy, -1_${ik}$, ierr ) lwrk_dormqr2 = int( rdummy(1_${ik}$),KIND=${ik}$) optwrk2 = max( lwrk_dgeqp3, n/2_${ik}$+lwrk_dgeqrf,n/2_${ik}$+lwrk_dgesvd2, n/2_${ik}$+& lwrk_dormqr2 ) if ( conda ) optwrk2 = max( optwrk2, lwcon ) optwrk2 = n + optwrk2 optwrk = max( optwrk, optwrk2 ) end if else call stdlib${ii}$_dgesvd( 'S', 'O', n, n, a, lda, s, u, ldu,v, ldv, rdummy, -1_${ik}$, & ierr ) lwrk_dgesvd = int( rdummy(1_${ik}$),KIND=${ik}$) optwrk = max(lwrk_dgeqp3,lwrk_dgesvd,lwrk_dormqr) if ( conda ) optwrk = max( optwrk, lwcon ) optwrk = n + optwrk if ( wntva ) then call stdlib${ii}$_dgelqf(n/2_${ik}$,n,u,ldu,rdummy,rdummy,-1_${ik}$,ierr) lwrk_dgelqf = int( rdummy(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_dgesvd( 'S','O', n/2_${ik}$,n/2_${ik}$, v, ldv, s, u, ldu,v, ldv, rdummy,& -1_${ik}$, ierr ) lwrk_dgesvd2 = int( rdummy(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_dormlq( 'R', 'N', n, n, n/2_${ik}$, u, ldu, rdummy,v, ldv, rdummy,& -1_${ik}$,ierr ) lwrk_dormlq = int( rdummy(1_${ik}$),KIND=${ik}$) optwrk2 = max( lwrk_dgeqp3, n/2_${ik}$+lwrk_dgelqf,n/2_${ik}$+lwrk_dgesvd2, n/2_${ik}$+& lwrk_dormlq ) if ( conda ) optwrk2 = max( optwrk2, lwcon ) optwrk2 = n + optwrk2 optwrk = max( optwrk, optwrk2 ) end if end if end if end if minwrk = max( 2_${ik}$, minwrk ) optwrk = max( 2_${ik}$, optwrk ) if ( lwork < minwrk .and. (.not.lquery) ) info = -19_${ik}$ end if if (info == 0_${ik}$ .and. lrwork < rminwrk .and. .not. lquery) then info = -21_${ik}$ end if if( info/=0_${ik}$ ) then call stdlib${ii}$_xerbla( 'DGESVDQ', -info ) return else if ( lquery ) then ! return optimal workspace iwork(1_${ik}$) = iminwrk work(1_${ik}$) = optwrk work(2_${ik}$) = minwrk rwork(1_${ik}$) = rminwrk return end if ! quick return if the matrix is void. if( ( m==0_${ik}$ ) .or. ( n==0_${ik}$ ) ) then ! All Output Is Void return end if big = stdlib${ii}$_dlamch('O') ascaled = .false. iwoff = 1_${ik}$ if ( rowprm ) then iwoff = m ! Reordering The Rows In Decreasing Sequence In The ! ell-infinity norm - this enhances numerical robustness in ! the case of differently scaled rows. do p = 1, m ! rwork(p) = abs( a(p,stdlib${ii}$_icamax(n,a(p,1),lda)) ) ! [[stdlib${ii}$_dlange will return nan if an entry of the p-th row is nan]] rwork(p) = stdlib${ii}$_dlange( 'M', 1_${ik}$, n, a(p,1_${ik}$), lda, rdummy ) ! .. check for nan's and inf's if ( ( rwork(p) /= rwork(p) ) .or.( (rwork(p)*zero) /= zero ) ) then info = -8_${ik}$ call stdlib${ii}$_xerbla( 'DGESVDQ', -info ) return end if end do do p = 1, m - 1 q = stdlib${ii}$_idamax( m-p+1, rwork(p), 1_${ik}$ ) + p - 1_${ik}$ iwork(n+p) = q if ( p /= q ) then rtmp = rwork(p) rwork(p) = rwork(q) rwork(q) = rtmp end if end do if ( rwork(1_${ik}$) == zero ) then ! quick return: a is the m x n zero matrix. numrank = 0_${ik}$ call stdlib${ii}$_dlaset( 'G', n, 1_${ik}$, zero, zero, s, n ) if ( wntus ) call stdlib${ii}$_dlaset('G', m, n, zero, one, u, ldu) if ( wntua ) call stdlib${ii}$_dlaset('G', m, m, zero, one, u, ldu) if ( wntva ) call stdlib${ii}$_dlaset('G', n, n, zero, one, v, ldv) if ( wntuf ) then call stdlib${ii}$_dlaset( 'G', n, 1_${ik}$, zero, zero, work, n ) call stdlib${ii}$_dlaset( 'G', m, n, zero, one, u, ldu ) end if do p = 1, n iwork(p) = p end do if ( rowprm ) then do p = n + 1, n + m - 1 iwork(p) = p - n end do end if if ( conda ) rwork(1_${ik}$) = -1_${ik}$ rwork(2_${ik}$) = -1_${ik}$ return end if if ( rwork(1_${ik}$) > big / sqrt(real(m,KIND=dp)) ) then ! .. to prevent overflow in the qr factorization, scale the ! matrix by 1/sqrt(m) if too large entry detected call stdlib${ii}$_dlascl('G',0_${ik}$,0_${ik}$,sqrt(real(m,KIND=dp)),one, m,n, a,lda, ierr) ascaled = .true. end if call stdlib${ii}$_dlaswp( n, a, lda, 1_${ik}$, m-1, iwork(n+1), 1_${ik}$ ) end if ! .. at this stage, preemptive scaling is done only to avoid column ! norms overflows during the qr factorization. the svd procedure should ! have its own scaling to save the singular values from overflows and ! underflows. that depends on the svd procedure. if ( .not.rowprm ) then rtmp = stdlib${ii}$_dlange( 'M', m, n, a, lda, rdummy ) if ( ( rtmp /= rtmp ) .or.( (rtmp*zero) /= zero ) ) then info = -8_${ik}$ call stdlib${ii}$_xerbla( 'DGESVDQ', -info ) return end if if ( rtmp > big / sqrt(real(m,KIND=dp)) ) then ! .. to prevent overflow in the qr factorization, scale the ! matrix by 1/sqrt(m) if too large entry detected call stdlib${ii}$_dlascl('G',0_${ik}$,0_${ik}$, sqrt(real(m,KIND=dp)),one, m,n, a,lda, ierr) ascaled = .true. end if end if ! Qr Factorization With Column Pivoting ! a * p = q * [ r ] ! [ 0 ] do p = 1, n ! All Columns Are Free Columns iwork(p) = 0_${ik}$ end do call stdlib${ii}$_dgeqp3( m, n, a, lda, iwork, work, work(n+1), lwork-n,ierr ) ! if the user requested accuracy level allows truncation in the ! computed upper triangular factor, the matrix r is examined and, ! if possible, replaced with its leading upper trapezoidal part. epsln = stdlib${ii}$_dlamch('E') sfmin = stdlib${ii}$_dlamch('S') ! small = sfmin / epsln nr = n if ( accla ) then ! standard absolute error bound suffices. all sigma_i with ! sigma_i < n*eps*||a||_f are flushed to zero. this is an ! aggressive enforcement of lower numerical rank by introducing a ! backward error of the order of n*eps*||a||_f. nr = 1_${ik}$ rtmp = sqrt(real(n,KIND=dp))*epsln loop_3002: do p = 2, n if ( abs(a(p,p)) < (rtmp*abs(a(1,1))) ) exit loop_3002 nr = nr + 1_${ik}$ end do loop_3002 elseif ( acclm ) then ! .. similarly as above, only slightly more gentle (less aggressive). ! sudden drop on the diagonal of r is used as the criterion for being ! close-to-rank-deficient. the threshold is set to epsln=stdlib${ii}$_dlamch('e'). ! [[this can be made more flexible by replacing this hard-coded value ! with a user specified threshold.]] also, the values that underflow ! will be truncated. nr = 1_${ik}$ loop_3402: do p = 2, n if ( ( abs(a(p,p)) < (epsln*abs(a(p-1,p-1))) ) .or.( abs(a(p,p)) < sfmin ) ) exit loop_3402 nr = nr + 1_${ik}$ end do loop_3402 else ! Rrqr Not Authorized To Determine Numerical Rank Except In The ! obvious case of zero pivots. ! .. inspect r for exact zeros on the diagonal; ! r(i,i)=0 => r(i:n,i:n)=0. nr = 1_${ik}$ loop_3502: do p = 2, n if ( abs(a(p,p)) == zero ) exit loop_3502 nr = nr + 1_${ik}$ end do loop_3502 if ( conda ) then ! estimate the scaled condition number of a. use the fact that it is ! the same as the scaled condition number of r. ! V Is Used As Workspace call stdlib${ii}$_dlacpy( 'U', n, n, a, lda, v, ldv ) ! only the leading nr x nr submatrix of the triangular factor ! is considered. only if nr=n will this give a reliable error ! bound. however, even for nr < n, this can be used on an ! expert level and obtain useful information in the sense of ! perturbation theory. do p = 1, nr rtmp = stdlib${ii}$_dnrm2( p, v(1_${ik}$,p), 1_${ik}$ ) call stdlib${ii}$_dscal( p, one/rtmp, v(1_${ik}$,p), 1_${ik}$ ) end do if ( .not. ( lsvec .or. rsvec ) ) then call stdlib${ii}$_dpocon( 'U', nr, v, ldv, one, rtmp,work, iwork(n+iwoff), ierr & ) else call stdlib${ii}$_dpocon( 'U', nr, v, ldv, one, rtmp,work(n+1), iwork(n+iwoff), & ierr ) end if sconda = one / sqrt(rtmp) ! for nr=n, sconda is an estimate of sqrt(||(r^* * r)^(-1)||_1), ! n^(-1/4) * sconda <= ||r^(-1)||_2 <= n^(1/4) * sconda ! see the reference [1] for more details. end if endif if ( wntur ) then n1 = nr else if ( wntus .or. wntuf) then n1 = n else if ( wntua ) then n1 = m end if if ( .not. ( rsvec .or. lsvec ) ) then ! ....................................................................... ! Only The Singular Values Are Requested ! ....................................................................... if ( rtrans ) then ! .. compute the singular values of r**t = [a](1:nr,1:n)**t ! .. set the lower triangle of [a] to [a](1:nr,1:n)**t and ! the upper triangle of [a] to zero. do p = 1, min( n, nr ) do q = p + 1, n a(q,p) = a(p,q) if ( q <= nr ) a(p,q) = zero end do end do call stdlib${ii}$_dgesvd( 'N', 'N', n, nr, a, lda, s, u, ldu,v, ldv, work, lwork, info & ) else ! .. compute the singular values of r = [a](1:nr,1:n) if ( nr > 1_${ik}$ )call stdlib${ii}$_dlaset( 'L', nr-1,nr-1, zero,zero, a(2_${ik}$,1_${ik}$), lda ) call stdlib${ii}$_dgesvd( 'N', 'N', nr, n, a, lda, s, u, ldu,v, ldv, work, lwork, info & ) end if else if ( lsvec .and. ( .not. rsvec) ) then ! ....................................................................... ! The Singular Values And The Left Singular Vectors Requested ! ......................................................................."""""""" if ( rtrans ) then ! .. apply stdlib${ii}$_dgesvd to r**t ! .. copy r**t into [u] and overwrite [u] with the right singular ! vectors of r do p = 1, nr do q = p, n u(q,p) = a(p,q) end do end do if ( nr > 1_${ik}$ )call stdlib${ii}$_dlaset( 'U', nr-1,nr-1, zero,zero, u(1_${ik}$,2_${ik}$), ldu ) ! .. the left singular vectors not computed, the nr right singular ! vectors overwrite [u](1:nr,1:nr) as transposed. these ! will be pre-multiplied by q to build the left singular vectors of a. call stdlib${ii}$_dgesvd( 'N', 'O', n, nr, u, ldu, s, u, ldu,u, ldu, work(n+1), & lwork-n, info ) do p = 1, nr do q = p + 1, nr rtmp = u(q,p) u(q,p) = u(p,q) u(p,q) = rtmp end do end do else ! Apply Stdlib_Dgesvd To R ! .. copy r into [u] and overwrite [u] with the left singular vectors call stdlib${ii}$_dlacpy( 'U', nr, n, a, lda, u, ldu ) if ( nr > 1_${ik}$ )call stdlib${ii}$_dlaset( 'L', nr-1, nr-1, zero, zero, u(2_${ik}$,1_${ik}$), ldu ) ! .. the right singular vectors not computed, the nr left singular ! vectors overwrite [u](1:nr,1:nr) call stdlib${ii}$_dgesvd( 'O', 'N', nr, n, u, ldu, s, u, ldu,v, ldv, work(n+1), & lwork-n, info ) ! .. now [u](1:nr,1:nr) contains the nr left singular vectors of ! r. these will be pre-multiplied by q to build the left singular ! vectors of a. end if ! Assemble The Left Singular Vector Matrix U Of Dimensions ! (m x nr) or (m x n) or (m x m). if ( ( nr < m ) .and. ( .not.wntuf ) ) then call stdlib${ii}$_dlaset('A', m-nr, nr, zero, zero, u(nr+1,1_${ik}$), ldu) if ( nr < n1 ) then call stdlib${ii}$_dlaset( 'A',nr,n1-nr,zero,zero,u(1_${ik}$,nr+1), ldu ) call stdlib${ii}$_dlaset( 'A',m-nr,n1-nr,zero,one,u(nr+1,nr+1), ldu ) end if end if ! the q matrix from the first qrf is built into the left singular ! vectors matrix u. if ( .not.wntuf )call stdlib${ii}$_dormqr( 'L', 'N', m, n1, n, a, lda, work, u,ldu, work(& n+1), lwork-n, ierr ) if ( rowprm .and. .not.wntuf )call stdlib${ii}$_dlaswp( n1, u, ldu, 1_${ik}$, m-1, iwork(n+1), -& 1_${ik}$ ) else if ( rsvec .and. ( .not. lsvec ) ) then ! ....................................................................... ! The Singular Values And The Right Singular Vectors Requested ! ....................................................................... if ( rtrans ) then ! .. apply stdlib${ii}$_dgesvd to r**t ! .. copy r**t into v and overwrite v with the left singular vectors do p = 1, nr do q = p, n v(q,p) = (a(p,q)) end do end do if ( nr > 1_${ik}$ )call stdlib${ii}$_dlaset( 'U', nr-1,nr-1, zero,zero, v(1_${ik}$,2_${ik}$), ldv ) ! .. the left singular vectors of r**t overwrite v, the right singular ! vectors not computed if ( wntvr .or. ( nr == n ) ) then call stdlib${ii}$_dgesvd( 'O', 'N', n, nr, v, ldv, s, u, ldu,u, ldu, work(n+1), & lwork-n, info ) do p = 1, nr do q = p + 1, nr rtmp = v(q,p) v(q,p) = v(p,q) v(p,q) = rtmp end do end do if ( nr < n ) then do p = 1, nr do q = nr + 1, n v(p,q) = v(q,p) end do end do end if call stdlib${ii}$_dlapmt( .false., nr, n, v, ldv, iwork ) else ! .. need all n right singular vectors and nr < n ! [!] this is simple implementation that augments [v](1:n,1:nr) ! by padding a zero block. in the case nr << n, a more efficient ! way is to first use the qr factorization. for more details ! how to implement this, see the " full svd " branch. call stdlib${ii}$_dlaset('G', n, n-nr, zero, zero, v(1_${ik}$,nr+1), ldv) call stdlib${ii}$_dgesvd( 'O', 'N', n, n, v, ldv, s, u, ldu,u, ldu, work(n+1), & lwork-n, info ) do p = 1, n do q = p + 1, n rtmp = v(q,p) v(q,p) = v(p,q) v(p,q) = rtmp end do end do call stdlib${ii}$_dlapmt( .false., n, n, v, ldv, iwork ) end if else ! Aply Stdlib_Dgesvd To R ! Copy R Into V And Overwrite V With The Right Singular Vectors call stdlib${ii}$_dlacpy( 'U', nr, n, a, lda, v, ldv ) if ( nr > 1_${ik}$ )call stdlib${ii}$_dlaset( 'L', nr-1, nr-1, zero, zero, v(2_${ik}$,1_${ik}$), ldv ) ! .. the right singular vectors overwrite v, the nr left singular ! vectors stored in u(1:nr,1:nr) if ( wntvr .or. ( nr == n ) ) then call stdlib${ii}$_dgesvd( 'N', 'O', nr, n, v, ldv, s, u, ldu,v, ldv, work(n+1), & lwork-n, info ) call stdlib${ii}$_dlapmt( .false., nr, n, v, ldv, iwork ) ! .. now [v](1:nr,1:n) contains v(1:n,1:nr)**t else ! .. need all n right singular vectors and nr < n ! [!] this is simple implementation that augments [v](1:nr,1:n) ! by padding a zero block. in the case nr << n, a more efficient ! way is to first use the lq factorization. for more details ! how to implement this, see the " full svd " branch. call stdlib${ii}$_dlaset('G', n-nr, n, zero,zero, v(nr+1,1_${ik}$), ldv) call stdlib${ii}$_dgesvd( 'N', 'O', n, n, v, ldv, s, u, ldu,v, ldv, work(n+1), & lwork-n, info ) call stdlib${ii}$_dlapmt( .false., n, n, v, ldv, iwork ) end if ! .. now [v] contains the transposed matrix of the right singular ! vectors of a. end if else ! ....................................................................... ! Full Svd Requested ! ....................................................................... if ( rtrans ) then ! .. apply stdlib${ii}$_dgesvd to r**t [[this option is left for r if ( wntvr .or. ( nr == n ) ) then ! .. copy r**t into [v] and overwrite [v] with the left singular ! vectors of r**t do p = 1, nr do q = p, n v(q,p) = a(p,q) end do end do if ( nr > 1_${ik}$ )call stdlib${ii}$_dlaset( 'U', nr-1,nr-1, zero,zero, v(1_${ik}$,2_${ik}$), ldv ) ! .. the left singular vectors of r**t overwrite [v], the nr right ! singular vectors of r**t stored in [u](1:nr,1:nr) as transposed call stdlib${ii}$_dgesvd( 'O', 'A', n, nr, v, ldv, s, v, ldv,u, ldu, work(n+1), & lwork-n, info ) ! Assemble V do p = 1, nr do q = p + 1, nr rtmp = v(q,p) v(q,p) = v(p,q) v(p,q) = rtmp end do end do if ( nr < n ) then do p = 1, nr do q = nr+1, n v(p,q) = v(q,p) end do end do end if call stdlib${ii}$_dlapmt( .false., nr, n, v, ldv, iwork ) do p = 1, nr do q = p + 1, nr rtmp = u(q,p) u(q,p) = u(p,q) u(p,q) = rtmp end do end do if ( ( nr < m ) .and. .not.(wntuf)) then call stdlib${ii}$_dlaset('A', m-nr,nr, zero,zero, u(nr+1,1_${ik}$), ldu) if ( nr < n1 ) then call stdlib${ii}$_dlaset('A',nr,n1-nr,zero,zero,u(1_${ik}$,nr+1),ldu) call stdlib${ii}$_dlaset( 'A',m-nr,n1-nr,zero,one,u(nr+1,nr+1), ldu ) end if end if else ! .. need all n right singular vectors and nr < n ! .. copy r**t into [v] and overwrite [v] with the left singular ! vectors of r**t ! [[the optimal ratio n/nr for using qrf instead of padding ! with zeros. here hard coded to 2; it must be at least ! two due to work space constraints.]] ! optratio = stdlib${ii}$_ilaenv(6, 'dgesvd', 's' // 'o', nr,n,0,0) ! optratio = max( optratio, 2 ) optratio = 2_${ik}$ if ( optratio*nr > n ) then do p = 1, nr do q = p, n v(q,p) = a(p,q) end do end do if ( nr > 1_${ik}$ )call stdlib${ii}$_dlaset('U',nr-1,nr-1, zero,zero, v(1_${ik}$,2_${ik}$),ldv) call stdlib${ii}$_dlaset('A',n,n-nr,zero,zero,v(1_${ik}$,nr+1),ldv) call stdlib${ii}$_dgesvd( 'O', 'A', n, n, v, ldv, s, v, ldv,u, ldu, work(n+1), & lwork-n, info ) do p = 1, n do q = p + 1, n rtmp = v(q,p) v(q,p) = v(p,q) v(p,q) = rtmp end do end do call stdlib${ii}$_dlapmt( .false., n, n, v, ldv, iwork ) ! Assemble The Left Singular Vector Matrix U Of Dimensions ! (m x n1), i.e. (m x n) or (m x m). do p = 1, n do q = p + 1, n rtmp = u(q,p) u(q,p) = u(p,q) u(p,q) = rtmp end do end do if ( ( n < m ) .and. .not.(wntuf)) then call stdlib${ii}$_dlaset('A',m-n,n,zero,zero,u(n+1,1_${ik}$),ldu) if ( n < n1 ) then call stdlib${ii}$_dlaset('A',n,n1-n,zero,zero,u(1_${ik}$,n+1),ldu) call stdlib${ii}$_dlaset('A',m-n,n1-n,zero,one,u(n+1,n+1), ldu ) end if end if else ! .. copy r**t into [u] and overwrite [u] with the right ! singular vectors of r do p = 1, nr do q = p, n u(q,nr+p) = a(p,q) end do end do if ( nr > 1_${ik}$ )call stdlib${ii}$_dlaset('U',nr-1,nr-1,zero,zero,u(1_${ik}$,nr+2),ldu) call stdlib${ii}$_dgeqrf( n, nr, u(1_${ik}$,nr+1), ldu, work(n+1),work(n+nr+1), lwork-& n-nr, ierr ) do p = 1, nr do q = 1, n v(q,p) = u(p,nr+q) end do end do if (nr>1_${ik}$) call stdlib${ii}$_dlaset('U',nr-1,nr-1,zero,zero,v(1_${ik}$,2_${ik}$),ldv) call stdlib${ii}$_dgesvd( 'S', 'O', nr, nr, v, ldv, s, u, ldu,v,ldv, work(n+nr+1)& ,lwork-n-nr, info ) call stdlib${ii}$_dlaset('A',n-nr,nr,zero,zero,v(nr+1,1_${ik}$),ldv) call stdlib${ii}$_dlaset('A',nr,n-nr,zero,zero,v(1_${ik}$,nr+1),ldv) call stdlib${ii}$_dlaset('A',n-nr,n-nr,zero,one,v(nr+1,nr+1),ldv) call stdlib${ii}$_dormqr('R','C', n, n, nr, u(1_${ik}$,nr+1), ldu,work(n+1),v,ldv,work(& n+nr+1),lwork-n-nr,ierr) call stdlib${ii}$_dlapmt( .false., n, n, v, ldv, iwork ) ! Assemble The Left Singular Vector Matrix U Of Dimensions ! (m x nr) or (m x n) or (m x m). if ( ( nr < m ) .and. .not.(wntuf)) then call stdlib${ii}$_dlaset('A',m-nr,nr,zero,zero,u(nr+1,1_${ik}$),ldu) if ( nr < n1 ) then call stdlib${ii}$_dlaset('A',nr,n1-nr,zero,zero,u(1_${ik}$,nr+1),ldu) call stdlib${ii}$_dlaset( 'A',m-nr,n1-nr,zero,one,u(nr+1,nr+1),ldu) end if end if end if end if else ! .. apply stdlib${ii}$_dgesvd to r [[this is the recommended option]] if ( wntvr .or. ( nr == n ) ) then ! .. copy r into [v] and overwrite v with the right singular vectors call stdlib${ii}$_dlacpy( 'U', nr, n, a, lda, v, ldv ) if ( nr > 1_${ik}$ )call stdlib${ii}$_dlaset( 'L', nr-1,nr-1, zero,zero, v(2_${ik}$,1_${ik}$), ldv ) ! .. the right singular vectors of r overwrite [v], the nr left ! singular vectors of r stored in [u](1:nr,1:nr) call stdlib${ii}$_dgesvd( 'S', 'O', nr, n, v, ldv, s, u, ldu,v, ldv, work(n+1), & lwork-n, info ) call stdlib${ii}$_dlapmt( .false., nr, n, v, ldv, iwork ) ! .. now [v](1:nr,1:n) contains v(1:n,1:nr)**t ! Assemble The Left Singular Vector Matrix U Of Dimensions ! (m x nr) or (m x n) or (m x m). if ( ( nr < m ) .and. .not.(wntuf)) then call stdlib${ii}$_dlaset('A', m-nr,nr, zero,zero, u(nr+1,1_${ik}$), ldu) if ( nr < n1 ) then call stdlib${ii}$_dlaset('A',nr,n1-nr,zero,zero,u(1_${ik}$,nr+1),ldu) call stdlib${ii}$_dlaset( 'A',m-nr,n1-nr,zero,one,u(nr+1,nr+1), ldu ) end if end if else ! .. need all n right singular vectors and nr < n ! The Requested Number Of The Left Singular Vectors ! is then n1 (n or m) ! [[the optimal ratio n/nr for using lq instead of padding ! with zeros. here hard coded to 2; it must be at least ! two due to work space constraints.]] ! optratio = stdlib${ii}$_ilaenv(6, 'dgesvd', 's' // 'o', nr,n,0,0) ! optratio = max( optratio, 2 ) optratio = 2_${ik}$ if ( optratio * nr > n ) then call stdlib${ii}$_dlacpy( 'U', nr, n, a, lda, v, ldv ) if ( nr > 1_${ik}$ )call stdlib${ii}$_dlaset('L', nr-1,nr-1, zero,zero, v(2_${ik}$,1_${ik}$),ldv) ! .. the right singular vectors of r overwrite [v], the nr left ! singular vectors of r stored in [u](1:nr,1:nr) call stdlib${ii}$_dlaset('A', n-nr,n, zero,zero, v(nr+1,1_${ik}$),ldv) call stdlib${ii}$_dgesvd( 'S', 'O', n, n, v, ldv, s, u, ldu,v, ldv, work(n+1), & lwork-n, info ) call stdlib${ii}$_dlapmt( .false., n, n, v, ldv, iwork ) ! .. now [v] contains the transposed matrix of the right ! singular vectors of a. the leading n left singular vectors ! are in [u](1:n,1:n) ! Assemble The Left Singular Vector Matrix U Of Dimensions ! (m x n1), i.e. (m x n) or (m x m). if ( ( n < m ) .and. .not.(wntuf)) then call stdlib${ii}$_dlaset('A',m-n,n,zero,zero,u(n+1,1_${ik}$),ldu) if ( n < n1 ) then call stdlib${ii}$_dlaset('A',n,n1-n,zero,zero,u(1_${ik}$,n+1),ldu) call stdlib${ii}$_dlaset( 'A',m-n,n1-n,zero,one,u(n+1,n+1), ldu ) end if end if else call stdlib${ii}$_dlacpy( 'U', nr, n, a, lda, u(nr+1,1_${ik}$), ldu ) if ( nr > 1_${ik}$ )call stdlib${ii}$_dlaset('L',nr-1,nr-1,zero,zero,u(nr+2,1_${ik}$),ldu) call stdlib${ii}$_dgelqf( nr, n, u(nr+1,1_${ik}$), ldu, work(n+1),work(n+nr+1), lwork-n-& nr, ierr ) call stdlib${ii}$_dlacpy('L',nr,nr,u(nr+1,1_${ik}$),ldu,v,ldv) if ( nr > 1_${ik}$ )call stdlib${ii}$_dlaset('U',nr-1,nr-1,zero,zero,v(1_${ik}$,2_${ik}$),ldv) call stdlib${ii}$_dgesvd( 'S', 'O', nr, nr, v, ldv, s, u, ldu,v, ldv, work(n+nr+& 1_${ik}$), lwork-n-nr, info ) call stdlib${ii}$_dlaset('A',n-nr,nr,zero,zero,v(nr+1,1_${ik}$),ldv) call stdlib${ii}$_dlaset('A',nr,n-nr,zero,zero,v(1_${ik}$,nr+1),ldv) call stdlib${ii}$_dlaset('A',n-nr,n-nr,zero,one,v(nr+1,nr+1),ldv) call stdlib${ii}$_dormlq('R','N',n,n,nr,u(nr+1,1_${ik}$),ldu,work(n+1),v, ldv, work(n+& nr+1),lwork-n-nr,ierr) call stdlib${ii}$_dlapmt( .false., n, n, v, ldv, iwork ) ! Assemble The Left Singular Vector Matrix U Of Dimensions ! (m x nr) or (m x n) or (m x m). if ( ( nr < m ) .and. .not.(wntuf)) then call stdlib${ii}$_dlaset('A',m-nr,nr,zero,zero,u(nr+1,1_${ik}$),ldu) if ( nr < n1 ) then call stdlib${ii}$_dlaset('A',nr,n1-nr,zero,zero,u(1_${ik}$,nr+1),ldu) call stdlib${ii}$_dlaset( 'A',m-nr,n1-nr,zero,one,u(nr+1,nr+1), ldu ) end if end if end if end if ! .. end of the "r**t or r" branch end if ! the q matrix from the first qrf is built into the left singular ! vectors matrix u. if ( .not. wntuf )call stdlib${ii}$_dormqr( 'L', 'N', m, n1, n, a, lda, work, u,ldu, work(& n+1), lwork-n, ierr ) if ( rowprm .and. .not.wntuf )call stdlib${ii}$_dlaswp( n1, u, ldu, 1_${ik}$, m-1, iwork(n+1), -& 1_${ik}$ ) ! ... end of the "full svd" branch end if ! check whether some singular values are returned as zeros, e.g. ! due to underflow, and update the numerical rank. p = nr do q = p, 1, -1 if ( s(q) > zero ) go to 4002 nr = nr - 1_${ik}$ end do 4002 continue ! .. if numerical rank deficiency is detected, the truncated ! singular values are set to zero. if ( nr < n ) call stdlib${ii}$_dlaset( 'G', n-nr,1_${ik}$, zero,zero, s(nr+1), n ) ! .. undo scaling; this may cause overflow in the largest singular ! values. if ( ascaled )call stdlib${ii}$_dlascl( 'G',0_${ik}$,0_${ik}$, one,sqrt(real(m,KIND=dp)), nr,1_${ik}$, s, n, ierr & ) if ( conda ) rwork(1_${ik}$) = sconda rwork(2_${ik}$) = p - nr ! .. p-nr is the number of singular values that are computed as ! exact zeros in stdlib${ii}$_dgesvd() applied to the (possibly truncated) ! full row rank triangular (trapezoidal) factor of a. numrank = nr return end subroutine stdlib${ii}$_dgesvdq #:for rk,rt,ri in REAL_KINDS_TYPES #:if not rk in ["sp","dp"] module subroutine stdlib${ii}$_${ri}$gesvdq( joba, jobp, jobr, jobu, jobv, m, n, a, lda,s, u, ldu, v, ldv, & !! DGESVDQ: computes the singular value decomposition (SVD) of a real !! M-by-N matrix A, where M >= N. The SVD of A is written as !! [++] [xx] [x0] [xx] !! A = U * SIGMA * V^*, [++] = [xx] * [ox] * [xx] !! [++] [xx] !! where SIGMA is an N-by-N diagonal matrix, U is an M-by-N orthonormal !! matrix, and V is an N-by-N orthogonal matrix. The diagonal elements !! of SIGMA are the singular values of A. The columns of U and V are the !! left and the right singular vectors of A, respectively. numrank, iwork, liwork,work, lwork, rwork, lrwork, info ) use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments character, intent(in) :: joba, jobp, jobr, jobu, jobv integer(${ik}$), intent(in) :: m, n, lda, ldu, ldv, liwork, lrwork integer(${ik}$), intent(out) :: numrank, info integer(${ik}$), intent(inout) :: lwork ! Array Arguments real(${rk}$), intent(inout) :: a(lda,*) real(${rk}$), intent(out) :: u(ldu,*), v(ldv,*), work(*) real(${rk}$), intent(out) :: s(*), rwork(*) integer(${ik}$), intent(out) :: iwork(*) ! ===================================================================== ! Local Scalars integer(${ik}$) :: ierr, iwoff, nr, n1, optratio, p, q integer(${ik}$) :: lwcon, lwqp3, lwrk_qgelqf, lwrk_qgesvd, lwrk_qgesvd2, lwrk_qgeqp3, & lwrk_qgeqrf, lwrk_qormlq, lwrk_qormqr, lwrk_qormqr2, lwlqf, lwqrf, lwsvd, lwsvd2, & lworq, lworq2, lworlq, minwrk, minwrk2, optwrk, optwrk2, iminwrk, rminwrk logical(lk) :: accla, acclm, acclh, ascaled, conda, dntwu, dntwv, lquery, lsvc0, lsvec,& rowprm, rsvec, rtrans, wntua, wntuf, wntur, wntus, wntva, wntvr real(${rk}$) :: big, epsln, rtmp, sconda, sfmin ! Local Arrays real(${rk}$) :: rdummy(1_${ik}$) ! Intrinsic Functions ! test the input arguments wntus = stdlib_lsame( jobu, 'S' ) .or. stdlib_lsame( jobu, 'U' ) wntur = stdlib_lsame( jobu, 'R' ) wntua = stdlib_lsame( jobu, 'A' ) wntuf = stdlib_lsame( jobu, 'F' ) lsvc0 = wntus .or. wntur .or. wntua lsvec = lsvc0 .or. wntuf dntwu = stdlib_lsame( jobu, 'N' ) wntvr = stdlib_lsame( jobv, 'R' ) wntva = stdlib_lsame( jobv, 'A' ) .or. stdlib_lsame( jobv, 'V' ) rsvec = wntvr .or. wntva dntwv = stdlib_lsame( jobv, 'N' ) accla = stdlib_lsame( joba, 'A' ) acclm = stdlib_lsame( joba, 'M' ) conda = stdlib_lsame( joba, 'E' ) acclh = stdlib_lsame( joba, 'H' ) .or. conda rowprm = stdlib_lsame( jobp, 'P' ) rtrans = stdlib_lsame( jobr, 'T' ) if ( rowprm ) then if ( conda ) then iminwrk = max( 1_${ik}$, n + m - 1_${ik}$ + n ) else iminwrk = max( 1_${ik}$, n + m - 1_${ik}$ ) end if rminwrk = max( 2_${ik}$, m ) else if ( conda ) then iminwrk = max( 1_${ik}$, n + n ) else iminwrk = max( 1_${ik}$, n ) end if rminwrk = 2_${ik}$ end if lquery = (liwork == -1_${ik}$ .or. lwork == -1_${ik}$ .or. lrwork == -1_${ik}$) info = 0_${ik}$ if ( .not. ( accla .or. acclm .or. acclh ) ) then info = -1_${ik}$ else if ( .not.( rowprm .or. stdlib_lsame( jobp, 'N' ) ) ) then info = -2_${ik}$ else if ( .not.( rtrans .or. stdlib_lsame( jobr, 'N' ) ) ) then info = -3_${ik}$ else if ( .not.( lsvec .or. dntwu ) ) then info = -4_${ik}$ else if ( wntur .and. wntva ) then info = -5_${ik}$ else if ( .not.( rsvec .or. dntwv )) then info = -5_${ik}$ else if ( m<0_${ik}$ ) then info = -6_${ik}$ else if ( ( n<0_${ik}$ ) .or. ( n>m ) ) then info = -7_${ik}$ else if ( lda<max( 1_${ik}$, m ) ) then info = -9_${ik}$ else if ( ldu<1_${ik}$ .or. ( lsvc0 .and. ldu<m ) .or.( wntuf .and. ldu<n ) ) then info = -12_${ik}$ else if ( ldv<1_${ik}$ .or. ( rsvec .and. ldv<n ) .or.( conda .and. ldv<n ) ) then info = -14_${ik}$ else if ( liwork < iminwrk .and. .not. lquery ) then info = -17_${ik}$ end if if ( info == 0_${ik}$ ) then ! Compute The Minimal And The Optimal Workspace Lengths ! [[the expressions for computing the minimal and the optimal ! values of lwork are written with a lot of redundancy and ! can be simplified. however, this detailed form is easier for ! maintenance and modifications of the code.]] ! Minimal Workspace Length For Stdlib_Dgeqp3 Of An M X N Matrix lwqp3 = 3_${ik}$ * n + 1_${ik}$ ! Minimal Workspace Length For Stdlib_Dormqr To Build Left Singular Vectors if ( wntus .or. wntur ) then lworq = max( n , 1_${ik}$ ) else if ( wntua ) then lworq = max( m , 1_${ik}$ ) end if ! Minimal Workspace Length For Stdlib_Dpocon Of An N X N Matrix lwcon = 3_${ik}$ * n ! Stdlib_Dgesvd Of An N X N Matrix lwsvd = max( 5_${ik}$ * n, 1_${ik}$ ) if ( lquery ) then call stdlib${ii}$_${ri}$geqp3( m, n, a, lda, iwork, rdummy, rdummy, -1_${ik}$,ierr ) lwrk_qgeqp3 = int( rdummy(1_${ik}$),KIND=${ik}$) if ( wntus .or. wntur ) then call stdlib${ii}$_${ri}$ormqr( 'L', 'N', m, n, n, a, lda, rdummy, u,ldu, rdummy, -1_${ik}$, & ierr ) lwrk_qormqr = int( rdummy(1_${ik}$),KIND=${ik}$) else if ( wntua ) then call stdlib${ii}$_${ri}$ormqr( 'L', 'N', m, m, n, a, lda, rdummy, u,ldu, rdummy, -1_${ik}$, & ierr ) lwrk_qormqr = int( rdummy(1_${ik}$),KIND=${ik}$) else lwrk_qormqr = 0_${ik}$ end if end if minwrk = 2_${ik}$ optwrk = 2_${ik}$ if ( .not. (lsvec .or. rsvec )) then ! Minimal And Optimal Sizes Of The Workspace If ! only the singular values are requested if ( conda ) then minwrk = max( n+lwqp3, lwcon, lwsvd ) else minwrk = max( n+lwqp3, lwsvd ) end if if ( lquery ) then call stdlib${ii}$_${ri}$gesvd( 'N', 'N', n, n, a, lda, s, u, ldu,v, ldv, rdummy, -1_${ik}$, & ierr ) lwrk_qgesvd = int( rdummy(1_${ik}$),KIND=${ik}$) if ( conda ) then optwrk = max( n+lwrk_qgeqp3, n+lwcon, lwrk_qgesvd ) else optwrk = max( n+lwrk_qgeqp3, lwrk_qgesvd ) end if end if else if ( lsvec .and. (.not.rsvec) ) then ! Minimal And Optimal Sizes Of The Workspace If The ! singular values and the left singular vectors are requested if ( conda ) then minwrk = n + max( lwqp3, lwcon, lwsvd, lworq ) else minwrk = n + max( lwqp3, lwsvd, lworq ) end if if ( lquery ) then if ( rtrans ) then call stdlib${ii}$_${ri}$gesvd( 'N', 'O', n, n, a, lda, s, u, ldu,v, ldv, rdummy, -1_${ik}$, & ierr ) else call stdlib${ii}$_${ri}$gesvd( 'O', 'N', n, n, a, lda, s, u, ldu,v, ldv, rdummy, -1_${ik}$, & ierr ) end if lwrk_qgesvd = int( rdummy(1_${ik}$),KIND=${ik}$) if ( conda ) then optwrk = n + max( lwrk_qgeqp3, lwcon, lwrk_qgesvd,lwrk_qormqr ) else optwrk = n + max( lwrk_qgeqp3, lwrk_qgesvd,lwrk_qormqr ) end if end if else if ( rsvec .and. (.not.lsvec) ) then ! Minimal And Optimal Sizes Of The Workspace If The ! singular values and the right singular vectors are requested if ( conda ) then minwrk = n + max( lwqp3, lwcon, lwsvd ) else minwrk = n + max( lwqp3, lwsvd ) end if if ( lquery ) then if ( rtrans ) then call stdlib${ii}$_${ri}$gesvd( 'O', 'N', n, n, a, lda, s, u, ldu,v, ldv, rdummy, -& 1_${ik}$, ierr ) else call stdlib${ii}$_${ri}$gesvd( 'N', 'O', n, n, a, lda, s, u, ldu,v, ldv, rdummy, -& 1_${ik}$, ierr ) end if lwrk_qgesvd = int( rdummy(1_${ik}$),KIND=${ik}$) if ( conda ) then optwrk = n + max( lwrk_qgeqp3, lwcon, lwrk_qgesvd ) else optwrk = n + max( lwrk_qgeqp3, lwrk_qgesvd ) end if end if else ! Minimal And Optimal Sizes Of The Workspace If The ! full svd is requested if ( rtrans ) then minwrk = max( lwqp3, lwsvd, lworq ) if ( conda ) minwrk = max( minwrk, lwcon ) minwrk = minwrk + n if ( wntva ) then ! .. minimal workspace length for n x n/2 stdlib${ii}$_${ri}$geqrf lwqrf = max( n/2_${ik}$, 1_${ik}$ ) ! .. minimal workspace length for n/2 x n/2 stdlib${ii}$_${ri}$gesvd lwsvd2 = max( 5_${ik}$ * (n/2_${ik}$), 1_${ik}$ ) lworq2 = max( n, 1_${ik}$ ) minwrk2 = max( lwqp3, n/2_${ik}$+lwqrf, n/2_${ik}$+lwsvd2,n/2_${ik}$+lworq2, lworq ) if ( conda ) minwrk2 = max( minwrk2, lwcon ) minwrk2 = n + minwrk2 minwrk = max( minwrk, minwrk2 ) end if else minwrk = max( lwqp3, lwsvd, lworq ) if ( conda ) minwrk = max( minwrk, lwcon ) minwrk = minwrk + n if ( wntva ) then ! .. minimal workspace length for n/2 x n stdlib${ii}$_${ri}$gelqf lwlqf = max( n/2_${ik}$, 1_${ik}$ ) lwsvd2 = max( 5_${ik}$ * (n/2_${ik}$), 1_${ik}$ ) lworlq = max( n , 1_${ik}$ ) minwrk2 = max( lwqp3, n/2_${ik}$+lwlqf, n/2_${ik}$+lwsvd2,n/2_${ik}$+lworlq, lworq ) if ( conda ) minwrk2 = max( minwrk2, lwcon ) minwrk2 = n + minwrk2 minwrk = max( minwrk, minwrk2 ) end if end if if ( lquery ) then if ( rtrans ) then call stdlib${ii}$_${ri}$gesvd( 'O', 'A', n, n, a, lda, s, u, ldu,v, ldv, rdummy, -1_${ik}$, & ierr ) lwrk_qgesvd = int( rdummy(1_${ik}$),KIND=${ik}$) optwrk = max(lwrk_qgeqp3,lwrk_qgesvd,lwrk_qormqr) if ( conda ) optwrk = max( optwrk, lwcon ) optwrk = n + optwrk if ( wntva ) then call stdlib${ii}$_${ri}$geqrf(n,n/2_${ik}$,u,ldu,rdummy,rdummy,-1_${ik}$,ierr) lwrk_qgeqrf = int( rdummy(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_${ri}$gesvd( 'S', 'O', n/2_${ik}$,n/2_${ik}$, v,ldv, s, u,ldu,v, ldv, rdummy,& -1_${ik}$, ierr ) lwrk_qgesvd2 = int( rdummy(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_${ri}$ormqr( 'R', 'C', n, n, n/2_${ik}$, u, ldu, rdummy,v, ldv, & rdummy, -1_${ik}$, ierr ) lwrk_qormqr2 = int( rdummy(1_${ik}$),KIND=${ik}$) optwrk2 = max( lwrk_qgeqp3, n/2_${ik}$+lwrk_qgeqrf,n/2_${ik}$+lwrk_qgesvd2, n/2_${ik}$+& lwrk_qormqr2 ) if ( conda ) optwrk2 = max( optwrk2, lwcon ) optwrk2 = n + optwrk2 optwrk = max( optwrk, optwrk2 ) end if else call stdlib${ii}$_${ri}$gesvd( 'S', 'O', n, n, a, lda, s, u, ldu,v, ldv, rdummy, -1_${ik}$, & ierr ) lwrk_qgesvd = int( rdummy(1_${ik}$),KIND=${ik}$) optwrk = max(lwrk_qgeqp3,lwrk_qgesvd,lwrk_qormqr) if ( conda ) optwrk = max( optwrk, lwcon ) optwrk = n + optwrk if ( wntva ) then call stdlib${ii}$_${ri}$gelqf(n/2_${ik}$,n,u,ldu,rdummy,rdummy,-1_${ik}$,ierr) lwrk_qgelqf = int( rdummy(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_${ri}$gesvd( 'S','O', n/2_${ik}$,n/2_${ik}$, v, ldv, s, u, ldu,v, ldv, rdummy,& -1_${ik}$, ierr ) lwrk_qgesvd2 = int( rdummy(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_${ri}$ormlq( 'R', 'N', n, n, n/2_${ik}$, u, ldu, rdummy,v, ldv, rdummy,& -1_${ik}$,ierr ) lwrk_qormlq = int( rdummy(1_${ik}$),KIND=${ik}$) optwrk2 = max( lwrk_qgeqp3, n/2_${ik}$+lwrk_qgelqf,n/2_${ik}$+lwrk_qgesvd2, n/2_${ik}$+& lwrk_qormlq ) if ( conda ) optwrk2 = max( optwrk2, lwcon ) optwrk2 = n + optwrk2 optwrk = max( optwrk, optwrk2 ) end if end if end if end if minwrk = max( 2_${ik}$, minwrk ) optwrk = max( 2_${ik}$, optwrk ) if ( lwork < minwrk .and. (.not.lquery) ) info = -19_${ik}$ end if if (info == 0_${ik}$ .and. lrwork < rminwrk .and. .not. lquery) then info = -21_${ik}$ end if if( info/=0_${ik}$ ) then call stdlib${ii}$_xerbla( 'DGESVDQ', -info ) return else if ( lquery ) then ! return optimal workspace iwork(1_${ik}$) = iminwrk work(1_${ik}$) = optwrk work(2_${ik}$) = minwrk rwork(1_${ik}$) = rminwrk return end if ! quick return if the matrix is void. if( ( m==0_${ik}$ ) .or. ( n==0_${ik}$ ) ) then ! All Output Is Void return end if big = stdlib${ii}$_${ri}$lamch('O') ascaled = .false. iwoff = 1_${ik}$ if ( rowprm ) then iwoff = m ! Reordering The Rows In Decreasing Sequence In The ! ell-infinity norm - this enhances numerical robustness in ! the case of differently scaled rows. do p = 1, m ! rwork(p) = abs( a(p,stdlib${ii}$_izamax(n,a(p,1),lda)) ) ! [[stdlib${ii}$_${ri}$lange will return nan if an entry of the p-th row is nan]] rwork(p) = stdlib${ii}$_${ri}$lange( 'M', 1_${ik}$, n, a(p,1_${ik}$), lda, rdummy ) ! .. check for nan's and inf's if ( ( rwork(p) /= rwork(p) ) .or.( (rwork(p)*zero) /= zero ) ) then info = -8_${ik}$ call stdlib${ii}$_xerbla( 'DGESVDQ', -info ) return end if end do do p = 1, m - 1 q = stdlib${ii}$_i${ri}$amax( m-p+1, rwork(p), 1_${ik}$ ) + p - 1_${ik}$ iwork(n+p) = q if ( p /= q ) then rtmp = rwork(p) rwork(p) = rwork(q) rwork(q) = rtmp end if end do if ( rwork(1_${ik}$) == zero ) then ! quick return: a is the m x n zero matrix. numrank = 0_${ik}$ call stdlib${ii}$_${ri}$laset( 'G', n, 1_${ik}$, zero, zero, s, n ) if ( wntus ) call stdlib${ii}$_${ri}$laset('G', m, n, zero, one, u, ldu) if ( wntua ) call stdlib${ii}$_${ri}$laset('G', m, m, zero, one, u, ldu) if ( wntva ) call stdlib${ii}$_${ri}$laset('G', n, n, zero, one, v, ldv) if ( wntuf ) then call stdlib${ii}$_${ri}$laset( 'G', n, 1_${ik}$, zero, zero, work, n ) call stdlib${ii}$_${ri}$laset( 'G', m, n, zero, one, u, ldu ) end if do p = 1, n iwork(p) = p end do if ( rowprm ) then do p = n + 1, n + m - 1 iwork(p) = p - n end do end if if ( conda ) rwork(1_${ik}$) = -1_${ik}$ rwork(2_${ik}$) = -1_${ik}$ return end if if ( rwork(1_${ik}$) > big / sqrt(real(m,KIND=${rk}$)) ) then ! .. to prevent overflow in the qr factorization, scale the ! matrix by 1/sqrt(m) if too large entry detected call stdlib${ii}$_${ri}$lascl('G',0_${ik}$,0_${ik}$,sqrt(real(m,KIND=${rk}$)),one, m,n, a,lda, ierr) ascaled = .true. end if call stdlib${ii}$_${ri}$laswp( n, a, lda, 1_${ik}$, m-1, iwork(n+1), 1_${ik}$ ) end if ! .. at this stage, preemptive scaling is done only to avoid column ! norms overflows during the qr factorization. the svd procedure should ! have its own scaling to save the singular values from overflows and ! underflows. that depends on the svd procedure. if ( .not.rowprm ) then rtmp = stdlib${ii}$_${ri}$lange( 'M', m, n, a, lda, rdummy ) if ( ( rtmp /= rtmp ) .or.( (rtmp*zero) /= zero ) ) then info = -8_${ik}$ call stdlib${ii}$_xerbla( 'DGESVDQ', -info ) return end if if ( rtmp > big / sqrt(real(m,KIND=${rk}$)) ) then ! .. to prevent overflow in the qr factorization, scale the ! matrix by 1/sqrt(m) if too large entry detected call stdlib${ii}$_${ri}$lascl('G',0_${ik}$,0_${ik}$, sqrt(real(m,KIND=${rk}$)),one, m,n, a,lda, ierr) ascaled = .true. end if end if ! Qr Factorization With Column Pivoting ! a * p = q * [ r ] ! [ 0 ] do p = 1, n ! All Columns Are Free Columns iwork(p) = 0_${ik}$ end do call stdlib${ii}$_${ri}$geqp3( m, n, a, lda, iwork, work, work(n+1), lwork-n,ierr ) ! if the user requested accuracy level allows truncation in the ! computed upper triangular factor, the matrix r is examined and, ! if possible, replaced with its leading upper trapezoidal part. epsln = stdlib${ii}$_${ri}$lamch('E') sfmin = stdlib${ii}$_${ri}$lamch('S') ! small = sfmin / epsln nr = n if ( accla ) then ! standard absolute error bound suffices. all sigma_i with ! sigma_i < n*eps*||a||_f are flushed to zero. this is an ! aggressive enforcement of lower numerical rank by introducing a ! backward error of the order of n*eps*||a||_f. nr = 1_${ik}$ rtmp = sqrt(real(n,KIND=${rk}$))*epsln loop_3002: do p = 2, n if ( abs(a(p,p)) < (rtmp*abs(a(1,1))) ) exit loop_3002 nr = nr + 1_${ik}$ end do loop_3002 elseif ( acclm ) then ! .. similarly as above, only slightly more gentle (less aggressive). ! sudden drop on the diagonal of r is used as the criterion for being ! close-to-rank-deficient. the threshold is set to epsln=stdlib${ii}$_${ri}$lamch('e'). ! [[this can be made more flexible by replacing this hard-coded value ! with a user specified threshold.]] also, the values that underflow ! will be truncated. nr = 1_${ik}$ loop_3402: do p = 2, n if ( ( abs(a(p,p)) < (epsln*abs(a(p-1,p-1))) ) .or.( abs(a(p,p)) < sfmin ) ) exit loop_3402 nr = nr + 1_${ik}$ end do loop_3402 else ! Rrqr Not Authorized To Determine Numerical Rank Except In The ! obvious case of zero pivots. ! .. inspect r for exact zeros on the diagonal; ! r(i,i)=0 => r(i:n,i:n)=0. nr = 1_${ik}$ loop_3502: do p = 2, n if ( abs(a(p,p)) == zero ) exit loop_3502 nr = nr + 1_${ik}$ end do loop_3502 if ( conda ) then ! estimate the scaled condition number of a. use the fact that it is ! the same as the scaled condition number of r. ! V Is Used As Workspace call stdlib${ii}$_${ri}$lacpy( 'U', n, n, a, lda, v, ldv ) ! only the leading nr x nr submatrix of the triangular factor ! is considered. only if nr=n will this give a reliable error ! bound. however, even for nr < n, this can be used on an ! expert level and obtain useful information in the sense of ! perturbation theory. do p = 1, nr rtmp = stdlib${ii}$_${ri}$nrm2( p, v(1_${ik}$,p), 1_${ik}$ ) call stdlib${ii}$_${ri}$scal( p, one/rtmp, v(1_${ik}$,p), 1_${ik}$ ) end do if ( .not. ( lsvec .or. rsvec ) ) then call stdlib${ii}$_${ri}$pocon( 'U', nr, v, ldv, one, rtmp,work, iwork(n+iwoff), ierr & ) else call stdlib${ii}$_${ri}$pocon( 'U', nr, v, ldv, one, rtmp,work(n+1), iwork(n+iwoff), & ierr ) end if sconda = one / sqrt(rtmp) ! for nr=n, sconda is an estimate of sqrt(||(r^* * r)^(-1)||_1), ! n^(-1/4) * sconda <= ||r^(-1)||_2 <= n^(1/4) * sconda ! see the reference [1] for more details. end if endif if ( wntur ) then n1 = nr else if ( wntus .or. wntuf) then n1 = n else if ( wntua ) then n1 = m end if if ( .not. ( rsvec .or. lsvec ) ) then ! ....................................................................... ! Only The Singular Values Are Requested ! ....................................................................... if ( rtrans ) then ! .. compute the singular values of r**t = [a](1:nr,1:n)**t ! .. set the lower triangle of [a] to [a](1:nr,1:n)**t and ! the upper triangle of [a] to zero. do p = 1, min( n, nr ) do q = p + 1, n a(q,p) = a(p,q) if ( q <= nr ) a(p,q) = zero end do end do call stdlib${ii}$_${ri}$gesvd( 'N', 'N', n, nr, a, lda, s, u, ldu,v, ldv, work, lwork, info & ) else ! .. compute the singular values of r = [a](1:nr,1:n) if ( nr > 1_${ik}$ )call stdlib${ii}$_${ri}$laset( 'L', nr-1,nr-1, zero,zero, a(2_${ik}$,1_${ik}$), lda ) call stdlib${ii}$_${ri}$gesvd( 'N', 'N', nr, n, a, lda, s, u, ldu,v, ldv, work, lwork, info & ) end if else if ( lsvec .and. ( .not. rsvec) ) then ! ....................................................................... ! The Singular Values And The Left Singular Vectors Requested ! ......................................................................."""""""" if ( rtrans ) then ! .. apply stdlib${ii}$_${ri}$gesvd to r**t ! .. copy r**t into [u] and overwrite [u] with the right singular ! vectors of r do p = 1, nr do q = p, n u(q,p) = a(p,q) end do end do if ( nr > 1_${ik}$ )call stdlib${ii}$_${ri}$laset( 'U', nr-1,nr-1, zero,zero, u(1_${ik}$,2_${ik}$), ldu ) ! .. the left singular vectors not computed, the nr right singular ! vectors overwrite [u](1:nr,1:nr) as transposed. these ! will be pre-multiplied by q to build the left singular vectors of a. call stdlib${ii}$_${ri}$gesvd( 'N', 'O', n, nr, u, ldu, s, u, ldu,u, ldu, work(n+1), & lwork-n, info ) do p = 1, nr do q = p + 1, nr rtmp = u(q,p) u(q,p) = u(p,q) u(p,q) = rtmp end do end do else ! Apply Stdlib_Dgesvd To R ! .. copy r into [u] and overwrite [u] with the left singular vectors call stdlib${ii}$_${ri}$lacpy( 'U', nr, n, a, lda, u, ldu ) if ( nr > 1_${ik}$ )call stdlib${ii}$_${ri}$laset( 'L', nr-1, nr-1, zero, zero, u(2_${ik}$,1_${ik}$), ldu ) ! .. the right singular vectors not computed, the nr left singular ! vectors overwrite [u](1:nr,1:nr) call stdlib${ii}$_${ri}$gesvd( 'O', 'N', nr, n, u, ldu, s, u, ldu,v, ldv, work(n+1), & lwork-n, info ) ! .. now [u](1:nr,1:nr) contains the nr left singular vectors of ! r. these will be pre-multiplied by q to build the left singular ! vectors of a. end if ! Assemble The Left Singular Vector Matrix U Of Dimensions ! (m x nr) or (m x n) or (m x m). if ( ( nr < m ) .and. ( .not.wntuf ) ) then call stdlib${ii}$_${ri}$laset('A', m-nr, nr, zero, zero, u(nr+1,1_${ik}$), ldu) if ( nr < n1 ) then call stdlib${ii}$_${ri}$laset( 'A',nr,n1-nr,zero,zero,u(1_${ik}$,nr+1), ldu ) call stdlib${ii}$_${ri}$laset( 'A',m-nr,n1-nr,zero,one,u(nr+1,nr+1), ldu ) end if end if ! the q matrix from the first qrf is built into the left singular ! vectors matrix u. if ( .not.wntuf )call stdlib${ii}$_${ri}$ormqr( 'L', 'N', m, n1, n, a, lda, work, u,ldu, work(& n+1), lwork-n, ierr ) if ( rowprm .and. .not.wntuf )call stdlib${ii}$_${ri}$laswp( n1, u, ldu, 1_${ik}$, m-1, iwork(n+1), -& 1_${ik}$ ) else if ( rsvec .and. ( .not. lsvec ) ) then ! ....................................................................... ! The Singular Values And The Right Singular Vectors Requested ! ....................................................................... if ( rtrans ) then ! .. apply stdlib${ii}$_${ri}$gesvd to r**t ! .. copy r**t into v and overwrite v with the left singular vectors do p = 1, nr do q = p, n v(q,p) = (a(p,q)) end do end do if ( nr > 1_${ik}$ )call stdlib${ii}$_${ri}$laset( 'U', nr-1,nr-1, zero,zero, v(1_${ik}$,2_${ik}$), ldv ) ! .. the left singular vectors of r**t overwrite v, the right singular ! vectors not computed if ( wntvr .or. ( nr == n ) ) then call stdlib${ii}$_${ri}$gesvd( 'O', 'N', n, nr, v, ldv, s, u, ldu,u, ldu, work(n+1), & lwork-n, info ) do p = 1, nr do q = p + 1, nr rtmp = v(q,p) v(q,p) = v(p,q) v(p,q) = rtmp end do end do if ( nr < n ) then do p = 1, nr do q = nr + 1, n v(p,q) = v(q,p) end do end do end if call stdlib${ii}$_${ri}$lapmt( .false., nr, n, v, ldv, iwork ) else ! .. need all n right singular vectors and nr < n ! [!] this is simple implementation that augments [v](1:n,1:nr) ! by padding a zero block. in the case nr << n, a more efficient ! way is to first use the qr factorization. for more details ! how to implement this, see the " full svd " branch. call stdlib${ii}$_${ri}$laset('G', n, n-nr, zero, zero, v(1_${ik}$,nr+1), ldv) call stdlib${ii}$_${ri}$gesvd( 'O', 'N', n, n, v, ldv, s, u, ldu,u, ldu, work(n+1), & lwork-n, info ) do p = 1, n do q = p + 1, n rtmp = v(q,p) v(q,p) = v(p,q) v(p,q) = rtmp end do end do call stdlib${ii}$_${ri}$lapmt( .false., n, n, v, ldv, iwork ) end if else ! Aply Stdlib_Dgesvd To R ! Copy R Into V And Overwrite V With The Right Singular Vectors call stdlib${ii}$_${ri}$lacpy( 'U', nr, n, a, lda, v, ldv ) if ( nr > 1_${ik}$ )call stdlib${ii}$_${ri}$laset( 'L', nr-1, nr-1, zero, zero, v(2_${ik}$,1_${ik}$), ldv ) ! .. the right singular vectors overwrite v, the nr left singular ! vectors stored in u(1:nr,1:nr) if ( wntvr .or. ( nr == n ) ) then call stdlib${ii}$_${ri}$gesvd( 'N', 'O', nr, n, v, ldv, s, u, ldu,v, ldv, work(n+1), & lwork-n, info ) call stdlib${ii}$_${ri}$lapmt( .false., nr, n, v, ldv, iwork ) ! .. now [v](1:nr,1:n) contains v(1:n,1:nr)**t else ! .. need all n right singular vectors and nr < n ! [!] this is simple implementation that augments [v](1:nr,1:n) ! by padding a zero block. in the case nr << n, a more efficient ! way is to first use the lq factorization. for more details ! how to implement this, see the " full svd " branch. call stdlib${ii}$_${ri}$laset('G', n-nr, n, zero,zero, v(nr+1,1_${ik}$), ldv) call stdlib${ii}$_${ri}$gesvd( 'N', 'O', n, n, v, ldv, s, u, ldu,v, ldv, work(n+1), & lwork-n, info ) call stdlib${ii}$_${ri}$lapmt( .false., n, n, v, ldv, iwork ) end if ! .. now [v] contains the transposed matrix of the right singular ! vectors of a. end if else ! ....................................................................... ! Full Svd Requested ! ....................................................................... if ( rtrans ) then ! .. apply stdlib${ii}$_${ri}$gesvd to r**t [[this option is left for r if ( wntvr .or. ( nr == n ) ) then ! .. copy r**t into [v] and overwrite [v] with the left singular ! vectors of r**t do p = 1, nr do q = p, n v(q,p) = a(p,q) end do end do if ( nr > 1_${ik}$ )call stdlib${ii}$_${ri}$laset( 'U', nr-1,nr-1, zero,zero, v(1_${ik}$,2_${ik}$), ldv ) ! .. the left singular vectors of r**t overwrite [v], the nr right ! singular vectors of r**t stored in [u](1:nr,1:nr) as transposed call stdlib${ii}$_${ri}$gesvd( 'O', 'A', n, nr, v, ldv, s, v, ldv,u, ldu, work(n+1), & lwork-n, info ) ! Assemble V do p = 1, nr do q = p + 1, nr rtmp = v(q,p) v(q,p) = v(p,q) v(p,q) = rtmp end do end do if ( nr < n ) then do p = 1, nr do q = nr+1, n v(p,q) = v(q,p) end do end do end if call stdlib${ii}$_${ri}$lapmt( .false., nr, n, v, ldv, iwork ) do p = 1, nr do q = p + 1, nr rtmp = u(q,p) u(q,p) = u(p,q) u(p,q) = rtmp end do end do if ( ( nr < m ) .and. .not.(wntuf)) then call stdlib${ii}$_${ri}$laset('A', m-nr,nr, zero,zero, u(nr+1,1_${ik}$), ldu) if ( nr < n1 ) then call stdlib${ii}$_${ri}$laset('A',nr,n1-nr,zero,zero,u(1_${ik}$,nr+1),ldu) call stdlib${ii}$_${ri}$laset( 'A',m-nr,n1-nr,zero,one,u(nr+1,nr+1), ldu ) end if end if else ! .. need all n right singular vectors and nr < n ! .. copy r**t into [v] and overwrite [v] with the left singular ! vectors of r**t ! [[the optimal ratio n/nr for using qrf instead of padding ! with zeros. here hard coded to 2; it must be at least ! two due to work space constraints.]] ! optratio = stdlib${ii}$_ilaenv(6, 'dgesvd', 's' // 'o', nr,n,0,0) ! optratio = max( optratio, 2 ) optratio = 2_${ik}$ if ( optratio*nr > n ) then do p = 1, nr do q = p, n v(q,p) = a(p,q) end do end do if ( nr > 1_${ik}$ )call stdlib${ii}$_${ri}$laset('U',nr-1,nr-1, zero,zero, v(1_${ik}$,2_${ik}$),ldv) call stdlib${ii}$_${ri}$laset('A',n,n-nr,zero,zero,v(1_${ik}$,nr+1),ldv) call stdlib${ii}$_${ri}$gesvd( 'O', 'A', n, n, v, ldv, s, v, ldv,u, ldu, work(n+1), & lwork-n, info ) do p = 1, n do q = p + 1, n rtmp = v(q,p) v(q,p) = v(p,q) v(p,q) = rtmp end do end do call stdlib${ii}$_${ri}$lapmt( .false., n, n, v, ldv, iwork ) ! Assemble The Left Singular Vector Matrix U Of Dimensions ! (m x n1), i.e. (m x n) or (m x m). do p = 1, n do q = p + 1, n rtmp = u(q,p) u(q,p) = u(p,q) u(p,q) = rtmp end do end do if ( ( n < m ) .and. .not.(wntuf)) then call stdlib${ii}$_${ri}$laset('A',m-n,n,zero,zero,u(n+1,1_${ik}$),ldu) if ( n < n1 ) then call stdlib${ii}$_${ri}$laset('A',n,n1-n,zero,zero,u(1_${ik}$,n+1),ldu) call stdlib${ii}$_${ri}$laset('A',m-n,n1-n,zero,one,u(n+1,n+1), ldu ) end if end if else ! .. copy r**t into [u] and overwrite [u] with the right ! singular vectors of r do p = 1, nr do q = p, n u(q,nr+p) = a(p,q) end do end do if ( nr > 1_${ik}$ )call stdlib${ii}$_${ri}$laset('U',nr-1,nr-1,zero,zero,u(1_${ik}$,nr+2),ldu) call stdlib${ii}$_${ri}$geqrf( n, nr, u(1_${ik}$,nr+1), ldu, work(n+1),work(n+nr+1), lwork-& n-nr, ierr ) do p = 1, nr do q = 1, n v(q,p) = u(p,nr+q) end do end do if (nr>1_${ik}$) call stdlib${ii}$_${ri}$laset('U',nr-1,nr-1,zero,zero,v(1_${ik}$,2_${ik}$),ldv) call stdlib${ii}$_${ri}$gesvd( 'S', 'O', nr, nr, v, ldv, s, u, ldu,v,ldv, work(n+nr+1)& ,lwork-n-nr, info ) call stdlib${ii}$_${ri}$laset('A',n-nr,nr,zero,zero,v(nr+1,1_${ik}$),ldv) call stdlib${ii}$_${ri}$laset('A',nr,n-nr,zero,zero,v(1_${ik}$,nr+1),ldv) call stdlib${ii}$_${ri}$laset('A',n-nr,n-nr,zero,one,v(nr+1,nr+1),ldv) call stdlib${ii}$_${ri}$ormqr('R','C', n, n, nr, u(1_${ik}$,nr+1), ldu,work(n+1),v,ldv,work(& n+nr+1),lwork-n-nr,ierr) call stdlib${ii}$_${ri}$lapmt( .false., n, n, v, ldv, iwork ) ! Assemble The Left Singular Vector Matrix U Of Dimensions ! (m x nr) or (m x n) or (m x m). if ( ( nr < m ) .and. .not.(wntuf)) then call stdlib${ii}$_${ri}$laset('A',m-nr,nr,zero,zero,u(nr+1,1_${ik}$),ldu) if ( nr < n1 ) then call stdlib${ii}$_${ri}$laset('A',nr,n1-nr,zero,zero,u(1_${ik}$,nr+1),ldu) call stdlib${ii}$_${ri}$laset( 'A',m-nr,n1-nr,zero,one,u(nr+1,nr+1),ldu) end if end if end if end if else ! .. apply stdlib${ii}$_${ri}$gesvd to r [[this is the recommended option]] if ( wntvr .or. ( nr == n ) ) then ! .. copy r into [v] and overwrite v with the right singular vectors call stdlib${ii}$_${ri}$lacpy( 'U', nr, n, a, lda, v, ldv ) if ( nr > 1_${ik}$ )call stdlib${ii}$_${ri}$laset( 'L', nr-1,nr-1, zero,zero, v(2_${ik}$,1_${ik}$), ldv ) ! .. the right singular vectors of r overwrite [v], the nr left ! singular vectors of r stored in [u](1:nr,1:nr) call stdlib${ii}$_${ri}$gesvd( 'S', 'O', nr, n, v, ldv, s, u, ldu,v, ldv, work(n+1), & lwork-n, info ) call stdlib${ii}$_${ri}$lapmt( .false., nr, n, v, ldv, iwork ) ! .. now [v](1:nr,1:n) contains v(1:n,1:nr)**t ! Assemble The Left Singular Vector Matrix U Of Dimensions ! (m x nr) or (m x n) or (m x m). if ( ( nr < m ) .and. .not.(wntuf)) then call stdlib${ii}$_${ri}$laset('A', m-nr,nr, zero,zero, u(nr+1,1_${ik}$), ldu) if ( nr < n1 ) then call stdlib${ii}$_${ri}$laset('A',nr,n1-nr,zero,zero,u(1_${ik}$,nr+1),ldu) call stdlib${ii}$_${ri}$laset( 'A',m-nr,n1-nr,zero,one,u(nr+1,nr+1), ldu ) end if end if else ! .. need all n right singular vectors and nr < n ! The Requested Number Of The Left Singular Vectors ! is then n1 (n or m) ! [[the optimal ratio n/nr for using lq instead of padding ! with zeros. here hard coded to 2; it must be at least ! two due to work space constraints.]] ! optratio = stdlib${ii}$_ilaenv(6, 'dgesvd', 's' // 'o', nr,n,0,0) ! optratio = max( optratio, 2 ) optratio = 2_${ik}$ if ( optratio * nr > n ) then call stdlib${ii}$_${ri}$lacpy( 'U', nr, n, a, lda, v, ldv ) if ( nr > 1_${ik}$ )call stdlib${ii}$_${ri}$laset('L', nr-1,nr-1, zero,zero, v(2_${ik}$,1_${ik}$),ldv) ! .. the right singular vectors of r overwrite [v], the nr left ! singular vectors of r stored in [u](1:nr,1:nr) call stdlib${ii}$_${ri}$laset('A', n-nr,n, zero,zero, v(nr+1,1_${ik}$),ldv) call stdlib${ii}$_${ri}$gesvd( 'S', 'O', n, n, v, ldv, s, u, ldu,v, ldv, work(n+1), & lwork-n, info ) call stdlib${ii}$_${ri}$lapmt( .false., n, n, v, ldv, iwork ) ! .. now [v] contains the transposed matrix of the right ! singular vectors of a. the leading n left singular vectors ! are in [u](1:n,1:n) ! Assemble The Left Singular Vector Matrix U Of Dimensions ! (m x n1), i.e. (m x n) or (m x m). if ( ( n < m ) .and. .not.(wntuf)) then call stdlib${ii}$_${ri}$laset('A',m-n,n,zero,zero,u(n+1,1_${ik}$),ldu) if ( n < n1 ) then call stdlib${ii}$_${ri}$laset('A',n,n1-n,zero,zero,u(1_${ik}$,n+1),ldu) call stdlib${ii}$_${ri}$laset( 'A',m-n,n1-n,zero,one,u(n+1,n+1), ldu ) end if end if else call stdlib${ii}$_${ri}$lacpy( 'U', nr, n, a, lda, u(nr+1,1_${ik}$), ldu ) if ( nr > 1_${ik}$ )call stdlib${ii}$_${ri}$laset('L',nr-1,nr-1,zero,zero,u(nr+2,1_${ik}$),ldu) call stdlib${ii}$_${ri}$gelqf( nr, n, u(nr+1,1_${ik}$), ldu, work(n+1),work(n+nr+1), lwork-n-& nr, ierr ) call stdlib${ii}$_${ri}$lacpy('L',nr,nr,u(nr+1,1_${ik}$),ldu,v,ldv) if ( nr > 1_${ik}$ )call stdlib${ii}$_${ri}$laset('U',nr-1,nr-1,zero,zero,v(1_${ik}$,2_${ik}$),ldv) call stdlib${ii}$_${ri}$gesvd( 'S', 'O', nr, nr, v, ldv, s, u, ldu,v, ldv, work(n+nr+& 1_${ik}$), lwork-n-nr, info ) call stdlib${ii}$_${ri}$laset('A',n-nr,nr,zero,zero,v(nr+1,1_${ik}$),ldv) call stdlib${ii}$_${ri}$laset('A',nr,n-nr,zero,zero,v(1_${ik}$,nr+1),ldv) call stdlib${ii}$_${ri}$laset('A',n-nr,n-nr,zero,one,v(nr+1,nr+1),ldv) call stdlib${ii}$_${ri}$ormlq('R','N',n,n,nr,u(nr+1,1_${ik}$),ldu,work(n+1),v, ldv, work(n+& nr+1),lwork-n-nr,ierr) call stdlib${ii}$_${ri}$lapmt( .false., n, n, v, ldv, iwork ) ! Assemble The Left Singular Vector Matrix U Of Dimensions ! (m x nr) or (m x n) or (m x m). if ( ( nr < m ) .and. .not.(wntuf)) then call stdlib${ii}$_${ri}$laset('A',m-nr,nr,zero,zero,u(nr+1,1_${ik}$),ldu) if ( nr < n1 ) then call stdlib${ii}$_${ri}$laset('A',nr,n1-nr,zero,zero,u(1_${ik}$,nr+1),ldu) call stdlib${ii}$_${ri}$laset( 'A',m-nr,n1-nr,zero,one,u(nr+1,nr+1), ldu ) end if end if end if end if ! .. end of the "r**t or r" branch end if ! the q matrix from the first qrf is built into the left singular ! vectors matrix u. if ( .not. wntuf )call stdlib${ii}$_${ri}$ormqr( 'L', 'N', m, n1, n, a, lda, work, u,ldu, work(& n+1), lwork-n, ierr ) if ( rowprm .and. .not.wntuf )call stdlib${ii}$_${ri}$laswp( n1, u, ldu, 1_${ik}$, m-1, iwork(n+1), -& 1_${ik}$ ) ! ... end of the "full svd" branch end if ! check whether some singular values are returned as zeros, e.g. ! due to underflow, and update the numerical rank. p = nr do q = p, 1, -1 if ( s(q) > zero ) go to 4002 nr = nr - 1_${ik}$ end do 4002 continue ! .. if numerical rank deficiency is detected, the truncated ! singular values are set to zero. if ( nr < n ) call stdlib${ii}$_${ri}$laset( 'G', n-nr,1_${ik}$, zero,zero, s(nr+1), n ) ! .. undo scaling; this may cause overflow in the largest singular ! values. if ( ascaled )call stdlib${ii}$_${ri}$lascl( 'G',0_${ik}$,0_${ik}$, one,sqrt(real(m,KIND=${rk}$)), nr,1_${ik}$, s, n, ierr & ) if ( conda ) rwork(1_${ik}$) = sconda rwork(2_${ik}$) = p - nr ! .. p-nr is the number of singular values that are computed as ! exact zeros in stdlib${ii}$_${ri}$gesvd() applied to the (possibly truncated) ! full row rank triangular (trapezoidal) factor of a. numrank = nr return end subroutine stdlib${ii}$_${ri}$gesvdq #:endif #:endfor module subroutine stdlib${ii}$_cgesvdq( joba, jobp, jobr, jobu, jobv, m, n, a, lda,s, u, ldu, v, ldv, & !! CGESVDQ computes the singular value decomposition (SVD) of a complex !! M-by-N matrix A, where M >= N. The SVD of A is written as !! [++] [xx] [x0] [xx] !! A = U * SIGMA * V^*, [++] = [xx] * [ox] * [xx] !! [++] [xx] !! where SIGMA is an N-by-N diagonal matrix, U is an M-by-N orthonormal !! matrix, and V is an N-by-N unitary matrix. The diagonal elements !! of SIGMA are the singular values of A. The columns of U and V are the !! left and the right singular vectors of A, respectively. numrank, iwork, liwork,cwork, lcwork, rwork, lrwork, info ) use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments character, intent(in) :: joba, jobp, jobr, jobu, jobv integer(${ik}$), intent(in) :: m, n, lda, ldu, ldv, liwork, lrwork integer(${ik}$), intent(out) :: numrank, info integer(${ik}$), intent(inout) :: lcwork ! Array Arguments complex(sp), intent(inout) :: a(lda,*) complex(sp), intent(out) :: u(ldu,*), v(ldv,*), cwork(*) real(sp), intent(out) :: s(*), rwork(*) integer(${ik}$), intent(out) :: iwork(*) ! ===================================================================== ! Local Scalars integer(${ik}$) :: ierr, nr, n1, optratio, p, q integer(${ik}$) :: lwcon, lwqp3, lwrk_cgelqf, lwrk_cgesvd, lwrk_cgesvd2, lwrk_cgeqp3, & lwrk_cgeqrf, lwrk_cunmlq, lwrk_cunmqr, lwrk_cunmqr2, lwlqf, lwqrf, lwsvd, lwsvd2, & lwunq, lwunq2, lwunlq, minwrk, minwrk2, optwrk, optwrk2, iminwrk, rminwrk logical(lk) :: accla, acclm, acclh, ascaled, conda, dntwu, dntwv, lquery, lsvc0, lsvec,& rowprm, rsvec, rtrans, wntua, wntuf, wntur, wntus, wntva, wntvr real(sp) :: big, epsln, rtmp, sconda, sfmin complex(sp) :: ctmp ! Local Arrays complex(sp) :: cdummy(1_${ik}$) real(sp) :: rdummy(1_${ik}$) ! Intrinsic Functions ! Executable Statements ! test the input arguments wntus = stdlib_lsame( jobu, 'S' ) .or. stdlib_lsame( jobu, 'U' ) wntur = stdlib_lsame( jobu, 'R' ) wntua = stdlib_lsame( jobu, 'A' ) wntuf = stdlib_lsame( jobu, 'F' ) lsvc0 = wntus .or. wntur .or. wntua lsvec = lsvc0 .or. wntuf dntwu = stdlib_lsame( jobu, 'N' ) wntvr = stdlib_lsame( jobv, 'R' ) wntva = stdlib_lsame( jobv, 'A' ) .or. stdlib_lsame( jobv, 'V' ) rsvec = wntvr .or. wntva dntwv = stdlib_lsame( jobv, 'N' ) accla = stdlib_lsame( joba, 'A' ) acclm = stdlib_lsame( joba, 'M' ) conda = stdlib_lsame( joba, 'E' ) acclh = stdlib_lsame( joba, 'H' ) .or. conda rowprm = stdlib_lsame( jobp, 'P' ) rtrans = stdlib_lsame( jobr, 'T' ) if ( rowprm ) then iminwrk = max( 1_${ik}$, n + m - 1_${ik}$ ) rminwrk = max( 2_${ik}$, m, 5_${ik}$*n ) else iminwrk = max( 1_${ik}$, n ) rminwrk = max( 2_${ik}$, 5_${ik}$*n ) end if lquery = (liwork == -1_${ik}$ .or. lcwork == -1_${ik}$ .or. lrwork == -1_${ik}$) info = 0_${ik}$ if ( .not. ( accla .or. acclm .or. acclh ) ) then info = -1_${ik}$ else if ( .not.( rowprm .or. stdlib_lsame( jobp, 'N' ) ) ) then info = -2_${ik}$ else if ( .not.( rtrans .or. stdlib_lsame( jobr, 'N' ) ) ) then info = -3_${ik}$ else if ( .not.( lsvec .or. dntwu ) ) then info = -4_${ik}$ else if ( wntur .and. wntva ) then info = -5_${ik}$ else if ( .not.( rsvec .or. dntwv )) then info = -5_${ik}$ else if ( m<0_${ik}$ ) then info = -6_${ik}$ else if ( ( n<0_${ik}$ ) .or. ( n>m ) ) then info = -7_${ik}$ else if ( lda<max( 1_${ik}$, m ) ) then info = -9_${ik}$ else if ( ldu<1_${ik}$ .or. ( lsvc0 .and. ldu<m ) .or.( wntuf .and. ldu<n ) ) then info = -12_${ik}$ else if ( ldv<1_${ik}$ .or. ( rsvec .and. ldv<n ) .or.( conda .and. ldv<n ) ) then info = -14_${ik}$ else if ( liwork < iminwrk .and. .not. lquery ) then info = -17_${ik}$ end if if ( info == 0_${ik}$ ) then ! compute workspace ! Compute The Minimal And The Optimal Workspace Lengths ! [[the expressions for computing the minimal and the optimal ! values of lcwork are written with a lot of redundancy and ! can be simplified. however, this detailed form is easier for ! maintenance and modifications of the code.]] ! Minimal Workspace Length For Stdlib_Cgeqp3 Of An M X N Matrix lwqp3 = n+1 ! Minimal Workspace Length For Stdlib_Cunmqr To Build Left Singular Vectors if ( wntus .or. wntur ) then lwunq = max( n , 1_${ik}$ ) else if ( wntua ) then lwunq = max( m , 1_${ik}$ ) end if ! Minimal Workspace Length For Stdlib_Cpocon Of An N X N Matrix lwcon = 2_${ik}$ * n ! Stdlib_Cgesvd Of An N X N Matrix lwsvd = max( 3_${ik}$ * n, 1_${ik}$ ) if ( lquery ) then call stdlib${ii}$_cgeqp3( m, n, a, lda, iwork, cdummy, cdummy, -1_${ik}$,rdummy, ierr ) lwrk_cgeqp3 = int( cdummy(1_${ik}$),KIND=${ik}$) if ( wntus .or. wntur ) then call stdlib${ii}$_cunmqr( 'L', 'N', m, n, n, a, lda, cdummy, u,ldu, cdummy, -1_${ik}$, & ierr ) lwrk_cunmqr = int( cdummy(1_${ik}$),KIND=${ik}$) else if ( wntua ) then call stdlib${ii}$_cunmqr( 'L', 'N', m, m, n, a, lda, cdummy, u,ldu, cdummy, -1_${ik}$, & ierr ) lwrk_cunmqr = int( cdummy(1_${ik}$),KIND=${ik}$) else lwrk_cunmqr = 0_${ik}$ end if end if minwrk = 2_${ik}$ optwrk = 2_${ik}$ if ( .not. (lsvec .or. rsvec )) then ! Minimal And Optimal Sizes Of The Complex Workspace If ! only the singular values are requested if ( conda ) then minwrk = max( n+lwqp3, lwcon, lwsvd ) else minwrk = max( n+lwqp3, lwsvd ) end if if ( lquery ) then call stdlib${ii}$_cgesvd( 'N', 'N', n, n, a, lda, s, u, ldu,v, ldv, cdummy, -1_${ik}$, & rdummy, ierr ) lwrk_cgesvd = int( cdummy(1_${ik}$),KIND=${ik}$) if ( conda ) then optwrk = max( n+lwrk_cgeqp3, n+lwcon, lwrk_cgesvd ) else optwrk = max( n+lwrk_cgeqp3, lwrk_cgesvd ) end if end if else if ( lsvec .and. (.not.rsvec) ) then ! Minimal And Optimal Sizes Of The Complex Workspace If The ! singular values and the left singular vectors are requested if ( conda ) then minwrk = n + max( lwqp3, lwcon, lwsvd, lwunq ) else minwrk = n + max( lwqp3, lwsvd, lwunq ) end if if ( lquery ) then if ( rtrans ) then call stdlib${ii}$_cgesvd( 'N', 'O', n, n, a, lda, s, u, ldu,v, ldv, cdummy, -1_${ik}$, & rdummy, ierr ) else call stdlib${ii}$_cgesvd( 'O', 'N', n, n, a, lda, s, u, ldu,v, ldv, cdummy, -1_${ik}$, & rdummy, ierr ) end if lwrk_cgesvd = int( cdummy(1_${ik}$),KIND=${ik}$) if ( conda ) then optwrk = n + max( lwrk_cgeqp3, lwcon, lwrk_cgesvd,lwrk_cunmqr ) else optwrk = n + max( lwrk_cgeqp3, lwrk_cgesvd,lwrk_cunmqr ) end if end if else if ( rsvec .and. (.not.lsvec) ) then ! Minimal And Optimal Sizes Of The Complex Workspace If The ! singular values and the right singular vectors are requested if ( conda ) then minwrk = n + max( lwqp3, lwcon, lwsvd ) else minwrk = n + max( lwqp3, lwsvd ) end if if ( lquery ) then if ( rtrans ) then call stdlib${ii}$_cgesvd( 'O', 'N', n, n, a, lda, s, u, ldu,v, ldv, cdummy, -& 1_${ik}$, rdummy, ierr ) else call stdlib${ii}$_cgesvd( 'N', 'O', n, n, a, lda, s, u, ldu,v, ldv, cdummy, -& 1_${ik}$, rdummy, ierr ) end if lwrk_cgesvd = int( cdummy(1_${ik}$),KIND=${ik}$) if ( conda ) then optwrk = n + max( lwrk_cgeqp3, lwcon, lwrk_cgesvd ) else optwrk = n + max( lwrk_cgeqp3, lwrk_cgesvd ) end if end if else ! Minimal And Optimal Sizes Of The Complex Workspace If The ! full svd is requested if ( rtrans ) then minwrk = max( lwqp3, lwsvd, lwunq ) if ( conda ) minwrk = max( minwrk, lwcon ) minwrk = minwrk + n if ( wntva ) then ! .. minimal workspace length for n x n/2 stdlib${ii}$_cgeqrf lwqrf = max( n/2_${ik}$, 1_${ik}$ ) ! .. minimal workspace length for n/2 x n/2 stdlib${ii}$_cgesvd lwsvd2 = max( 3_${ik}$ * (n/2_${ik}$), 1_${ik}$ ) lwunq2 = max( n, 1_${ik}$ ) minwrk2 = max( lwqp3, n/2_${ik}$+lwqrf, n/2_${ik}$+lwsvd2,n/2_${ik}$+lwunq2, lwunq ) if ( conda ) minwrk2 = max( minwrk2, lwcon ) minwrk2 = n + minwrk2 minwrk = max( minwrk, minwrk2 ) end if else minwrk = max( lwqp3, lwsvd, lwunq ) if ( conda ) minwrk = max( minwrk, lwcon ) minwrk = minwrk + n if ( wntva ) then ! .. minimal workspace length for n/2 x n stdlib${ii}$_cgelqf lwlqf = max( n/2_${ik}$, 1_${ik}$ ) lwsvd2 = max( 3_${ik}$ * (n/2_${ik}$), 1_${ik}$ ) lwunlq = max( n , 1_${ik}$ ) minwrk2 = max( lwqp3, n/2_${ik}$+lwlqf, n/2_${ik}$+lwsvd2,n/2_${ik}$+lwunlq, lwunq ) if ( conda ) minwrk2 = max( minwrk2, lwcon ) minwrk2 = n + minwrk2 minwrk = max( minwrk, minwrk2 ) end if end if if ( lquery ) then if ( rtrans ) then call stdlib${ii}$_cgesvd( 'O', 'A', n, n, a, lda, s, u, ldu,v, ldv, cdummy, -1_${ik}$, & rdummy, ierr ) lwrk_cgesvd = int( cdummy(1_${ik}$),KIND=${ik}$) optwrk = max(lwrk_cgeqp3,lwrk_cgesvd,lwrk_cunmqr) if ( conda ) optwrk = max( optwrk, lwcon ) optwrk = n + optwrk if ( wntva ) then call stdlib${ii}$_cgeqrf(n,n/2_${ik}$,u,ldu,cdummy,cdummy,-1_${ik}$,ierr) lwrk_cgeqrf = int( cdummy(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_cgesvd( 'S', 'O', n/2_${ik}$,n/2_${ik}$, v,ldv, s, u,ldu,v, ldv, cdummy,& -1_${ik}$, rdummy, ierr ) lwrk_cgesvd2 = int( cdummy(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_cunmqr( 'R', 'C', n, n, n/2_${ik}$, u, ldu, cdummy,v, ldv, & cdummy, -1_${ik}$, ierr ) lwrk_cunmqr2 = int( cdummy(1_${ik}$),KIND=${ik}$) optwrk2 = max( lwrk_cgeqp3, n/2_${ik}$+lwrk_cgeqrf,n/2_${ik}$+lwrk_cgesvd2, n/2_${ik}$+& lwrk_cunmqr2 ) if ( conda ) optwrk2 = max( optwrk2, lwcon ) optwrk2 = n + optwrk2 optwrk = max( optwrk, optwrk2 ) end if else call stdlib${ii}$_cgesvd( 'S', 'O', n, n, a, lda, s, u, ldu,v, ldv, cdummy, -1_${ik}$, & rdummy, ierr ) lwrk_cgesvd = int( cdummy(1_${ik}$),KIND=${ik}$) optwrk = max(lwrk_cgeqp3,lwrk_cgesvd,lwrk_cunmqr) if ( conda ) optwrk = max( optwrk, lwcon ) optwrk = n + optwrk if ( wntva ) then call stdlib${ii}$_cgelqf(n/2_${ik}$,n,u,ldu,cdummy,cdummy,-1_${ik}$,ierr) lwrk_cgelqf = int( cdummy(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_cgesvd( 'S','O', n/2_${ik}$,n/2_${ik}$, v, ldv, s, u, ldu,v, ldv, cdummy,& -1_${ik}$, rdummy, ierr ) lwrk_cgesvd2 = int( cdummy(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_cunmlq( 'R', 'N', n, n, n/2_${ik}$, u, ldu, cdummy,v, ldv, cdummy,& -1_${ik}$,ierr ) lwrk_cunmlq = int( cdummy(1_${ik}$),KIND=${ik}$) optwrk2 = max( lwrk_cgeqp3, n/2_${ik}$+lwrk_cgelqf,n/2_${ik}$+lwrk_cgesvd2, n/2_${ik}$+& lwrk_cunmlq ) if ( conda ) optwrk2 = max( optwrk2, lwcon ) optwrk2 = n + optwrk2 optwrk = max( optwrk, optwrk2 ) end if end if end if end if minwrk = max( 2_${ik}$, minwrk ) optwrk = max( 2_${ik}$, optwrk ) if ( lcwork < minwrk .and. (.not.lquery) ) info = -19_${ik}$ end if if (info == 0_${ik}$ .and. lrwork < rminwrk .and. .not. lquery) then info = -21_${ik}$ end if if( info/=0_${ik}$ ) then call stdlib${ii}$_xerbla( 'CGESVDQ', -info ) return else if ( lquery ) then ! return optimal workspace iwork(1_${ik}$) = iminwrk cwork(1_${ik}$) = optwrk cwork(2_${ik}$) = minwrk rwork(1_${ik}$) = rminwrk return end if ! quick return if the matrix is void. if( ( m==0_${ik}$ ) .or. ( n==0_${ik}$ ) ) then ! All Output Is Void return end if big = stdlib${ii}$_slamch('O') ascaled = .false. if ( rowprm ) then ! Reordering The Rows In Decreasing Sequence In The ! ell-infinity norm - this enhances numerical robustness in ! the case of differently scaled rows. do p = 1, m ! rwork(p) = abs( a(p,stdlib${ii}$_icamax(n,a(p,1),lda)) ) ! [[stdlib${ii}$_clange will return nan if an entry of the p-th row is nan]] rwork(p) = stdlib${ii}$_clange( 'M', 1_${ik}$, n, a(p,1_${ik}$), lda, rdummy ) ! .. check for nan's and inf's if ( ( rwork(p) /= rwork(p) ) .or.( (rwork(p)*zero) /= zero ) ) then info = - 8_${ik}$ call stdlib${ii}$_xerbla( 'CGESVDQ', -info ) return end if end do do p = 1, m - 1 q = stdlib${ii}$_isamax( m-p+1, rwork(p), 1_${ik}$ ) + p - 1_${ik}$ iwork(n+p) = q if ( p /= q ) then rtmp = rwork(p) rwork(p) = rwork(q) rwork(q) = rtmp end if end do if ( rwork(1_${ik}$) == zero ) then ! quick return: a is the m x n zero matrix. numrank = 0_${ik}$ call stdlib${ii}$_slaset( 'G', n, 1_${ik}$, zero, zero, s, n ) if ( wntus ) call stdlib${ii}$_claset('G', m, n, czero, cone, u, ldu) if ( wntua ) call stdlib${ii}$_claset('G', m, m, czero, cone, u, ldu) if ( wntva ) call stdlib${ii}$_claset('G', n, n, czero, cone, v, ldv) if ( wntuf ) then call stdlib${ii}$_claset( 'G', n, 1_${ik}$, czero, czero, cwork, n ) call stdlib${ii}$_claset( 'G', m, n, czero, cone, u, ldu ) end if do p = 1, n iwork(p) = p end do if ( rowprm ) then do p = n + 1, n + m - 1 iwork(p) = p - n end do end if if ( conda ) rwork(1_${ik}$) = -1_${ik}$ rwork(2_${ik}$) = -1_${ik}$ return end if if ( rwork(1_${ik}$) > big / sqrt(real(m,KIND=sp)) ) then ! .. to prevent overflow in the qr factorization, scale the ! matrix by 1/sqrt(m) if too large entry detected call stdlib${ii}$_clascl('G',0_${ik}$,0_${ik}$,sqrt(real(m,KIND=sp)),one, m,n, a,lda, ierr) ascaled = .true. end if call stdlib${ii}$_claswp( n, a, lda, 1_${ik}$, m-1, iwork(n+1), 1_${ik}$ ) end if ! .. at this stage, preemptive scaling is done only to avoid column ! norms overflows during the qr factorization. the svd procedure should ! have its own scaling to save the singular values from overflows and ! underflows. that depends on the svd procedure. if ( .not.rowprm ) then rtmp = stdlib${ii}$_clange( 'M', m, n, a, lda, rwork ) if ( ( rtmp /= rtmp ) .or.( (rtmp*zero) /= zero ) ) then info = - 8_${ik}$ call stdlib${ii}$_xerbla( 'CGESVDQ', -info ) return end if if ( rtmp > big / sqrt(real(m,KIND=sp)) ) then ! .. to prevent overflow in the qr factorization, scale the ! matrix by 1/sqrt(m) if too large entry detected call stdlib${ii}$_clascl('G',0_${ik}$,0_${ik}$, sqrt(real(m,KIND=sp)),one, m,n, a,lda, ierr) ascaled = .true. end if end if ! Qr Factorization With Column Pivoting ! a * p = q * [ r ] ! [ 0 ] do p = 1, n ! All Columns Are Free Columns iwork(p) = 0_${ik}$ end do call stdlib${ii}$_cgeqp3( m, n, a, lda, iwork, cwork, cwork(n+1), lcwork-n,rwork, ierr ) ! if the user requested accuracy level allows truncation in the ! computed upper triangular factor, the matrix r is examined and, ! if possible, replaced with its leading upper trapezoidal part. epsln = stdlib${ii}$_slamch('E') sfmin = stdlib${ii}$_slamch('S') ! small = sfmin / epsln nr = n if ( accla ) then ! standard absolute error bound suffices. all sigma_i with ! sigma_i < n*eps*||a||_f are flushed to zero. this is an ! aggressive enforcement of lower numerical rank by introducing a ! backward error of the order of n*eps*||a||_f. nr = 1_${ik}$ rtmp = sqrt(real(n,KIND=sp))*epsln loop_3002: do p = 2, n if ( abs(a(p,p)) < (rtmp*abs(a(1,1))) ) exit loop_3002 nr = nr + 1_${ik}$ end do loop_3002 elseif ( acclm ) then ! .. similarly as above, only slightly more gentle (less aggressive). ! sudden drop on the diagonal of r is used as the criterion for being ! close-to-rank-deficient. the threshold is set to epsln=stdlib${ii}$_slamch('e'). ! [[this can be made more flexible by replacing this hard-coded value ! with a user specified threshold.]] also, the values that underflow ! will be truncated. nr = 1_${ik}$ loop_3402: do p = 2, n if ( ( abs(a(p,p)) < (epsln*abs(a(p-1,p-1))) ) .or.( abs(a(p,p)) < sfmin ) ) exit loop_3402 nr = nr + 1_${ik}$ end do loop_3402 else ! Rrqr Not Authorized To Determine Numerical Rank Except In The ! obvious case of zero pivots. ! .. inspect r for exact zeros on the diagonal; ! r(i,i)=0 => r(i:n,i:n)=0. nr = 1_${ik}$ loop_3502: do p = 2, n if ( abs(a(p,p)) == zero ) exit loop_3502 nr = nr + 1_${ik}$ end do loop_3502 if ( conda ) then ! estimate the scaled condition number of a. use the fact that it is ! the same as the scaled condition number of r. ! V Is Used As Workspace call stdlib${ii}$_clacpy( 'U', n, n, a, lda, v, ldv ) ! only the leading nr x nr submatrix of the triangular factor ! is considered. only if nr=n will this give a reliable error ! bound. however, even for nr < n, this can be used on an ! expert level and obtain useful information in the sense of ! perturbation theory. do p = 1, nr rtmp = stdlib${ii}$_scnrm2( p, v(1_${ik}$,p), 1_${ik}$ ) call stdlib${ii}$_csscal( p, one/rtmp, v(1_${ik}$,p), 1_${ik}$ ) end do if ( .not. ( lsvec .or. rsvec ) ) then call stdlib${ii}$_cpocon( 'U', nr, v, ldv, one, rtmp,cwork, rwork, ierr ) else call stdlib${ii}$_cpocon( 'U', nr, v, ldv, one, rtmp,cwork(n+1), rwork, ierr ) end if sconda = one / sqrt(rtmp) ! for nr=n, sconda is an estimate of sqrt(||(r^* * r)^(-1)||_1), ! n^(-1/4) * sconda <= ||r^(-1)||_2 <= n^(1/4) * sconda ! see the reference [1] for more details. end if endif if ( wntur ) then n1 = nr else if ( wntus .or. wntuf) then n1 = n else if ( wntua ) then n1 = m end if if ( .not. ( rsvec .or. lsvec ) ) then ! ....................................................................... ! Only The Singular Values Are Requested ! ....................................................................... if ( rtrans ) then ! .. compute the singular values of r**h = [a](1:nr,1:n)**h ! .. set the lower triangle of [a] to [a](1:nr,1:n)**h and ! the upper triangle of [a] to zero. do p = 1, min( n, nr ) a(p,p) = conjg(a(p,p)) do q = p + 1, n a(q,p) = conjg(a(p,q)) if ( q <= nr ) a(p,q) = czero end do end do call stdlib${ii}$_cgesvd( 'N', 'N', n, nr, a, lda, s, u, ldu,v, ldv, cwork, lcwork, & rwork, info ) else ! .. compute the singular values of r = [a](1:nr,1:n) if ( nr > 1_${ik}$ )call stdlib${ii}$_claset( 'L', nr-1,nr-1, czero,czero, a(2_${ik}$,1_${ik}$), lda ) call stdlib${ii}$_cgesvd( 'N', 'N', nr, n, a, lda, s, u, ldu,v, ldv, cwork, lcwork, & rwork, info ) end if else if ( lsvec .and. ( .not. rsvec) ) then ! ....................................................................... ! The Singular Values And The Left Singular Vectors Requested ! ......................................................................."""""""" if ( rtrans ) then ! .. apply stdlib${ii}$_cgesvd to r**h ! .. copy r**h into [u] and overwrite [u] with the right singular ! vectors of r do p = 1, nr do q = p, n u(q,p) = conjg(a(p,q)) end do end do if ( nr > 1_${ik}$ )call stdlib${ii}$_claset( 'U', nr-1,nr-1, czero,czero, u(1_${ik}$,2_${ik}$), ldu ) ! .. the left singular vectors not computed, the nr right singular ! vectors overwrite [u](1:nr,1:nr) as conjugate transposed. these ! will be pre-multiplied by q to build the left singular vectors of a. call stdlib${ii}$_cgesvd( 'N', 'O', n, nr, u, ldu, s, u, ldu,u, ldu, cwork(n+1), & lcwork-n, rwork, info ) do p = 1, nr u(p,p) = conjg(u(p,p)) do q = p + 1, nr ctmp = conjg(u(q,p)) u(q,p) = conjg(u(p,q)) u(p,q) = ctmp end do end do else ! Apply Stdlib_Cgesvd To R ! .. copy r into [u] and overwrite [u] with the left singular vectors call stdlib${ii}$_clacpy( 'U', nr, n, a, lda, u, ldu ) if ( nr > 1_${ik}$ )call stdlib${ii}$_claset( 'L', nr-1, nr-1, czero, czero, u(2_${ik}$,1_${ik}$), ldu ) ! .. the right singular vectors not computed, the nr left singular ! vectors overwrite [u](1:nr,1:nr) call stdlib${ii}$_cgesvd( 'O', 'N', nr, n, u, ldu, s, u, ldu,v, ldv, cwork(n+1), & lcwork-n, rwork, info ) ! .. now [u](1:nr,1:nr) contains the nr left singular vectors of ! r. these will be pre-multiplied by q to build the left singular ! vectors of a. end if ! Assemble The Left Singular Vector Matrix U Of Dimensions ! (m x nr) or (m x n) or (m x m). if ( ( nr < m ) .and. ( .not.wntuf ) ) then call stdlib${ii}$_claset('A', m-nr, nr, czero, czero, u(nr+1,1_${ik}$), ldu) if ( nr < n1 ) then call stdlib${ii}$_claset( 'A',nr,n1-nr,czero,czero,u(1_${ik}$,nr+1), ldu ) call stdlib${ii}$_claset( 'A',m-nr,n1-nr,czero,cone,u(nr+1,nr+1), ldu ) end if end if ! the q matrix from the first qrf is built into the left singular ! vectors matrix u. if ( .not.wntuf )call stdlib${ii}$_cunmqr( 'L', 'N', m, n1, n, a, lda, cwork, u,ldu, & cwork(n+1), lcwork-n, ierr ) if ( rowprm .and. .not.wntuf )call stdlib${ii}$_claswp( n1, u, ldu, 1_${ik}$, m-1, iwork(n+1), -& 1_${ik}$ ) else if ( rsvec .and. ( .not. lsvec ) ) then ! ....................................................................... ! The Singular Values And The Right Singular Vectors Requested ! ....................................................................... if ( rtrans ) then ! .. apply stdlib${ii}$_cgesvd to r**h ! .. copy r**h into v and overwrite v with the left singular vectors do p = 1, nr do q = p, n v(q,p) = conjg(a(p,q)) end do end do if ( nr > 1_${ik}$ )call stdlib${ii}$_claset( 'U', nr-1,nr-1, czero,czero, v(1_${ik}$,2_${ik}$), ldv ) ! .. the left singular vectors of r**h overwrite v, the right singular ! vectors not computed if ( wntvr .or. ( nr == n ) ) then call stdlib${ii}$_cgesvd( 'O', 'N', n, nr, v, ldv, s, u, ldu,u, ldu, cwork(n+1), & lcwork-n, rwork, info ) do p = 1, nr v(p,p) = conjg(v(p,p)) do q = p + 1, nr ctmp = conjg(v(q,p)) v(q,p) = conjg(v(p,q)) v(p,q) = ctmp end do end do if ( nr < n ) then do p = 1, nr do q = nr + 1, n v(p,q) = conjg(v(q,p)) end do end do end if call stdlib${ii}$_clapmt( .false., nr, n, v, ldv, iwork ) else ! .. need all n right singular vectors and nr < n ! [!] this is simple implementation that augments [v](1:n,1:nr) ! by padding a zero block. in the case nr << n, a more efficient ! way is to first use the qr factorization. for more details ! how to implement this, see the " full svd " branch. call stdlib${ii}$_claset('G', n, n-nr, czero, czero, v(1_${ik}$,nr+1), ldv) call stdlib${ii}$_cgesvd( 'O', 'N', n, n, v, ldv, s, u, ldu,u, ldu, cwork(n+1), & lcwork-n, rwork, info ) do p = 1, n v(p,p) = conjg(v(p,p)) do q = p + 1, n ctmp = conjg(v(q,p)) v(q,p) = conjg(v(p,q)) v(p,q) = ctmp end do end do call stdlib${ii}$_clapmt( .false., n, n, v, ldv, iwork ) end if else ! Aply Stdlib_Cgesvd To R ! Copy R Into V And Overwrite V With The Right Singular Vectors call stdlib${ii}$_clacpy( 'U', nr, n, a, lda, v, ldv ) if ( nr > 1_${ik}$ )call stdlib${ii}$_claset( 'L', nr-1, nr-1, czero, czero, v(2_${ik}$,1_${ik}$), ldv ) ! .. the right singular vectors overwrite v, the nr left singular ! vectors stored in u(1:nr,1:nr) if ( wntvr .or. ( nr == n ) ) then call stdlib${ii}$_cgesvd( 'N', 'O', nr, n, v, ldv, s, u, ldu,v, ldv, cwork(n+1), & lcwork-n, rwork, info ) call stdlib${ii}$_clapmt( .false., nr, n, v, ldv, iwork ) ! .. now [v](1:nr,1:n) contains v(1:n,1:nr)**h else ! .. need all n right singular vectors and nr < n ! [!] this is simple implementation that augments [v](1:nr,1:n) ! by padding a zero block. in the case nr << n, a more efficient ! way is to first use the lq factorization. for more details ! how to implement this, see the " full svd " branch. call stdlib${ii}$_claset('G', n-nr, n, czero,czero, v(nr+1,1_${ik}$), ldv) call stdlib${ii}$_cgesvd( 'N', 'O', n, n, v, ldv, s, u, ldu,v, ldv, cwork(n+1), & lcwork-n, rwork, info ) call stdlib${ii}$_clapmt( .false., n, n, v, ldv, iwork ) end if ! .. now [v] contains the adjoint of the matrix of the right singular ! vectors of a. end if else ! ....................................................................... ! Full Svd Requested ! ....................................................................... if ( rtrans ) then ! .. apply stdlib${ii}$_cgesvd to r**h [[this option is left for r if ( wntvr .or. ( nr == n ) ) then ! .. copy r**h into [v] and overwrite [v] with the left singular ! vectors of r**h do p = 1, nr do q = p, n v(q,p) = conjg(a(p,q)) end do end do if ( nr > 1_${ik}$ )call stdlib${ii}$_claset( 'U', nr-1,nr-1, czero,czero, v(1_${ik}$,2_${ik}$), ldv ) ! .. the left singular vectors of r**h overwrite [v], the nr right ! singular vectors of r**h stored in [u](1:nr,1:nr) as conjugate ! transposed call stdlib${ii}$_cgesvd( 'O', 'A', n, nr, v, ldv, s, v, ldv,u, ldu, cwork(n+1), & lcwork-n, rwork, info ) ! Assemble V do p = 1, nr v(p,p) = conjg(v(p,p)) do q = p + 1, nr ctmp = conjg(v(q,p)) v(q,p) = conjg(v(p,q)) v(p,q) = ctmp end do end do if ( nr < n ) then do p = 1, nr do q = nr+1, n v(p,q) = conjg(v(q,p)) end do end do end if call stdlib${ii}$_clapmt( .false., nr, n, v, ldv, iwork ) do p = 1, nr u(p,p) = conjg(u(p,p)) do q = p + 1, nr ctmp = conjg(u(q,p)) u(q,p) = conjg(u(p,q)) u(p,q) = ctmp end do end do if ( ( nr < m ) .and. .not.(wntuf)) then call stdlib${ii}$_claset('A', m-nr,nr, czero,czero, u(nr+1,1_${ik}$), ldu) if ( nr < n1 ) then call stdlib${ii}$_claset('A',nr,n1-nr,czero,czero,u(1_${ik}$,nr+1),ldu) call stdlib${ii}$_claset( 'A',m-nr,n1-nr,czero,cone,u(nr+1,nr+1), ldu ) end if end if else ! .. need all n right singular vectors and nr < n ! .. copy r**h into [v] and overwrite [v] with the left singular ! vectors of r**h ! [[the optimal ratio n/nr for using qrf instead of padding ! with zeros. here hard coded to 2; it must be at least ! two due to work space constraints.]] ! optratio = stdlib${ii}$_ilaenv(6, 'cgesvd', 's' // 'o', nr,n,0,0) ! optratio = max( optratio, 2 ) optratio = 2_${ik}$ if ( optratio*nr > n ) then do p = 1, nr do q = p, n v(q,p) = conjg(a(p,q)) end do end do if ( nr > 1_${ik}$ )call stdlib${ii}$_claset('U',nr-1,nr-1, czero,czero, v(1_${ik}$,2_${ik}$),ldv) call stdlib${ii}$_claset('A',n,n-nr,czero,czero,v(1_${ik}$,nr+1),ldv) call stdlib${ii}$_cgesvd( 'O', 'A', n, n, v, ldv, s, v, ldv,u, ldu, cwork(n+1), & lcwork-n, rwork, info ) do p = 1, n v(p,p) = conjg(v(p,p)) do q = p + 1, n ctmp = conjg(v(q,p)) v(q,p) = conjg(v(p,q)) v(p,q) = ctmp end do end do call stdlib${ii}$_clapmt( .false., n, n, v, ldv, iwork ) ! Assemble The Left Singular Vector Matrix U Of Dimensions ! (m x n1), i.e. (m x n) or (m x m). do p = 1, n u(p,p) = conjg(u(p,p)) do q = p + 1, n ctmp = conjg(u(q,p)) u(q,p) = conjg(u(p,q)) u(p,q) = ctmp end do end do if ( ( n < m ) .and. .not.(wntuf)) then call stdlib${ii}$_claset('A',m-n,n,czero,czero,u(n+1,1_${ik}$),ldu) if ( n < n1 ) then call stdlib${ii}$_claset('A',n,n1-n,czero,czero,u(1_${ik}$,n+1),ldu) call stdlib${ii}$_claset('A',m-n,n1-n,czero,cone,u(n+1,n+1), ldu ) end if end if else ! .. copy r**h into [u] and overwrite [u] with the right ! singular vectors of r do p = 1, nr do q = p, n u(q,nr+p) = conjg(a(p,q)) end do end do if ( nr > 1_${ik}$ )call stdlib${ii}$_claset('U',nr-1,nr-1,czero,czero,u(1_${ik}$,nr+2),ldu) call stdlib${ii}$_cgeqrf( n, nr, u(1_${ik}$,nr+1), ldu, cwork(n+1),cwork(n+nr+1), & lcwork-n-nr, ierr ) do p = 1, nr do q = 1, n v(q,p) = conjg(u(p,nr+q)) end do end do if (nr>1_${ik}$) call stdlib${ii}$_claset('U',nr-1,nr-1,czero,czero,v(1_${ik}$,2_${ik}$),ldv) call stdlib${ii}$_cgesvd( 'S', 'O', nr, nr, v, ldv, s, u, ldu,v,ldv, cwork(n+nr+& 1_${ik}$),lcwork-n-nr,rwork, info ) call stdlib${ii}$_claset('A',n-nr,nr,czero,czero,v(nr+1,1_${ik}$),ldv) call stdlib${ii}$_claset('A',nr,n-nr,czero,czero,v(1_${ik}$,nr+1),ldv) call stdlib${ii}$_claset('A',n-nr,n-nr,czero,cone,v(nr+1,nr+1),ldv) call stdlib${ii}$_cunmqr('R','C', n, n, nr, u(1_${ik}$,nr+1), ldu,cwork(n+1),v,ldv,& cwork(n+nr+1),lcwork-n-nr,ierr) call stdlib${ii}$_clapmt( .false., n, n, v, ldv, iwork ) ! Assemble The Left Singular Vector Matrix U Of Dimensions ! (m x nr) or (m x n) or (m x m). if ( ( nr < m ) .and. .not.(wntuf)) then call stdlib${ii}$_claset('A',m-nr,nr,czero,czero,u(nr+1,1_${ik}$),ldu) if ( nr < n1 ) then call stdlib${ii}$_claset('A',nr,n1-nr,czero,czero,u(1_${ik}$,nr+1),ldu) call stdlib${ii}$_claset( 'A',m-nr,n1-nr,czero,cone,u(nr+1,nr+1),ldu) end if end if end if end if else ! .. apply stdlib${ii}$_cgesvd to r [[this is the recommended option]] if ( wntvr .or. ( nr == n ) ) then ! .. copy r into [v] and overwrite v with the right singular vectors call stdlib${ii}$_clacpy( 'U', nr, n, a, lda, v, ldv ) if ( nr > 1_${ik}$ )call stdlib${ii}$_claset( 'L', nr-1,nr-1, czero,czero, v(2_${ik}$,1_${ik}$), ldv ) ! .. the right singular vectors of r overwrite [v], the nr left ! singular vectors of r stored in [u](1:nr,1:nr) call stdlib${ii}$_cgesvd( 'S', 'O', nr, n, v, ldv, s, u, ldu,v, ldv, cwork(n+1), & lcwork-n, rwork, info ) call stdlib${ii}$_clapmt( .false., nr, n, v, ldv, iwork ) ! .. now [v](1:nr,1:n) contains v(1:n,1:nr)**h ! Assemble The Left Singular Vector Matrix U Of Dimensions ! (m x nr) or (m x n) or (m x m). if ( ( nr < m ) .and. .not.(wntuf)) then call stdlib${ii}$_claset('A', m-nr,nr, czero,czero, u(nr+1,1_${ik}$), ldu) if ( nr < n1 ) then call stdlib${ii}$_claset('A',nr,n1-nr,czero,czero,u(1_${ik}$,nr+1),ldu) call stdlib${ii}$_claset( 'A',m-nr,n1-nr,czero,cone,u(nr+1,nr+1), ldu ) end if end if else ! .. need all n right singular vectors and nr < n ! The Requested Number Of The Left Singular Vectors ! is then n1 (n or m) ! [[the optimal ratio n/nr for using lq instead of padding ! with zeros. here hard coded to 2; it must be at least ! two due to work space constraints.]] ! optratio = stdlib${ii}$_ilaenv(6, 'cgesvd', 's' // 'o', nr,n,0,0) ! optratio = max( optratio, 2 ) optratio = 2_${ik}$ if ( optratio * nr > n ) then call stdlib${ii}$_clacpy( 'U', nr, n, a, lda, v, ldv ) if ( nr > 1_${ik}$ )call stdlib${ii}$_claset('L', nr-1,nr-1, czero,czero, v(2_${ik}$,1_${ik}$),ldv) ! .. the right singular vectors of r overwrite [v], the nr left ! singular vectors of r stored in [u](1:nr,1:nr) call stdlib${ii}$_claset('A', n-nr,n, czero,czero, v(nr+1,1_${ik}$),ldv) call stdlib${ii}$_cgesvd( 'S', 'O', n, n, v, ldv, s, u, ldu,v, ldv, cwork(n+1), & lcwork-n, rwork, info ) call stdlib${ii}$_clapmt( .false., n, n, v, ldv, iwork ) ! .. now [v] contains the adjoint of the matrix of the right ! singular vectors of a. the leading n left singular vectors ! are in [u](1:n,1:n) ! Assemble The Left Singular Vector Matrix U Of Dimensions ! (m x n1), i.e. (m x n) or (m x m). if ( ( n < m ) .and. .not.(wntuf)) then call stdlib${ii}$_claset('A',m-n,n,czero,czero,u(n+1,1_${ik}$),ldu) if ( n < n1 ) then call stdlib${ii}$_claset('A',n,n1-n,czero,czero,u(1_${ik}$,n+1),ldu) call stdlib${ii}$_claset( 'A',m-n,n1-n,czero,cone,u(n+1,n+1), ldu ) end if end if else call stdlib${ii}$_clacpy( 'U', nr, n, a, lda, u(nr+1,1_${ik}$), ldu ) if ( nr > 1_${ik}$ )call stdlib${ii}$_claset('L',nr-1,nr-1,czero,czero,u(nr+2,1_${ik}$),ldu) call stdlib${ii}$_cgelqf( nr, n, u(nr+1,1_${ik}$), ldu, cwork(n+1),cwork(n+nr+1), & lcwork-n-nr, ierr ) call stdlib${ii}$_clacpy('L',nr,nr,u(nr+1,1_${ik}$),ldu,v,ldv) if ( nr > 1_${ik}$ )call stdlib${ii}$_claset('U',nr-1,nr-1,czero,czero,v(1_${ik}$,2_${ik}$),ldv) call stdlib${ii}$_cgesvd( 'S', 'O', nr, nr, v, ldv, s, u, ldu,v, ldv, cwork(n+nr+& 1_${ik}$), lcwork-n-nr, rwork, info ) call stdlib${ii}$_claset('A',n-nr,nr,czero,czero,v(nr+1,1_${ik}$),ldv) call stdlib${ii}$_claset('A',nr,n-nr,czero,czero,v(1_${ik}$,nr+1),ldv) call stdlib${ii}$_claset('A',n-nr,n-nr,czero,cone,v(nr+1,nr+1),ldv) call stdlib${ii}$_cunmlq('R','N',n,n,nr,u(nr+1,1_${ik}$),ldu,cwork(n+1),v, ldv, cwork(n+& nr+1),lcwork-n-nr,ierr) call stdlib${ii}$_clapmt( .false., n, n, v, ldv, iwork ) ! Assemble The Left Singular Vector Matrix U Of Dimensions ! (m x nr) or (m x n) or (m x m). if ( ( nr < m ) .and. .not.(wntuf)) then call stdlib${ii}$_claset('A',m-nr,nr,czero,czero,u(nr+1,1_${ik}$),ldu) if ( nr < n1 ) then call stdlib${ii}$_claset('A',nr,n1-nr,czero,czero,u(1_${ik}$,nr+1),ldu) call stdlib${ii}$_claset( 'A',m-nr,n1-nr,czero,cone,u(nr+1,nr+1), ldu ) end if end if end if end if ! .. end of the "r**h or r" branch end if ! the q matrix from the first qrf is built into the left singular ! vectors matrix u. if ( .not. wntuf )call stdlib${ii}$_cunmqr( 'L', 'N', m, n1, n, a, lda, cwork, u,ldu, & cwork(n+1), lcwork-n, ierr ) if ( rowprm .and. .not.wntuf )call stdlib${ii}$_claswp( n1, u, ldu, 1_${ik}$, m-1, iwork(n+1), -& 1_${ik}$ ) ! ... end of the "full svd" branch end if ! check whether some singular values are returned as zeros, e.g. ! due to underflow, and update the numerical rank. p = nr do q = p, 1, -1 if ( s(q) > zero ) go to 4002 nr = nr - 1_${ik}$ end do 4002 continue ! .. if numerical rank deficiency is detected, the truncated ! singular values are set to zero. if ( nr < n ) call stdlib${ii}$_slaset( 'G', n-nr,1_${ik}$, zero,zero, s(nr+1), n ) ! .. undo scaling; this may cause overflow in the largest singular ! values. if ( ascaled )call stdlib${ii}$_slascl( 'G',0_${ik}$,0_${ik}$, one,sqrt(real(m,KIND=sp)), nr,1_${ik}$, s, n, ierr & ) if ( conda ) rwork(1_${ik}$) = sconda rwork(2_${ik}$) = p - nr ! .. p-nr is the number of singular values that are computed as ! exact zeros in stdlib${ii}$_cgesvd() applied to the (possibly truncated) ! full row rank triangular (trapezoidal) factor of a. numrank = nr return end subroutine stdlib${ii}$_cgesvdq module subroutine stdlib${ii}$_zgesvdq( joba, jobp, jobr, jobu, jobv, m, n, a, lda,s, u, ldu, v, ldv, & !! ZCGESVDQ computes the singular value decomposition (SVD) of a complex !! M-by-N matrix A, where M >= N. The SVD of A is written as !! [++] [xx] [x0] [xx] !! A = U * SIGMA * V^*, [++] = [xx] * [ox] * [xx] !! [++] [xx] !! where SIGMA is an N-by-N diagonal matrix, U is an M-by-N orthonormal !! matrix, and V is an N-by-N unitary matrix. The diagonal elements !! of SIGMA are the singular values of A. The columns of U and V are the !! left and the right singular vectors of A, respectively. numrank, iwork, liwork,cwork, lcwork, rwork, lrwork, info ) use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments character, intent(in) :: joba, jobp, jobr, jobu, jobv integer(${ik}$), intent(in) :: m, n, lda, ldu, ldv, liwork, lrwork integer(${ik}$), intent(out) :: numrank, info integer(${ik}$), intent(inout) :: lcwork ! Array Arguments complex(dp), intent(inout) :: a(lda,*) complex(dp), intent(out) :: u(ldu,*), v(ldv,*), cwork(*) real(dp), intent(out) :: s(*), rwork(*) integer(${ik}$), intent(out) :: iwork(*) ! ===================================================================== ! Local Scalars integer(${ik}$) :: ierr, nr, n1, optratio, p, q integer(${ik}$) :: lwcon, lwqp3, lwrk_zgelqf, lwrk_zgesvd, lwrk_zgesvd2, lwrk_zgeqp3, & lwrk_zgeqrf, lwrk_zunmlq, lwrk_zunmqr, lwrk_zunmqr2, lwlqf, lwqrf, lwsvd, lwsvd2, & lwunq, lwunq2, lwunlq, minwrk, minwrk2, optwrk, optwrk2, iminwrk, rminwrk logical(lk) :: accla, acclm, acclh, ascaled, conda, dntwu, dntwv, lquery, lsvc0, lsvec,& rowprm, rsvec, rtrans, wntua, wntuf, wntur, wntus, wntva, wntvr real(dp) :: big, epsln, rtmp, sconda, sfmin complex(dp) :: ctmp ! Local Arrays complex(dp) :: cdummy(1_${ik}$) real(dp) :: rdummy(1_${ik}$) ! Intrinsic Functions ! Executable Statements ! test the input arguments wntus = stdlib_lsame( jobu, 'S' ) .or. stdlib_lsame( jobu, 'U' ) wntur = stdlib_lsame( jobu, 'R' ) wntua = stdlib_lsame( jobu, 'A' ) wntuf = stdlib_lsame( jobu, 'F' ) lsvc0 = wntus .or. wntur .or. wntua lsvec = lsvc0 .or. wntuf dntwu = stdlib_lsame( jobu, 'N' ) wntvr = stdlib_lsame( jobv, 'R' ) wntva = stdlib_lsame( jobv, 'A' ) .or. stdlib_lsame( jobv, 'V' ) rsvec = wntvr .or. wntva dntwv = stdlib_lsame( jobv, 'N' ) accla = stdlib_lsame( joba, 'A' ) acclm = stdlib_lsame( joba, 'M' ) conda = stdlib_lsame( joba, 'E' ) acclh = stdlib_lsame( joba, 'H' ) .or. conda rowprm = stdlib_lsame( jobp, 'P' ) rtrans = stdlib_lsame( jobr, 'T' ) if ( rowprm ) then iminwrk = max( 1_${ik}$, n + m - 1_${ik}$ ) rminwrk = max( 2_${ik}$, m, 5_${ik}$*n ) else iminwrk = max( 1_${ik}$, n ) rminwrk = max( 2_${ik}$, 5_${ik}$*n ) end if lquery = (liwork == -1_${ik}$ .or. lcwork == -1_${ik}$ .or. lrwork == -1_${ik}$) info = 0_${ik}$ if ( .not. ( accla .or. acclm .or. acclh ) ) then info = -1_${ik}$ else if ( .not.( rowprm .or. stdlib_lsame( jobp, 'N' ) ) ) then info = -2_${ik}$ else if ( .not.( rtrans .or. stdlib_lsame( jobr, 'N' ) ) ) then info = -3_${ik}$ else if ( .not.( lsvec .or. dntwu ) ) then info = -4_${ik}$ else if ( wntur .and. wntva ) then info = -5_${ik}$ else if ( .not.( rsvec .or. dntwv )) then info = -5_${ik}$ else if ( m<0_${ik}$ ) then info = -6_${ik}$ else if ( ( n<0_${ik}$ ) .or. ( n>m ) ) then info = -7_${ik}$ else if ( lda<max( 1_${ik}$, m ) ) then info = -9_${ik}$ else if ( ldu<1_${ik}$ .or. ( lsvc0 .and. ldu<m ) .or.( wntuf .and. ldu<n ) ) then info = -12_${ik}$ else if ( ldv<1_${ik}$ .or. ( rsvec .and. ldv<n ) .or.( conda .and. ldv<n ) ) then info = -14_${ik}$ else if ( liwork < iminwrk .and. .not. lquery ) then info = -17_${ik}$ end if if ( info == 0_${ik}$ ) then ! Compute The Minimal And The Optimal Workspace Lengths ! [[the expressions for computing the minimal and the optimal ! values of lcwork are written with a lot of redundancy and ! can be simplified. however, this detailed form is easier for ! maintenance and modifications of the code.]] ! Minimal Workspace Length For Stdlib_Zgeqp3 Of An M X N Matrix lwqp3 = n+1 ! Minimal Workspace Length For Stdlib_Zunmqr To Build Left Singular Vectors if ( wntus .or. wntur ) then lwunq = max( n , 1_${ik}$ ) else if ( wntua ) then lwunq = max( m , 1_${ik}$ ) end if ! Minimal Workspace Length For Stdlib_Zpocon Of An N X N Matrix lwcon = 2_${ik}$ * n ! Stdlib_Zgesvd Of An N X N Matrix lwsvd = max( 3_${ik}$ * n, 1_${ik}$ ) if ( lquery ) then call stdlib${ii}$_zgeqp3( m, n, a, lda, iwork, cdummy, cdummy, -1_${ik}$,rdummy, ierr ) lwrk_zgeqp3 = int( cdummy(1_${ik}$),KIND=${ik}$) if ( wntus .or. wntur ) then call stdlib${ii}$_zunmqr( 'L', 'N', m, n, n, a, lda, cdummy, u,ldu, cdummy, -1_${ik}$, & ierr ) lwrk_zunmqr = int( cdummy(1_${ik}$),KIND=${ik}$) else if ( wntua ) then call stdlib${ii}$_zunmqr( 'L', 'N', m, m, n, a, lda, cdummy, u,ldu, cdummy, -1_${ik}$, & ierr ) lwrk_zunmqr = int( cdummy(1_${ik}$),KIND=${ik}$) else lwrk_zunmqr = 0_${ik}$ end if end if minwrk = 2_${ik}$ optwrk = 2_${ik}$ if ( .not. (lsvec .or. rsvec ) ) then ! Minimal And Optimal Sizes Of The Complex Workspace If ! only the singular values are requested if ( conda ) then minwrk = max( n+lwqp3, lwcon, lwsvd ) else minwrk = max( n+lwqp3, lwsvd ) end if if ( lquery ) then call stdlib${ii}$_zgesvd( 'N', 'N', n, n, a, lda, s, u, ldu,v, ldv, cdummy, -1_${ik}$, & rdummy, ierr ) lwrk_zgesvd = int( cdummy(1_${ik}$),KIND=${ik}$) if ( conda ) then optwrk = max( n+lwrk_zgeqp3, n+lwcon, lwrk_zgesvd ) else optwrk = max( n+lwrk_zgeqp3, lwrk_zgesvd ) end if end if else if ( lsvec .and. (.not.rsvec) ) then ! Minimal And Optimal Sizes Of The Complex Workspace If The ! singular values and the left singular vectors are requested if ( conda ) then minwrk = n + max( lwqp3, lwcon, lwsvd, lwunq ) else minwrk = n + max( lwqp3, lwsvd, lwunq ) end if if ( lquery ) then if ( rtrans ) then call stdlib${ii}$_zgesvd( 'N', 'O', n, n, a, lda, s, u, ldu,v, ldv, cdummy, -1_${ik}$, & rdummy, ierr ) else call stdlib${ii}$_zgesvd( 'O', 'N', n, n, a, lda, s, u, ldu,v, ldv, cdummy, -1_${ik}$, & rdummy, ierr ) end if lwrk_zgesvd = int( cdummy(1_${ik}$),KIND=${ik}$) if ( conda ) then optwrk = n + max( lwrk_zgeqp3, lwcon, lwrk_zgesvd,lwrk_zunmqr ) else optwrk = n + max( lwrk_zgeqp3, lwrk_zgesvd,lwrk_zunmqr ) end if end if else if ( rsvec .and. (.not.lsvec) ) then ! Minimal And Optimal Sizes Of The Complex Workspace If The ! singular values and the right singular vectors are requested if ( conda ) then minwrk = n + max( lwqp3, lwcon, lwsvd ) else minwrk = n + max( lwqp3, lwsvd ) end if if ( lquery ) then if ( rtrans ) then call stdlib${ii}$_zgesvd( 'O', 'N', n, n, a, lda, s, u, ldu,v, ldv, cdummy, -& 1_${ik}$, rdummy, ierr ) else call stdlib${ii}$_zgesvd( 'N', 'O', n, n, a, lda, s, u, ldu,v, ldv, cdummy, -& 1_${ik}$, rdummy, ierr ) end if lwrk_zgesvd = int( cdummy(1_${ik}$),KIND=${ik}$) if ( conda ) then optwrk = n + max( lwrk_zgeqp3, lwcon, lwrk_zgesvd ) else optwrk = n + max( lwrk_zgeqp3, lwrk_zgesvd ) end if end if else ! Minimal And Optimal Sizes Of The Complex Workspace If The ! full svd is requested if ( rtrans ) then minwrk = max( lwqp3, lwsvd, lwunq ) if ( conda ) minwrk = max( minwrk, lwcon ) minwrk = minwrk + n if ( wntva ) then ! .. minimal workspace length for n x n/2 stdlib${ii}$_zgeqrf lwqrf = max( n/2_${ik}$, 1_${ik}$ ) ! .. minimal workspace length for n/2 x n/2 stdlib${ii}$_zgesvd lwsvd2 = max( 3_${ik}$ * (n/2_${ik}$), 1_${ik}$ ) lwunq2 = max( n, 1_${ik}$ ) minwrk2 = max( lwqp3, n/2_${ik}$+lwqrf, n/2_${ik}$+lwsvd2,n/2_${ik}$+lwunq2, lwunq ) if ( conda ) minwrk2 = max( minwrk2, lwcon ) minwrk2 = n + minwrk2 minwrk = max( minwrk, minwrk2 ) end if else minwrk = max( lwqp3, lwsvd, lwunq ) if ( conda ) minwrk = max( minwrk, lwcon ) minwrk = minwrk + n if ( wntva ) then ! .. minimal workspace length for n/2 x n stdlib${ii}$_zgelqf lwlqf = max( n/2_${ik}$, 1_${ik}$ ) lwsvd2 = max( 3_${ik}$ * (n/2_${ik}$), 1_${ik}$ ) lwunlq = max( n , 1_${ik}$ ) minwrk2 = max( lwqp3, n/2_${ik}$+lwlqf, n/2_${ik}$+lwsvd2,n/2_${ik}$+lwunlq, lwunq ) if ( conda ) minwrk2 = max( minwrk2, lwcon ) minwrk2 = n + minwrk2 minwrk = max( minwrk, minwrk2 ) end if end if if ( lquery ) then if ( rtrans ) then call stdlib${ii}$_zgesvd( 'O', 'A', n, n, a, lda, s, u, ldu,v, ldv, cdummy, -1_${ik}$, & rdummy, ierr ) lwrk_zgesvd = int( cdummy(1_${ik}$),KIND=${ik}$) optwrk = max(lwrk_zgeqp3,lwrk_zgesvd,lwrk_zunmqr) if ( conda ) optwrk = max( optwrk, lwcon ) optwrk = n + optwrk if ( wntva ) then call stdlib${ii}$_zgeqrf(n,n/2_${ik}$,u,ldu,cdummy,cdummy,-1_${ik}$,ierr) lwrk_zgeqrf = int( cdummy(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_zgesvd( 'S', 'O', n/2_${ik}$,n/2_${ik}$, v,ldv, s, u,ldu,v, ldv, cdummy,& -1_${ik}$, rdummy, ierr ) lwrk_zgesvd2 = int( cdummy(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_zunmqr( 'R', 'C', n, n, n/2_${ik}$, u, ldu, cdummy,v, ldv, & cdummy, -1_${ik}$, ierr ) lwrk_zunmqr2 = int( cdummy(1_${ik}$),KIND=${ik}$) optwrk2 = max( lwrk_zgeqp3, n/2_${ik}$+lwrk_zgeqrf,n/2_${ik}$+lwrk_zgesvd2, n/2_${ik}$+& lwrk_zunmqr2 ) if ( conda ) optwrk2 = max( optwrk2, lwcon ) optwrk2 = n + optwrk2 optwrk = max( optwrk, optwrk2 ) end if else call stdlib${ii}$_zgesvd( 'S', 'O', n, n, a, lda, s, u, ldu,v, ldv, cdummy, -1_${ik}$, & rdummy, ierr ) lwrk_zgesvd = int( cdummy(1_${ik}$),KIND=${ik}$) optwrk = max(lwrk_zgeqp3,lwrk_zgesvd,lwrk_zunmqr) if ( conda ) optwrk = max( optwrk, lwcon ) optwrk = n + optwrk if ( wntva ) then call stdlib${ii}$_zgelqf(n/2_${ik}$,n,u,ldu,cdummy,cdummy,-1_${ik}$,ierr) lwrk_zgelqf = int( cdummy(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_zgesvd( 'S','O', n/2_${ik}$,n/2_${ik}$, v, ldv, s, u, ldu,v, ldv, cdummy,& -1_${ik}$, rdummy, ierr ) lwrk_zgesvd2 = int( cdummy(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_zunmlq( 'R', 'N', n, n, n/2_${ik}$, u, ldu, cdummy,v, ldv, cdummy,& -1_${ik}$,ierr ) lwrk_zunmlq = int( cdummy(1_${ik}$),KIND=${ik}$) optwrk2 = max( lwrk_zgeqp3, n/2_${ik}$+lwrk_zgelqf,n/2_${ik}$+lwrk_zgesvd2, n/2_${ik}$+& lwrk_zunmlq ) if ( conda ) optwrk2 = max( optwrk2, lwcon ) optwrk2 = n + optwrk2 optwrk = max( optwrk, optwrk2 ) end if end if end if end if minwrk = max( 2_${ik}$, minwrk ) optwrk = max( 2_${ik}$, optwrk ) if ( lcwork < minwrk .and. (.not.lquery) ) info = -19_${ik}$ end if if (info == 0_${ik}$ .and. lrwork < rminwrk .and. .not. lquery) then info = -21_${ik}$ end if if( info/=0_${ik}$ ) then call stdlib${ii}$_xerbla( 'ZGESVDQ', -info ) return else if ( lquery ) then ! return optimal workspace iwork(1_${ik}$) = iminwrk cwork(1_${ik}$) = optwrk cwork(2_${ik}$) = minwrk rwork(1_${ik}$) = rminwrk return end if ! quick return if the matrix is void. if( ( m==0_${ik}$ ) .or. ( n==0_${ik}$ ) ) then ! All Output Is Void return end if big = stdlib${ii}$_dlamch('O') ascaled = .false. if ( rowprm ) then ! Reordering The Rows In Decreasing Sequence In The ! ell-infinity norm - this enhances numerical robustness in ! the case of differently scaled rows. do p = 1, m ! rwork(p) = abs( a(p,stdlib${ii}$_izamax(n,a(p,1),lda)) ) ! [[stdlib${ii}$_zlange will return nan if an entry of the p-th row is nan]] rwork(p) = stdlib${ii}$_zlange( 'M', 1_${ik}$, n, a(p,1_${ik}$), lda, rdummy ) ! .. check for nan's and inf's if ( ( rwork(p) /= rwork(p) ) .or.( (rwork(p)*zero) /= zero ) ) then info = -8_${ik}$ call stdlib${ii}$_xerbla( 'ZGESVDQ', -info ) return end if end do do p = 1, m - 1 q = stdlib${ii}$_idamax( m-p+1, rwork(p), 1_${ik}$ ) + p - 1_${ik}$ iwork(n+p) = q if ( p /= q ) then rtmp = rwork(p) rwork(p) = rwork(q) rwork(q) = rtmp end if end do if ( rwork(1_${ik}$) == zero ) then ! quick return: a is the m x n zero matrix. numrank = 0_${ik}$ call stdlib${ii}$_dlaset( 'G', n, 1_${ik}$, zero, zero, s, n ) if ( wntus ) call stdlib${ii}$_zlaset('G', m, n, czero, cone, u, ldu) if ( wntua ) call stdlib${ii}$_zlaset('G', m, m, czero, cone, u, ldu) if ( wntva ) call stdlib${ii}$_zlaset('G', n, n, czero, cone, v, ldv) if ( wntuf ) then call stdlib${ii}$_zlaset( 'G', n, 1_${ik}$, czero, czero, cwork, n ) call stdlib${ii}$_zlaset( 'G', m, n, czero, cone, u, ldu ) end if do p = 1, n iwork(p) = p end do if ( rowprm ) then do p = n + 1, n + m - 1 iwork(p) = p - n end do end if if ( conda ) rwork(1_${ik}$) = -1_${ik}$ rwork(2_${ik}$) = -1_${ik}$ return end if if ( rwork(1_${ik}$) > big / sqrt(real(m,KIND=dp)) ) then ! .. to prevent overflow in the qr factorization, scale the ! matrix by 1/sqrt(m) if too large entry detected call stdlib${ii}$_zlascl('G',0_${ik}$,0_${ik}$,sqrt(real(m,KIND=dp)),one, m,n, a,lda, ierr) ascaled = .true. end if call stdlib${ii}$_zlaswp( n, a, lda, 1_${ik}$, m-1, iwork(n+1), 1_${ik}$ ) end if ! .. at this stage, preemptive scaling is done only to avoid column ! norms overflows during the qr factorization. the svd procedure should ! have its own scaling to save the singular values from overflows and ! underflows. that depends on the svd procedure. if ( .not.rowprm ) then rtmp = stdlib${ii}$_zlange( 'M', m, n, a, lda, rwork ) if ( ( rtmp /= rtmp ) .or.( (rtmp*zero) /= zero ) ) then info = -8_${ik}$ call stdlib${ii}$_xerbla( 'ZGESVDQ', -info ) return end if if ( rtmp > big / sqrt(real(m,KIND=dp)) ) then ! .. to prevent overflow in the qr factorization, scale the ! matrix by 1/sqrt(m) if too large entry detected call stdlib${ii}$_zlascl('G',0_${ik}$,0_${ik}$, sqrt(real(m,KIND=dp)),one, m,n, a,lda, ierr) ascaled = .true. end if end if ! Qr Factorization With Column Pivoting ! a * p = q * [ r ] ! [ 0 ] do p = 1, n ! All Columns Are Free Columns iwork(p) = 0_${ik}$ end do call stdlib${ii}$_zgeqp3( m, n, a, lda, iwork, cwork, cwork(n+1), lcwork-n,rwork, ierr ) ! if the user requested accuracy level allows truncation in the ! computed upper triangular factor, the matrix r is examined and, ! if possible, replaced with its leading upper trapezoidal part. epsln = stdlib${ii}$_dlamch('E') sfmin = stdlib${ii}$_dlamch('S') ! small = sfmin / epsln nr = n if ( accla ) then ! standard absolute error bound suffices. all sigma_i with ! sigma_i < n*eps*||a||_f are flushed to zero. this is an ! aggressive enforcement of lower numerical rank by introducing a ! backward error of the order of n*eps*||a||_f. nr = 1_${ik}$ rtmp = sqrt(real(n,KIND=dp))*epsln loop_3002: do p = 2, n if ( abs(a(p,p)) < (rtmp*abs(a(1,1))) ) exit loop_3002 nr = nr + 1_${ik}$ end do loop_3002 elseif ( acclm ) then ! .. similarly as above, only slightly more gentle (less aggressive). ! sudden drop on the diagonal of r is used as the criterion for being ! close-to-rank-deficient. the threshold is set to epsln=stdlib${ii}$_dlamch('e'). ! [[this can be made more flexible by replacing this hard-coded value ! with a user specified threshold.]] also, the values that underflow ! will be truncated. nr = 1_${ik}$ loop_3402: do p = 2, n if ( ( abs(a(p,p)) < (epsln*abs(a(p-1,p-1))) ) .or.( abs(a(p,p)) < sfmin ) ) exit loop_3402 nr = nr + 1_${ik}$ end do loop_3402 else ! Rrqr Not Authorized To Determine Numerical Rank Except In The ! obvious case of zero pivots. ! .. inspect r for exact zeros on the diagonal; ! r(i,i)=0 => r(i:n,i:n)=0. nr = 1_${ik}$ loop_3502: do p = 2, n if ( abs(a(p,p)) == zero ) exit loop_3502 nr = nr + 1_${ik}$ end do loop_3502 if ( conda ) then ! estimate the scaled condition number of a. use the fact that it is ! the same as the scaled condition number of r. ! V Is Used As Workspace call stdlib${ii}$_zlacpy( 'U', n, n, a, lda, v, ldv ) ! only the leading nr x nr submatrix of the triangular factor ! is considered. only if nr=n will this give a reliable error ! bound. however, even for nr < n, this can be used on an ! expert level and obtain useful information in the sense of ! perturbation theory. do p = 1, nr rtmp = stdlib${ii}$_dznrm2( p, v(1_${ik}$,p), 1_${ik}$ ) call stdlib${ii}$_zdscal( p, one/rtmp, v(1_${ik}$,p), 1_${ik}$ ) end do if ( .not. ( lsvec .or. rsvec ) ) then call stdlib${ii}$_zpocon( 'U', nr, v, ldv, one, rtmp,cwork, rwork, ierr ) else call stdlib${ii}$_zpocon( 'U', nr, v, ldv, one, rtmp,cwork(n+1), rwork, ierr ) end if sconda = one / sqrt(rtmp) ! for nr=n, sconda is an estimate of sqrt(||(r^* * r)^(-1)||_1), ! n^(-1/4) * sconda <= ||r^(-1)||_2 <= n^(1/4) * sconda ! see the reference [1] for more details. end if endif if ( wntur ) then n1 = nr else if ( wntus .or. wntuf) then n1 = n else if ( wntua ) then n1 = m end if if ( .not. ( rsvec .or. lsvec ) ) then ! ....................................................................... ! Only The Singular Values Are Requested ! ....................................................................... if ( rtrans ) then ! .. compute the singular values of r**h = [a](1:nr,1:n)**h ! .. set the lower triangle of [a] to [a](1:nr,1:n)**h and ! the upper triangle of [a] to zero. do p = 1, min( n, nr ) a(p,p) = conjg(a(p,p)) do q = p + 1, n a(q,p) = conjg(a(p,q)) if ( q <= nr ) a(p,q) = czero end do end do call stdlib${ii}$_zgesvd( 'N', 'N', n, nr, a, lda, s, u, ldu,v, ldv, cwork, lcwork, & rwork, info ) else ! .. compute the singular values of r = [a](1:nr,1:n) if ( nr > 1_${ik}$ )call stdlib${ii}$_zlaset( 'L', nr-1,nr-1, czero,czero, a(2_${ik}$,1_${ik}$), lda ) call stdlib${ii}$_zgesvd( 'N', 'N', nr, n, a, lda, s, u, ldu,v, ldv, cwork, lcwork, & rwork, info ) end if else if ( lsvec .and. ( .not. rsvec) ) then ! ....................................................................... ! The Singular Values And The Left Singular Vectors Requested ! ......................................................................."""""""" if ( rtrans ) then ! .. apply stdlib${ii}$_zgesvd to r**h ! .. copy r**h into [u] and overwrite [u] with the right singular ! vectors of r do p = 1, nr do q = p, n u(q,p) = conjg(a(p,q)) end do end do if ( nr > 1_${ik}$ )call stdlib${ii}$_zlaset( 'U', nr-1,nr-1, czero,czero, u(1_${ik}$,2_${ik}$), ldu ) ! .. the left singular vectors not computed, the nr right singular ! vectors overwrite [u](1:nr,1:nr) as conjugate transposed. these ! will be pre-multiplied by q to build the left singular vectors of a. call stdlib${ii}$_zgesvd( 'N', 'O', n, nr, u, ldu, s, u, ldu,u, ldu, cwork(n+1), & lcwork-n, rwork, info ) do p = 1, nr u(p,p) = conjg(u(p,p)) do q = p + 1, nr ctmp = conjg(u(q,p)) u(q,p) = conjg(u(p,q)) u(p,q) = ctmp end do end do else ! Apply Stdlib_Zgesvd To R ! .. copy r into [u] and overwrite [u] with the left singular vectors call stdlib${ii}$_zlacpy( 'U', nr, n, a, lda, u, ldu ) if ( nr > 1_${ik}$ )call stdlib${ii}$_zlaset( 'L', nr-1, nr-1, czero, czero, u(2_${ik}$,1_${ik}$), ldu ) ! .. the right singular vectors not computed, the nr left singular ! vectors overwrite [u](1:nr,1:nr) call stdlib${ii}$_zgesvd( 'O', 'N', nr, n, u, ldu, s, u, ldu,v, ldv, cwork(n+1), & lcwork-n, rwork, info ) ! .. now [u](1:nr,1:nr) contains the nr left singular vectors of ! r. these will be pre-multiplied by q to build the left singular ! vectors of a. end if ! Assemble The Left Singular Vector Matrix U Of Dimensions ! (m x nr) or (m x n) or (m x m). if ( ( nr < m ) .and. ( .not.wntuf ) ) then call stdlib${ii}$_zlaset('A', m-nr, nr, czero, czero, u(nr+1,1_${ik}$), ldu) if ( nr < n1 ) then call stdlib${ii}$_zlaset( 'A',nr,n1-nr,czero,czero,u(1_${ik}$,nr+1), ldu ) call stdlib${ii}$_zlaset( 'A',m-nr,n1-nr,czero,cone,u(nr+1,nr+1), ldu ) end if end if ! the q matrix from the first qrf is built into the left singular ! vectors matrix u. if ( .not.wntuf )call stdlib${ii}$_zunmqr( 'L', 'N', m, n1, n, a, lda, cwork, u,ldu, & cwork(n+1), lcwork-n, ierr ) if ( rowprm .and. .not.wntuf )call stdlib${ii}$_zlaswp( n1, u, ldu, 1_${ik}$, m-1, iwork(n+1), -& 1_${ik}$ ) else if ( rsvec .and. ( .not. lsvec ) ) then ! ....................................................................... ! The Singular Values And The Right Singular Vectors Requested ! ....................................................................... if ( rtrans ) then ! .. apply stdlib${ii}$_zgesvd to r**h ! .. copy r**h into v and overwrite v with the left singular vectors do p = 1, nr do q = p, n v(q,p) = conjg(a(p,q)) end do end do if ( nr > 1_${ik}$ )call stdlib${ii}$_zlaset( 'U', nr-1,nr-1, czero,czero, v(1_${ik}$,2_${ik}$), ldv ) ! .. the left singular vectors of r**h overwrite v, the right singular ! vectors not computed if ( wntvr .or. ( nr == n ) ) then call stdlib${ii}$_zgesvd( 'O', 'N', n, nr, v, ldv, s, u, ldu,u, ldu, cwork(n+1), & lcwork-n, rwork, info ) do p = 1, nr v(p,p) = conjg(v(p,p)) do q = p + 1, nr ctmp = conjg(v(q,p)) v(q,p) = conjg(v(p,q)) v(p,q) = ctmp end do end do if ( nr < n ) then do p = 1, nr do q = nr + 1, n v(p,q) = conjg(v(q,p)) end do end do end if call stdlib${ii}$_zlapmt( .false., nr, n, v, ldv, iwork ) else ! .. need all n right singular vectors and nr < n ! [!] this is simple implementation that augments [v](1:n,1:nr) ! by padding a zero block. in the case nr << n, a more efficient ! way is to first use the qr factorization. for more details ! how to implement this, see the " full svd " branch. call stdlib${ii}$_zlaset('G', n, n-nr, czero, czero, v(1_${ik}$,nr+1), ldv) call stdlib${ii}$_zgesvd( 'O', 'N', n, n, v, ldv, s, u, ldu,u, ldu, cwork(n+1), & lcwork-n, rwork, info ) do p = 1, n v(p,p) = conjg(v(p,p)) do q = p + 1, n ctmp = conjg(v(q,p)) v(q,p) = conjg(v(p,q)) v(p,q) = ctmp end do end do call stdlib${ii}$_zlapmt( .false., n, n, v, ldv, iwork ) end if else ! Aply Stdlib_Zgesvd To R ! Copy R Into V And Overwrite V With The Right Singular Vectors call stdlib${ii}$_zlacpy( 'U', nr, n, a, lda, v, ldv ) if ( nr > 1_${ik}$ )call stdlib${ii}$_zlaset( 'L', nr-1, nr-1, czero, czero, v(2_${ik}$,1_${ik}$), ldv ) ! .. the right singular vectors overwrite v, the nr left singular ! vectors stored in u(1:nr,1:nr) if ( wntvr .or. ( nr == n ) ) then call stdlib${ii}$_zgesvd( 'N', 'O', nr, n, v, ldv, s, u, ldu,v, ldv, cwork(n+1), & lcwork-n, rwork, info ) call stdlib${ii}$_zlapmt( .false., nr, n, v, ldv, iwork ) ! .. now [v](1:nr,1:n) contains v(1:n,1:nr)**h else ! .. need all n right singular vectors and nr < n ! [!] this is simple implementation that augments [v](1:nr,1:n) ! by padding a zero block. in the case nr << n, a more efficient ! way is to first use the lq factorization. for more details ! how to implement this, see the " full svd " branch. call stdlib${ii}$_zlaset('G', n-nr, n, czero,czero, v(nr+1,1_${ik}$), ldv) call stdlib${ii}$_zgesvd( 'N', 'O', n, n, v, ldv, s, u, ldu,v, ldv, cwork(n+1), & lcwork-n, rwork, info ) call stdlib${ii}$_zlapmt( .false., n, n, v, ldv, iwork ) end if ! .. now [v] contains the adjoint of the matrix of the right singular ! vectors of a. end if else ! ....................................................................... ! Full Svd Requested ! ....................................................................... if ( rtrans ) then ! .. apply stdlib${ii}$_zgesvd to r**h [[this option is left for r if ( wntvr .or. ( nr == n ) ) then ! .. copy r**h into [v] and overwrite [v] with the left singular ! vectors of r**h do p = 1, nr do q = p, n v(q,p) = conjg(a(p,q)) end do end do if ( nr > 1_${ik}$ )call stdlib${ii}$_zlaset( 'U', nr-1,nr-1, czero,czero, v(1_${ik}$,2_${ik}$), ldv ) ! .. the left singular vectors of r**h overwrite [v], the nr right ! singular vectors of r**h stored in [u](1:nr,1:nr) as conjugate ! transposed call stdlib${ii}$_zgesvd( 'O', 'A', n, nr, v, ldv, s, v, ldv,u, ldu, cwork(n+1), & lcwork-n, rwork, info ) ! Assemble V do p = 1, nr v(p,p) = conjg(v(p,p)) do q = p + 1, nr ctmp = conjg(v(q,p)) v(q,p) = conjg(v(p,q)) v(p,q) = ctmp end do end do if ( nr < n ) then do p = 1, nr do q = nr+1, n v(p,q) = conjg(v(q,p)) end do end do end if call stdlib${ii}$_zlapmt( .false., nr, n, v, ldv, iwork ) do p = 1, nr u(p,p) = conjg(u(p,p)) do q = p + 1, nr ctmp = conjg(u(q,p)) u(q,p) = conjg(u(p,q)) u(p,q) = ctmp end do end do if ( ( nr < m ) .and. .not.(wntuf)) then call stdlib${ii}$_zlaset('A', m-nr,nr, czero,czero, u(nr+1,1_${ik}$), ldu) if ( nr < n1 ) then call stdlib${ii}$_zlaset('A',nr,n1-nr,czero,czero,u(1_${ik}$,nr+1),ldu) call stdlib${ii}$_zlaset( 'A',m-nr,n1-nr,czero,cone,u(nr+1,nr+1), ldu ) end if end if else ! .. need all n right singular vectors and nr < n ! .. copy r**h into [v] and overwrite [v] with the left singular ! vectors of r**h ! [[the optimal ratio n/nr for using qrf instead of padding ! with zeros. here hard coded to 2; it must be at least ! two due to work space constraints.]] ! optratio = stdlib${ii}$_ilaenv(6, 'zgesvd', 's' // 'o', nr,n,0,0) ! optratio = max( optratio, 2 ) optratio = 2_${ik}$ if ( optratio*nr > n ) then do p = 1, nr do q = p, n v(q,p) = conjg(a(p,q)) end do end do if ( nr > 1_${ik}$ )call stdlib${ii}$_zlaset('U',nr-1,nr-1, czero,czero, v(1_${ik}$,2_${ik}$),ldv) call stdlib${ii}$_zlaset('A',n,n-nr,czero,czero,v(1_${ik}$,nr+1),ldv) call stdlib${ii}$_zgesvd( 'O', 'A', n, n, v, ldv, s, v, ldv,u, ldu, cwork(n+1), & lcwork-n, rwork, info ) do p = 1, n v(p,p) = conjg(v(p,p)) do q = p + 1, n ctmp = conjg(v(q,p)) v(q,p) = conjg(v(p,q)) v(p,q) = ctmp end do end do call stdlib${ii}$_zlapmt( .false., n, n, v, ldv, iwork ) ! Assemble The Left Singular Vector Matrix U Of Dimensions ! (m x n1), i.e. (m x n) or (m x m). do p = 1, n u(p,p) = conjg(u(p,p)) do q = p + 1, n ctmp = conjg(u(q,p)) u(q,p) = conjg(u(p,q)) u(p,q) = ctmp end do end do if ( ( n < m ) .and. .not.(wntuf)) then call stdlib${ii}$_zlaset('A',m-n,n,czero,czero,u(n+1,1_${ik}$),ldu) if ( n < n1 ) then call stdlib${ii}$_zlaset('A',n,n1-n,czero,czero,u(1_${ik}$,n+1),ldu) call stdlib${ii}$_zlaset('A',m-n,n1-n,czero,cone,u(n+1,n+1), ldu ) end if end if else ! .. copy r**h into [u] and overwrite [u] with the right ! singular vectors of r do p = 1, nr do q = p, n u(q,nr+p) = conjg(a(p,q)) end do end do if ( nr > 1_${ik}$ )call stdlib${ii}$_zlaset('U',nr-1,nr-1,czero,czero,u(1_${ik}$,nr+2),ldu) call stdlib${ii}$_zgeqrf( n, nr, u(1_${ik}$,nr+1), ldu, cwork(n+1),cwork(n+nr+1), & lcwork-n-nr, ierr ) do p = 1, nr do q = 1, n v(q,p) = conjg(u(p,nr+q)) end do end do if (nr>1_${ik}$) call stdlib${ii}$_zlaset('U',nr-1,nr-1,czero,czero,v(1_${ik}$,2_${ik}$),ldv) call stdlib${ii}$_zgesvd( 'S', 'O', nr, nr, v, ldv, s, u, ldu,v,ldv, cwork(n+nr+& 1_${ik}$),lcwork-n-nr,rwork, info ) call stdlib${ii}$_zlaset('A',n-nr,nr,czero,czero,v(nr+1,1_${ik}$),ldv) call stdlib${ii}$_zlaset('A',nr,n-nr,czero,czero,v(1_${ik}$,nr+1),ldv) call stdlib${ii}$_zlaset('A',n-nr,n-nr,czero,cone,v(nr+1,nr+1),ldv) call stdlib${ii}$_zunmqr('R','C', n, n, nr, u(1_${ik}$,nr+1), ldu,cwork(n+1),v,ldv,& cwork(n+nr+1),lcwork-n-nr,ierr) call stdlib${ii}$_zlapmt( .false., n, n, v, ldv, iwork ) ! Assemble The Left Singular Vector Matrix U Of Dimensions ! (m x nr) or (m x n) or (m x m). if ( ( nr < m ) .and. .not.(wntuf)) then call stdlib${ii}$_zlaset('A',m-nr,nr,czero,czero,u(nr+1,1_${ik}$),ldu) if ( nr < n1 ) then call stdlib${ii}$_zlaset('A',nr,n1-nr,czero,czero,u(1_${ik}$,nr+1),ldu) call stdlib${ii}$_zlaset( 'A',m-nr,n1-nr,czero,cone,u(nr+1,nr+1),ldu) end if end if end if end if else ! .. apply stdlib${ii}$_zgesvd to r [[this is the recommended option]] if ( wntvr .or. ( nr == n ) ) then ! .. copy r into [v] and overwrite v with the right singular vectors call stdlib${ii}$_zlacpy( 'U', nr, n, a, lda, v, ldv ) if ( nr > 1_${ik}$ )call stdlib${ii}$_zlaset( 'L', nr-1,nr-1, czero,czero, v(2_${ik}$,1_${ik}$), ldv ) ! .. the right singular vectors of r overwrite [v], the nr left ! singular vectors of r stored in [u](1:nr,1:nr) call stdlib${ii}$_zgesvd( 'S', 'O', nr, n, v, ldv, s, u, ldu,v, ldv, cwork(n+1), & lcwork-n, rwork, info ) call stdlib${ii}$_zlapmt( .false., nr, n, v, ldv, iwork ) ! .. now [v](1:nr,1:n) contains v(1:n,1:nr)**h ! Assemble The Left Singular Vector Matrix U Of Dimensions ! (m x nr) or (m x n) or (m x m). if ( ( nr < m ) .and. .not.(wntuf)) then call stdlib${ii}$_zlaset('A', m-nr,nr, czero,czero, u(nr+1,1_${ik}$), ldu) if ( nr < n1 ) then call stdlib${ii}$_zlaset('A',nr,n1-nr,czero,czero,u(1_${ik}$,nr+1),ldu) call stdlib${ii}$_zlaset( 'A',m-nr,n1-nr,czero,cone,u(nr+1,nr+1), ldu ) end if end if else ! .. need all n right singular vectors and nr < n ! The Requested Number Of The Left Singular Vectors ! is then n1 (n or m) ! [[the optimal ratio n/nr for using lq instead of padding ! with zeros. here hard coded to 2; it must be at least ! two due to work space constraints.]] ! optratio = stdlib${ii}$_ilaenv(6, 'zgesvd', 's' // 'o', nr,n,0,0) ! optratio = max( optratio, 2 ) optratio = 2_${ik}$ if ( optratio * nr > n ) then call stdlib${ii}$_zlacpy( 'U', nr, n, a, lda, v, ldv ) if ( nr > 1_${ik}$ )call stdlib${ii}$_zlaset('L', nr-1,nr-1, czero,czero, v(2_${ik}$,1_${ik}$),ldv) ! .. the right singular vectors of r overwrite [v], the nr left ! singular vectors of r stored in [u](1:nr,1:nr) call stdlib${ii}$_zlaset('A', n-nr,n, czero,czero, v(nr+1,1_${ik}$),ldv) call stdlib${ii}$_zgesvd( 'S', 'O', n, n, v, ldv, s, u, ldu,v, ldv, cwork(n+1), & lcwork-n, rwork, info ) call stdlib${ii}$_zlapmt( .false., n, n, v, ldv, iwork ) ! .. now [v] contains the adjoint of the matrix of the right ! singular vectors of a. the leading n left singular vectors ! are in [u](1:n,1:n) ! Assemble The Left Singular Vector Matrix U Of Dimensions ! (m x n1), i.e. (m x n) or (m x m). if ( ( n < m ) .and. .not.(wntuf)) then call stdlib${ii}$_zlaset('A',m-n,n,czero,czero,u(n+1,1_${ik}$),ldu) if ( n < n1 ) then call stdlib${ii}$_zlaset('A',n,n1-n,czero,czero,u(1_${ik}$,n+1),ldu) call stdlib${ii}$_zlaset( 'A',m-n,n1-n,czero,cone,u(n+1,n+1), ldu ) end if end if else call stdlib${ii}$_zlacpy( 'U', nr, n, a, lda, u(nr+1,1_${ik}$), ldu ) if ( nr > 1_${ik}$ )call stdlib${ii}$_zlaset('L',nr-1,nr-1,czero,czero,u(nr+2,1_${ik}$),ldu) call stdlib${ii}$_zgelqf( nr, n, u(nr+1,1_${ik}$), ldu, cwork(n+1),cwork(n+nr+1), & lcwork-n-nr, ierr ) call stdlib${ii}$_zlacpy('L',nr,nr,u(nr+1,1_${ik}$),ldu,v,ldv) if ( nr > 1_${ik}$ )call stdlib${ii}$_zlaset('U',nr-1,nr-1,czero,czero,v(1_${ik}$,2_${ik}$),ldv) call stdlib${ii}$_zgesvd( 'S', 'O', nr, nr, v, ldv, s, u, ldu,v, ldv, cwork(n+nr+& 1_${ik}$), lcwork-n-nr, rwork, info ) call stdlib${ii}$_zlaset('A',n-nr,nr,czero,czero,v(nr+1,1_${ik}$),ldv) call stdlib${ii}$_zlaset('A',nr,n-nr,czero,czero,v(1_${ik}$,nr+1),ldv) call stdlib${ii}$_zlaset('A',n-nr,n-nr,czero,cone,v(nr+1,nr+1),ldv) call stdlib${ii}$_zunmlq('R','N',n,n,nr,u(nr+1,1_${ik}$),ldu,cwork(n+1),v, ldv, cwork(n+& nr+1),lcwork-n-nr,ierr) call stdlib${ii}$_zlapmt( .false., n, n, v, ldv, iwork ) ! Assemble The Left Singular Vector Matrix U Of Dimensions ! (m x nr) or (m x n) or (m x m). if ( ( nr < m ) .and. .not.(wntuf)) then call stdlib${ii}$_zlaset('A',m-nr,nr,czero,czero,u(nr+1,1_${ik}$),ldu) if ( nr < n1 ) then call stdlib${ii}$_zlaset('A',nr,n1-nr,czero,czero,u(1_${ik}$,nr+1),ldu) call stdlib${ii}$_zlaset( 'A',m-nr,n1-nr,czero,cone,u(nr+1,nr+1), ldu ) end if end if end if end if ! .. end of the "r**h or r" branch end if ! the q matrix from the first qrf is built into the left singular ! vectors matrix u. if ( .not. wntuf )call stdlib${ii}$_zunmqr( 'L', 'N', m, n1, n, a, lda, cwork, u,ldu, & cwork(n+1), lcwork-n, ierr ) if ( rowprm .and. .not.wntuf )call stdlib${ii}$_zlaswp( n1, u, ldu, 1_${ik}$, m-1, iwork(n+1), -& 1_${ik}$ ) ! ... end of the "full svd" branch end if ! check whether some singular values are returned as zeros, e.g. ! due to underflow, and update the numerical rank. p = nr do q = p, 1, -1 if ( s(q) > zero ) go to 4002 nr = nr - 1_${ik}$ end do 4002 continue ! .. if numerical rank deficiency is detected, the truncated ! singular values are set to zero. if ( nr < n ) call stdlib${ii}$_dlaset( 'G', n-nr,1_${ik}$, zero,zero, s(nr+1), n ) ! .. undo scaling; this may cause overflow in the largest singular ! values. if ( ascaled )call stdlib${ii}$_dlascl( 'G',0_${ik}$,0_${ik}$, one,sqrt(real(m,KIND=dp)), nr,1_${ik}$, s, n, ierr & ) if ( conda ) rwork(1_${ik}$) = sconda rwork(2_${ik}$) = p - nr ! .. p-nr is the number of singular values that are computed as ! exact zeros in stdlib${ii}$_zgesvd() applied to the (possibly truncated) ! full row rank triangular (trapezoidal) factor of a. numrank = nr return end subroutine stdlib${ii}$_zgesvdq #:for ck,ct,ci in CMPLX_KINDS_TYPES #:if not ck in ["sp","dp"] module subroutine stdlib${ii}$_${ci}$gesvdq( joba, jobp, jobr, jobu, jobv, m, n, a, lda,s, u, ldu, v, ldv, & !! ZCGESVDQ computes the singular value decomposition (SVD) of a complex !! M-by-N matrix A, where M >= N. The SVD of A is written as !! [++] [xx] [x0] [xx] !! A = U * SIGMA * V^*, [++] = [xx] * [ox] * [xx] !! [++] [xx] !! where SIGMA is an N-by-N diagonal matrix, U is an M-by-N orthonormal !! matrix, and V is an N-by-N unitary matrix. The diagonal elements !! of SIGMA are the singular values of A. The columns of U and V are the !! left and the right singular vectors of A, respectively. numrank, iwork, liwork,cwork, lcwork, rwork, lrwork, info ) use stdlib_blas_constants_${ck}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments character, intent(in) :: joba, jobp, jobr, jobu, jobv integer(${ik}$), intent(in) :: m, n, lda, ldu, ldv, liwork, lrwork integer(${ik}$), intent(out) :: numrank, info integer(${ik}$), intent(inout) :: lcwork ! Array Arguments complex(${ck}$), intent(inout) :: a(lda,*) complex(${ck}$), intent(out) :: u(ldu,*), v(ldv,*), cwork(*) real(${ck}$), intent(out) :: s(*), rwork(*) integer(${ik}$), intent(out) :: iwork(*) ! ===================================================================== ! Local Scalars integer(${ik}$) :: ierr, nr, n1, optratio, p, q integer(${ik}$) :: lwcon, lwqp3, lwrk_wgelqf, lwrk_wgesvd, lwrk_wgesvd2, lwrk_wgeqp3, & lwrk_wgeqrf, lwrk_wunmlq, lwrk_wunmqr, lwrk_wunmqr2, lwlqf, lwqrf, lwsvd, lwsvd2, & lwunq, lwunq2, lwunlq, minwrk, minwrk2, optwrk, optwrk2, iminwrk, rminwrk logical(lk) :: accla, acclm, acclh, ascaled, conda, dntwu, dntwv, lquery, lsvc0, lsvec,& rowprm, rsvec, rtrans, wntua, wntuf, wntur, wntus, wntva, wntvr real(${ck}$) :: big, epsln, rtmp, sconda, sfmin complex(${ck}$) :: ctmp ! Local Arrays complex(${ck}$) :: cdummy(1_${ik}$) real(${ck}$) :: rdummy(1_${ik}$) ! Intrinsic Functions ! Executable Statements ! test the input arguments wntus = stdlib_lsame( jobu, 'S' ) .or. stdlib_lsame( jobu, 'U' ) wntur = stdlib_lsame( jobu, 'R' ) wntua = stdlib_lsame( jobu, 'A' ) wntuf = stdlib_lsame( jobu, 'F' ) lsvc0 = wntus .or. wntur .or. wntua lsvec = lsvc0 .or. wntuf dntwu = stdlib_lsame( jobu, 'N' ) wntvr = stdlib_lsame( jobv, 'R' ) wntva = stdlib_lsame( jobv, 'A' ) .or. stdlib_lsame( jobv, 'V' ) rsvec = wntvr .or. wntva dntwv = stdlib_lsame( jobv, 'N' ) accla = stdlib_lsame( joba, 'A' ) acclm = stdlib_lsame( joba, 'M' ) conda = stdlib_lsame( joba, 'E' ) acclh = stdlib_lsame( joba, 'H' ) .or. conda rowprm = stdlib_lsame( jobp, 'P' ) rtrans = stdlib_lsame( jobr, 'T' ) if ( rowprm ) then iminwrk = max( 1_${ik}$, n + m - 1_${ik}$ ) rminwrk = max( 2_${ik}$, m, 5_${ik}$*n ) else iminwrk = max( 1_${ik}$, n ) rminwrk = max( 2_${ik}$, 5_${ik}$*n ) end if lquery = (liwork == -1_${ik}$ .or. lcwork == -1_${ik}$ .or. lrwork == -1_${ik}$) info = 0_${ik}$ if ( .not. ( accla .or. acclm .or. acclh ) ) then info = -1_${ik}$ else if ( .not.( rowprm .or. stdlib_lsame( jobp, 'N' ) ) ) then info = -2_${ik}$ else if ( .not.( rtrans .or. stdlib_lsame( jobr, 'N' ) ) ) then info = -3_${ik}$ else if ( .not.( lsvec .or. dntwu ) ) then info = -4_${ik}$ else if ( wntur .and. wntva ) then info = -5_${ik}$ else if ( .not.( rsvec .or. dntwv )) then info = -5_${ik}$ else if ( m<0_${ik}$ ) then info = -6_${ik}$ else if ( ( n<0_${ik}$ ) .or. ( n>m ) ) then info = -7_${ik}$ else if ( lda<max( 1_${ik}$, m ) ) then info = -9_${ik}$ else if ( ldu<1_${ik}$ .or. ( lsvc0 .and. ldu<m ) .or.( wntuf .and. ldu<n ) ) then info = -12_${ik}$ else if ( ldv<1_${ik}$ .or. ( rsvec .and. ldv<n ) .or.( conda .and. ldv<n ) ) then info = -14_${ik}$ else if ( liwork < iminwrk .and. .not. lquery ) then info = -17_${ik}$ end if if ( info == 0_${ik}$ ) then ! Compute The Minimal And The Optimal Workspace Lengths ! [[the expressions for computing the minimal and the optimal ! values of lcwork are written with a lot of redundancy and ! can be simplified. however, this detailed form is easier for ! maintenance and modifications of the code.]] ! Minimal Workspace Length For Stdlib_Zgeqp3 Of An M X N Matrix lwqp3 = n+1 ! Minimal Workspace Length For Stdlib_Zunmqr To Build Left Singular Vectors if ( wntus .or. wntur ) then lwunq = max( n , 1_${ik}$ ) else if ( wntua ) then lwunq = max( m , 1_${ik}$ ) end if ! Minimal Workspace Length For Stdlib_Zpocon Of An N X N Matrix lwcon = 2_${ik}$ * n ! Stdlib_Zgesvd Of An N X N Matrix lwsvd = max( 3_${ik}$ * n, 1_${ik}$ ) if ( lquery ) then call stdlib${ii}$_${ci}$geqp3( m, n, a, lda, iwork, cdummy, cdummy, -1_${ik}$,rdummy, ierr ) lwrk_wgeqp3 = int( cdummy(1_${ik}$),KIND=${ik}$) if ( wntus .or. wntur ) then call stdlib${ii}$_${ci}$unmqr( 'L', 'N', m, n, n, a, lda, cdummy, u,ldu, cdummy, -1_${ik}$, & ierr ) lwrk_wunmqr = int( cdummy(1_${ik}$),KIND=${ik}$) else if ( wntua ) then call stdlib${ii}$_${ci}$unmqr( 'L', 'N', m, m, n, a, lda, cdummy, u,ldu, cdummy, -1_${ik}$, & ierr ) lwrk_wunmqr = int( cdummy(1_${ik}$),KIND=${ik}$) else lwrk_wunmqr = 0_${ik}$ end if end if minwrk = 2_${ik}$ optwrk = 2_${ik}$ if ( .not. (lsvec .or. rsvec ) ) then ! Minimal And Optimal Sizes Of The Complex Workspace If ! only the singular values are requested if ( conda ) then minwrk = max( n+lwqp3, lwcon, lwsvd ) else minwrk = max( n+lwqp3, lwsvd ) end if if ( lquery ) then call stdlib${ii}$_${ci}$gesvd( 'N', 'N', n, n, a, lda, s, u, ldu,v, ldv, cdummy, -1_${ik}$, & rdummy, ierr ) lwrk_wgesvd = int( cdummy(1_${ik}$),KIND=${ik}$) if ( conda ) then optwrk = max( n+lwrk_wgeqp3, n+lwcon, lwrk_wgesvd ) else optwrk = max( n+lwrk_wgeqp3, lwrk_wgesvd ) end if end if else if ( lsvec .and. (.not.rsvec) ) then ! Minimal And Optimal Sizes Of The Complex Workspace If The ! singular values and the left singular vectors are requested if ( conda ) then minwrk = n + max( lwqp3, lwcon, lwsvd, lwunq ) else minwrk = n + max( lwqp3, lwsvd, lwunq ) end if if ( lquery ) then if ( rtrans ) then call stdlib${ii}$_${ci}$gesvd( 'N', 'O', n, n, a, lda, s, u, ldu,v, ldv, cdummy, -1_${ik}$, & rdummy, ierr ) else call stdlib${ii}$_${ci}$gesvd( 'O', 'N', n, n, a, lda, s, u, ldu,v, ldv, cdummy, -1_${ik}$, & rdummy, ierr ) end if lwrk_wgesvd = int( cdummy(1_${ik}$),KIND=${ik}$) if ( conda ) then optwrk = n + max( lwrk_wgeqp3, lwcon, lwrk_wgesvd,lwrk_wunmqr ) else optwrk = n + max( lwrk_wgeqp3, lwrk_wgesvd,lwrk_wunmqr ) end if end if else if ( rsvec .and. (.not.lsvec) ) then ! Minimal And Optimal Sizes Of The Complex Workspace If The ! singular values and the right singular vectors are requested if ( conda ) then minwrk = n + max( lwqp3, lwcon, lwsvd ) else minwrk = n + max( lwqp3, lwsvd ) end if if ( lquery ) then if ( rtrans ) then call stdlib${ii}$_${ci}$gesvd( 'O', 'N', n, n, a, lda, s, u, ldu,v, ldv, cdummy, -& 1_${ik}$, rdummy, ierr ) else call stdlib${ii}$_${ci}$gesvd( 'N', 'O', n, n, a, lda, s, u, ldu,v, ldv, cdummy, -& 1_${ik}$, rdummy, ierr ) end if lwrk_wgesvd = int( cdummy(1_${ik}$),KIND=${ik}$) if ( conda ) then optwrk = n + max( lwrk_wgeqp3, lwcon, lwrk_wgesvd ) else optwrk = n + max( lwrk_wgeqp3, lwrk_wgesvd ) end if end if else ! Minimal And Optimal Sizes Of The Complex Workspace If The ! full svd is requested if ( rtrans ) then minwrk = max( lwqp3, lwsvd, lwunq ) if ( conda ) minwrk = max( minwrk, lwcon ) minwrk = minwrk + n if ( wntva ) then ! .. minimal workspace length for n x n/2 stdlib${ii}$_${ci}$geqrf lwqrf = max( n/2_${ik}$, 1_${ik}$ ) ! .. minimal workspace length for n/2 x n/2 stdlib${ii}$_${ci}$gesvd lwsvd2 = max( 3_${ik}$ * (n/2_${ik}$), 1_${ik}$ ) lwunq2 = max( n, 1_${ik}$ ) minwrk2 = max( lwqp3, n/2_${ik}$+lwqrf, n/2_${ik}$+lwsvd2,n/2_${ik}$+lwunq2, lwunq ) if ( conda ) minwrk2 = max( minwrk2, lwcon ) minwrk2 = n + minwrk2 minwrk = max( minwrk, minwrk2 ) end if else minwrk = max( lwqp3, lwsvd, lwunq ) if ( conda ) minwrk = max( minwrk, lwcon ) minwrk = minwrk + n if ( wntva ) then ! .. minimal workspace length for n/2 x n stdlib${ii}$_${ci}$gelqf lwlqf = max( n/2_${ik}$, 1_${ik}$ ) lwsvd2 = max( 3_${ik}$ * (n/2_${ik}$), 1_${ik}$ ) lwunlq = max( n , 1_${ik}$ ) minwrk2 = max( lwqp3, n/2_${ik}$+lwlqf, n/2_${ik}$+lwsvd2,n/2_${ik}$+lwunlq, lwunq ) if ( conda ) minwrk2 = max( minwrk2, lwcon ) minwrk2 = n + minwrk2 minwrk = max( minwrk, minwrk2 ) end if end if if ( lquery ) then if ( rtrans ) then call stdlib${ii}$_${ci}$gesvd( 'O', 'A', n, n, a, lda, s, u, ldu,v, ldv, cdummy, -1_${ik}$, & rdummy, ierr ) lwrk_wgesvd = int( cdummy(1_${ik}$),KIND=${ik}$) optwrk = max(lwrk_wgeqp3,lwrk_wgesvd,lwrk_wunmqr) if ( conda ) optwrk = max( optwrk, lwcon ) optwrk = n + optwrk if ( wntva ) then call stdlib${ii}$_${ci}$geqrf(n,n/2_${ik}$,u,ldu,cdummy,cdummy,-1_${ik}$,ierr) lwrk_wgeqrf = int( cdummy(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_${ci}$gesvd( 'S', 'O', n/2_${ik}$,n/2_${ik}$, v,ldv, s, u,ldu,v, ldv, cdummy,& -1_${ik}$, rdummy, ierr ) lwrk_wgesvd2 = int( cdummy(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_${ci}$unmqr( 'R', 'C', n, n, n/2_${ik}$, u, ldu, cdummy,v, ldv, & cdummy, -1_${ik}$, ierr ) lwrk_wunmqr2 = int( cdummy(1_${ik}$),KIND=${ik}$) optwrk2 = max( lwrk_wgeqp3, n/2_${ik}$+lwrk_wgeqrf,n/2_${ik}$+lwrk_wgesvd2, n/2_${ik}$+& lwrk_wunmqr2 ) if ( conda ) optwrk2 = max( optwrk2, lwcon ) optwrk2 = n + optwrk2 optwrk = max( optwrk, optwrk2 ) end if else call stdlib${ii}$_${ci}$gesvd( 'S', 'O', n, n, a, lda, s, u, ldu,v, ldv, cdummy, -1_${ik}$, & rdummy, ierr ) lwrk_wgesvd = int( cdummy(1_${ik}$),KIND=${ik}$) optwrk = max(lwrk_wgeqp3,lwrk_wgesvd,lwrk_wunmqr) if ( conda ) optwrk = max( optwrk, lwcon ) optwrk = n + optwrk if ( wntva ) then call stdlib${ii}$_${ci}$gelqf(n/2_${ik}$,n,u,ldu,cdummy,cdummy,-1_${ik}$,ierr) lwrk_wgelqf = int( cdummy(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_${ci}$gesvd( 'S','O', n/2_${ik}$,n/2_${ik}$, v, ldv, s, u, ldu,v, ldv, cdummy,& -1_${ik}$, rdummy, ierr ) lwrk_wgesvd2 = int( cdummy(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_${ci}$unmlq( 'R', 'N', n, n, n/2_${ik}$, u, ldu, cdummy,v, ldv, cdummy,& -1_${ik}$,ierr ) lwrk_wunmlq = int( cdummy(1_${ik}$),KIND=${ik}$) optwrk2 = max( lwrk_wgeqp3, n/2_${ik}$+lwrk_wgelqf,n/2_${ik}$+lwrk_wgesvd2, n/2_${ik}$+& lwrk_wunmlq ) if ( conda ) optwrk2 = max( optwrk2, lwcon ) optwrk2 = n + optwrk2 optwrk = max( optwrk, optwrk2 ) end if end if end if end if minwrk = max( 2_${ik}$, minwrk ) optwrk = max( 2_${ik}$, optwrk ) if ( lcwork < minwrk .and. (.not.lquery) ) info = -19_${ik}$ end if if (info == 0_${ik}$ .and. lrwork < rminwrk .and. .not. lquery) then info = -21_${ik}$ end if if( info/=0_${ik}$ ) then call stdlib${ii}$_xerbla( 'ZGESVDQ', -info ) return else if ( lquery ) then ! return optimal workspace iwork(1_${ik}$) = iminwrk cwork(1_${ik}$) = optwrk cwork(2_${ik}$) = minwrk rwork(1_${ik}$) = rminwrk return end if ! quick return if the matrix is void. if( ( m==0_${ik}$ ) .or. ( n==0_${ik}$ ) ) then ! All Output Is Void return end if big = stdlib${ii}$_${c2ri(ci)}$lamch('O') ascaled = .false. if ( rowprm ) then ! Reordering The Rows In Decreasing Sequence In The ! ell-infinity norm - this enhances numerical robustness in ! the case of differently scaled rows. do p = 1, m ! rwork(p) = abs( a(p,stdlib${ii}$_i${ci}$amax(n,a(p,1),lda)) ) ! [[stdlib${ii}$_${ci}$lange will return nan if an entry of the p-th row is nan]] rwork(p) = stdlib${ii}$_${ci}$lange( 'M', 1_${ik}$, n, a(p,1_${ik}$), lda, rdummy ) ! .. check for nan's and inf's if ( ( rwork(p) /= rwork(p) ) .or.( (rwork(p)*zero) /= zero ) ) then info = -8_${ik}$ call stdlib${ii}$_xerbla( 'ZGESVDQ', -info ) return end if end do do p = 1, m - 1 q = stdlib${ii}$_i${c2ri(ci)}$amax( m-p+1, rwork(p), 1_${ik}$ ) + p - 1_${ik}$ iwork(n+p) = q if ( p /= q ) then rtmp = rwork(p) rwork(p) = rwork(q) rwork(q) = rtmp end if end do if ( rwork(1_${ik}$) == zero ) then ! quick return: a is the m x n zero matrix. numrank = 0_${ik}$ call stdlib${ii}$_${c2ri(ci)}$laset( 'G', n, 1_${ik}$, zero, zero, s, n ) if ( wntus ) call stdlib${ii}$_${ci}$laset('G', m, n, czero, cone, u, ldu) if ( wntua ) call stdlib${ii}$_${ci}$laset('G', m, m, czero, cone, u, ldu) if ( wntva ) call stdlib${ii}$_${ci}$laset('G', n, n, czero, cone, v, ldv) if ( wntuf ) then call stdlib${ii}$_${ci}$laset( 'G', n, 1_${ik}$, czero, czero, cwork, n ) call stdlib${ii}$_${ci}$laset( 'G', m, n, czero, cone, u, ldu ) end if do p = 1, n iwork(p) = p end do if ( rowprm ) then do p = n + 1, n + m - 1 iwork(p) = p - n end do end if if ( conda ) rwork(1_${ik}$) = -1_${ik}$ rwork(2_${ik}$) = -1_${ik}$ return end if if ( rwork(1_${ik}$) > big / sqrt(real(m,KIND=${ck}$)) ) then ! .. to prevent overflow in the qr factorization, scale the ! matrix by 1/sqrt(m) if too large entry detected call stdlib${ii}$_${ci}$lascl('G',0_${ik}$,0_${ik}$,sqrt(real(m,KIND=${ck}$)),one, m,n, a,lda, ierr) ascaled = .true. end if call stdlib${ii}$_${ci}$laswp( n, a, lda, 1_${ik}$, m-1, iwork(n+1), 1_${ik}$ ) end if ! .. at this stage, preemptive scaling is done only to avoid column ! norms overflows during the qr factorization. the svd procedure should ! have its own scaling to save the singular values from overflows and ! underflows. that depends on the svd procedure. if ( .not.rowprm ) then rtmp = stdlib${ii}$_${ci}$lange( 'M', m, n, a, lda, rwork ) if ( ( rtmp /= rtmp ) .or.( (rtmp*zero) /= zero ) ) then info = -8_${ik}$ call stdlib${ii}$_xerbla( 'ZGESVDQ', -info ) return end if if ( rtmp > big / sqrt(real(m,KIND=${ck}$)) ) then ! .. to prevent overflow in the qr factorization, scale the ! matrix by 1/sqrt(m) if too large entry detected call stdlib${ii}$_${ci}$lascl('G',0_${ik}$,0_${ik}$, sqrt(real(m,KIND=${ck}$)),one, m,n, a,lda, ierr) ascaled = .true. end if end if ! Qr Factorization With Column Pivoting ! a * p = q * [ r ] ! [ 0 ] do p = 1, n ! All Columns Are Free Columns iwork(p) = 0_${ik}$ end do call stdlib${ii}$_${ci}$geqp3( m, n, a, lda, iwork, cwork, cwork(n+1), lcwork-n,rwork, ierr ) ! if the user requested accuracy level allows truncation in the ! computed upper triangular factor, the matrix r is examined and, ! if possible, replaced with its leading upper trapezoidal part. epsln = stdlib${ii}$_${c2ri(ci)}$lamch('E') sfmin = stdlib${ii}$_${c2ri(ci)}$lamch('S') ! small = sfmin / epsln nr = n if ( accla ) then ! standard absolute error bound suffices. all sigma_i with ! sigma_i < n*eps*||a||_f are flushed to zero. this is an ! aggressive enforcement of lower numerical rank by introducing a ! backward error of the order of n*eps*||a||_f. nr = 1_${ik}$ rtmp = sqrt(real(n,KIND=${ck}$))*epsln loop_3002: do p = 2, n if ( abs(a(p,p)) < (rtmp*abs(a(1,1))) ) exit loop_3002 nr = nr + 1_${ik}$ end do loop_3002 elseif ( acclm ) then ! .. similarly as above, only slightly more gentle (less aggressive). ! sudden drop on the diagonal of r is used as the criterion for being ! close-to-rank-deficient. the threshold is set to epsln=stdlib${ii}$_${c2ri(ci)}$lamch('e'). ! [[this can be made more flexible by replacing this hard-coded value ! with a user specified threshold.]] also, the values that underflow ! will be truncated. nr = 1_${ik}$ loop_3402: do p = 2, n if ( ( abs(a(p,p)) < (epsln*abs(a(p-1,p-1))) ) .or.( abs(a(p,p)) < sfmin ) ) exit loop_3402 nr = nr + 1_${ik}$ end do loop_3402 else ! Rrqr Not Authorized To Determine Numerical Rank Except In The ! obvious case of zero pivots. ! .. inspect r for exact zeros on the diagonal; ! r(i,i)=0 => r(i:n,i:n)=0. nr = 1_${ik}$ loop_3502: do p = 2, n if ( abs(a(p,p)) == zero ) exit loop_3502 nr = nr + 1_${ik}$ end do loop_3502 if ( conda ) then ! estimate the scaled condition number of a. use the fact that it is ! the same as the scaled condition number of r. ! V Is Used As Workspace call stdlib${ii}$_${ci}$lacpy( 'U', n, n, a, lda, v, ldv ) ! only the leading nr x nr submatrix of the triangular factor ! is considered. only if nr=n will this give a reliable error ! bound. however, even for nr < n, this can be used on an ! expert level and obtain useful information in the sense of ! perturbation theory. do p = 1, nr rtmp = stdlib${ii}$_${c2ri(ci)}$znrm2( p, v(1_${ik}$,p), 1_${ik}$ ) call stdlib${ii}$_${ci}$dscal( p, one/rtmp, v(1_${ik}$,p), 1_${ik}$ ) end do if ( .not. ( lsvec .or. rsvec ) ) then call stdlib${ii}$_${ci}$pocon( 'U', nr, v, ldv, one, rtmp,cwork, rwork, ierr ) else call stdlib${ii}$_${ci}$pocon( 'U', nr, v, ldv, one, rtmp,cwork(n+1), rwork, ierr ) end if sconda = one / sqrt(rtmp) ! for nr=n, sconda is an estimate of sqrt(||(r^* * r)^(-1)||_1), ! n^(-1/4) * sconda <= ||r^(-1)||_2 <= n^(1/4) * sconda ! see the reference [1] for more details. end if endif if ( wntur ) then n1 = nr else if ( wntus .or. wntuf) then n1 = n else if ( wntua ) then n1 = m end if if ( .not. ( rsvec .or. lsvec ) ) then ! ....................................................................... ! Only The Singular Values Are Requested ! ....................................................................... if ( rtrans ) then ! .. compute the singular values of r**h = [a](1:nr,1:n)**h ! .. set the lower triangle of [a] to [a](1:nr,1:n)**h and ! the upper triangle of [a] to zero. do p = 1, min( n, nr ) a(p,p) = conjg(a(p,p)) do q = p + 1, n a(q,p) = conjg(a(p,q)) if ( q <= nr ) a(p,q) = czero end do end do call stdlib${ii}$_${ci}$gesvd( 'N', 'N', n, nr, a, lda, s, u, ldu,v, ldv, cwork, lcwork, & rwork, info ) else ! .. compute the singular values of r = [a](1:nr,1:n) if ( nr > 1_${ik}$ )call stdlib${ii}$_${ci}$laset( 'L', nr-1,nr-1, czero,czero, a(2_${ik}$,1_${ik}$), lda ) call stdlib${ii}$_${ci}$gesvd( 'N', 'N', nr, n, a, lda, s, u, ldu,v, ldv, cwork, lcwork, & rwork, info ) end if else if ( lsvec .and. ( .not. rsvec) ) then ! ....................................................................... ! The Singular Values And The Left Singular Vectors Requested ! ......................................................................."""""""" if ( rtrans ) then ! .. apply stdlib${ii}$_${ci}$gesvd to r**h ! .. copy r**h into [u] and overwrite [u] with the right singular ! vectors of r do p = 1, nr do q = p, n u(q,p) = conjg(a(p,q)) end do end do if ( nr > 1_${ik}$ )call stdlib${ii}$_${ci}$laset( 'U', nr-1,nr-1, czero,czero, u(1_${ik}$,2_${ik}$), ldu ) ! .. the left singular vectors not computed, the nr right singular ! vectors overwrite [u](1:nr,1:nr) as conjugate transposed. these ! will be pre-multiplied by q to build the left singular vectors of a. call stdlib${ii}$_${ci}$gesvd( 'N', 'O', n, nr, u, ldu, s, u, ldu,u, ldu, cwork(n+1), & lcwork-n, rwork, info ) do p = 1, nr u(p,p) = conjg(u(p,p)) do q = p + 1, nr ctmp = conjg(u(q,p)) u(q,p) = conjg(u(p,q)) u(p,q) = ctmp end do end do else ! Apply Stdlib_Zgesvd To R ! .. copy r into [u] and overwrite [u] with the left singular vectors call stdlib${ii}$_${ci}$lacpy( 'U', nr, n, a, lda, u, ldu ) if ( nr > 1_${ik}$ )call stdlib${ii}$_${ci}$laset( 'L', nr-1, nr-1, czero, czero, u(2_${ik}$,1_${ik}$), ldu ) ! .. the right singular vectors not computed, the nr left singular ! vectors overwrite [u](1:nr,1:nr) call stdlib${ii}$_${ci}$gesvd( 'O', 'N', nr, n, u, ldu, s, u, ldu,v, ldv, cwork(n+1), & lcwork-n, rwork, info ) ! .. now [u](1:nr,1:nr) contains the nr left singular vectors of ! r. these will be pre-multiplied by q to build the left singular ! vectors of a. end if ! Assemble The Left Singular Vector Matrix U Of Dimensions ! (m x nr) or (m x n) or (m x m). if ( ( nr < m ) .and. ( .not.wntuf ) ) then call stdlib${ii}$_${ci}$laset('A', m-nr, nr, czero, czero, u(nr+1,1_${ik}$), ldu) if ( nr < n1 ) then call stdlib${ii}$_${ci}$laset( 'A',nr,n1-nr,czero,czero,u(1_${ik}$,nr+1), ldu ) call stdlib${ii}$_${ci}$laset( 'A',m-nr,n1-nr,czero,cone,u(nr+1,nr+1), ldu ) end if end if ! the q matrix from the first qrf is built into the left singular ! vectors matrix u. if ( .not.wntuf )call stdlib${ii}$_${ci}$unmqr( 'L', 'N', m, n1, n, a, lda, cwork, u,ldu, & cwork(n+1), lcwork-n, ierr ) if ( rowprm .and. .not.wntuf )call stdlib${ii}$_${ci}$laswp( n1, u, ldu, 1_${ik}$, m-1, iwork(n+1), -& 1_${ik}$ ) else if ( rsvec .and. ( .not. lsvec ) ) then ! ....................................................................... ! The Singular Values And The Right Singular Vectors Requested ! ....................................................................... if ( rtrans ) then ! .. apply stdlib${ii}$_${ci}$gesvd to r**h ! .. copy r**h into v and overwrite v with the left singular vectors do p = 1, nr do q = p, n v(q,p) = conjg(a(p,q)) end do end do if ( nr > 1_${ik}$ )call stdlib${ii}$_${ci}$laset( 'U', nr-1,nr-1, czero,czero, v(1_${ik}$,2_${ik}$), ldv ) ! .. the left singular vectors of r**h overwrite v, the right singular ! vectors not computed if ( wntvr .or. ( nr == n ) ) then call stdlib${ii}$_${ci}$gesvd( 'O', 'N', n, nr, v, ldv, s, u, ldu,u, ldu, cwork(n+1), & lcwork-n, rwork, info ) do p = 1, nr v(p,p) = conjg(v(p,p)) do q = p + 1, nr ctmp = conjg(v(q,p)) v(q,p) = conjg(v(p,q)) v(p,q) = ctmp end do end do if ( nr < n ) then do p = 1, nr do q = nr + 1, n v(p,q) = conjg(v(q,p)) end do end do end if call stdlib${ii}$_${ci}$lapmt( .false., nr, n, v, ldv, iwork ) else ! .. need all n right singular vectors and nr < n ! [!] this is simple implementation that augments [v](1:n,1:nr) ! by padding a zero block. in the case nr << n, a more efficient ! way is to first use the qr factorization. for more details ! how to implement this, see the " full svd " branch. call stdlib${ii}$_${ci}$laset('G', n, n-nr, czero, czero, v(1_${ik}$,nr+1), ldv) call stdlib${ii}$_${ci}$gesvd( 'O', 'N', n, n, v, ldv, s, u, ldu,u, ldu, cwork(n+1), & lcwork-n, rwork, info ) do p = 1, n v(p,p) = conjg(v(p,p)) do q = p + 1, n ctmp = conjg(v(q,p)) v(q,p) = conjg(v(p,q)) v(p,q) = ctmp end do end do call stdlib${ii}$_${ci}$lapmt( .false., n, n, v, ldv, iwork ) end if else ! Aply Stdlib_Zgesvd To R ! Copy R Into V And Overwrite V With The Right Singular Vectors call stdlib${ii}$_${ci}$lacpy( 'U', nr, n, a, lda, v, ldv ) if ( nr > 1_${ik}$ )call stdlib${ii}$_${ci}$laset( 'L', nr-1, nr-1, czero, czero, v(2_${ik}$,1_${ik}$), ldv ) ! .. the right singular vectors overwrite v, the nr left singular ! vectors stored in u(1:nr,1:nr) if ( wntvr .or. ( nr == n ) ) then call stdlib${ii}$_${ci}$gesvd( 'N', 'O', nr, n, v, ldv, s, u, ldu,v, ldv, cwork(n+1), & lcwork-n, rwork, info ) call stdlib${ii}$_${ci}$lapmt( .false., nr, n, v, ldv, iwork ) ! .. now [v](1:nr,1:n) contains v(1:n,1:nr)**h else ! .. need all n right singular vectors and nr < n ! [!] this is simple implementation that augments [v](1:nr,1:n) ! by padding a zero block. in the case nr << n, a more efficient ! way is to first use the lq factorization. for more details ! how to implement this, see the " full svd " branch. call stdlib${ii}$_${ci}$laset('G', n-nr, n, czero,czero, v(nr+1,1_${ik}$), ldv) call stdlib${ii}$_${ci}$gesvd( 'N', 'O', n, n, v, ldv, s, u, ldu,v, ldv, cwork(n+1), & lcwork-n, rwork, info ) call stdlib${ii}$_${ci}$lapmt( .false., n, n, v, ldv, iwork ) end if ! .. now [v] contains the adjoint of the matrix of the right singular ! vectors of a. end if else ! ....................................................................... ! Full Svd Requested ! ....................................................................... if ( rtrans ) then ! .. apply stdlib${ii}$_${ci}$gesvd to r**h [[this option is left for r if ( wntvr .or. ( nr == n ) ) then ! .. copy r**h into [v] and overwrite [v] with the left singular ! vectors of r**h do p = 1, nr do q = p, n v(q,p) = conjg(a(p,q)) end do end do if ( nr > 1_${ik}$ )call stdlib${ii}$_${ci}$laset( 'U', nr-1,nr-1, czero,czero, v(1_${ik}$,2_${ik}$), ldv ) ! .. the left singular vectors of r**h overwrite [v], the nr right ! singular vectors of r**h stored in [u](1:nr,1:nr) as conjugate ! transposed call stdlib${ii}$_${ci}$gesvd( 'O', 'A', n, nr, v, ldv, s, v, ldv,u, ldu, cwork(n+1), & lcwork-n, rwork, info ) ! Assemble V do p = 1, nr v(p,p) = conjg(v(p,p)) do q = p + 1, nr ctmp = conjg(v(q,p)) v(q,p) = conjg(v(p,q)) v(p,q) = ctmp end do end do if ( nr < n ) then do p = 1, nr do q = nr+1, n v(p,q) = conjg(v(q,p)) end do end do end if call stdlib${ii}$_${ci}$lapmt( .false., nr, n, v, ldv, iwork ) do p = 1, nr u(p,p) = conjg(u(p,p)) do q = p + 1, nr ctmp = conjg(u(q,p)) u(q,p) = conjg(u(p,q)) u(p,q) = ctmp end do end do if ( ( nr < m ) .and. .not.(wntuf)) then call stdlib${ii}$_${ci}$laset('A', m-nr,nr, czero,czero, u(nr+1,1_${ik}$), ldu) if ( nr < n1 ) then call stdlib${ii}$_${ci}$laset('A',nr,n1-nr,czero,czero,u(1_${ik}$,nr+1),ldu) call stdlib${ii}$_${ci}$laset( 'A',m-nr,n1-nr,czero,cone,u(nr+1,nr+1), ldu ) end if end if else ! .. need all n right singular vectors and nr < n ! .. copy r**h into [v] and overwrite [v] with the left singular ! vectors of r**h ! [[the optimal ratio n/nr for using qrf instead of padding ! with zeros. here hard coded to 2; it must be at least ! two due to work space constraints.]] ! optratio = stdlib${ii}$_ilaenv(6, 'zgesvd', 's' // 'o', nr,n,0,0) ! optratio = max( optratio, 2 ) optratio = 2_${ik}$ if ( optratio*nr > n ) then do p = 1, nr do q = p, n v(q,p) = conjg(a(p,q)) end do end do if ( nr > 1_${ik}$ )call stdlib${ii}$_${ci}$laset('U',nr-1,nr-1, czero,czero, v(1_${ik}$,2_${ik}$),ldv) call stdlib${ii}$_${ci}$laset('A',n,n-nr,czero,czero,v(1_${ik}$,nr+1),ldv) call stdlib${ii}$_${ci}$gesvd( 'O', 'A', n, n, v, ldv, s, v, ldv,u, ldu, cwork(n+1), & lcwork-n, rwork, info ) do p = 1, n v(p,p) = conjg(v(p,p)) do q = p + 1, n ctmp = conjg(v(q,p)) v(q,p) = conjg(v(p,q)) v(p,q) = ctmp end do end do call stdlib${ii}$_${ci}$lapmt( .false., n, n, v, ldv, iwork ) ! Assemble The Left Singular Vector Matrix U Of Dimensions ! (m x n1), i.e. (m x n) or (m x m). do p = 1, n u(p,p) = conjg(u(p,p)) do q = p + 1, n ctmp = conjg(u(q,p)) u(q,p) = conjg(u(p,q)) u(p,q) = ctmp end do end do if ( ( n < m ) .and. .not.(wntuf)) then call stdlib${ii}$_${ci}$laset('A',m-n,n,czero,czero,u(n+1,1_${ik}$),ldu) if ( n < n1 ) then call stdlib${ii}$_${ci}$laset('A',n,n1-n,czero,czero,u(1_${ik}$,n+1),ldu) call stdlib${ii}$_${ci}$laset('A',m-n,n1-n,czero,cone,u(n+1,n+1), ldu ) end if end if else ! .. copy r**h into [u] and overwrite [u] with the right ! singular vectors of r do p = 1, nr do q = p, n u(q,nr+p) = conjg(a(p,q)) end do end do if ( nr > 1_${ik}$ )call stdlib${ii}$_${ci}$laset('U',nr-1,nr-1,czero,czero,u(1_${ik}$,nr+2),ldu) call stdlib${ii}$_${ci}$geqrf( n, nr, u(1_${ik}$,nr+1), ldu, cwork(n+1),cwork(n+nr+1), & lcwork-n-nr, ierr ) do p = 1, nr do q = 1, n v(q,p) = conjg(u(p,nr+q)) end do end do if (nr>1_${ik}$) call stdlib${ii}$_${ci}$laset('U',nr-1,nr-1,czero,czero,v(1_${ik}$,2_${ik}$),ldv) call stdlib${ii}$_${ci}$gesvd( 'S', 'O', nr, nr, v, ldv, s, u, ldu,v,ldv, cwork(n+nr+& 1_${ik}$),lcwork-n-nr,rwork, info ) call stdlib${ii}$_${ci}$laset('A',n-nr,nr,czero,czero,v(nr+1,1_${ik}$),ldv) call stdlib${ii}$_${ci}$laset('A',nr,n-nr,czero,czero,v(1_${ik}$,nr+1),ldv) call stdlib${ii}$_${ci}$laset('A',n-nr,n-nr,czero,cone,v(nr+1,nr+1),ldv) call stdlib${ii}$_${ci}$unmqr('R','C', n, n, nr, u(1_${ik}$,nr+1), ldu,cwork(n+1),v,ldv,& cwork(n+nr+1),lcwork-n-nr,ierr) call stdlib${ii}$_${ci}$lapmt( .false., n, n, v, ldv, iwork ) ! Assemble The Left Singular Vector Matrix U Of Dimensions ! (m x nr) or (m x n) or (m x m). if ( ( nr < m ) .and. .not.(wntuf)) then call stdlib${ii}$_${ci}$laset('A',m-nr,nr,czero,czero,u(nr+1,1_${ik}$),ldu) if ( nr < n1 ) then call stdlib${ii}$_${ci}$laset('A',nr,n1-nr,czero,czero,u(1_${ik}$,nr+1),ldu) call stdlib${ii}$_${ci}$laset( 'A',m-nr,n1-nr,czero,cone,u(nr+1,nr+1),ldu) end if end if end if end if else ! .. apply stdlib${ii}$_${ci}$gesvd to r [[this is the recommended option]] if ( wntvr .or. ( nr == n ) ) then ! .. copy r into [v] and overwrite v with the right singular vectors call stdlib${ii}$_${ci}$lacpy( 'U', nr, n, a, lda, v, ldv ) if ( nr > 1_${ik}$ )call stdlib${ii}$_${ci}$laset( 'L', nr-1,nr-1, czero,czero, v(2_${ik}$,1_${ik}$), ldv ) ! .. the right singular vectors of r overwrite [v], the nr left ! singular vectors of r stored in [u](1:nr,1:nr) call stdlib${ii}$_${ci}$gesvd( 'S', 'O', nr, n, v, ldv, s, u, ldu,v, ldv, cwork(n+1), & lcwork-n, rwork, info ) call stdlib${ii}$_${ci}$lapmt( .false., nr, n, v, ldv, iwork ) ! .. now [v](1:nr,1:n) contains v(1:n,1:nr)**h ! Assemble The Left Singular Vector Matrix U Of Dimensions ! (m x nr) or (m x n) or (m x m). if ( ( nr < m ) .and. .not.(wntuf)) then call stdlib${ii}$_${ci}$laset('A', m-nr,nr, czero,czero, u(nr+1,1_${ik}$), ldu) if ( nr < n1 ) then call stdlib${ii}$_${ci}$laset('A',nr,n1-nr,czero,czero,u(1_${ik}$,nr+1),ldu) call stdlib${ii}$_${ci}$laset( 'A',m-nr,n1-nr,czero,cone,u(nr+1,nr+1), ldu ) end if end if else ! .. need all n right singular vectors and nr < n ! The Requested Number Of The Left Singular Vectors ! is then n1 (n or m) ! [[the optimal ratio n/nr for using lq instead of padding ! with zeros. here hard coded to 2; it must be at least ! two due to work space constraints.]] ! optratio = stdlib${ii}$_ilaenv(6, 'zgesvd', 's' // 'o', nr,n,0,0) ! optratio = max( optratio, 2 ) optratio = 2_${ik}$ if ( optratio * nr > n ) then call stdlib${ii}$_${ci}$lacpy( 'U', nr, n, a, lda, v, ldv ) if ( nr > 1_${ik}$ )call stdlib${ii}$_${ci}$laset('L', nr-1,nr-1, czero,czero, v(2_${ik}$,1_${ik}$),ldv) ! .. the right singular vectors of r overwrite [v], the nr left ! singular vectors of r stored in [u](1:nr,1:nr) call stdlib${ii}$_${ci}$laset('A', n-nr,n, czero,czero, v(nr+1,1_${ik}$),ldv) call stdlib${ii}$_${ci}$gesvd( 'S', 'O', n, n, v, ldv, s, u, ldu,v, ldv, cwork(n+1), & lcwork-n, rwork, info ) call stdlib${ii}$_${ci}$lapmt( .false., n, n, v, ldv, iwork ) ! .. now [v] contains the adjoint of the matrix of the right ! singular vectors of a. the leading n left singular vectors ! are in [u](1:n,1:n) ! Assemble The Left Singular Vector Matrix U Of Dimensions ! (m x n1), i.e. (m x n) or (m x m). if ( ( n < m ) .and. .not.(wntuf)) then call stdlib${ii}$_${ci}$laset('A',m-n,n,czero,czero,u(n+1,1_${ik}$),ldu) if ( n < n1 ) then call stdlib${ii}$_${ci}$laset('A',n,n1-n,czero,czero,u(1_${ik}$,n+1),ldu) call stdlib${ii}$_${ci}$laset( 'A',m-n,n1-n,czero,cone,u(n+1,n+1), ldu ) end if end if else call stdlib${ii}$_${ci}$lacpy( 'U', nr, n, a, lda, u(nr+1,1_${ik}$), ldu ) if ( nr > 1_${ik}$ )call stdlib${ii}$_${ci}$laset('L',nr-1,nr-1,czero,czero,u(nr+2,1_${ik}$),ldu) call stdlib${ii}$_${ci}$gelqf( nr, n, u(nr+1,1_${ik}$), ldu, cwork(n+1),cwork(n+nr+1), & lcwork-n-nr, ierr ) call stdlib${ii}$_${ci}$lacpy('L',nr,nr,u(nr+1,1_${ik}$),ldu,v,ldv) if ( nr > 1_${ik}$ )call stdlib${ii}$_${ci}$laset('U',nr-1,nr-1,czero,czero,v(1_${ik}$,2_${ik}$),ldv) call stdlib${ii}$_${ci}$gesvd( 'S', 'O', nr, nr, v, ldv, s, u, ldu,v, ldv, cwork(n+nr+& 1_${ik}$), lcwork-n-nr, rwork, info ) call stdlib${ii}$_${ci}$laset('A',n-nr,nr,czero,czero,v(nr+1,1_${ik}$),ldv) call stdlib${ii}$_${ci}$laset('A',nr,n-nr,czero,czero,v(1_${ik}$,nr+1),ldv) call stdlib${ii}$_${ci}$laset('A',n-nr,n-nr,czero,cone,v(nr+1,nr+1),ldv) call stdlib${ii}$_${ci}$unmlq('R','N',n,n,nr,u(nr+1,1_${ik}$),ldu,cwork(n+1),v, ldv, cwork(n+& nr+1),lcwork-n-nr,ierr) call stdlib${ii}$_${ci}$lapmt( .false., n, n, v, ldv, iwork ) ! Assemble The Left Singular Vector Matrix U Of Dimensions ! (m x nr) or (m x n) or (m x m). if ( ( nr < m ) .and. .not.(wntuf)) then call stdlib${ii}$_${ci}$laset('A',m-nr,nr,czero,czero,u(nr+1,1_${ik}$),ldu) if ( nr < n1 ) then call stdlib${ii}$_${ci}$laset('A',nr,n1-nr,czero,czero,u(1_${ik}$,nr+1),ldu) call stdlib${ii}$_${ci}$laset( 'A',m-nr,n1-nr,czero,cone,u(nr+1,nr+1), ldu ) end if end if end if end if ! .. end of the "r**h or r" branch end if ! the q matrix from the first qrf is built into the left singular ! vectors matrix u. if ( .not. wntuf )call stdlib${ii}$_${ci}$unmqr( 'L', 'N', m, n1, n, a, lda, cwork, u,ldu, & cwork(n+1), lcwork-n, ierr ) if ( rowprm .and. .not.wntuf )call stdlib${ii}$_${ci}$laswp( n1, u, ldu, 1_${ik}$, m-1, iwork(n+1), -& 1_${ik}$ ) ! ... end of the "full svd" branch end if ! check whether some singular values are returned as zeros, e.g. ! due to underflow, and update the numerical rank. p = nr do q = p, 1, -1 if ( s(q) > zero ) go to 4002 nr = nr - 1_${ik}$ end do 4002 continue ! .. if numerical rank deficiency is detected, the truncated ! singular values are set to zero. if ( nr < n ) call stdlib${ii}$_${c2ri(ci)}$laset( 'G', n-nr,1_${ik}$, zero,zero, s(nr+1), n ) ! .. undo scaling; this may cause overflow in the largest singular ! values. if ( ascaled )call stdlib${ii}$_${c2ri(ci)}$lascl( 'G',0_${ik}$,0_${ik}$, one,sqrt(real(m,KIND=${ck}$)), nr,1_${ik}$, s, n, ierr & ) if ( conda ) rwork(1_${ik}$) = sconda rwork(2_${ik}$) = p - nr ! .. p-nr is the number of singular values that are computed as ! exact zeros in stdlib${ii}$_${ci}$gesvd() applied to the (possibly truncated) ! full row rank triangular (trapezoidal) factor of a. numrank = nr return end subroutine stdlib${ii}$_${ci}$gesvdq #:endif #:endfor #:endfor end submodule stdlib_lapack_eigv_svd_drivers