#:include "common.fypp" submodule(stdlib_lapack_eig_svd_lsq) stdlib_lapack_eigv_svd_drivers2 implicit none contains #:for ik,it,ii in LINALG_INT_KINDS_TYPES module subroutine stdlib${ii}$_sgesdd( jobz, m, n, a, lda, s, u, ldu, vt, ldvt,work, lwork, iwork, info ) !! SGESDD computes the singular value decomposition (SVD) of a real !! M-by-N matrix A, optionally computing the left and right singular !! vectors. If singular vectors are desired, it uses a !! divide-and-conquer algorithm. !! The SVD is written !! A = U * SIGMA * transpose(V) !! where SIGMA is an M-by-N matrix which is zero except for its !! min(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and !! V is an N-by-N orthogonal matrix. The diagonal elements of SIGMA !! are the singular values of A; they are real and non-negative, and !! are returned in descending order. The first min(m,n) columns of !! U and V are the left and right singular vectors of A. !! Note that the routine returns VT = V**T, not V. !! The divide and conquer algorithm makes very mild assumptions about !! floating point arithmetic. It will work on machines with a guard !! digit in add/subtract, or on those binary machines without guard !! digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or !! Cray-2. It could conceivably fail on hexadecimal or decimal machines !! without guard digits, but we know of none. ! -- lapack driver routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments character, intent(in) :: jobz integer(${ik}$), intent(out) :: info integer(${ik}$), intent(in) :: lda, ldu, ldvt, lwork, m, n ! Array Arguments integer(${ik}$), intent(out) :: iwork(*) real(sp), intent(inout) :: a(lda,*) real(sp), intent(out) :: s(*), u(ldu,*), vt(ldvt,*), work(*) ! ===================================================================== ! Local Scalars logical(lk) :: lquery, wntqa, wntqas, wntqn, wntqo, wntqs integer(${ik}$) :: bdspac, blk, chunk, i, ie, ierr, il, ir, iscl, itau, itaup, itauq, iu, & ivt, ldwkvt, ldwrkl, ldwrkr, ldwrku, maxwrk, minmn, minwrk, mnthr, nwork, wrkbl integer(${ik}$) :: lwork_sgebrd_mn, lwork_sgebrd_mm, lwork_sgebrd_nn, lwork_sgelqf_mn, & lwork_sgeqrf_mn, lwork_sorgbr_p_mm, lwork_sorgbr_q_nn, lwork_sorglq_mn, & lwork_sorglq_nn, lwork_sorgqr_mm, lwork_sorgqr_mn, lwork_sormbr_prt_mm, & lwork_sormbr_qln_mm, lwork_sormbr_prt_mn, lwork_sormbr_qln_mn, lwork_sormbr_prt_nn, & lwork_sormbr_qln_nn real(sp) :: anrm, bignum, eps, smlnum ! Local Arrays integer(${ik}$) :: idum(1_${ik}$) real(sp) :: dum(1_${ik}$) ! Intrinsic Functions ! Executable Statements ! test the input arguments info = 0_${ik}$ minmn = min( m, n ) wntqa = stdlib_lsame( jobz, 'A' ) wntqs = stdlib_lsame( jobz, 'S' ) wntqas = wntqa .or. wntqs wntqo = stdlib_lsame( jobz, 'O' ) wntqn = stdlib_lsame( jobz, 'N' ) lquery = ( lwork==-1_${ik}$ ) if( .not.( wntqa .or. wntqs .or. wntqo .or. wntqn ) ) then info = -1_${ik}$ else if( m<0_${ik}$ ) then info = -2_${ik}$ else if( n<0_${ik}$ ) then info = -3_${ik}$ else if( lda<max( 1_${ik}$, m ) ) then info = -5_${ik}$ else if( ldu<1_${ik}$ .or. ( wntqas .and. ldu<m ) .or.( wntqo .and. m<n .and. ldu<m ) ) & then info = -8_${ik}$ else if( ldvt<1_${ik}$ .or. ( wntqa .and. ldvt<n ) .or.( wntqs .and. ldvt<minmn ) .or.( wntqo & .and. m>=n .and. ldvt<n ) ) then info = -10_${ik}$ end if ! compute workspace ! note: comments in the code beginning "workspace:" describe the ! minimal amount of workspace allocated at that point in the code, ! as well as the preferred amount for good performance. ! nb refers to the optimal block size for the immediately ! following subroutine, as returned by stdlib${ii}$_ilaenv. if( info==0_${ik}$ ) then minwrk = 1_${ik}$ maxwrk = 1_${ik}$ bdspac = 0_${ik}$ mnthr = int( minmn*11.0_sp / 6.0_sp,KIND=${ik}$) if( m>=n .and. minmn>0_${ik}$ ) then ! compute space needed for stdlib${ii}$_sbdsdc if( wntqn ) then ! stdlib${ii}$_sbdsdc needs only 4*n (or 6*n for uplo=l for lapack <= 3.6_sp) ! keep 7*n for backwards compatibility. bdspac = 7_${ik}$*n else bdspac = 3_${ik}$*n*n + 4_${ik}$*n end if ! compute space preferred for each routine call stdlib${ii}$_sgebrd( m, n, dum(1_${ik}$), m, dum(1_${ik}$), dum(1_${ik}$), dum(1_${ik}$),dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, & ierr ) lwork_sgebrd_mn = int( dum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_sgebrd( n, n, dum(1_${ik}$), n, dum(1_${ik}$), dum(1_${ik}$), dum(1_${ik}$),dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, & ierr ) lwork_sgebrd_nn = int( dum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_sgeqrf( m, n, dum(1_${ik}$), m, dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, ierr ) lwork_sgeqrf_mn = int( dum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_sorgbr( 'Q', n, n, n, dum(1_${ik}$), n, dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$,ierr ) lwork_sorgbr_q_nn = int( dum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_sorgqr( m, m, n, dum(1_${ik}$), m, dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, ierr ) lwork_sorgqr_mm = int( dum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_sorgqr( m, n, n, dum(1_${ik}$), m, dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, ierr ) lwork_sorgqr_mn = int( dum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_sormbr( 'P', 'R', 'T', n, n, n, dum(1_${ik}$), n,dum(1_${ik}$), dum(1_${ik}$), n, dum(1_${ik}$), & -1_${ik}$, ierr ) lwork_sormbr_prt_nn = int( dum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_sormbr( 'Q', 'L', 'N', n, n, n, dum(1_${ik}$), n,dum(1_${ik}$), dum(1_${ik}$), n, dum(1_${ik}$), & -1_${ik}$, ierr ) lwork_sormbr_qln_nn = int( dum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_sormbr( 'Q', 'L', 'N', m, n, n, dum(1_${ik}$), m,dum(1_${ik}$), dum(1_${ik}$), m, dum(1_${ik}$), & -1_${ik}$, ierr ) lwork_sormbr_qln_mn = int( dum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_sormbr( 'Q', 'L', 'N', m, m, n, dum(1_${ik}$), m,dum(1_${ik}$), dum(1_${ik}$), m, dum(1_${ik}$), & -1_${ik}$, ierr ) lwork_sormbr_qln_mm = int( dum(1_${ik}$),KIND=${ik}$) if( m>=mnthr ) then if( wntqn ) then ! path 1 (m >> n, jobz='n') wrkbl = n + lwork_sgeqrf_mn wrkbl = max( wrkbl, 3_${ik}$*n + lwork_sgebrd_nn ) maxwrk = max( wrkbl, bdspac + n ) minwrk = bdspac + n else if( wntqo ) then ! path 2 (m >> n, jobz='o') wrkbl = n + lwork_sgeqrf_mn wrkbl = max( wrkbl, n + lwork_sorgqr_mn ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_sgebrd_nn ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_sormbr_qln_nn ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_sormbr_prt_nn ) wrkbl = max( wrkbl, 3_${ik}$*n + bdspac ) maxwrk = wrkbl + 2_${ik}$*n*n minwrk = bdspac + 2_${ik}$*n*n + 3_${ik}$*n else if( wntqs ) then ! path 3 (m >> n, jobz='s') wrkbl = n + lwork_sgeqrf_mn wrkbl = max( wrkbl, n + lwork_sorgqr_mn ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_sgebrd_nn ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_sormbr_qln_nn ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_sormbr_prt_nn ) wrkbl = max( wrkbl, 3_${ik}$*n + bdspac ) maxwrk = wrkbl + n*n minwrk = bdspac + n*n + 3_${ik}$*n else if( wntqa ) then ! path 4 (m >> n, jobz='a') wrkbl = n + lwork_sgeqrf_mn wrkbl = max( wrkbl, n + lwork_sorgqr_mm ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_sgebrd_nn ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_sormbr_qln_nn ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_sormbr_prt_nn ) wrkbl = max( wrkbl, 3_${ik}$*n + bdspac ) maxwrk = wrkbl + n*n minwrk = n*n + max( 3_${ik}$*n + bdspac, n + m ) end if else ! path 5 (m >= n, but not much larger) wrkbl = 3_${ik}$*n + lwork_sgebrd_mn if( wntqn ) then ! path 5n (m >= n, jobz='n') maxwrk = max( wrkbl, 3_${ik}$*n + bdspac ) minwrk = 3_${ik}$*n + max( m, bdspac ) else if( wntqo ) then ! path 5o (m >= n, jobz='o') wrkbl = max( wrkbl, 3_${ik}$*n + lwork_sormbr_prt_nn ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_sormbr_qln_mn ) wrkbl = max( wrkbl, 3_${ik}$*n + bdspac ) maxwrk = wrkbl + m*n minwrk = 3_${ik}$*n + max( m, n*n + bdspac ) else if( wntqs ) then ! path 5s (m >= n, jobz='s') wrkbl = max( wrkbl, 3_${ik}$*n + lwork_sormbr_qln_mn ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_sormbr_prt_nn ) maxwrk = max( wrkbl, 3_${ik}$*n + bdspac ) minwrk = 3_${ik}$*n + max( m, bdspac ) else if( wntqa ) then ! path 5a (m >= n, jobz='a') wrkbl = max( wrkbl, 3_${ik}$*n + lwork_sormbr_qln_mm ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_sormbr_prt_nn ) maxwrk = max( wrkbl, 3_${ik}$*n + bdspac ) minwrk = 3_${ik}$*n + max( m, bdspac ) end if end if else if( minmn>0_${ik}$ ) then ! compute space needed for stdlib${ii}$_sbdsdc if( wntqn ) then ! stdlib${ii}$_sbdsdc needs only 4*n (or 6*n for uplo=l for lapack <= 3.6_sp) ! keep 7*n for backwards compatibility. bdspac = 7_${ik}$*m else bdspac = 3_${ik}$*m*m + 4_${ik}$*m end if ! compute space preferred for each routine call stdlib${ii}$_sgebrd( m, n, dum(1_${ik}$), m, dum(1_${ik}$), dum(1_${ik}$), dum(1_${ik}$),dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, & ierr ) lwork_sgebrd_mn = int( dum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_sgebrd( m, m, a, m, s, dum(1_${ik}$), dum(1_${ik}$),dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, ierr ) lwork_sgebrd_mm = int( dum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_sgelqf( m, n, a, m, dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, ierr ) lwork_sgelqf_mn = int( dum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_sorglq( n, n, m, dum(1_${ik}$), n, dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, ierr ) lwork_sorglq_nn = int( dum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_sorglq( m, n, m, a, m, dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, ierr ) lwork_sorglq_mn = int( dum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_sorgbr( 'P', m, m, m, a, n, dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, ierr ) lwork_sorgbr_p_mm = int( dum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_sormbr( 'P', 'R', 'T', m, m, m, dum(1_${ik}$), m,dum(1_${ik}$), dum(1_${ik}$), m, dum(1_${ik}$), & -1_${ik}$, ierr ) lwork_sormbr_prt_mm = int( dum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_sormbr( 'P', 'R', 'T', m, n, m, dum(1_${ik}$), m,dum(1_${ik}$), dum(1_${ik}$), m, dum(1_${ik}$), & -1_${ik}$, ierr ) lwork_sormbr_prt_mn = int( dum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_sormbr( 'P', 'R', 'T', n, n, m, dum(1_${ik}$), n,dum(1_${ik}$), dum(1_${ik}$), n, dum(1_${ik}$), & -1_${ik}$, ierr ) lwork_sormbr_prt_nn = int( dum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_sormbr( 'Q', 'L', 'N', m, m, m, dum(1_${ik}$), m,dum(1_${ik}$), dum(1_${ik}$), m, dum(1_${ik}$), & -1_${ik}$, ierr ) lwork_sormbr_qln_mm = int( dum(1_${ik}$),KIND=${ik}$) if( n>=mnthr ) then if( wntqn ) then ! path 1t (n >> m, jobz='n') wrkbl = m + lwork_sgelqf_mn wrkbl = max( wrkbl, 3_${ik}$*m + lwork_sgebrd_mm ) maxwrk = max( wrkbl, bdspac + m ) minwrk = bdspac + m else if( wntqo ) then ! path 2t (n >> m, jobz='o') wrkbl = m + lwork_sgelqf_mn wrkbl = max( wrkbl, m + lwork_sorglq_mn ) wrkbl = max( wrkbl, 3_${ik}$*m + lwork_sgebrd_mm ) wrkbl = max( wrkbl, 3_${ik}$*m + lwork_sormbr_qln_mm ) wrkbl = max( wrkbl, 3_${ik}$*m + lwork_sormbr_prt_mm ) wrkbl = max( wrkbl, 3_${ik}$*m + bdspac ) maxwrk = wrkbl + 2_${ik}$*m*m minwrk = bdspac + 2_${ik}$*m*m + 3_${ik}$*m else if( wntqs ) then ! path 3t (n >> m, jobz='s') wrkbl = m + lwork_sgelqf_mn wrkbl = max( wrkbl, m + lwork_sorglq_mn ) wrkbl = max( wrkbl, 3_${ik}$*m + lwork_sgebrd_mm ) wrkbl = max( wrkbl, 3_${ik}$*m + lwork_sormbr_qln_mm ) wrkbl = max( wrkbl, 3_${ik}$*m + lwork_sormbr_prt_mm ) wrkbl = max( wrkbl, 3_${ik}$*m + bdspac ) maxwrk = wrkbl + m*m minwrk = bdspac + m*m + 3_${ik}$*m else if( wntqa ) then ! path 4t (n >> m, jobz='a') wrkbl = m + lwork_sgelqf_mn wrkbl = max( wrkbl, m + lwork_sorglq_nn ) wrkbl = max( wrkbl, 3_${ik}$*m + lwork_sgebrd_mm ) wrkbl = max( wrkbl, 3_${ik}$*m + lwork_sormbr_qln_mm ) wrkbl = max( wrkbl, 3_${ik}$*m + lwork_sormbr_prt_mm ) wrkbl = max( wrkbl, 3_${ik}$*m + bdspac ) maxwrk = wrkbl + m*m minwrk = m*m + max( 3_${ik}$*m + bdspac, m + n ) end if else ! path 5t (n > m, but not much larger) wrkbl = 3_${ik}$*m + lwork_sgebrd_mn if( wntqn ) then ! path 5tn (n > m, jobz='n') maxwrk = max( wrkbl, 3_${ik}$*m + bdspac ) minwrk = 3_${ik}$*m + max( n, bdspac ) else if( wntqo ) then ! path 5to (n > m, jobz='o') wrkbl = max( wrkbl, 3_${ik}$*m + lwork_sormbr_qln_mm ) wrkbl = max( wrkbl, 3_${ik}$*m + lwork_sormbr_prt_mn ) wrkbl = max( wrkbl, 3_${ik}$*m + bdspac ) maxwrk = wrkbl + m*n minwrk = 3_${ik}$*m + max( n, m*m + bdspac ) else if( wntqs ) then ! path 5ts (n > m, jobz='s') wrkbl = max( wrkbl, 3_${ik}$*m + lwork_sormbr_qln_mm ) wrkbl = max( wrkbl, 3_${ik}$*m + lwork_sormbr_prt_mn ) maxwrk = max( wrkbl, 3_${ik}$*m + bdspac ) minwrk = 3_${ik}$*m + max( n, bdspac ) else if( wntqa ) then ! path 5ta (n > m, jobz='a') wrkbl = max( wrkbl, 3_${ik}$*m + lwork_sormbr_qln_mm ) wrkbl = max( wrkbl, 3_${ik}$*m + lwork_sormbr_prt_nn ) maxwrk = max( wrkbl, 3_${ik}$*m + bdspac ) minwrk = 3_${ik}$*m + max( n, bdspac ) end if end if end if maxwrk = max( maxwrk, minwrk ) work( 1_${ik}$ ) = stdlib${ii}$_sroundup_lwork( maxwrk ) if( lwork<minwrk .and. .not.lquery ) then info = -12_${ik}$ end if end if if( info/=0_${ik}$ ) then call stdlib${ii}$_xerbla( 'SGESDD', -info ) return else if( lquery ) then return end if ! quick return if possible if( m==0_${ik}$ .or. n==0_${ik}$ ) then return end if ! get machine constants eps = stdlib${ii}$_slamch( 'P' ) smlnum = sqrt( stdlib${ii}$_slamch( 'S' ) ) / eps bignum = one / smlnum ! scale a if max element outside range [smlnum,bignum] anrm = stdlib${ii}$_slange( 'M', m, n, a, lda, dum ) if( stdlib${ii}$_sisnan( anrm ) ) then info = -4_${ik}$ return end if iscl = 0_${ik}$ if( anrm>zero .and. anrm<smlnum ) then iscl = 1_${ik}$ call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, m, n, a, lda, ierr ) else if( anrm>bignum ) then iscl = 1_${ik}$ call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, m, n, a, lda, ierr ) end if if( m>=n ) then ! a has at least as many rows as columns. if a has sufficiently ! more rows than columns, first reduce using the qr ! decomposition (if sufficient workspace available) if( m>=mnthr ) then if( wntqn ) then ! path 1 (m >> n, jobz='n') ! no singular vectors to be computed itau = 1_${ik}$ nwork = itau + n ! compute a=q*r ! workspace: need n [tau] + n [work] ! workspace: prefer n [tau] + n*nb [work] call stdlib${ii}$_sgeqrf( m, n, a, lda, work( itau ), work( nwork ),lwork - nwork + & 1_${ik}$, ierr ) ! zero out below r if (n>1_${ik}$) call stdlib${ii}$_slaset( 'L', n-1, n-1, zero, zero, a( 2_${ik}$, 1_${ik}$ ), lda ) ie = 1_${ik}$ itauq = ie + n itaup = itauq + n nwork = itaup + n ! bidiagonalize r in a ! workspace: need 3*n [e, tauq, taup] + n [work] ! workspace: prefer 3*n [e, tauq, taup] + 2*n*nb [work] call stdlib${ii}$_sgebrd( n, n, a, lda, s, work( ie ), work( itauq ),work( itaup ), & work( nwork ), lwork-nwork+1,ierr ) nwork = ie + n ! perform bidiagonal svd, computing singular values only ! workspace: need n [e] + bdspac call stdlib${ii}$_sbdsdc( 'U', 'N', n, s, work( ie ), dum, 1_${ik}$, dum, 1_${ik}$,dum, idum, & work( nwork ), iwork, info ) else if( wntqo ) then ! path 2 (m >> n, jobz = 'o') ! n left singular vectors to be overwritten on a and ! n right singular vectors to be computed in vt ir = 1_${ik}$ ! work(ir) is ldwrkr by n if( lwork >= lda*n + n*n + 3_${ik}$*n + bdspac ) then ldwrkr = lda else ldwrkr = ( lwork - n*n - 3_${ik}$*n - bdspac ) / n end if itau = ir + ldwrkr*n nwork = itau + n ! compute a=q*r ! workspace: need n*n [r] + n [tau] + n [work] ! workspace: prefer n*n [r] + n [tau] + n*nb [work] call stdlib${ii}$_sgeqrf( m, n, a, lda, work( itau ), work( nwork ),lwork - nwork + & 1_${ik}$, ierr ) ! copy r to work(ir), zeroing out below it call stdlib${ii}$_slacpy( 'U', n, n, a, lda, work( ir ), ldwrkr ) call stdlib${ii}$_slaset( 'L', n - 1_${ik}$, n - 1_${ik}$, zero, zero, work(ir+1),ldwrkr ) ! generate q in a ! workspace: need n*n [r] + n [tau] + n [work] ! workspace: prefer n*n [r] + n [tau] + n*nb [work] call stdlib${ii}$_sorgqr( m, n, n, a, lda, work( itau ),work( nwork ), lwork - & nwork + 1_${ik}$, ierr ) ie = itau itauq = ie + n itaup = itauq + n nwork = itaup + n ! bidiagonalize r in work(ir) ! workspace: need n*n [r] + 3*n [e, tauq, taup] + n [work] ! workspace: prefer n*n [r] + 3*n [e, tauq, taup] + 2*n*nb [work] call stdlib${ii}$_sgebrd( n, n, work( ir ), ldwrkr, s, work( ie ),work( itauq ), & work( itaup ), work( nwork ),lwork - nwork + 1_${ik}$, ierr ) ! work(iu) is n by n iu = nwork nwork = iu + n*n ! perform bidiagonal svd, computing left singular vectors ! of bidiagonal matrix in work(iu) and computing right ! singular vectors of bidiagonal matrix in vt ! workspace: need n*n [r] + 3*n [e, tauq, taup] + n*n [u] + bdspac call stdlib${ii}$_sbdsdc( 'U', 'I', n, s, work( ie ), work( iu ), n,vt, ldvt, dum, & idum, work( nwork ), iwork,info ) ! overwrite work(iu) by left singular vectors of r ! and vt by right singular vectors of r ! workspace: need n*n [r] + 3*n [e, tauq, taup] + n*n [u] + n [work] ! workspace: prefer n*n [r] + 3*n [e, tauq, taup] + n*n [u] + n*nb [work] call stdlib${ii}$_sormbr( 'Q', 'L', 'N', n, n, n, work( ir ), ldwrkr,work( itauq ), & work( iu ), n, work( nwork ),lwork - nwork + 1_${ik}$, ierr ) call stdlib${ii}$_sormbr( 'P', 'R', 'T', n, n, n, work( ir ), ldwrkr,work( itaup ), & vt, ldvt, work( nwork ),lwork - nwork + 1_${ik}$, ierr ) ! multiply q in a by left singular vectors of r in ! work(iu), storing result in work(ir) and copying to a ! workspace: need n*n [r] + 3*n [e, tauq, taup] + n*n [u] ! workspace: prefer m*n [r] + 3*n [e, tauq, taup] + n*n [u] do i = 1, m, ldwrkr chunk = min( m - i + 1_${ik}$, ldwrkr ) call stdlib${ii}$_sgemm( 'N', 'N', chunk, n, n, one, a( i, 1_${ik}$ ),lda, work( iu ), & n, zero, work( ir ),ldwrkr ) call stdlib${ii}$_slacpy( 'F', chunk, n, work( ir ), ldwrkr,a( i, 1_${ik}$ ), lda ) end do else if( wntqs ) then ! path 3 (m >> n, jobz='s') ! n left singular vectors to be computed in u and ! n right singular vectors to be computed in vt ir = 1_${ik}$ ! work(ir) is n by n ldwrkr = n itau = ir + ldwrkr*n nwork = itau + n ! compute a=q*r ! workspace: need n*n [r] + n [tau] + n [work] ! workspace: prefer n*n [r] + n [tau] + n*nb [work] call stdlib${ii}$_sgeqrf( m, n, a, lda, work( itau ), work( nwork ),lwork - nwork + & 1_${ik}$, ierr ) ! copy r to work(ir), zeroing out below it call stdlib${ii}$_slacpy( 'U', n, n, a, lda, work( ir ), ldwrkr ) call stdlib${ii}$_slaset( 'L', n - 1_${ik}$, n - 1_${ik}$, zero, zero, work(ir+1),ldwrkr ) ! generate q in a ! workspace: need n*n [r] + n [tau] + n [work] ! workspace: prefer n*n [r] + n [tau] + n*nb [work] call stdlib${ii}$_sorgqr( m, n, n, a, lda, work( itau ),work( nwork ), lwork - & nwork + 1_${ik}$, ierr ) ie = itau itauq = ie + n itaup = itauq + n nwork = itaup + n ! bidiagonalize r in work(ir) ! workspace: need n*n [r] + 3*n [e, tauq, taup] + n [work] ! workspace: prefer n*n [r] + 3*n [e, tauq, taup] + 2*n*nb [work] call stdlib${ii}$_sgebrd( n, n, work( ir ), ldwrkr, s, work( ie ),work( itauq ), & work( itaup ), work( nwork ),lwork - nwork + 1_${ik}$, ierr ) ! perform bidiagonal svd, computing left singular vectors ! of bidiagoal matrix in u and computing right singular ! vectors of bidiagonal matrix in vt ! workspace: need n*n [r] + 3*n [e, tauq, taup] + bdspac call stdlib${ii}$_sbdsdc( 'U', 'I', n, s, work( ie ), u, ldu, vt,ldvt, dum, idum, & work( nwork ), iwork,info ) ! overwrite u by left singular vectors of r and vt ! by right singular vectors of r ! workspace: need n*n [r] + 3*n [e, tauq, taup] + n [work] ! workspace: prefer n*n [r] + 3*n [e, tauq, taup] + n*nb [work] call stdlib${ii}$_sormbr( 'Q', 'L', 'N', n, n, n, work( ir ), ldwrkr,work( itauq ), & u, ldu, work( nwork ),lwork - nwork + 1_${ik}$, ierr ) call stdlib${ii}$_sormbr( 'P', 'R', 'T', n, n, n, work( ir ), ldwrkr,work( itaup ), & vt, ldvt, work( nwork ),lwork - nwork + 1_${ik}$, ierr ) ! multiply q in a by left singular vectors of r in ! work(ir), storing result in u ! workspace: need n*n [r] call stdlib${ii}$_slacpy( 'F', n, n, u, ldu, work( ir ), ldwrkr ) call stdlib${ii}$_sgemm( 'N', 'N', m, n, n, one, a, lda, work( ir ),ldwrkr, zero, u,& ldu ) else if( wntqa ) then ! path 4 (m >> n, jobz='a') ! m left singular vectors to be computed in u and ! n right singular vectors to be computed in vt iu = 1_${ik}$ ! work(iu) is n by n ldwrku = n itau = iu + ldwrku*n nwork = itau + n ! compute a=q*r, copying result to u ! workspace: need n*n [u] + n [tau] + n [work] ! workspace: prefer n*n [u] + n [tau] + n*nb [work] call stdlib${ii}$_sgeqrf( m, n, a, lda, work( itau ), work( nwork ),lwork - nwork + & 1_${ik}$, ierr ) call stdlib${ii}$_slacpy( 'L', m, n, a, lda, u, ldu ) ! generate q in u ! workspace: need n*n [u] + n [tau] + m [work] ! workspace: prefer n*n [u] + n [tau] + m*nb [work] call stdlib${ii}$_sorgqr( m, m, n, u, ldu, work( itau ),work( nwork ), lwork - & nwork + 1_${ik}$, ierr ) ! produce r in a, zeroing out other entries if (n>1_${ik}$) call stdlib${ii}$_slaset( 'L', n-1, n-1, zero, zero, a( 2_${ik}$, 1_${ik}$ ), lda ) ie = itau itauq = ie + n itaup = itauq + n nwork = itaup + n ! bidiagonalize r in a ! workspace: need n*n [u] + 3*n [e, tauq, taup] + n [work] ! workspace: prefer n*n [u] + 3*n [e, tauq, taup] + 2*n*nb [work] call stdlib${ii}$_sgebrd( n, n, a, lda, s, work( ie ), work( itauq ),work( itaup ), & work( nwork ), lwork-nwork+1,ierr ) ! perform bidiagonal svd, computing left singular vectors ! of bidiagonal matrix in work(iu) and computing right ! singular vectors of bidiagonal matrix in vt ! workspace: need n*n [u] + 3*n [e, tauq, taup] + bdspac call stdlib${ii}$_sbdsdc( 'U', 'I', n, s, work( ie ), work( iu ), n,vt, ldvt, dum, & idum, work( nwork ), iwork,info ) ! overwrite work(iu) by left singular vectors of r and vt ! by right singular vectors of r ! workspace: need n*n [u] + 3*n [e, tauq, taup] + n [work] ! workspace: prefer n*n [u] + 3*n [e, tauq, taup] + n*nb [work] call stdlib${ii}$_sormbr( 'Q', 'L', 'N', n, n, n, a, lda,work( itauq ), work( iu ), & ldwrku,work( nwork ), lwork - nwork + 1_${ik}$, ierr ) call stdlib${ii}$_sormbr( 'P', 'R', 'T', n, n, n, a, lda,work( itaup ), vt, ldvt, & work( nwork ),lwork - nwork + 1_${ik}$, ierr ) ! multiply q in u by left singular vectors of r in ! work(iu), storing result in a ! workspace: need n*n [u] call stdlib${ii}$_sgemm( 'N', 'N', m, n, n, one, u, ldu, work( iu ),ldwrku, zero, a,& lda ) ! copy left singular vectors of a from a to u call stdlib${ii}$_slacpy( 'F', m, n, a, lda, u, ldu ) end if else ! m < mnthr ! path 5 (m >= n, but not much larger) ! reduce to bidiagonal form without qr decomposition ie = 1_${ik}$ itauq = ie + n itaup = itauq + n nwork = itaup + n ! bidiagonalize a ! workspace: need 3*n [e, tauq, taup] + m [work] ! workspace: prefer 3*n [e, tauq, taup] + (m+n)*nb [work] call stdlib${ii}$_sgebrd( m, n, a, lda, s, work( ie ), work( itauq ),work( itaup ), & work( nwork ), lwork-nwork+1,ierr ) if( wntqn ) then ! path 5n (m >= n, jobz='n') ! perform bidiagonal svd, only computing singular values ! workspace: need 3*n [e, tauq, taup] + bdspac call stdlib${ii}$_sbdsdc( 'U', 'N', n, s, work( ie ), dum, 1_${ik}$, dum, 1_${ik}$,dum, idum, & work( nwork ), iwork, info ) else if( wntqo ) then ! path 5o (m >= n, jobz='o') iu = nwork if( lwork >= m*n + 3_${ik}$*n + bdspac ) then ! work( iu ) is m by n ldwrku = m nwork = iu + ldwrku*n call stdlib${ii}$_slaset( 'F', m, n, zero, zero, work( iu ),ldwrku ) ! ir is unused; silence compile warnings ir = -1_${ik}$ else ! work( iu ) is n by n ldwrku = n nwork = iu + ldwrku*n ! work(ir) is ldwrkr by n ir = nwork ldwrkr = ( lwork - n*n - 3_${ik}$*n ) / n end if nwork = iu + ldwrku*n ! perform bidiagonal svd, computing left singular vectors ! of bidiagonal matrix in work(iu) and computing right ! singular vectors of bidiagonal matrix in vt ! workspace: need 3*n [e, tauq, taup] + n*n [u] + bdspac call stdlib${ii}$_sbdsdc( 'U', 'I', n, s, work( ie ), work( iu ),ldwrku, vt, ldvt, & dum, idum, work( nwork ),iwork, info ) ! overwrite vt by right singular vectors of a ! workspace: need 3*n [e, tauq, taup] + n*n [u] + n [work] ! workspace: prefer 3*n [e, tauq, taup] + n*n [u] + n*nb [work] call stdlib${ii}$_sormbr( 'P', 'R', 'T', n, n, n, a, lda,work( itaup ), vt, ldvt, & work( nwork ),lwork - nwork + 1_${ik}$, ierr ) if( lwork >= m*n + 3_${ik}$*n + bdspac ) then ! path 5o-fast ! overwrite work(iu) by left singular vectors of a ! workspace: need 3*n [e, tauq, taup] + m*n [u] + n [work] ! workspace: prefer 3*n [e, tauq, taup] + m*n [u] + n*nb [work] call stdlib${ii}$_sormbr( 'Q', 'L', 'N', m, n, n, a, lda,work( itauq ), work( iu & ), ldwrku,work( nwork ), lwork - nwork + 1_${ik}$, ierr ) ! copy left singular vectors of a from work(iu) to a call stdlib${ii}$_slacpy( 'F', m, n, work( iu ), ldwrku, a, lda ) else ! path 5o-slow ! generate q in a ! workspace: need 3*n [e, tauq, taup] + n*n [u] + n [work] ! workspace: prefer 3*n [e, tauq, taup] + n*n [u] + n*nb [work] call stdlib${ii}$_sorgbr( 'Q', m, n, n, a, lda, work( itauq ),work( nwork ), & lwork - nwork + 1_${ik}$, ierr ) ! multiply q in a by left singular vectors of ! bidiagonal matrix in work(iu), storing result in ! work(ir) and copying to a ! workspace: need 3*n [e, tauq, taup] + n*n [u] + nb*n [r] ! workspace: prefer 3*n [e, tauq, taup] + n*n [u] + m*n [r] do i = 1, m, ldwrkr chunk = min( m - i + 1_${ik}$, ldwrkr ) call stdlib${ii}$_sgemm( 'N', 'N', chunk, n, n, one, a( i, 1_${ik}$ ),lda, work( iu )& , ldwrku, zero,work( ir ), ldwrkr ) call stdlib${ii}$_slacpy( 'F', chunk, n, work( ir ), ldwrkr,a( i, 1_${ik}$ ), lda ) end do end if else if( wntqs ) then ! path 5s (m >= n, jobz='s') ! perform bidiagonal svd, computing left singular vectors ! of bidiagonal matrix in u and computing right singular ! vectors of bidiagonal matrix in vt ! workspace: need 3*n [e, tauq, taup] + bdspac call stdlib${ii}$_slaset( 'F', m, n, zero, zero, u, ldu ) call stdlib${ii}$_sbdsdc( 'U', 'I', n, s, work( ie ), u, ldu, vt,ldvt, dum, idum, & work( nwork ), iwork,info ) ! overwrite u by left singular vectors of a and vt ! by right singular vectors of a ! workspace: need 3*n [e, tauq, taup] + n [work] ! workspace: prefer 3*n [e, tauq, taup] + n*nb [work] call stdlib${ii}$_sormbr( 'Q', 'L', 'N', m, n, n, a, lda,work( itauq ), u, ldu, & work( nwork ),lwork - nwork + 1_${ik}$, ierr ) call stdlib${ii}$_sormbr( 'P', 'R', 'T', n, n, n, a, lda,work( itaup ), vt, ldvt, & work( nwork ),lwork - nwork + 1_${ik}$, ierr ) else if( wntqa ) then ! path 5a (m >= n, jobz='a') ! perform bidiagonal svd, computing left singular vectors ! of bidiagonal matrix in u and computing right singular ! vectors of bidiagonal matrix in vt ! workspace: need 3*n [e, tauq, taup] + bdspac call stdlib${ii}$_slaset( 'F', m, m, zero, zero, u, ldu ) call stdlib${ii}$_sbdsdc( 'U', 'I', n, s, work( ie ), u, ldu, vt,ldvt, dum, idum, & work( nwork ), iwork,info ) ! set the right corner of u to identity matrix if( m>n ) then call stdlib${ii}$_slaset( 'F', m - n, m - n, zero, one, u(n+1,n+1),ldu ) end if ! overwrite u by left singular vectors of a and vt ! by right singular vectors of a ! workspace: need 3*n [e, tauq, taup] + m [work] ! workspace: prefer 3*n [e, tauq, taup] + m*nb [work] call stdlib${ii}$_sormbr( 'Q', 'L', 'N', m, m, n, a, lda,work( itauq ), u, ldu, & work( nwork ),lwork - nwork + 1_${ik}$, ierr ) call stdlib${ii}$_sormbr( 'P', 'R', 'T', n, n, m, a, lda,work( itaup ), vt, ldvt, & work( nwork ),lwork - nwork + 1_${ik}$, ierr ) end if end if else ! a has more columns than rows. if a has sufficiently more ! columns than rows, first reduce using the lq decomposition (if ! sufficient workspace available) if( n>=mnthr ) then if( wntqn ) then ! path 1t (n >> m, jobz='n') ! no singular vectors to be computed itau = 1_${ik}$ nwork = itau + m ! compute a=l*q ! workspace: need m [tau] + m [work] ! workspace: prefer m [tau] + m*nb [work] call stdlib${ii}$_sgelqf( m, n, a, lda, work( itau ), work( nwork ),lwork - nwork + & 1_${ik}$, ierr ) ! zero out above l if (m>1_${ik}$) call stdlib${ii}$_slaset( 'U', m-1, m-1, zero, zero, a( 1_${ik}$, 2_${ik}$ ), lda ) ie = 1_${ik}$ itauq = ie + m itaup = itauq + m nwork = itaup + m ! bidiagonalize l in a ! workspace: need 3*m [e, tauq, taup] + m [work] ! workspace: prefer 3*m [e, tauq, taup] + 2*m*nb [work] call stdlib${ii}$_sgebrd( m, m, a, lda, s, work( ie ), work( itauq ),work( itaup ), & work( nwork ), lwork-nwork+1,ierr ) nwork = ie + m ! perform bidiagonal svd, computing singular values only ! workspace: need m [e] + bdspac call stdlib${ii}$_sbdsdc( 'U', 'N', m, s, work( ie ), dum, 1_${ik}$, dum, 1_${ik}$,dum, idum, & work( nwork ), iwork, info ) else if( wntqo ) then ! path 2t (n >> m, jobz='o') ! m right singular vectors to be overwritten on a and ! m left singular vectors to be computed in u ivt = 1_${ik}$ ! work(ivt) is m by m ! work(il) is m by m; it is later resized to m by chunk for gemm il = ivt + m*m if( lwork >= m*n + m*m + 3_${ik}$*m + bdspac ) then ldwrkl = m chunk = n else ldwrkl = m chunk = ( lwork - m*m ) / m end if itau = il + ldwrkl*m nwork = itau + m ! compute a=l*q ! workspace: need m*m [vt] + m*m [l] + m [tau] + m [work] ! workspace: prefer m*m [vt] + m*m [l] + m [tau] + m*nb [work] call stdlib${ii}$_sgelqf( m, n, a, lda, work( itau ), work( nwork ),lwork - nwork + & 1_${ik}$, ierr ) ! copy l to work(il), zeroing about above it call stdlib${ii}$_slacpy( 'L', m, m, a, lda, work( il ), ldwrkl ) call stdlib${ii}$_slaset( 'U', m - 1_${ik}$, m - 1_${ik}$, zero, zero,work( il + ldwrkl ), ldwrkl & ) ! generate q in a ! workspace: need m*m [vt] + m*m [l] + m [tau] + m [work] ! workspace: prefer m*m [vt] + m*m [l] + m [tau] + m*nb [work] call stdlib${ii}$_sorglq( m, n, m, a, lda, work( itau ),work( nwork ), lwork - & nwork + 1_${ik}$, ierr ) ie = itau itauq = ie + m itaup = itauq + m nwork = itaup + m ! bidiagonalize l in work(il) ! workspace: need m*m [vt] + m*m [l] + 3*m [e, tauq, taup] + m [work] ! workspace: prefer m*m [vt] + m*m [l] + 3*m [e, tauq, taup] + 2*m*nb [work] call stdlib${ii}$_sgebrd( m, m, work( il ), ldwrkl, s, work( ie ),work( itauq ), & work( itaup ), work( nwork ),lwork - nwork + 1_${ik}$, ierr ) ! perform bidiagonal svd, computing left singular vectors ! of bidiagonal matrix in u, and computing right singular ! vectors of bidiagonal matrix in work(ivt) ! workspace: need m*m [vt] + m*m [l] + 3*m [e, tauq, taup] + bdspac call stdlib${ii}$_sbdsdc( 'U', 'I', m, s, work( ie ), u, ldu,work( ivt ), m, dum, & idum, work( nwork ),iwork, info ) ! overwrite u by left singular vectors of l and work(ivt) ! by right singular vectors of l ! workspace: need m*m [vt] + m*m [l] + 3*m [e, tauq, taup] + m [work] ! workspace: prefer m*m [vt] + m*m [l] + 3*m [e, tauq, taup] + m*nb [work] call stdlib${ii}$_sormbr( 'Q', 'L', 'N', m, m, m, work( il ), ldwrkl,work( itauq ), & u, ldu, work( nwork ),lwork - nwork + 1_${ik}$, ierr ) call stdlib${ii}$_sormbr( 'P', 'R', 'T', m, m, m, work( il ), ldwrkl,work( itaup ), & work( ivt ), m,work( nwork ), lwork - nwork + 1_${ik}$, ierr ) ! multiply right singular vectors of l in work(ivt) by q ! in a, storing result in work(il) and copying to a ! workspace: need m*m [vt] + m*m [l] ! workspace: prefer m*m [vt] + m*n [l] ! at this point, l is resized as m by chunk. do i = 1, n, chunk blk = min( n - i + 1_${ik}$, chunk ) call stdlib${ii}$_sgemm( 'N', 'N', m, blk, m, one, work( ivt ), m,a( 1_${ik}$, i ), lda,& zero, work( il ), ldwrkl ) call stdlib${ii}$_slacpy( 'F', m, blk, work( il ), ldwrkl,a( 1_${ik}$, i ), lda ) end do else if( wntqs ) then ! path 3t (n >> m, jobz='s') ! m right singular vectors to be computed in vt and ! m left singular vectors to be computed in u il = 1_${ik}$ ! work(il) is m by m ldwrkl = m itau = il + ldwrkl*m nwork = itau + m ! compute a=l*q ! workspace: need m*m [l] + m [tau] + m [work] ! workspace: prefer m*m [l] + m [tau] + m*nb [work] call stdlib${ii}$_sgelqf( m, n, a, lda, work( itau ), work( nwork ),lwork - nwork + & 1_${ik}$, ierr ) ! copy l to work(il), zeroing out above it call stdlib${ii}$_slacpy( 'L', m, m, a, lda, work( il ), ldwrkl ) call stdlib${ii}$_slaset( 'U', m - 1_${ik}$, m - 1_${ik}$, zero, zero,work( il + ldwrkl ), ldwrkl & ) ! generate q in a ! workspace: need m*m [l] + m [tau] + m [work] ! workspace: prefer m*m [l] + m [tau] + m*nb [work] call stdlib${ii}$_sorglq( m, n, m, a, lda, work( itau ),work( nwork ), lwork - & nwork + 1_${ik}$, ierr ) ie = itau itauq = ie + m itaup = itauq + m nwork = itaup + m ! bidiagonalize l in work(iu). ! workspace: need m*m [l] + 3*m [e, tauq, taup] + m [work] ! workspace: prefer m*m [l] + 3*m [e, tauq, taup] + 2*m*nb [work] call stdlib${ii}$_sgebrd( m, m, work( il ), ldwrkl, s, work( ie ),work( itauq ), & work( itaup ), work( nwork ),lwork - nwork + 1_${ik}$, ierr ) ! perform bidiagonal svd, computing left singular vectors ! of bidiagonal matrix in u and computing right singular ! vectors of bidiagonal matrix in vt ! workspace: need m*m [l] + 3*m [e, tauq, taup] + bdspac call stdlib${ii}$_sbdsdc( 'U', 'I', m, s, work( ie ), u, ldu, vt,ldvt, dum, idum, & work( nwork ), iwork,info ) ! overwrite u by left singular vectors of l and vt ! by right singular vectors of l ! workspace: need m*m [l] + 3*m [e, tauq, taup] + m [work] ! workspace: prefer m*m [l] + 3*m [e, tauq, taup] + m*nb [work] call stdlib${ii}$_sormbr( 'Q', 'L', 'N', m, m, m, work( il ), ldwrkl,work( itauq ), & u, ldu, work( nwork ),lwork - nwork + 1_${ik}$, ierr ) call stdlib${ii}$_sormbr( 'P', 'R', 'T', m, m, m, work( il ), ldwrkl,work( itaup ), & vt, ldvt, work( nwork ),lwork - nwork + 1_${ik}$, ierr ) ! multiply right singular vectors of l in work(il) by ! q in a, storing result in vt ! workspace: need m*m [l] call stdlib${ii}$_slacpy( 'F', m, m, vt, ldvt, work( il ), ldwrkl ) call stdlib${ii}$_sgemm( 'N', 'N', m, n, m, one, work( il ), ldwrkl,a, lda, zero, & vt, ldvt ) else if( wntqa ) then ! path 4t (n >> m, jobz='a') ! n right singular vectors to be computed in vt and ! m left singular vectors to be computed in u ivt = 1_${ik}$ ! work(ivt) is m by m ldwkvt = m itau = ivt + ldwkvt*m nwork = itau + m ! compute a=l*q, copying result to vt ! workspace: need m*m [vt] + m [tau] + m [work] ! workspace: prefer m*m [vt] + m [tau] + m*nb [work] call stdlib${ii}$_sgelqf( m, n, a, lda, work( itau ), work( nwork ),lwork - nwork + & 1_${ik}$, ierr ) call stdlib${ii}$_slacpy( 'U', m, n, a, lda, vt, ldvt ) ! generate q in vt ! workspace: need m*m [vt] + m [tau] + n [work] ! workspace: prefer m*m [vt] + m [tau] + n*nb [work] call stdlib${ii}$_sorglq( n, n, m, vt, ldvt, work( itau ),work( nwork ), lwork - & nwork + 1_${ik}$, ierr ) ! produce l in a, zeroing out other entries if (m>1_${ik}$) call stdlib${ii}$_slaset( 'U', m-1, m-1, zero, zero, a( 1_${ik}$, 2_${ik}$ ), lda ) ie = itau itauq = ie + m itaup = itauq + m nwork = itaup + m ! bidiagonalize l in a ! workspace: need m*m [vt] + 3*m [e, tauq, taup] + m [work] ! workspace: prefer m*m [vt] + 3*m [e, tauq, taup] + 2*m*nb [work] call stdlib${ii}$_sgebrd( m, m, a, lda, s, work( ie ), work( itauq ),work( itaup ), & work( nwork ), lwork-nwork+1,ierr ) ! perform bidiagonal svd, computing left singular vectors ! of bidiagonal matrix in u and computing right singular ! vectors of bidiagonal matrix in work(ivt) ! workspace: need m*m [vt] + 3*m [e, tauq, taup] + bdspac call stdlib${ii}$_sbdsdc( 'U', 'I', m, s, work( ie ), u, ldu,work( ivt ), ldwkvt, & dum, idum,work( nwork ), iwork, info ) ! overwrite u by left singular vectors of l and work(ivt) ! by right singular vectors of l ! workspace: need m*m [vt] + 3*m [e, tauq, taup]+ m [work] ! workspace: prefer m*m [vt] + 3*m [e, tauq, taup]+ m*nb [work] call stdlib${ii}$_sormbr( 'Q', 'L', 'N', m, m, m, a, lda,work( itauq ), u, ldu, & work( nwork ),lwork - nwork + 1_${ik}$, ierr ) call stdlib${ii}$_sormbr( 'P', 'R', 'T', m, m, m, a, lda,work( itaup ), work( ivt ),& ldwkvt,work( nwork ), lwork - nwork + 1_${ik}$, ierr ) ! multiply right singular vectors of l in work(ivt) by ! q in vt, storing result in a ! workspace: need m*m [vt] call stdlib${ii}$_sgemm( 'N', 'N', m, n, m, one, work( ivt ), ldwkvt,vt, ldvt, zero,& a, lda ) ! copy right singular vectors of a from a to vt call stdlib${ii}$_slacpy( 'F', m, n, a, lda, vt, ldvt ) end if else ! n < mnthr ! path 5t (n > m, but not much larger) ! reduce to bidiagonal form without lq decomposition ie = 1_${ik}$ itauq = ie + m itaup = itauq + m nwork = itaup + m ! bidiagonalize a ! workspace: need 3*m [e, tauq, taup] + n [work] ! workspace: prefer 3*m [e, tauq, taup] + (m+n)*nb [work] call stdlib${ii}$_sgebrd( m, n, a, lda, s, work( ie ), work( itauq ),work( itaup ), & work( nwork ), lwork-nwork+1,ierr ) if( wntqn ) then ! path 5tn (n > m, jobz='n') ! perform bidiagonal svd, only computing singular values ! workspace: need 3*m [e, tauq, taup] + bdspac call stdlib${ii}$_sbdsdc( 'L', 'N', m, s, work( ie ), dum, 1_${ik}$, dum, 1_${ik}$,dum, idum, & work( nwork ), iwork, info ) else if( wntqo ) then ! path 5to (n > m, jobz='o') ldwkvt = m ivt = nwork if( lwork >= m*n + 3_${ik}$*m + bdspac ) then ! work( ivt ) is m by n call stdlib${ii}$_slaset( 'F', m, n, zero, zero, work( ivt ),ldwkvt ) nwork = ivt + ldwkvt*n ! il is unused; silence compile warnings il = -1_${ik}$ else ! work( ivt ) is m by m nwork = ivt + ldwkvt*m il = nwork ! work(il) is m by chunk chunk = ( lwork - m*m - 3_${ik}$*m ) / m end if ! perform bidiagonal svd, computing left singular vectors ! of bidiagonal matrix in u and computing right singular ! vectors of bidiagonal matrix in work(ivt) ! workspace: need 3*m [e, tauq, taup] + m*m [vt] + bdspac call stdlib${ii}$_sbdsdc( 'L', 'I', m, s, work( ie ), u, ldu,work( ivt ), ldwkvt, & dum, idum,work( nwork ), iwork, info ) ! overwrite u by left singular vectors of a ! workspace: need 3*m [e, tauq, taup] + m*m [vt] + m [work] ! workspace: prefer 3*m [e, tauq, taup] + m*m [vt] + m*nb [work] call stdlib${ii}$_sormbr( 'Q', 'L', 'N', m, m, n, a, lda,work( itauq ), u, ldu, & work( nwork ),lwork - nwork + 1_${ik}$, ierr ) if( lwork >= m*n + 3_${ik}$*m + bdspac ) then ! path 5to-fast ! overwrite work(ivt) by left singular vectors of a ! workspace: need 3*m [e, tauq, taup] + m*n [vt] + m [work] ! workspace: prefer 3*m [e, tauq, taup] + m*n [vt] + m*nb [work] call stdlib${ii}$_sormbr( 'P', 'R', 'T', m, n, m, a, lda,work( itaup ), work( & ivt ), ldwkvt,work( nwork ), lwork - nwork + 1_${ik}$, ierr ) ! copy right singular vectors of a from work(ivt) to a call stdlib${ii}$_slacpy( 'F', m, n, work( ivt ), ldwkvt, a, lda ) else ! path 5to-slow ! generate p**t in a ! workspace: need 3*m [e, tauq, taup] + m*m [vt] + m [work] ! workspace: prefer 3*m [e, tauq, taup] + m*m [vt] + m*nb [work] call stdlib${ii}$_sorgbr( 'P', m, n, m, a, lda, work( itaup ),work( nwork ), & lwork - nwork + 1_${ik}$, ierr ) ! multiply q in a by right singular vectors of ! bidiagonal matrix in work(ivt), storing result in ! work(il) and copying to a ! workspace: need 3*m [e, tauq, taup] + m*m [vt] + m*nb [l] ! workspace: prefer 3*m [e, tauq, taup] + m*m [vt] + m*n [l] do i = 1, n, chunk blk = min( n - i + 1_${ik}$, chunk ) call stdlib${ii}$_sgemm( 'N', 'N', m, blk, m, one, work( ivt ),ldwkvt, a( 1_${ik}$, & i ), lda, zero,work( il ), m ) call stdlib${ii}$_slacpy( 'F', m, blk, work( il ), m, a( 1_${ik}$, i ),lda ) end do end if else if( wntqs ) then ! path 5ts (n > m, jobz='s') ! perform bidiagonal svd, computing left singular vectors ! of bidiagonal matrix in u and computing right singular ! vectors of bidiagonal matrix in vt ! workspace: need 3*m [e, tauq, taup] + bdspac call stdlib${ii}$_slaset( 'F', m, n, zero, zero, vt, ldvt ) call stdlib${ii}$_sbdsdc( 'L', 'I', m, s, work( ie ), u, ldu, vt,ldvt, dum, idum, & work( nwork ), iwork,info ) ! overwrite u by left singular vectors of a and vt ! by right singular vectors of a ! workspace: need 3*m [e, tauq, taup] + m [work] ! workspace: prefer 3*m [e, tauq, taup] + m*nb [work] call stdlib${ii}$_sormbr( 'Q', 'L', 'N', m, m, n, a, lda,work( itauq ), u, ldu, & work( nwork ),lwork - nwork + 1_${ik}$, ierr ) call stdlib${ii}$_sormbr( 'P', 'R', 'T', m, n, m, a, lda,work( itaup ), vt, ldvt, & work( nwork ),lwork - nwork + 1_${ik}$, ierr ) else if( wntqa ) then ! path 5ta (n > m, jobz='a') ! perform bidiagonal svd, computing left singular vectors ! of bidiagonal matrix in u and computing right singular ! vectors of bidiagonal matrix in vt ! workspace: need 3*m [e, tauq, taup] + bdspac call stdlib${ii}$_slaset( 'F', n, n, zero, zero, vt, ldvt ) call stdlib${ii}$_sbdsdc( 'L', 'I', m, s, work( ie ), u, ldu, vt,ldvt, dum, idum, & work( nwork ), iwork,info ) ! set the right corner of vt to identity matrix if( n>m ) then call stdlib${ii}$_slaset( 'F', n-m, n-m, zero, one, vt(m+1,m+1),ldvt ) end if ! overwrite u by left singular vectors of a and vt ! by right singular vectors of a ! workspace: need 3*m [e, tauq, taup] + n [work] ! workspace: prefer 3*m [e, tauq, taup] + n*nb [work] call stdlib${ii}$_sormbr( 'Q', 'L', 'N', m, m, n, a, lda,work( itauq ), u, ldu, & work( nwork ),lwork - nwork + 1_${ik}$, ierr ) call stdlib${ii}$_sormbr( 'P', 'R', 'T', n, n, m, a, lda,work( itaup ), vt, ldvt, & work( nwork ),lwork - nwork + 1_${ik}$, ierr ) end if end if end if ! undo scaling if necessary if( iscl==1_${ik}$ ) then if( anrm>bignum )call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, bignum, anrm, minmn, 1_${ik}$, s, minmn,& ierr ) if( anrm<smlnum )call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, smlnum, anrm, minmn, 1_${ik}$, s, minmn,& ierr ) end if ! return optimal workspace in work(1) work( 1_${ik}$ ) = stdlib${ii}$_sroundup_lwork( maxwrk ) return end subroutine stdlib${ii}$_sgesdd module subroutine stdlib${ii}$_dgesdd( jobz, m, n, a, lda, s, u, ldu, vt, ldvt,work, lwork, iwork, info ) !! DGESDD computes the singular value decomposition (SVD) of a real !! M-by-N matrix A, optionally computing the left and right singular !! vectors. If singular vectors are desired, it uses a !! divide-and-conquer algorithm. !! The SVD is written !! A = U * SIGMA * transpose(V) !! where SIGMA is an M-by-N matrix which is zero except for its !! min(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and !! V is an N-by-N orthogonal matrix. The diagonal elements of SIGMA !! are the singular values of A; they are real and non-negative, and !! are returned in descending order. The first min(m,n) columns of !! U and V are the left and right singular vectors of A. !! Note that the routine returns VT = V**T, not V. !! The divide and conquer algorithm makes very mild assumptions about !! floating point arithmetic. It will work on machines with a guard !! digit in add/subtract, or on those binary machines without guard !! digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or !! Cray-2. It could conceivably fail on hexadecimal or decimal machines !! without guard digits, but we know of none. ! -- lapack driver routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments character, intent(in) :: jobz integer(${ik}$), intent(out) :: info integer(${ik}$), intent(in) :: lda, ldu, ldvt, lwork, m, n ! Array Arguments integer(${ik}$), intent(out) :: iwork(*) real(dp), intent(inout) :: a(lda,*) real(dp), intent(out) :: s(*), u(ldu,*), vt(ldvt,*), work(*) ! ===================================================================== ! Local Scalars logical(lk) :: lquery, wntqa, wntqas, wntqn, wntqo, wntqs integer(${ik}$) :: bdspac, blk, chunk, i, ie, ierr, il, ir, iscl, itau, itaup, itauq, iu, & ivt, ldwkvt, ldwrkl, ldwrkr, ldwrku, maxwrk, minmn, minwrk, mnthr, nwork, wrkbl integer(${ik}$) :: lwork_dgebrd_mn, lwork_dgebrd_mm, lwork_dgebrd_nn, lwork_dgelqf_mn, & lwork_dgeqrf_mn, lwork_dorgbr_p_mm, lwork_dorgbr_q_nn, lwork_dorglq_mn, & lwork_dorglq_nn, lwork_dorgqr_mm, lwork_dorgqr_mn, lwork_dormbr_prt_mm, & lwork_dormbr_qln_mm, lwork_dormbr_prt_mn, lwork_dormbr_qln_mn, lwork_dormbr_prt_nn, & lwork_dormbr_qln_nn real(dp) :: anrm, bignum, eps, smlnum ! Local Arrays integer(${ik}$) :: idum(1_${ik}$) real(dp) :: dum(1_${ik}$) ! Intrinsic Functions ! Executable Statements ! test the input arguments info = 0_${ik}$ minmn = min( m, n ) wntqa = stdlib_lsame( jobz, 'A' ) wntqs = stdlib_lsame( jobz, 'S' ) wntqas = wntqa .or. wntqs wntqo = stdlib_lsame( jobz, 'O' ) wntqn = stdlib_lsame( jobz, 'N' ) lquery = ( lwork==-1_${ik}$ ) if( .not.( wntqa .or. wntqs .or. wntqo .or. wntqn ) ) then info = -1_${ik}$ else if( m<0_${ik}$ ) then info = -2_${ik}$ else if( n<0_${ik}$ ) then info = -3_${ik}$ else if( lda<max( 1_${ik}$, m ) ) then info = -5_${ik}$ else if( ldu<1_${ik}$ .or. ( wntqas .and. ldu<m ) .or.( wntqo .and. m<n .and. ldu<m ) ) & then info = -8_${ik}$ else if( ldvt<1_${ik}$ .or. ( wntqa .and. ldvt<n ) .or.( wntqs .and. ldvt<minmn ) .or.( wntqo & .and. m>=n .and. ldvt<n ) ) then info = -10_${ik}$ end if ! compute workspace ! note: comments in the code beginning "workspace:" describe the ! minimal amount of workspace allocated at that point in the code, ! as well as the preferred amount for good performance. ! nb refers to the optimal block size for the immediately ! following subroutine, as returned by stdlib${ii}$_ilaenv. if( info==0_${ik}$ ) then minwrk = 1_${ik}$ maxwrk = 1_${ik}$ bdspac = 0_${ik}$ mnthr = int( minmn*11.0_dp / 6.0_dp,KIND=${ik}$) if( m>=n .and. minmn>0_${ik}$ ) then ! compute space needed for stdlib${ii}$_dbdsdc if( wntqn ) then ! stdlib${ii}$_dbdsdc needs only 4*n (or 6*n for uplo=l for lapack <= 3.6_dp) ! keep 7*n for backwards compatibility. bdspac = 7_${ik}$*n else bdspac = 3_${ik}$*n*n + 4_${ik}$*n end if ! compute space preferred for each routine call stdlib${ii}$_dgebrd( m, n, dum(1_${ik}$), m, dum(1_${ik}$), dum(1_${ik}$), dum(1_${ik}$),dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, & ierr ) lwork_dgebrd_mn = int( dum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_dgebrd( n, n, dum(1_${ik}$), n, dum(1_${ik}$), dum(1_${ik}$), dum(1_${ik}$),dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, & ierr ) lwork_dgebrd_nn = int( dum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_dgeqrf( m, n, dum(1_${ik}$), m, dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, ierr ) lwork_dgeqrf_mn = int( dum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_dorgbr( 'Q', n, n, n, dum(1_${ik}$), n, dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$,ierr ) lwork_dorgbr_q_nn = int( dum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_dorgqr( m, m, n, dum(1_${ik}$), m, dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, ierr ) lwork_dorgqr_mm = int( dum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_dorgqr( m, n, n, dum(1_${ik}$), m, dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, ierr ) lwork_dorgqr_mn = int( dum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_dormbr( 'P', 'R', 'T', n, n, n, dum(1_${ik}$), n,dum(1_${ik}$), dum(1_${ik}$), n, dum(1_${ik}$), & -1_${ik}$, ierr ) lwork_dormbr_prt_nn = int( dum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_dormbr( 'Q', 'L', 'N', n, n, n, dum(1_${ik}$), n,dum(1_${ik}$), dum(1_${ik}$), n, dum(1_${ik}$), & -1_${ik}$, ierr ) lwork_dormbr_qln_nn = int( dum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_dormbr( 'Q', 'L', 'N', m, n, n, dum(1_${ik}$), m,dum(1_${ik}$), dum(1_${ik}$), m, dum(1_${ik}$), & -1_${ik}$, ierr ) lwork_dormbr_qln_mn = int( dum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_dormbr( 'Q', 'L', 'N', m, m, n, dum(1_${ik}$), m,dum(1_${ik}$), dum(1_${ik}$), m, dum(1_${ik}$), & -1_${ik}$, ierr ) lwork_dormbr_qln_mm = int( dum(1_${ik}$),KIND=${ik}$) if( m>=mnthr ) then if( wntqn ) then ! path 1 (m >> n, jobz='n') wrkbl = n + lwork_dgeqrf_mn wrkbl = max( wrkbl, 3_${ik}$*n + lwork_dgebrd_nn ) maxwrk = max( wrkbl, bdspac + n ) minwrk = bdspac + n else if( wntqo ) then ! path 2 (m >> n, jobz='o') wrkbl = n + lwork_dgeqrf_mn wrkbl = max( wrkbl, n + lwork_dorgqr_mn ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_dgebrd_nn ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_dormbr_qln_nn ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_dormbr_prt_nn ) wrkbl = max( wrkbl, 3_${ik}$*n + bdspac ) maxwrk = wrkbl + 2_${ik}$*n*n minwrk = bdspac + 2_${ik}$*n*n + 3_${ik}$*n else if( wntqs ) then ! path 3 (m >> n, jobz='s') wrkbl = n + lwork_dgeqrf_mn wrkbl = max( wrkbl, n + lwork_dorgqr_mn ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_dgebrd_nn ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_dormbr_qln_nn ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_dormbr_prt_nn ) wrkbl = max( wrkbl, 3_${ik}$*n + bdspac ) maxwrk = wrkbl + n*n minwrk = bdspac + n*n + 3_${ik}$*n else if( wntqa ) then ! path 4 (m >> n, jobz='a') wrkbl = n + lwork_dgeqrf_mn wrkbl = max( wrkbl, n + lwork_dorgqr_mm ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_dgebrd_nn ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_dormbr_qln_nn ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_dormbr_prt_nn ) wrkbl = max( wrkbl, 3_${ik}$*n + bdspac ) maxwrk = wrkbl + n*n minwrk = n*n + max( 3_${ik}$*n + bdspac, n + m ) end if else ! path 5 (m >= n, but not much larger) wrkbl = 3_${ik}$*n + lwork_dgebrd_mn if( wntqn ) then ! path 5n (m >= n, jobz='n') maxwrk = max( wrkbl, 3_${ik}$*n + bdspac ) minwrk = 3_${ik}$*n + max( m, bdspac ) else if( wntqo ) then ! path 5o (m >= n, jobz='o') wrkbl = max( wrkbl, 3_${ik}$*n + lwork_dormbr_prt_nn ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_dormbr_qln_mn ) wrkbl = max( wrkbl, 3_${ik}$*n + bdspac ) maxwrk = wrkbl + m*n minwrk = 3_${ik}$*n + max( m, n*n + bdspac ) else if( wntqs ) then ! path 5s (m >= n, jobz='s') wrkbl = max( wrkbl, 3_${ik}$*n + lwork_dormbr_qln_mn ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_dormbr_prt_nn ) maxwrk = max( wrkbl, 3_${ik}$*n + bdspac ) minwrk = 3_${ik}$*n + max( m, bdspac ) else if( wntqa ) then ! path 5a (m >= n, jobz='a') wrkbl = max( wrkbl, 3_${ik}$*n + lwork_dormbr_qln_mm ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_dormbr_prt_nn ) maxwrk = max( wrkbl, 3_${ik}$*n + bdspac ) minwrk = 3_${ik}$*n + max( m, bdspac ) end if end if else if( minmn>0_${ik}$ ) then ! compute space needed for stdlib${ii}$_dbdsdc if( wntqn ) then ! stdlib${ii}$_dbdsdc needs only 4*n (or 6*n for uplo=l for lapack <= 3.6_dp) ! keep 7*n for backwards compatibility. bdspac = 7_${ik}$*m else bdspac = 3_${ik}$*m*m + 4_${ik}$*m end if ! compute space preferred for each routine call stdlib${ii}$_dgebrd( m, n, dum(1_${ik}$), m, dum(1_${ik}$), dum(1_${ik}$), dum(1_${ik}$),dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, & ierr ) lwork_dgebrd_mn = int( dum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_dgebrd( m, m, a, m, s, dum(1_${ik}$), dum(1_${ik}$),dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, ierr ) lwork_dgebrd_mm = int( dum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_dgelqf( m, n, a, m, dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, ierr ) lwork_dgelqf_mn = int( dum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_dorglq( n, n, m, dum(1_${ik}$), n, dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, ierr ) lwork_dorglq_nn = int( dum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_dorglq( m, n, m, a, m, dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, ierr ) lwork_dorglq_mn = int( dum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_dorgbr( 'P', m, m, m, a, n, dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, ierr ) lwork_dorgbr_p_mm = int( dum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_dormbr( 'P', 'R', 'T', m, m, m, dum(1_${ik}$), m,dum(1_${ik}$), dum(1_${ik}$), m, dum(1_${ik}$), & -1_${ik}$, ierr ) lwork_dormbr_prt_mm = int( dum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_dormbr( 'P', 'R', 'T', m, n, m, dum(1_${ik}$), m,dum(1_${ik}$), dum(1_${ik}$), m, dum(1_${ik}$), & -1_${ik}$, ierr ) lwork_dormbr_prt_mn = int( dum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_dormbr( 'P', 'R', 'T', n, n, m, dum(1_${ik}$), n,dum(1_${ik}$), dum(1_${ik}$), n, dum(1_${ik}$), & -1_${ik}$, ierr ) lwork_dormbr_prt_nn = int( dum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_dormbr( 'Q', 'L', 'N', m, m, m, dum(1_${ik}$), m,dum(1_${ik}$), dum(1_${ik}$), m, dum(1_${ik}$), & -1_${ik}$, ierr ) lwork_dormbr_qln_mm = int( dum(1_${ik}$),KIND=${ik}$) if( n>=mnthr ) then if( wntqn ) then ! path 1t (n >> m, jobz='n') wrkbl = m + lwork_dgelqf_mn wrkbl = max( wrkbl, 3_${ik}$*m + lwork_dgebrd_mm ) maxwrk = max( wrkbl, bdspac + m ) minwrk = bdspac + m else if( wntqo ) then ! path 2t (n >> m, jobz='o') wrkbl = m + lwork_dgelqf_mn wrkbl = max( wrkbl, m + lwork_dorglq_mn ) wrkbl = max( wrkbl, 3_${ik}$*m + lwork_dgebrd_mm ) wrkbl = max( wrkbl, 3_${ik}$*m + lwork_dormbr_qln_mm ) wrkbl = max( wrkbl, 3_${ik}$*m + lwork_dormbr_prt_mm ) wrkbl = max( wrkbl, 3_${ik}$*m + bdspac ) maxwrk = wrkbl + 2_${ik}$*m*m minwrk = bdspac + 2_${ik}$*m*m + 3_${ik}$*m else if( wntqs ) then ! path 3t (n >> m, jobz='s') wrkbl = m + lwork_dgelqf_mn wrkbl = max( wrkbl, m + lwork_dorglq_mn ) wrkbl = max( wrkbl, 3_${ik}$*m + lwork_dgebrd_mm ) wrkbl = max( wrkbl, 3_${ik}$*m + lwork_dormbr_qln_mm ) wrkbl = max( wrkbl, 3_${ik}$*m + lwork_dormbr_prt_mm ) wrkbl = max( wrkbl, 3_${ik}$*m + bdspac ) maxwrk = wrkbl + m*m minwrk = bdspac + m*m + 3_${ik}$*m else if( wntqa ) then ! path 4t (n >> m, jobz='a') wrkbl = m + lwork_dgelqf_mn wrkbl = max( wrkbl, m + lwork_dorglq_nn ) wrkbl = max( wrkbl, 3_${ik}$*m + lwork_dgebrd_mm ) wrkbl = max( wrkbl, 3_${ik}$*m + lwork_dormbr_qln_mm ) wrkbl = max( wrkbl, 3_${ik}$*m + lwork_dormbr_prt_mm ) wrkbl = max( wrkbl, 3_${ik}$*m + bdspac ) maxwrk = wrkbl + m*m minwrk = m*m + max( 3_${ik}$*m + bdspac, m + n ) end if else ! path 5t (n > m, but not much larger) wrkbl = 3_${ik}$*m + lwork_dgebrd_mn if( wntqn ) then ! path 5tn (n > m, jobz='n') maxwrk = max( wrkbl, 3_${ik}$*m + bdspac ) minwrk = 3_${ik}$*m + max( n, bdspac ) else if( wntqo ) then ! path 5to (n > m, jobz='o') wrkbl = max( wrkbl, 3_${ik}$*m + lwork_dormbr_qln_mm ) wrkbl = max( wrkbl, 3_${ik}$*m + lwork_dormbr_prt_mn ) wrkbl = max( wrkbl, 3_${ik}$*m + bdspac ) maxwrk = wrkbl + m*n minwrk = 3_${ik}$*m + max( n, m*m + bdspac ) else if( wntqs ) then ! path 5ts (n > m, jobz='s') wrkbl = max( wrkbl, 3_${ik}$*m + lwork_dormbr_qln_mm ) wrkbl = max( wrkbl, 3_${ik}$*m + lwork_dormbr_prt_mn ) maxwrk = max( wrkbl, 3_${ik}$*m + bdspac ) minwrk = 3_${ik}$*m + max( n, bdspac ) else if( wntqa ) then ! path 5ta (n > m, jobz='a') wrkbl = max( wrkbl, 3_${ik}$*m + lwork_dormbr_qln_mm ) wrkbl = max( wrkbl, 3_${ik}$*m + lwork_dormbr_prt_nn ) maxwrk = max( wrkbl, 3_${ik}$*m + bdspac ) minwrk = 3_${ik}$*m + max( n, bdspac ) end if end if end if maxwrk = max( maxwrk, minwrk ) work( 1_${ik}$ ) = stdlib${ii}$_droundup_lwork( maxwrk ) if( lwork<minwrk .and. .not.lquery ) then info = -12_${ik}$ end if end if if( info/=0_${ik}$ ) then call stdlib${ii}$_xerbla( 'DGESDD', -info ) return else if( lquery ) then return end if ! quick return if possible if( m==0_${ik}$ .or. n==0_${ik}$ ) then return end if ! get machine constants eps = stdlib${ii}$_dlamch( 'P' ) smlnum = sqrt( stdlib${ii}$_dlamch( 'S' ) ) / eps bignum = one / smlnum ! scale a if max element outside range [smlnum,bignum] anrm = stdlib${ii}$_dlange( 'M', m, n, a, lda, dum ) if( stdlib${ii}$_disnan( anrm ) ) then info = -4_${ik}$ return end if iscl = 0_${ik}$ if( anrm>zero .and. anrm<smlnum ) then iscl = 1_${ik}$ call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, m, n, a, lda, ierr ) else if( anrm>bignum ) then iscl = 1_${ik}$ call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, m, n, a, lda, ierr ) end if if( m>=n ) then ! a has at least as many rows as columns. if a has sufficiently ! more rows than columns, first reduce using the qr ! decomposition (if sufficient workspace available) if( m>=mnthr ) then if( wntqn ) then ! path 1 (m >> n, jobz='n') ! no singular vectors to be computed itau = 1_${ik}$ nwork = itau + n ! compute a=q*r ! workspace: need n [tau] + n [work] ! workspace: prefer n [tau] + n*nb [work] call stdlib${ii}$_dgeqrf( m, n, a, lda, work( itau ), work( nwork ),lwork - nwork + & 1_${ik}$, ierr ) ! zero out below r if (n>1_${ik}$) call stdlib${ii}$_dlaset( 'L', n-1, n-1, zero, zero, a( 2_${ik}$, 1_${ik}$ ), lda ) ie = 1_${ik}$ itauq = ie + n itaup = itauq + n nwork = itaup + n ! bidiagonalize r in a ! workspace: need 3*n [e, tauq, taup] + n [work] ! workspace: prefer 3*n [e, tauq, taup] + 2*n*nb [work] call stdlib${ii}$_dgebrd( n, n, a, lda, s, work( ie ), work( itauq ),work( itaup ), & work( nwork ), lwork-nwork+1,ierr ) nwork = ie + n ! perform bidiagonal svd, computing singular values only ! workspace: need n [e] + bdspac call stdlib${ii}$_dbdsdc( 'U', 'N', n, s, work( ie ), dum, 1_${ik}$, dum, 1_${ik}$,dum, idum, & work( nwork ), iwork, info ) else if( wntqo ) then ! path 2 (m >> n, jobz = 'o') ! n left singular vectors to be overwritten on a and ! n right singular vectors to be computed in vt ir = 1_${ik}$ ! work(ir) is ldwrkr by n if( lwork >= lda*n + n*n + 3_${ik}$*n + bdspac ) then ldwrkr = lda else ldwrkr = ( lwork - n*n - 3_${ik}$*n - bdspac ) / n end if itau = ir + ldwrkr*n nwork = itau + n ! compute a=q*r ! workspace: need n*n [r] + n [tau] + n [work] ! workspace: prefer n*n [r] + n [tau] + n*nb [work] call stdlib${ii}$_dgeqrf( m, n, a, lda, work( itau ), work( nwork ),lwork - nwork + & 1_${ik}$, ierr ) ! copy r to work(ir), zeroing out below it call stdlib${ii}$_dlacpy( 'U', n, n, a, lda, work( ir ), ldwrkr ) call stdlib${ii}$_dlaset( 'L', n - 1_${ik}$, n - 1_${ik}$, zero, zero, work(ir+1),ldwrkr ) ! generate q in a ! workspace: need n*n [r] + n [tau] + n [work] ! workspace: prefer n*n [r] + n [tau] + n*nb [work] call stdlib${ii}$_dorgqr( m, n, n, a, lda, work( itau ),work( nwork ), lwork - & nwork + 1_${ik}$, ierr ) ie = itau itauq = ie + n itaup = itauq + n nwork = itaup + n ! bidiagonalize r in work(ir) ! workspace: need n*n [r] + 3*n [e, tauq, taup] + n [work] ! workspace: prefer n*n [r] + 3*n [e, tauq, taup] + 2*n*nb [work] call stdlib${ii}$_dgebrd( n, n, work( ir ), ldwrkr, s, work( ie ),work( itauq ), & work( itaup ), work( nwork ),lwork - nwork + 1_${ik}$, ierr ) ! work(iu) is n by n iu = nwork nwork = iu + n*n ! perform bidiagonal svd, computing left singular vectors ! of bidiagonal matrix in work(iu) and computing right ! singular vectors of bidiagonal matrix in vt ! workspace: need n*n [r] + 3*n [e, tauq, taup] + n*n [u] + bdspac call stdlib${ii}$_dbdsdc( 'U', 'I', n, s, work( ie ), work( iu ), n,vt, ldvt, dum, & idum, work( nwork ), iwork,info ) ! overwrite work(iu) by left singular vectors of r ! and vt by right singular vectors of r ! workspace: need n*n [r] + 3*n [e, tauq, taup] + n*n [u] + n [work] ! workspace: prefer n*n [r] + 3*n [e, tauq, taup] + n*n [u] + n*nb [work] call stdlib${ii}$_dormbr( 'Q', 'L', 'N', n, n, n, work( ir ), ldwrkr,work( itauq ), & work( iu ), n, work( nwork ),lwork - nwork + 1_${ik}$, ierr ) call stdlib${ii}$_dormbr( 'P', 'R', 'T', n, n, n, work( ir ), ldwrkr,work( itaup ), & vt, ldvt, work( nwork ),lwork - nwork + 1_${ik}$, ierr ) ! multiply q in a by left singular vectors of r in ! work(iu), storing result in work(ir) and copying to a ! workspace: need n*n [r] + 3*n [e, tauq, taup] + n*n [u] ! workspace: prefer m*n [r] + 3*n [e, tauq, taup] + n*n [u] do i = 1, m, ldwrkr chunk = min( m - i + 1_${ik}$, ldwrkr ) call stdlib${ii}$_dgemm( 'N', 'N', chunk, n, n, one, a( i, 1_${ik}$ ),lda, work( iu ), & n, zero, work( ir ),ldwrkr ) call stdlib${ii}$_dlacpy( 'F', chunk, n, work( ir ), ldwrkr,a( i, 1_${ik}$ ), lda ) end do else if( wntqs ) then ! path 3 (m >> n, jobz='s') ! n left singular vectors to be computed in u and ! n right singular vectors to be computed in vt ir = 1_${ik}$ ! work(ir) is n by n ldwrkr = n itau = ir + ldwrkr*n nwork = itau + n ! compute a=q*r ! workspace: need n*n [r] + n [tau] + n [work] ! workspace: prefer n*n [r] + n [tau] + n*nb [work] call stdlib${ii}$_dgeqrf( m, n, a, lda, work( itau ), work( nwork ),lwork - nwork + & 1_${ik}$, ierr ) ! copy r to work(ir), zeroing out below it call stdlib${ii}$_dlacpy( 'U', n, n, a, lda, work( ir ), ldwrkr ) call stdlib${ii}$_dlaset( 'L', n - 1_${ik}$, n - 1_${ik}$, zero, zero, work(ir+1),ldwrkr ) ! generate q in a ! workspace: need n*n [r] + n [tau] + n [work] ! workspace: prefer n*n [r] + n [tau] + n*nb [work] call stdlib${ii}$_dorgqr( m, n, n, a, lda, work( itau ),work( nwork ), lwork - & nwork + 1_${ik}$, ierr ) ie = itau itauq = ie + n itaup = itauq + n nwork = itaup + n ! bidiagonalize r in work(ir) ! workspace: need n*n [r] + 3*n [e, tauq, taup] + n [work] ! workspace: prefer n*n [r] + 3*n [e, tauq, taup] + 2*n*nb [work] call stdlib${ii}$_dgebrd( n, n, work( ir ), ldwrkr, s, work( ie ),work( itauq ), & work( itaup ), work( nwork ),lwork - nwork + 1_${ik}$, ierr ) ! perform bidiagonal svd, computing left singular vectors ! of bidiagoal matrix in u and computing right singular ! vectors of bidiagonal matrix in vt ! workspace: need n*n [r] + 3*n [e, tauq, taup] + bdspac call stdlib${ii}$_dbdsdc( 'U', 'I', n, s, work( ie ), u, ldu, vt,ldvt, dum, idum, & work( nwork ), iwork,info ) ! overwrite u by left singular vectors of r and vt ! by right singular vectors of r ! workspace: need n*n [r] + 3*n [e, tauq, taup] + n [work] ! workspace: prefer n*n [r] + 3*n [e, tauq, taup] + n*nb [work] call stdlib${ii}$_dormbr( 'Q', 'L', 'N', n, n, n, work( ir ), ldwrkr,work( itauq ), & u, ldu, work( nwork ),lwork - nwork + 1_${ik}$, ierr ) call stdlib${ii}$_dormbr( 'P', 'R', 'T', n, n, n, work( ir ), ldwrkr,work( itaup ), & vt, ldvt, work( nwork ),lwork - nwork + 1_${ik}$, ierr ) ! multiply q in a by left singular vectors of r in ! work(ir), storing result in u ! workspace: need n*n [r] call stdlib${ii}$_dlacpy( 'F', n, n, u, ldu, work( ir ), ldwrkr ) call stdlib${ii}$_dgemm( 'N', 'N', m, n, n, one, a, lda, work( ir ),ldwrkr, zero, u,& ldu ) else if( wntqa ) then ! path 4 (m >> n, jobz='a') ! m left singular vectors to be computed in u and ! n right singular vectors to be computed in vt iu = 1_${ik}$ ! work(iu) is n by n ldwrku = n itau = iu + ldwrku*n nwork = itau + n ! compute a=q*r, copying result to u ! workspace: need n*n [u] + n [tau] + n [work] ! workspace: prefer n*n [u] + n [tau] + n*nb [work] call stdlib${ii}$_dgeqrf( m, n, a, lda, work( itau ), work( nwork ),lwork - nwork + & 1_${ik}$, ierr ) call stdlib${ii}$_dlacpy( 'L', m, n, a, lda, u, ldu ) ! generate q in u ! workspace: need n*n [u] + n [tau] + m [work] ! workspace: prefer n*n [u] + n [tau] + m*nb [work] call stdlib${ii}$_dorgqr( m, m, n, u, ldu, work( itau ),work( nwork ), lwork - & nwork + 1_${ik}$, ierr ) ! produce r in a, zeroing out other entries if (n>1_${ik}$) call stdlib${ii}$_dlaset( 'L', n-1, n-1, zero, zero, a( 2_${ik}$, 1_${ik}$ ), lda ) ie = itau itauq = ie + n itaup = itauq + n nwork = itaup + n ! bidiagonalize r in a ! workspace: need n*n [u] + 3*n [e, tauq, taup] + n [work] ! workspace: prefer n*n [u] + 3*n [e, tauq, taup] + 2*n*nb [work] call stdlib${ii}$_dgebrd( n, n, a, lda, s, work( ie ), work( itauq ),work( itaup ), & work( nwork ), lwork-nwork+1,ierr ) ! perform bidiagonal svd, computing left singular vectors ! of bidiagonal matrix in work(iu) and computing right ! singular vectors of bidiagonal matrix in vt ! workspace: need n*n [u] + 3*n [e, tauq, taup] + bdspac call stdlib${ii}$_dbdsdc( 'U', 'I', n, s, work( ie ), work( iu ), n,vt, ldvt, dum, & idum, work( nwork ), iwork,info ) ! overwrite work(iu) by left singular vectors of r and vt ! by right singular vectors of r ! workspace: need n*n [u] + 3*n [e, tauq, taup] + n [work] ! workspace: prefer n*n [u] + 3*n [e, tauq, taup] + n*nb [work] call stdlib${ii}$_dormbr( 'Q', 'L', 'N', n, n, n, a, lda,work( itauq ), work( iu ), & ldwrku,work( nwork ), lwork - nwork + 1_${ik}$, ierr ) call stdlib${ii}$_dormbr( 'P', 'R', 'T', n, n, n, a, lda,work( itaup ), vt, ldvt, & work( nwork ),lwork - nwork + 1_${ik}$, ierr ) ! multiply q in u by left singular vectors of r in ! work(iu), storing result in a ! workspace: need n*n [u] call stdlib${ii}$_dgemm( 'N', 'N', m, n, n, one, u, ldu, work( iu ),ldwrku, zero, a,& lda ) ! copy left singular vectors of a from a to u call stdlib${ii}$_dlacpy( 'F', m, n, a, lda, u, ldu ) end if else ! m < mnthr ! path 5 (m >= n, but not much larger) ! reduce to bidiagonal form without qr decomposition ie = 1_${ik}$ itauq = ie + n itaup = itauq + n nwork = itaup + n ! bidiagonalize a ! workspace: need 3*n [e, tauq, taup] + m [work] ! workspace: prefer 3*n [e, tauq, taup] + (m+n)*nb [work] call stdlib${ii}$_dgebrd( m, n, a, lda, s, work( ie ), work( itauq ),work( itaup ), & work( nwork ), lwork-nwork+1,ierr ) if( wntqn ) then ! path 5n (m >= n, jobz='n') ! perform bidiagonal svd, only computing singular values ! workspace: need 3*n [e, tauq, taup] + bdspac call stdlib${ii}$_dbdsdc( 'U', 'N', n, s, work( ie ), dum, 1_${ik}$, dum, 1_${ik}$,dum, idum, & work( nwork ), iwork, info ) else if( wntqo ) then ! path 5o (m >= n, jobz='o') iu = nwork if( lwork >= m*n + 3_${ik}$*n + bdspac ) then ! work( iu ) is m by n ldwrku = m nwork = iu + ldwrku*n call stdlib${ii}$_dlaset( 'F', m, n, zero, zero, work( iu ),ldwrku ) ! ir is unused; silence compile warnings ir = -1_${ik}$ else ! work( iu ) is n by n ldwrku = n nwork = iu + ldwrku*n ! work(ir) is ldwrkr by n ir = nwork ldwrkr = ( lwork - n*n - 3_${ik}$*n ) / n end if nwork = iu + ldwrku*n ! perform bidiagonal svd, computing left singular vectors ! of bidiagonal matrix in work(iu) and computing right ! singular vectors of bidiagonal matrix in vt ! workspace: need 3*n [e, tauq, taup] + n*n [u] + bdspac call stdlib${ii}$_dbdsdc( 'U', 'I', n, s, work( ie ), work( iu ),ldwrku, vt, ldvt, & dum, idum, work( nwork ),iwork, info ) ! overwrite vt by right singular vectors of a ! workspace: need 3*n [e, tauq, taup] + n*n [u] + n [work] ! workspace: prefer 3*n [e, tauq, taup] + n*n [u] + n*nb [work] call stdlib${ii}$_dormbr( 'P', 'R', 'T', n, n, n, a, lda,work( itaup ), vt, ldvt, & work( nwork ),lwork - nwork + 1_${ik}$, ierr ) if( lwork >= m*n + 3_${ik}$*n + bdspac ) then ! path 5o-fast ! overwrite work(iu) by left singular vectors of a ! workspace: need 3*n [e, tauq, taup] + m*n [u] + n [work] ! workspace: prefer 3*n [e, tauq, taup] + m*n [u] + n*nb [work] call stdlib${ii}$_dormbr( 'Q', 'L', 'N', m, n, n, a, lda,work( itauq ), work( iu & ), ldwrku,work( nwork ), lwork - nwork + 1_${ik}$, ierr ) ! copy left singular vectors of a from work(iu) to a call stdlib${ii}$_dlacpy( 'F', m, n, work( iu ), ldwrku, a, lda ) else ! path 5o-slow ! generate q in a ! workspace: need 3*n [e, tauq, taup] + n*n [u] + n [work] ! workspace: prefer 3*n [e, tauq, taup] + n*n [u] + n*nb [work] call stdlib${ii}$_dorgbr( 'Q', m, n, n, a, lda, work( itauq ),work( nwork ), & lwork - nwork + 1_${ik}$, ierr ) ! multiply q in a by left singular vectors of ! bidiagonal matrix in work(iu), storing result in ! work(ir) and copying to a ! workspace: need 3*n [e, tauq, taup] + n*n [u] + nb*n [r] ! workspace: prefer 3*n [e, tauq, taup] + n*n [u] + m*n [r] do i = 1, m, ldwrkr chunk = min( m - i + 1_${ik}$, ldwrkr ) call stdlib${ii}$_dgemm( 'N', 'N', chunk, n, n, one, a( i, 1_${ik}$ ),lda, work( iu )& , ldwrku, zero,work( ir ), ldwrkr ) call stdlib${ii}$_dlacpy( 'F', chunk, n, work( ir ), ldwrkr,a( i, 1_${ik}$ ), lda ) end do end if else if( wntqs ) then ! path 5s (m >= n, jobz='s') ! perform bidiagonal svd, computing left singular vectors ! of bidiagonal matrix in u and computing right singular ! vectors of bidiagonal matrix in vt ! workspace: need 3*n [e, tauq, taup] + bdspac call stdlib${ii}$_dlaset( 'F', m, n, zero, zero, u, ldu ) call stdlib${ii}$_dbdsdc( 'U', 'I', n, s, work( ie ), u, ldu, vt,ldvt, dum, idum, & work( nwork ), iwork,info ) ! overwrite u by left singular vectors of a and vt ! by right singular vectors of a ! workspace: need 3*n [e, tauq, taup] + n [work] ! workspace: prefer 3*n [e, tauq, taup] + n*nb [work] call stdlib${ii}$_dormbr( 'Q', 'L', 'N', m, n, n, a, lda,work( itauq ), u, ldu, & work( nwork ),lwork - nwork + 1_${ik}$, ierr ) call stdlib${ii}$_dormbr( 'P', 'R', 'T', n, n, n, a, lda,work( itaup ), vt, ldvt, & work( nwork ),lwork - nwork + 1_${ik}$, ierr ) else if( wntqa ) then ! path 5a (m >= n, jobz='a') ! perform bidiagonal svd, computing left singular vectors ! of bidiagonal matrix in u and computing right singular ! vectors of bidiagonal matrix in vt ! workspace: need 3*n [e, tauq, taup] + bdspac call stdlib${ii}$_dlaset( 'F', m, m, zero, zero, u, ldu ) call stdlib${ii}$_dbdsdc( 'U', 'I', n, s, work( ie ), u, ldu, vt,ldvt, dum, idum, & work( nwork ), iwork,info ) ! set the right corner of u to identity matrix if( m>n ) then call stdlib${ii}$_dlaset( 'F', m - n, m - n, zero, one, u(n+1,n+1),ldu ) end if ! overwrite u by left singular vectors of a and vt ! by right singular vectors of a ! workspace: need 3*n [e, tauq, taup] + m [work] ! workspace: prefer 3*n [e, tauq, taup] + m*nb [work] call stdlib${ii}$_dormbr( 'Q', 'L', 'N', m, m, n, a, lda,work( itauq ), u, ldu, & work( nwork ),lwork - nwork + 1_${ik}$, ierr ) call stdlib${ii}$_dormbr( 'P', 'R', 'T', n, n, m, a, lda,work( itaup ), vt, ldvt, & work( nwork ),lwork - nwork + 1_${ik}$, ierr ) end if end if else ! a has more columns than rows. if a has sufficiently more ! columns than rows, first reduce using the lq decomposition (if ! sufficient workspace available) if( n>=mnthr ) then if( wntqn ) then ! path 1t (n >> m, jobz='n') ! no singular vectors to be computed itau = 1_${ik}$ nwork = itau + m ! compute a=l*q ! workspace: need m [tau] + m [work] ! workspace: prefer m [tau] + m*nb [work] call stdlib${ii}$_dgelqf( m, n, a, lda, work( itau ), work( nwork ),lwork - nwork + & 1_${ik}$, ierr ) ! zero out above l if (m>1_${ik}$) call stdlib${ii}$_dlaset( 'U', m-1, m-1, zero, zero, a( 1_${ik}$, 2_${ik}$ ), lda ) ie = 1_${ik}$ itauq = ie + m itaup = itauq + m nwork = itaup + m ! bidiagonalize l in a ! workspace: need 3*m [e, tauq, taup] + m [work] ! workspace: prefer 3*m [e, tauq, taup] + 2*m*nb [work] call stdlib${ii}$_dgebrd( m, m, a, lda, s, work( ie ), work( itauq ),work( itaup ), & work( nwork ), lwork-nwork+1,ierr ) nwork = ie + m ! perform bidiagonal svd, computing singular values only ! workspace: need m [e] + bdspac call stdlib${ii}$_dbdsdc( 'U', 'N', m, s, work( ie ), dum, 1_${ik}$, dum, 1_${ik}$,dum, idum, & work( nwork ), iwork, info ) else if( wntqo ) then ! path 2t (n >> m, jobz='o') ! m right singular vectors to be overwritten on a and ! m left singular vectors to be computed in u ivt = 1_${ik}$ ! work(ivt) is m by m ! work(il) is m by m; it is later resized to m by chunk for gemm il = ivt + m*m if( lwork >= m*n + m*m + 3_${ik}$*m + bdspac ) then ldwrkl = m chunk = n else ldwrkl = m chunk = ( lwork - m*m ) / m end if itau = il + ldwrkl*m nwork = itau + m ! compute a=l*q ! workspace: need m*m [vt] + m*m [l] + m [tau] + m [work] ! workspace: prefer m*m [vt] + m*m [l] + m [tau] + m*nb [work] call stdlib${ii}$_dgelqf( m, n, a, lda, work( itau ), work( nwork ),lwork - nwork + & 1_${ik}$, ierr ) ! copy l to work(il), zeroing about above it call stdlib${ii}$_dlacpy( 'L', m, m, a, lda, work( il ), ldwrkl ) call stdlib${ii}$_dlaset( 'U', m - 1_${ik}$, m - 1_${ik}$, zero, zero,work( il + ldwrkl ), ldwrkl & ) ! generate q in a ! workspace: need m*m [vt] + m*m [l] + m [tau] + m [work] ! workspace: prefer m*m [vt] + m*m [l] + m [tau] + m*nb [work] call stdlib${ii}$_dorglq( m, n, m, a, lda, work( itau ),work( nwork ), lwork - & nwork + 1_${ik}$, ierr ) ie = itau itauq = ie + m itaup = itauq + m nwork = itaup + m ! bidiagonalize l in work(il) ! workspace: need m*m [vt] + m*m [l] + 3*m [e, tauq, taup] + m [work] ! workspace: prefer m*m [vt] + m*m [l] + 3*m [e, tauq, taup] + 2*m*nb [work] call stdlib${ii}$_dgebrd( m, m, work( il ), ldwrkl, s, work( ie ),work( itauq ), & work( itaup ), work( nwork ),lwork - nwork + 1_${ik}$, ierr ) ! perform bidiagonal svd, computing left singular vectors ! of bidiagonal matrix in u, and computing right singular ! vectors of bidiagonal matrix in work(ivt) ! workspace: need m*m [vt] + m*m [l] + 3*m [e, tauq, taup] + bdspac call stdlib${ii}$_dbdsdc( 'U', 'I', m, s, work( ie ), u, ldu,work( ivt ), m, dum, & idum, work( nwork ),iwork, info ) ! overwrite u by left singular vectors of l and work(ivt) ! by right singular vectors of l ! workspace: need m*m [vt] + m*m [l] + 3*m [e, tauq, taup] + m [work] ! workspace: prefer m*m [vt] + m*m [l] + 3*m [e, tauq, taup] + m*nb [work] call stdlib${ii}$_dormbr( 'Q', 'L', 'N', m, m, m, work( il ), ldwrkl,work( itauq ), & u, ldu, work( nwork ),lwork - nwork + 1_${ik}$, ierr ) call stdlib${ii}$_dormbr( 'P', 'R', 'T', m, m, m, work( il ), ldwrkl,work( itaup ), & work( ivt ), m,work( nwork ), lwork - nwork + 1_${ik}$, ierr ) ! multiply right singular vectors of l in work(ivt) by q ! in a, storing result in work(il) and copying to a ! workspace: need m*m [vt] + m*m [l] ! workspace: prefer m*m [vt] + m*n [l] ! at this point, l is resized as m by chunk. do i = 1, n, chunk blk = min( n - i + 1_${ik}$, chunk ) call stdlib${ii}$_dgemm( 'N', 'N', m, blk, m, one, work( ivt ), m,a( 1_${ik}$, i ), lda,& zero, work( il ), ldwrkl ) call stdlib${ii}$_dlacpy( 'F', m, blk, work( il ), ldwrkl,a( 1_${ik}$, i ), lda ) end do else if( wntqs ) then ! path 3t (n >> m, jobz='s') ! m right singular vectors to be computed in vt and ! m left singular vectors to be computed in u il = 1_${ik}$ ! work(il) is m by m ldwrkl = m itau = il + ldwrkl*m nwork = itau + m ! compute a=l*q ! workspace: need m*m [l] + m [tau] + m [work] ! workspace: prefer m*m [l] + m [tau] + m*nb [work] call stdlib${ii}$_dgelqf( m, n, a, lda, work( itau ), work( nwork ),lwork - nwork + & 1_${ik}$, ierr ) ! copy l to work(il), zeroing out above it call stdlib${ii}$_dlacpy( 'L', m, m, a, lda, work( il ), ldwrkl ) call stdlib${ii}$_dlaset( 'U', m - 1_${ik}$, m - 1_${ik}$, zero, zero,work( il + ldwrkl ), ldwrkl & ) ! generate q in a ! workspace: need m*m [l] + m [tau] + m [work] ! workspace: prefer m*m [l] + m [tau] + m*nb [work] call stdlib${ii}$_dorglq( m, n, m, a, lda, work( itau ),work( nwork ), lwork - & nwork + 1_${ik}$, ierr ) ie = itau itauq = ie + m itaup = itauq + m nwork = itaup + m ! bidiagonalize l in work(iu). ! workspace: need m*m [l] + 3*m [e, tauq, taup] + m [work] ! workspace: prefer m*m [l] + 3*m [e, tauq, taup] + 2*m*nb [work] call stdlib${ii}$_dgebrd( m, m, work( il ), ldwrkl, s, work( ie ),work( itauq ), & work( itaup ), work( nwork ),lwork - nwork + 1_${ik}$, ierr ) ! perform bidiagonal svd, computing left singular vectors ! of bidiagonal matrix in u and computing right singular ! vectors of bidiagonal matrix in vt ! workspace: need m*m [l] + 3*m [e, tauq, taup] + bdspac call stdlib${ii}$_dbdsdc( 'U', 'I', m, s, work( ie ), u, ldu, vt,ldvt, dum, idum, & work( nwork ), iwork,info ) ! overwrite u by left singular vectors of l and vt ! by right singular vectors of l ! workspace: need m*m [l] + 3*m [e, tauq, taup] + m [work] ! workspace: prefer m*m [l] + 3*m [e, tauq, taup] + m*nb [work] call stdlib${ii}$_dormbr( 'Q', 'L', 'N', m, m, m, work( il ), ldwrkl,work( itauq ), & u, ldu, work( nwork ),lwork - nwork + 1_${ik}$, ierr ) call stdlib${ii}$_dormbr( 'P', 'R', 'T', m, m, m, work( il ), ldwrkl,work( itaup ), & vt, ldvt, work( nwork ),lwork - nwork + 1_${ik}$, ierr ) ! multiply right singular vectors of l in work(il) by ! q in a, storing result in vt ! workspace: need m*m [l] call stdlib${ii}$_dlacpy( 'F', m, m, vt, ldvt, work( il ), ldwrkl ) call stdlib${ii}$_dgemm( 'N', 'N', m, n, m, one, work( il ), ldwrkl,a, lda, zero, & vt, ldvt ) else if( wntqa ) then ! path 4t (n >> m, jobz='a') ! n right singular vectors to be computed in vt and ! m left singular vectors to be computed in u ivt = 1_${ik}$ ! work(ivt) is m by m ldwkvt = m itau = ivt + ldwkvt*m nwork = itau + m ! compute a=l*q, copying result to vt ! workspace: need m*m [vt] + m [tau] + m [work] ! workspace: prefer m*m [vt] + m [tau] + m*nb [work] call stdlib${ii}$_dgelqf( m, n, a, lda, work( itau ), work( nwork ),lwork - nwork + & 1_${ik}$, ierr ) call stdlib${ii}$_dlacpy( 'U', m, n, a, lda, vt, ldvt ) ! generate q in vt ! workspace: need m*m [vt] + m [tau] + n [work] ! workspace: prefer m*m [vt] + m [tau] + n*nb [work] call stdlib${ii}$_dorglq( n, n, m, vt, ldvt, work( itau ),work( nwork ), lwork - & nwork + 1_${ik}$, ierr ) ! produce l in a, zeroing out other entries if (m>1_${ik}$) call stdlib${ii}$_dlaset( 'U', m-1, m-1, zero, zero, a( 1_${ik}$, 2_${ik}$ ), lda ) ie = itau itauq = ie + m itaup = itauq + m nwork = itaup + m ! bidiagonalize l in a ! workspace: need m*m [vt] + 3*m [e, tauq, taup] + m [work] ! workspace: prefer m*m [vt] + 3*m [e, tauq, taup] + 2*m*nb [work] call stdlib${ii}$_dgebrd( m, m, a, lda, s, work( ie ), work( itauq ),work( itaup ), & work( nwork ), lwork-nwork+1,ierr ) ! perform bidiagonal svd, computing left singular vectors ! of bidiagonal matrix in u and computing right singular ! vectors of bidiagonal matrix in work(ivt) ! workspace: need m*m [vt] + 3*m [e, tauq, taup] + bdspac call stdlib${ii}$_dbdsdc( 'U', 'I', m, s, work( ie ), u, ldu,work( ivt ), ldwkvt, & dum, idum,work( nwork ), iwork, info ) ! overwrite u by left singular vectors of l and work(ivt) ! by right singular vectors of l ! workspace: need m*m [vt] + 3*m [e, tauq, taup]+ m [work] ! workspace: prefer m*m [vt] + 3*m [e, tauq, taup]+ m*nb [work] call stdlib${ii}$_dormbr( 'Q', 'L', 'N', m, m, m, a, lda,work( itauq ), u, ldu, & work( nwork ),lwork - nwork + 1_${ik}$, ierr ) call stdlib${ii}$_dormbr( 'P', 'R', 'T', m, m, m, a, lda,work( itaup ), work( ivt ),& ldwkvt,work( nwork ), lwork - nwork + 1_${ik}$, ierr ) ! multiply right singular vectors of l in work(ivt) by ! q in vt, storing result in a ! workspace: need m*m [vt] call stdlib${ii}$_dgemm( 'N', 'N', m, n, m, one, work( ivt ), ldwkvt,vt, ldvt, zero,& a, lda ) ! copy right singular vectors of a from a to vt call stdlib${ii}$_dlacpy( 'F', m, n, a, lda, vt, ldvt ) end if else ! n < mnthr ! path 5t (n > m, but not much larger) ! reduce to bidiagonal form without lq decomposition ie = 1_${ik}$ itauq = ie + m itaup = itauq + m nwork = itaup + m ! bidiagonalize a ! workspace: need 3*m [e, tauq, taup] + n [work] ! workspace: prefer 3*m [e, tauq, taup] + (m+n)*nb [work] call stdlib${ii}$_dgebrd( m, n, a, lda, s, work( ie ), work( itauq ),work( itaup ), & work( nwork ), lwork-nwork+1,ierr ) if( wntqn ) then ! path 5tn (n > m, jobz='n') ! perform bidiagonal svd, only computing singular values ! workspace: need 3*m [e, tauq, taup] + bdspac call stdlib${ii}$_dbdsdc( 'L', 'N', m, s, work( ie ), dum, 1_${ik}$, dum, 1_${ik}$,dum, idum, & work( nwork ), iwork, info ) else if( wntqo ) then ! path 5to (n > m, jobz='o') ldwkvt = m ivt = nwork if( lwork >= m*n + 3_${ik}$*m + bdspac ) then ! work( ivt ) is m by n call stdlib${ii}$_dlaset( 'F', m, n, zero, zero, work( ivt ),ldwkvt ) nwork = ivt + ldwkvt*n ! il is unused; silence compile warnings il = -1_${ik}$ else ! work( ivt ) is m by m nwork = ivt + ldwkvt*m il = nwork ! work(il) is m by chunk chunk = ( lwork - m*m - 3_${ik}$*m ) / m end if ! perform bidiagonal svd, computing left singular vectors ! of bidiagonal matrix in u and computing right singular ! vectors of bidiagonal matrix in work(ivt) ! workspace: need 3*m [e, tauq, taup] + m*m [vt] + bdspac call stdlib${ii}$_dbdsdc( 'L', 'I', m, s, work( ie ), u, ldu,work( ivt ), ldwkvt, & dum, idum,work( nwork ), iwork, info ) ! overwrite u by left singular vectors of a ! workspace: need 3*m [e, tauq, taup] + m*m [vt] + m [work] ! workspace: prefer 3*m [e, tauq, taup] + m*m [vt] + m*nb [work] call stdlib${ii}$_dormbr( 'Q', 'L', 'N', m, m, n, a, lda,work( itauq ), u, ldu, & work( nwork ),lwork - nwork + 1_${ik}$, ierr ) if( lwork >= m*n + 3_${ik}$*m + bdspac ) then ! path 5to-fast ! overwrite work(ivt) by left singular vectors of a ! workspace: need 3*m [e, tauq, taup] + m*n [vt] + m [work] ! workspace: prefer 3*m [e, tauq, taup] + m*n [vt] + m*nb [work] call stdlib${ii}$_dormbr( 'P', 'R', 'T', m, n, m, a, lda,work( itaup ), work( & ivt ), ldwkvt,work( nwork ), lwork - nwork + 1_${ik}$, ierr ) ! copy right singular vectors of a from work(ivt) to a call stdlib${ii}$_dlacpy( 'F', m, n, work( ivt ), ldwkvt, a, lda ) else ! path 5to-slow ! generate p**t in a ! workspace: need 3*m [e, tauq, taup] + m*m [vt] + m [work] ! workspace: prefer 3*m [e, tauq, taup] + m*m [vt] + m*nb [work] call stdlib${ii}$_dorgbr( 'P', m, n, m, a, lda, work( itaup ),work( nwork ), & lwork - nwork + 1_${ik}$, ierr ) ! multiply q in a by right singular vectors of ! bidiagonal matrix in work(ivt), storing result in ! work(il) and copying to a ! workspace: need 3*m [e, tauq, taup] + m*m [vt] + m*nb [l] ! workspace: prefer 3*m [e, tauq, taup] + m*m [vt] + m*n [l] do i = 1, n, chunk blk = min( n - i + 1_${ik}$, chunk ) call stdlib${ii}$_dgemm( 'N', 'N', m, blk, m, one, work( ivt ),ldwkvt, a( 1_${ik}$, & i ), lda, zero,work( il ), m ) call stdlib${ii}$_dlacpy( 'F', m, blk, work( il ), m, a( 1_${ik}$, i ),lda ) end do end if else if( wntqs ) then ! path 5ts (n > m, jobz='s') ! perform bidiagonal svd, computing left singular vectors ! of bidiagonal matrix in u and computing right singular ! vectors of bidiagonal matrix in vt ! workspace: need 3*m [e, tauq, taup] + bdspac call stdlib${ii}$_dlaset( 'F', m, n, zero, zero, vt, ldvt ) call stdlib${ii}$_dbdsdc( 'L', 'I', m, s, work( ie ), u, ldu, vt,ldvt, dum, idum, & work( nwork ), iwork,info ) ! overwrite u by left singular vectors of a and vt ! by right singular vectors of a ! workspace: need 3*m [e, tauq, taup] + m [work] ! workspace: prefer 3*m [e, tauq, taup] + m*nb [work] call stdlib${ii}$_dormbr( 'Q', 'L', 'N', m, m, n, a, lda,work( itauq ), u, ldu, & work( nwork ),lwork - nwork + 1_${ik}$, ierr ) call stdlib${ii}$_dormbr( 'P', 'R', 'T', m, n, m, a, lda,work( itaup ), vt, ldvt, & work( nwork ),lwork - nwork + 1_${ik}$, ierr ) else if( wntqa ) then ! path 5ta (n > m, jobz='a') ! perform bidiagonal svd, computing left singular vectors ! of bidiagonal matrix in u and computing right singular ! vectors of bidiagonal matrix in vt ! workspace: need 3*m [e, tauq, taup] + bdspac call stdlib${ii}$_dlaset( 'F', n, n, zero, zero, vt, ldvt ) call stdlib${ii}$_dbdsdc( 'L', 'I', m, s, work( ie ), u, ldu, vt,ldvt, dum, idum, & work( nwork ), iwork,info ) ! set the right corner of vt to identity matrix if( n>m ) then call stdlib${ii}$_dlaset( 'F', n-m, n-m, zero, one, vt(m+1,m+1),ldvt ) end if ! overwrite u by left singular vectors of a and vt ! by right singular vectors of a ! workspace: need 3*m [e, tauq, taup] + n [work] ! workspace: prefer 3*m [e, tauq, taup] + n*nb [work] call stdlib${ii}$_dormbr( 'Q', 'L', 'N', m, m, n, a, lda,work( itauq ), u, ldu, & work( nwork ),lwork - nwork + 1_${ik}$, ierr ) call stdlib${ii}$_dormbr( 'P', 'R', 'T', n, n, m, a, lda,work( itaup ), vt, ldvt, & work( nwork ),lwork - nwork + 1_${ik}$, ierr ) end if end if end if ! undo scaling if necessary if( iscl==1_${ik}$ ) then if( anrm>bignum )call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, bignum, anrm, minmn, 1_${ik}$, s, minmn,& ierr ) if( anrm<smlnum )call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, smlnum, anrm, minmn, 1_${ik}$, s, minmn,& ierr ) end if ! return optimal workspace in work(1) work( 1_${ik}$ ) = stdlib${ii}$_droundup_lwork( maxwrk ) return end subroutine stdlib${ii}$_dgesdd #:for rk,rt,ri in REAL_KINDS_TYPES #:if not rk in ["sp","dp"] module subroutine stdlib${ii}$_${ri}$gesdd( jobz, m, n, a, lda, s, u, ldu, vt, ldvt,work, lwork, iwork, info ) !! DGESDD: computes the singular value decomposition (SVD) of a real !! M-by-N matrix A, optionally computing the left and right singular !! vectors. If singular vectors are desired, it uses a !! divide-and-conquer algorithm. !! The SVD is written !! A = U * SIGMA * transpose(V) !! where SIGMA is an M-by-N matrix which is zero except for its !! min(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and !! V is an N-by-N orthogonal matrix. The diagonal elements of SIGMA !! are the singular values of A; they are real and non-negative, and !! are returned in descending order. The first min(m,n) columns of !! U and V are the left and right singular vectors of A. !! Note that the routine returns VT = V**T, not V. !! The divide and conquer algorithm makes very mild assumptions about !! floating point arithmetic. It will work on machines with a guard !! digit in add/subtract, or on those binary machines without guard !! digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or !! Cray-2. It could conceivably fail on hexadecimal or decimal machines !! without guard digits, but we know of none. ! -- lapack driver routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments character, intent(in) :: jobz integer(${ik}$), intent(out) :: info integer(${ik}$), intent(in) :: lda, ldu, ldvt, lwork, m, n ! Array Arguments integer(${ik}$), intent(out) :: iwork(*) real(${rk}$), intent(inout) :: a(lda,*) real(${rk}$), intent(out) :: s(*), u(ldu,*), vt(ldvt,*), work(*) ! ===================================================================== ! Local Scalars logical(lk) :: lquery, wntqa, wntqas, wntqn, wntqo, wntqs integer(${ik}$) :: bdspac, blk, chunk, i, ie, ierr, il, ir, iscl, itau, itaup, itauq, iu, & ivt, ldwkvt, ldwrkl, ldwrkr, ldwrku, maxwrk, minmn, minwrk, mnthr, nwork, wrkbl integer(${ik}$) :: lwork_qgebrd_mn, lwork_qgebrd_mm, lwork_qgebrd_nn, lwork_qgelqf_mn, & lwork_qgeqrf_mn, lwork_qorgbr_p_mm, lwork_qorgbr_q_nn, lwork_qorglq_mn, & lwork_qorglq_nn, lwork_qorgqr_mm, lwork_qorgqr_mn, lwork_qormbr_prt_mm, & lwork_qormbr_qln_mm, lwork_qormbr_prt_mn, lwork_qormbr_qln_mn, lwork_qormbr_prt_nn, & lwork_qormbr_qln_nn real(${rk}$) :: anrm, bignum, eps, smlnum ! Local Arrays integer(${ik}$) :: idum(1_${ik}$) real(${rk}$) :: dum(1_${ik}$) ! Intrinsic Functions ! Executable Statements ! test the input arguments info = 0_${ik}$ minmn = min( m, n ) wntqa = stdlib_lsame( jobz, 'A' ) wntqs = stdlib_lsame( jobz, 'S' ) wntqas = wntqa .or. wntqs wntqo = stdlib_lsame( jobz, 'O' ) wntqn = stdlib_lsame( jobz, 'N' ) lquery = ( lwork==-1_${ik}$ ) if( .not.( wntqa .or. wntqs .or. wntqo .or. wntqn ) ) then info = -1_${ik}$ else if( m<0_${ik}$ ) then info = -2_${ik}$ else if( n<0_${ik}$ ) then info = -3_${ik}$ else if( lda<max( 1_${ik}$, m ) ) then info = -5_${ik}$ else if( ldu<1_${ik}$ .or. ( wntqas .and. ldu<m ) .or.( wntqo .and. m<n .and. ldu<m ) ) & then info = -8_${ik}$ else if( ldvt<1_${ik}$ .or. ( wntqa .and. ldvt<n ) .or.( wntqs .and. ldvt<minmn ) .or.( wntqo & .and. m>=n .and. ldvt<n ) ) then info = -10_${ik}$ end if ! compute workspace ! note: comments in the code beginning "workspace:" describe the ! minimal amount of workspace allocated at that point in the code, ! as well as the preferred amount for good performance. ! nb refers to the optimal block size for the immediately ! following subroutine, as returned by stdlib${ii}$_ilaenv. if( info==0_${ik}$ ) then minwrk = 1_${ik}$ maxwrk = 1_${ik}$ bdspac = 0_${ik}$ mnthr = int( minmn*11.0_${rk}$ / 6.0_${rk}$,KIND=${ik}$) if( m>=n .and. minmn>0_${ik}$ ) then ! compute space needed for stdlib${ii}$_${ri}$bdsdc if( wntqn ) then ! stdlib${ii}$_${ri}$bdsdc needs only 4*n (or 6*n for uplo=l for lapack <= 3.6_${rk}$) ! keep 7*n for backwards compatibility. bdspac = 7_${ik}$*n else bdspac = 3_${ik}$*n*n + 4_${ik}$*n end if ! compute space preferred for each routine call stdlib${ii}$_${ri}$gebrd( m, n, dum(1_${ik}$), m, dum(1_${ik}$), dum(1_${ik}$), dum(1_${ik}$),dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, & ierr ) lwork_qgebrd_mn = int( dum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_${ri}$gebrd( n, n, dum(1_${ik}$), n, dum(1_${ik}$), dum(1_${ik}$), dum(1_${ik}$),dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, & ierr ) lwork_qgebrd_nn = int( dum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_${ri}$geqrf( m, n, dum(1_${ik}$), m, dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, ierr ) lwork_qgeqrf_mn = int( dum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_${ri}$orgbr( 'Q', n, n, n, dum(1_${ik}$), n, dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$,ierr ) lwork_qorgbr_q_nn = int( dum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_${ri}$orgqr( m, m, n, dum(1_${ik}$), m, dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, ierr ) lwork_qorgqr_mm = int( dum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_${ri}$orgqr( m, n, n, dum(1_${ik}$), m, dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, ierr ) lwork_qorgqr_mn = int( dum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_${ri}$ormbr( 'P', 'R', 'T', n, n, n, dum(1_${ik}$), n,dum(1_${ik}$), dum(1_${ik}$), n, dum(1_${ik}$), & -1_${ik}$, ierr ) lwork_qormbr_prt_nn = int( dum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_${ri}$ormbr( 'Q', 'L', 'N', n, n, n, dum(1_${ik}$), n,dum(1_${ik}$), dum(1_${ik}$), n, dum(1_${ik}$), & -1_${ik}$, ierr ) lwork_qormbr_qln_nn = int( dum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_${ri}$ormbr( 'Q', 'L', 'N', m, n, n, dum(1_${ik}$), m,dum(1_${ik}$), dum(1_${ik}$), m, dum(1_${ik}$), & -1_${ik}$, ierr ) lwork_qormbr_qln_mn = int( dum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_${ri}$ormbr( 'Q', 'L', 'N', m, m, n, dum(1_${ik}$), m,dum(1_${ik}$), dum(1_${ik}$), m, dum(1_${ik}$), & -1_${ik}$, ierr ) lwork_qormbr_qln_mm = int( dum(1_${ik}$),KIND=${ik}$) if( m>=mnthr ) then if( wntqn ) then ! path 1 (m >> n, jobz='n') wrkbl = n + lwork_qgeqrf_mn wrkbl = max( wrkbl, 3_${ik}$*n + lwork_qgebrd_nn ) maxwrk = max( wrkbl, bdspac + n ) minwrk = bdspac + n else if( wntqo ) then ! path 2 (m >> n, jobz='o') wrkbl = n + lwork_qgeqrf_mn wrkbl = max( wrkbl, n + lwork_qorgqr_mn ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_qgebrd_nn ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_qormbr_qln_nn ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_qormbr_prt_nn ) wrkbl = max( wrkbl, 3_${ik}$*n + bdspac ) maxwrk = wrkbl + 2_${ik}$*n*n minwrk = bdspac + 2_${ik}$*n*n + 3_${ik}$*n else if( wntqs ) then ! path 3 (m >> n, jobz='s') wrkbl = n + lwork_qgeqrf_mn wrkbl = max( wrkbl, n + lwork_qorgqr_mn ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_qgebrd_nn ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_qormbr_qln_nn ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_qormbr_prt_nn ) wrkbl = max( wrkbl, 3_${ik}$*n + bdspac ) maxwrk = wrkbl + n*n minwrk = bdspac + n*n + 3_${ik}$*n else if( wntqa ) then ! path 4 (m >> n, jobz='a') wrkbl = n + lwork_qgeqrf_mn wrkbl = max( wrkbl, n + lwork_qorgqr_mm ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_qgebrd_nn ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_qormbr_qln_nn ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_qormbr_prt_nn ) wrkbl = max( wrkbl, 3_${ik}$*n + bdspac ) maxwrk = wrkbl + n*n minwrk = n*n + max( 3_${ik}$*n + bdspac, n + m ) end if else ! path 5 (m >= n, but not much larger) wrkbl = 3_${ik}$*n + lwork_qgebrd_mn if( wntqn ) then ! path 5n (m >= n, jobz='n') maxwrk = max( wrkbl, 3_${ik}$*n + bdspac ) minwrk = 3_${ik}$*n + max( m, bdspac ) else if( wntqo ) then ! path 5o (m >= n, jobz='o') wrkbl = max( wrkbl, 3_${ik}$*n + lwork_qormbr_prt_nn ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_qormbr_qln_mn ) wrkbl = max( wrkbl, 3_${ik}$*n + bdspac ) maxwrk = wrkbl + m*n minwrk = 3_${ik}$*n + max( m, n*n + bdspac ) else if( wntqs ) then ! path 5s (m >= n, jobz='s') wrkbl = max( wrkbl, 3_${ik}$*n + lwork_qormbr_qln_mn ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_qormbr_prt_nn ) maxwrk = max( wrkbl, 3_${ik}$*n + bdspac ) minwrk = 3_${ik}$*n + max( m, bdspac ) else if( wntqa ) then ! path 5a (m >= n, jobz='a') wrkbl = max( wrkbl, 3_${ik}$*n + lwork_qormbr_qln_mm ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_qormbr_prt_nn ) maxwrk = max( wrkbl, 3_${ik}$*n + bdspac ) minwrk = 3_${ik}$*n + max( m, bdspac ) end if end if else if( minmn>0_${ik}$ ) then ! compute space needed for stdlib${ii}$_${ri}$bdsdc if( wntqn ) then ! stdlib${ii}$_${ri}$bdsdc needs only 4*n (or 6*n for uplo=l for lapack <= 3.6_${rk}$) ! keep 7*n for backwards compatibility. bdspac = 7_${ik}$*m else bdspac = 3_${ik}$*m*m + 4_${ik}$*m end if ! compute space preferred for each routine call stdlib${ii}$_${ri}$gebrd( m, n, dum(1_${ik}$), m, dum(1_${ik}$), dum(1_${ik}$), dum(1_${ik}$),dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, & ierr ) lwork_qgebrd_mn = int( dum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_${ri}$gebrd( m, m, a, m, s, dum(1_${ik}$), dum(1_${ik}$),dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, ierr ) lwork_qgebrd_mm = int( dum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_${ri}$gelqf( m, n, a, m, dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, ierr ) lwork_qgelqf_mn = int( dum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_${ri}$orglq( n, n, m, dum(1_${ik}$), n, dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, ierr ) lwork_qorglq_nn = int( dum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_${ri}$orglq( m, n, m, a, m, dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, ierr ) lwork_qorglq_mn = int( dum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_${ri}$orgbr( 'P', m, m, m, a, n, dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, ierr ) lwork_qorgbr_p_mm = int( dum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_${ri}$ormbr( 'P', 'R', 'T', m, m, m, dum(1_${ik}$), m,dum(1_${ik}$), dum(1_${ik}$), m, dum(1_${ik}$), & -1_${ik}$, ierr ) lwork_qormbr_prt_mm = int( dum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_${ri}$ormbr( 'P', 'R', 'T', m, n, m, dum(1_${ik}$), m,dum(1_${ik}$), dum(1_${ik}$), m, dum(1_${ik}$), & -1_${ik}$, ierr ) lwork_qormbr_prt_mn = int( dum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_${ri}$ormbr( 'P', 'R', 'T', n, n, m, dum(1_${ik}$), n,dum(1_${ik}$), dum(1_${ik}$), n, dum(1_${ik}$), & -1_${ik}$, ierr ) lwork_qormbr_prt_nn = int( dum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_${ri}$ormbr( 'Q', 'L', 'N', m, m, m, dum(1_${ik}$), m,dum(1_${ik}$), dum(1_${ik}$), m, dum(1_${ik}$), & -1_${ik}$, ierr ) lwork_qormbr_qln_mm = int( dum(1_${ik}$),KIND=${ik}$) if( n>=mnthr ) then if( wntqn ) then ! path 1t (n >> m, jobz='n') wrkbl = m + lwork_qgelqf_mn wrkbl = max( wrkbl, 3_${ik}$*m + lwork_qgebrd_mm ) maxwrk = max( wrkbl, bdspac + m ) minwrk = bdspac + m else if( wntqo ) then ! path 2t (n >> m, jobz='o') wrkbl = m + lwork_qgelqf_mn wrkbl = max( wrkbl, m + lwork_qorglq_mn ) wrkbl = max( wrkbl, 3_${ik}$*m + lwork_qgebrd_mm ) wrkbl = max( wrkbl, 3_${ik}$*m + lwork_qormbr_qln_mm ) wrkbl = max( wrkbl, 3_${ik}$*m + lwork_qormbr_prt_mm ) wrkbl = max( wrkbl, 3_${ik}$*m + bdspac ) maxwrk = wrkbl + 2_${ik}$*m*m minwrk = bdspac + 2_${ik}$*m*m + 3_${ik}$*m else if( wntqs ) then ! path 3t (n >> m, jobz='s') wrkbl = m + lwork_qgelqf_mn wrkbl = max( wrkbl, m + lwork_qorglq_mn ) wrkbl = max( wrkbl, 3_${ik}$*m + lwork_qgebrd_mm ) wrkbl = max( wrkbl, 3_${ik}$*m + lwork_qormbr_qln_mm ) wrkbl = max( wrkbl, 3_${ik}$*m + lwork_qormbr_prt_mm ) wrkbl = max( wrkbl, 3_${ik}$*m + bdspac ) maxwrk = wrkbl + m*m minwrk = bdspac + m*m + 3_${ik}$*m else if( wntqa ) then ! path 4t (n >> m, jobz='a') wrkbl = m + lwork_qgelqf_mn wrkbl = max( wrkbl, m + lwork_qorglq_nn ) wrkbl = max( wrkbl, 3_${ik}$*m + lwork_qgebrd_mm ) wrkbl = max( wrkbl, 3_${ik}$*m + lwork_qormbr_qln_mm ) wrkbl = max( wrkbl, 3_${ik}$*m + lwork_qormbr_prt_mm ) wrkbl = max( wrkbl, 3_${ik}$*m + bdspac ) maxwrk = wrkbl + m*m minwrk = m*m + max( 3_${ik}$*m + bdspac, m + n ) end if else ! path 5t (n > m, but not much larger) wrkbl = 3_${ik}$*m + lwork_qgebrd_mn if( wntqn ) then ! path 5tn (n > m, jobz='n') maxwrk = max( wrkbl, 3_${ik}$*m + bdspac ) minwrk = 3_${ik}$*m + max( n, bdspac ) else if( wntqo ) then ! path 5to (n > m, jobz='o') wrkbl = max( wrkbl, 3_${ik}$*m + lwork_qormbr_qln_mm ) wrkbl = max( wrkbl, 3_${ik}$*m + lwork_qormbr_prt_mn ) wrkbl = max( wrkbl, 3_${ik}$*m + bdspac ) maxwrk = wrkbl + m*n minwrk = 3_${ik}$*m + max( n, m*m + bdspac ) else if( wntqs ) then ! path 5ts (n > m, jobz='s') wrkbl = max( wrkbl, 3_${ik}$*m + lwork_qormbr_qln_mm ) wrkbl = max( wrkbl, 3_${ik}$*m + lwork_qormbr_prt_mn ) maxwrk = max( wrkbl, 3_${ik}$*m + bdspac ) minwrk = 3_${ik}$*m + max( n, bdspac ) else if( wntqa ) then ! path 5ta (n > m, jobz='a') wrkbl = max( wrkbl, 3_${ik}$*m + lwork_qormbr_qln_mm ) wrkbl = max( wrkbl, 3_${ik}$*m + lwork_qormbr_prt_nn ) maxwrk = max( wrkbl, 3_${ik}$*m + bdspac ) minwrk = 3_${ik}$*m + max( n, bdspac ) end if end if end if maxwrk = max( maxwrk, minwrk ) work( 1_${ik}$ ) = stdlib${ii}$_${ri}$roundup_lwork( maxwrk ) if( lwork<minwrk .and. .not.lquery ) then info = -12_${ik}$ end if end if if( info/=0_${ik}$ ) then call stdlib${ii}$_xerbla( 'DGESDD', -info ) return else if( lquery ) then return end if ! quick return if possible if( m==0_${ik}$ .or. n==0_${ik}$ ) then return end if ! get machine constants eps = stdlib${ii}$_${ri}$lamch( 'P' ) smlnum = sqrt( stdlib${ii}$_${ri}$lamch( 'S' ) ) / eps bignum = one / smlnum ! scale a if max element outside range [smlnum,bignum] anrm = stdlib${ii}$_${ri}$lange( 'M', m, n, a, lda, dum ) if( stdlib${ii}$_${ri}$isnan( anrm ) ) then info = -4_${ik}$ return end if iscl = 0_${ik}$ if( anrm>zero .and. anrm<smlnum ) then iscl = 1_${ik}$ call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, m, n, a, lda, ierr ) else if( anrm>bignum ) then iscl = 1_${ik}$ call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, m, n, a, lda, ierr ) end if if( m>=n ) then ! a has at least as many rows as columns. if a has sufficiently ! more rows than columns, first reduce using the qr ! decomposition (if sufficient workspace available) if( m>=mnthr ) then if( wntqn ) then ! path 1 (m >> n, jobz='n') ! no singular vectors to be computed itau = 1_${ik}$ nwork = itau + n ! compute a=q*r ! workspace: need n [tau] + n [work] ! workspace: prefer n [tau] + n*nb [work] call stdlib${ii}$_${ri}$geqrf( m, n, a, lda, work( itau ), work( nwork ),lwork - nwork + & 1_${ik}$, ierr ) ! zero out below r if (n>1_${ik}$) call stdlib${ii}$_${ri}$laset( 'L', n-1, n-1, zero, zero, a( 2_${ik}$, 1_${ik}$ ), lda ) ie = 1_${ik}$ itauq = ie + n itaup = itauq + n nwork = itaup + n ! bidiagonalize r in a ! workspace: need 3*n [e, tauq, taup] + n [work] ! workspace: prefer 3*n [e, tauq, taup] + 2*n*nb [work] call stdlib${ii}$_${ri}$gebrd( n, n, a, lda, s, work( ie ), work( itauq ),work( itaup ), & work( nwork ), lwork-nwork+1,ierr ) nwork = ie + n ! perform bidiagonal svd, computing singular values only ! workspace: need n [e] + bdspac call stdlib${ii}$_${ri}$bdsdc( 'U', 'N', n, s, work( ie ), dum, 1_${ik}$, dum, 1_${ik}$,dum, idum, & work( nwork ), iwork, info ) else if( wntqo ) then ! path 2 (m >> n, jobz = 'o') ! n left singular vectors to be overwritten on a and ! n right singular vectors to be computed in vt ir = 1_${ik}$ ! work(ir) is ldwrkr by n if( lwork >= lda*n + n*n + 3_${ik}$*n + bdspac ) then ldwrkr = lda else ldwrkr = ( lwork - n*n - 3_${ik}$*n - bdspac ) / n end if itau = ir + ldwrkr*n nwork = itau + n ! compute a=q*r ! workspace: need n*n [r] + n [tau] + n [work] ! workspace: prefer n*n [r] + n [tau] + n*nb [work] call stdlib${ii}$_${ri}$geqrf( m, n, a, lda, work( itau ), work( nwork ),lwork - nwork + & 1_${ik}$, ierr ) ! copy r to work(ir), zeroing out below it call stdlib${ii}$_${ri}$lacpy( 'U', n, n, a, lda, work( ir ), ldwrkr ) call stdlib${ii}$_${ri}$laset( 'L', n - 1_${ik}$, n - 1_${ik}$, zero, zero, work(ir+1),ldwrkr ) ! generate q in a ! workspace: need n*n [r] + n [tau] + n [work] ! workspace: prefer n*n [r] + n [tau] + n*nb [work] call stdlib${ii}$_${ri}$orgqr( m, n, n, a, lda, work( itau ),work( nwork ), lwork - & nwork + 1_${ik}$, ierr ) ie = itau itauq = ie + n itaup = itauq + n nwork = itaup + n ! bidiagonalize r in work(ir) ! workspace: need n*n [r] + 3*n [e, tauq, taup] + n [work] ! workspace: prefer n*n [r] + 3*n [e, tauq, taup] + 2*n*nb [work] call stdlib${ii}$_${ri}$gebrd( n, n, work( ir ), ldwrkr, s, work( ie ),work( itauq ), & work( itaup ), work( nwork ),lwork - nwork + 1_${ik}$, ierr ) ! work(iu) is n by n iu = nwork nwork = iu + n*n ! perform bidiagonal svd, computing left singular vectors ! of bidiagonal matrix in work(iu) and computing right ! singular vectors of bidiagonal matrix in vt ! workspace: need n*n [r] + 3*n [e, tauq, taup] + n*n [u] + bdspac call stdlib${ii}$_${ri}$bdsdc( 'U', 'I', n, s, work( ie ), work( iu ), n,vt, ldvt, dum, & idum, work( nwork ), iwork,info ) ! overwrite work(iu) by left singular vectors of r ! a