#:include "common.fypp" submodule(stdlib_lapack_eig_svd_lsq) stdlib_lapack_eigv_svd_drivers2 implicit none contains #:for ik,it,ii in LINALG_INT_KINDS_TYPES module subroutine stdlib${ii}$_sgesdd( jobz, m, n, a, lda, s, u, ldu, vt, ldvt,work, lwork, iwork, info ) !! SGESDD computes the singular value decomposition (SVD) of a real !! M-by-N matrix A, optionally computing the left and right singular !! vectors. If singular vectors are desired, it uses a !! divide-and-conquer algorithm. !! The SVD is written !! A = U * SIGMA * transpose(V) !! where SIGMA is an M-by-N matrix which is zero except for its !! min(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and !! V is an N-by-N orthogonal matrix. The diagonal elements of SIGMA !! are the singular values of A; they are real and non-negative, and !! are returned in descending order. The first min(m,n) columns of !! U and V are the left and right singular vectors of A. !! Note that the routine returns VT = V**T, not V. !! The divide and conquer algorithm makes very mild assumptions about !! floating point arithmetic. It will work on machines with a guard !! digit in add/subtract, or on those binary machines without guard !! digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or !! Cray-2. It could conceivably fail on hexadecimal or decimal machines !! without guard digits, but we know of none. ! -- lapack driver routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments character, intent(in) :: jobz integer(${ik}$), intent(out) :: info integer(${ik}$), intent(in) :: lda, ldu, ldvt, lwork, m, n ! Array Arguments integer(${ik}$), intent(out) :: iwork(*) real(sp), intent(inout) :: a(lda,*) real(sp), intent(out) :: s(*), u(ldu,*), vt(ldvt,*), work(*) ! ===================================================================== ! Local Scalars logical(lk) :: lquery, wntqa, wntqas, wntqn, wntqo, wntqs integer(${ik}$) :: bdspac, blk, chunk, i, ie, ierr, il, ir, iscl, itau, itaup, itauq, iu, & ivt, ldwkvt, ldwrkl, ldwrkr, ldwrku, maxwrk, minmn, minwrk, mnthr, nwork, wrkbl integer(${ik}$) :: lwork_sgebrd_mn, lwork_sgebrd_mm, lwork_sgebrd_nn, lwork_sgelqf_mn, & lwork_sgeqrf_mn, lwork_sorgbr_p_mm, lwork_sorgbr_q_nn, lwork_sorglq_mn, & lwork_sorglq_nn, lwork_sorgqr_mm, lwork_sorgqr_mn, lwork_sormbr_prt_mm, & lwork_sormbr_qln_mm, lwork_sormbr_prt_mn, lwork_sormbr_qln_mn, lwork_sormbr_prt_nn, & lwork_sormbr_qln_nn real(sp) :: anrm, bignum, eps, smlnum ! Local Arrays integer(${ik}$) :: idum(1_${ik}$) real(sp) :: dum(1_${ik}$) ! Intrinsic Functions ! Executable Statements ! test the input arguments info = 0_${ik}$ minmn = min( m, n ) wntqa = stdlib_lsame( jobz, 'A' ) wntqs = stdlib_lsame( jobz, 'S' ) wntqas = wntqa .or. wntqs wntqo = stdlib_lsame( jobz, 'O' ) wntqn = stdlib_lsame( jobz, 'N' ) lquery = ( lwork==-1_${ik}$ ) if( .not.( wntqa .or. wntqs .or. wntqo .or. wntqn ) ) then info = -1_${ik}$ else if( m<0_${ik}$ ) then info = -2_${ik}$ else if( n<0_${ik}$ ) then info = -3_${ik}$ else if( lda<max( 1_${ik}$, m ) ) then info = -5_${ik}$ else if( ldu<1_${ik}$ .or. ( wntqas .and. ldu<m ) .or.( wntqo .and. m<n .and. ldu<m ) ) & then info = -8_${ik}$ else if( ldvt<1_${ik}$ .or. ( wntqa .and. ldvt<n ) .or.( wntqs .and. ldvt<minmn ) .or.( wntqo & .and. m>=n .and. ldvt<n ) ) then info = -10_${ik}$ end if ! compute workspace ! note: comments in the code beginning "workspace:" describe the ! minimal amount of workspace allocated at that point in the code, ! as well as the preferred amount for good performance. ! nb refers to the optimal block size for the immediately ! following subroutine, as returned by stdlib${ii}$_ilaenv. if( info==0_${ik}$ ) then minwrk = 1_${ik}$ maxwrk = 1_${ik}$ bdspac = 0_${ik}$ mnthr = int( minmn*11.0_sp / 6.0_sp,KIND=${ik}$) if( m>=n .and. minmn>0_${ik}$ ) then ! compute space needed for stdlib${ii}$_sbdsdc if( wntqn ) then ! stdlib${ii}$_sbdsdc needs only 4*n (or 6*n for uplo=l for lapack <= 3.6_sp) ! keep 7*n for backwards compatibility. bdspac = 7_${ik}$*n else bdspac = 3_${ik}$*n*n + 4_${ik}$*n end if ! compute space preferred for each routine call stdlib${ii}$_sgebrd( m, n, dum(1_${ik}$), m, dum(1_${ik}$), dum(1_${ik}$), dum(1_${ik}$),dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, & ierr ) lwork_sgebrd_mn = int( dum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_sgebrd( n, n, dum(1_${ik}$), n, dum(1_${ik}$), dum(1_${ik}$), dum(1_${ik}$),dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, & ierr ) lwork_sgebrd_nn = int( dum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_sgeqrf( m, n, dum(1_${ik}$), m, dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, ierr ) lwork_sgeqrf_mn = int( dum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_sorgbr( 'Q', n, n, n, dum(1_${ik}$), n, dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$,ierr ) lwork_sorgbr_q_nn = int( dum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_sorgqr( m, m, n, dum(1_${ik}$), m, dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, ierr ) lwork_sorgqr_mm = int( dum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_sorgqr( m, n, n, dum(1_${ik}$), m, dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, ierr ) lwork_sorgqr_mn = int( dum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_sormbr( 'P', 'R', 'T', n, n, n, dum(1_${ik}$), n,dum(1_${ik}$), dum(1_${ik}$), n, dum(1_${ik}$), & -1_${ik}$, ierr ) lwork_sormbr_prt_nn = int( dum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_sormbr( 'Q', 'L', 'N', n, n, n, dum(1_${ik}$), n,dum(1_${ik}$), dum(1_${ik}$), n, dum(1_${ik}$), & -1_${ik}$, ierr ) lwork_sormbr_qln_nn = int( dum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_sormbr( 'Q', 'L', 'N', m, n, n, dum(1_${ik}$), m,dum(1_${ik}$), dum(1_${ik}$), m, dum(1_${ik}$), & -1_${ik}$, ierr ) lwork_sormbr_qln_mn = int( dum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_sormbr( 'Q', 'L', 'N', m, m, n, dum(1_${ik}$), m,dum(1_${ik}$), dum(1_${ik}$), m, dum(1_${ik}$), & -1_${ik}$, ierr ) lwork_sormbr_qln_mm = int( dum(1_${ik}$),KIND=${ik}$) if( m>=mnthr ) then if( wntqn ) then ! path 1 (m >> n, jobz='n') wrkbl = n + lwork_sgeqrf_mn wrkbl = max( wrkbl, 3_${ik}$*n + lwork_sgebrd_nn ) maxwrk = max( wrkbl, bdspac + n ) minwrk = bdspac + n else if( wntqo ) then ! path 2 (m >> n, jobz='o') wrkbl = n + lwork_sgeqrf_mn wrkbl = max( wrkbl, n + lwork_sorgqr_mn ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_sgebrd_nn ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_sormbr_qln_nn ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_sormbr_prt_nn ) wrkbl = max( wrkbl, 3_${ik}$*n + bdspac ) maxwrk = wrkbl + 2_${ik}$*n*n minwrk = bdspac + 2_${ik}$*n*n + 3_${ik}$*n else if( wntqs ) then ! path 3 (m >> n, jobz='s') wrkbl = n + lwork_sgeqrf_mn wrkbl = max( wrkbl, n + lwork_sorgqr_mn ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_sgebrd_nn ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_sormbr_qln_nn ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_sormbr_prt_nn ) wrkbl = max( wrkbl, 3_${ik}$*n + bdspac ) maxwrk = wrkbl + n*n minwrk = bdspac + n*n + 3_${ik}$*n else if( wntqa ) then ! path 4 (m >> n, jobz='a') wrkbl = n + lwork_sgeqrf_mn wrkbl = max( wrkbl, n + lwork_sorgqr_mm ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_sgebrd_nn ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_sormbr_qln_nn ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_sormbr_prt_nn ) wrkbl = max( wrkbl, 3_${ik}$*n + bdspac ) maxwrk = wrkbl + n*n minwrk = n*n + max( 3_${ik}$*n + bdspac, n + m ) end if else ! path 5 (m >= n, but not much larger) wrkbl = 3_${ik}$*n + lwork_sgebrd_mn if( wntqn ) then ! path 5n (m >= n, jobz='n') maxwrk = max( wrkbl, 3_${ik}$*n + bdspac ) minwrk = 3_${ik}$*n + max( m, bdspac ) else if( wntqo ) then ! path 5o (m >= n, jobz='o') wrkbl = max( wrkbl, 3_${ik}$*n + lwork_sormbr_prt_nn ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_sormbr_qln_mn ) wrkbl = max( wrkbl, 3_${ik}$*n + bdspac ) maxwrk = wrkbl + m*n minwrk = 3_${ik}$*n + max( m, n*n + bdspac ) else if( wntqs ) then ! path 5s (m >= n, jobz='s') wrkbl = max( wrkbl, 3_${ik}$*n + lwork_sormbr_qln_mn ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_sormbr_prt_nn ) maxwrk = max( wrkbl, 3_${ik}$*n + bdspac ) minwrk = 3_${ik}$*n + max( m, bdspac ) else if( wntqa ) then ! path 5a (m >= n, jobz='a') wrkbl = max( wrkbl, 3_${ik}$*n + lwork_sormbr_qln_mm ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_sormbr_prt_nn ) maxwrk = max( wrkbl, 3_${ik}$*n + bdspac ) minwrk = 3_${ik}$*n + max( m, bdspac ) end if end if else if( minmn>0_${ik}$ ) then ! compute space needed for stdlib${ii}$_sbdsdc if( wntqn ) then ! stdlib${ii}$_sbdsdc needs only 4*n (or 6*n for uplo=l for lapack <= 3.6_sp) ! keep 7*n for backwards compatibility. bdspac = 7_${ik}$*m else bdspac = 3_${ik}$*m*m + 4_${ik}$*m end if ! compute space preferred for each routine call stdlib${ii}$_sgebrd( m, n, dum(1_${ik}$), m, dum(1_${ik}$), dum(1_${ik}$), dum(1_${ik}$),dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, & ierr ) lwork_sgebrd_mn = int( dum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_sgebrd( m, m, a, m, s, dum(1_${ik}$), dum(1_${ik}$),dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, ierr ) lwork_sgebrd_mm = int( dum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_sgelqf( m, n, a, m, dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, ierr ) lwork_sgelqf_mn = int( dum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_sorglq( n, n, m, dum(1_${ik}$), n, dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, ierr ) lwork_sorglq_nn = int( dum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_sorglq( m, n, m, a, m, dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, ierr ) lwork_sorglq_mn = int( dum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_sorgbr( 'P', m, m, m, a, n, dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, ierr ) lwork_sorgbr_p_mm = int( dum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_sormbr( 'P', 'R', 'T', m, m, m, dum(1_${ik}$), m,dum(1_${ik}$), dum(1_${ik}$), m, dum(1_${ik}$), & -1_${ik}$, ierr ) lwork_sormbr_prt_mm = int( dum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_sormbr( 'P', 'R', 'T', m, n, m, dum(1_${ik}$), m,dum(1_${ik}$), dum(1_${ik}$), m, dum(1_${ik}$), & -1_${ik}$, ierr ) lwork_sormbr_prt_mn = int( dum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_sormbr( 'P', 'R', 'T', n, n, m, dum(1_${ik}$), n,dum(1_${ik}$), dum(1_${ik}$), n, dum(1_${ik}$), & -1_${ik}$, ierr ) lwork_sormbr_prt_nn = int( dum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_sormbr( 'Q', 'L', 'N', m, m, m, dum(1_${ik}$), m,dum(1_${ik}$), dum(1_${ik}$), m, dum(1_${ik}$), & -1_${ik}$, ierr ) lwork_sormbr_qln_mm = int( dum(1_${ik}$),KIND=${ik}$) if( n>=mnthr ) then if( wntqn ) then ! path 1t (n >> m, jobz='n') wrkbl = m + lwork_sgelqf_mn wrkbl = max( wrkbl, 3_${ik}$*m + lwork_sgebrd_mm ) maxwrk = max( wrkbl, bdspac + m ) minwrk = bdspac + m else if( wntqo ) then ! path 2t (n >> m, jobz='o') wrkbl = m + lwork_sgelqf_mn wrkbl = max( wrkbl, m + lwork_sorglq_mn ) wrkbl = max( wrkbl, 3_${ik}$*m + lwork_sgebrd_mm ) wrkbl = max( wrkbl, 3_${ik}$*m + lwork_sormbr_qln_mm ) wrkbl = max( wrkbl, 3_${ik}$*m + lwork_sormbr_prt_mm ) wrkbl = max( wrkbl, 3_${ik}$*m + bdspac ) maxwrk = wrkbl + 2_${ik}$*m*m minwrk = bdspac + 2_${ik}$*m*m + 3_${ik}$*m else if( wntqs ) then ! path 3t (n >> m, jobz='s') wrkbl = m + lwork_sgelqf_mn wrkbl = max( wrkbl, m + lwork_sorglq_mn ) wrkbl = max( wrkbl, 3_${ik}$*m + lwork_sgebrd_mm ) wrkbl = max( wrkbl, 3_${ik}$*m + lwork_sormbr_qln_mm ) wrkbl = max( wrkbl, 3_${ik}$*m + lwork_sormbr_prt_mm ) wrkbl = max( wrkbl, 3_${ik}$*m + bdspac ) maxwrk = wrkbl + m*m minwrk = bdspac + m*m + 3_${ik}$*m else if( wntqa ) then ! path 4t (n >> m, jobz='a') wrkbl = m + lwork_sgelqf_mn wrkbl = max( wrkbl, m + lwork_sorglq_nn ) wrkbl = max( wrkbl, 3_${ik}$*m + lwork_sgebrd_mm ) wrkbl = max( wrkbl, 3_${ik}$*m + lwork_sormbr_qln_mm ) wrkbl = max( wrkbl, 3_${ik}$*m + lwork_sormbr_prt_mm ) wrkbl = max( wrkbl, 3_${ik}$*m + bdspac ) maxwrk = wrkbl + m*m minwrk = m*m + max( 3_${ik}$*m + bdspac, m + n ) end if else ! path 5t (n > m, but not much larger) wrkbl = 3_${ik}$*m + lwork_sgebrd_mn if( wntqn ) then ! path 5tn (n > m, jobz='n') maxwrk = max( wrkbl, 3_${ik}$*m + bdspac ) minwrk = 3_${ik}$*m + max( n, bdspac ) else if( wntqo ) then ! path 5to (n > m, jobz='o') wrkbl = max( wrkbl, 3_${ik}$*m + lwork_sormbr_qln_mm ) wrkbl = max( wrkbl, 3_${ik}$*m + lwork_sormbr_prt_mn ) wrkbl = max( wrkbl, 3_${ik}$*m + bdspac ) maxwrk = wrkbl + m*n minwrk = 3_${ik}$*m + max( n, m*m + bdspac ) else if( wntqs ) then ! path 5ts (n > m, jobz='s') wrkbl = max( wrkbl, 3_${ik}$*m + lwork_sormbr_qln_mm ) wrkbl = max( wrkbl, 3_${ik}$*m + lwork_sormbr_prt_mn ) maxwrk = max( wrkbl, 3_${ik}$*m + bdspac ) minwrk = 3_${ik}$*m + max( n, bdspac ) else if( wntqa ) then ! path 5ta (n > m, jobz='a') wrkbl = max( wrkbl, 3_${ik}$*m + lwork_sormbr_qln_mm ) wrkbl = max( wrkbl, 3_${ik}$*m + lwork_sormbr_prt_nn ) maxwrk = max( wrkbl, 3_${ik}$*m + bdspac ) minwrk = 3_${ik}$*m + max( n, bdspac ) end if end if end if maxwrk = max( maxwrk, minwrk ) work( 1_${ik}$ ) = stdlib${ii}$_sroundup_lwork( maxwrk ) if( lwork<minwrk .and. .not.lquery ) then info = -12_${ik}$ end if end if if( info/=0_${ik}$ ) then call stdlib${ii}$_xerbla( 'SGESDD', -info ) return else if( lquery ) then return end if ! quick return if possible if( m==0_${ik}$ .or. n==0_${ik}$ ) then return end if ! get machine constants eps = stdlib${ii}$_slamch( 'P' ) smlnum = sqrt( stdlib${ii}$_slamch( 'S' ) ) / eps bignum = one / smlnum ! scale a if max element outside range [smlnum,bignum] anrm = stdlib${ii}$_slange( 'M', m, n, a, lda, dum ) if( stdlib${ii}$_sisnan( anrm ) ) then info = -4_${ik}$ return end if iscl = 0_${ik}$ if( anrm>zero .and. anrm<smlnum ) then iscl = 1_${ik}$ call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, m, n, a, lda, ierr ) else if( anrm>bignum ) then iscl = 1_${ik}$ call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, m, n, a, lda, ierr ) end if if( m>=n ) then ! a has at least as many rows as columns. if a has sufficiently ! more rows than columns, first reduce using the qr ! decomposition (if sufficient workspace available) if( m>=mnthr ) then if( wntqn ) then ! path 1 (m >> n, jobz='n') ! no singular vectors to be computed itau = 1_${ik}$ nwork = itau + n ! compute a=q*r ! workspace: need n [tau] + n [work] ! workspace: prefer n [tau] + n*nb [work] call stdlib${ii}$_sgeqrf( m, n, a, lda, work( itau ), work( nwork ),lwork - nwork + & 1_${ik}$, ierr ) ! zero out below r if (n>1_${ik}$) call stdlib${ii}$_slaset( 'L', n-1, n-1, zero, zero, a( 2_${ik}$, 1_${ik}$ ), lda ) ie = 1_${ik}$ itauq = ie + n itaup = itauq + n nwork = itaup + n ! bidiagonalize r in a ! workspace: need 3*n [e, tauq, taup] + n [work] ! workspace: prefer 3*n [e, tauq, taup] + 2*n*nb [work] call stdlib${ii}$_sgebrd( n, n, a, lda, s, work( ie ), work( itauq ),work( itaup ), & work( nwork ), lwork-nwork+1,ierr ) nwork = ie + n ! perform bidiagonal svd, computing singular values only ! workspace: need n [e] + bdspac call stdlib${ii}$_sbdsdc( 'U', 'N', n, s, work( ie ), dum, 1_${ik}$, dum, 1_${ik}$,dum, idum, & work( nwork ), iwork, info ) else if( wntqo ) then ! path 2 (m >> n, jobz = 'o') ! n left singular vectors to be overwritten on a and ! n right singular vectors to be computed in vt ir = 1_${ik}$ ! work(ir) is ldwrkr by n if( lwork >= lda*n + n*n + 3_${ik}$*n + bdspac ) then ldwrkr = lda else ldwrkr = ( lwork - n*n - 3_${ik}$*n - bdspac ) / n end if itau = ir + ldwrkr*n nwork = itau + n ! compute a=q*r ! workspace: need n*n [r] + n [tau] + n [work] ! workspace: prefer n*n [r] + n [tau] + n*nb [work] call stdlib${ii}$_sgeqrf( m, n, a, lda, work( itau ), work( nwork ),lwork - nwork + & 1_${ik}$, ierr ) ! copy r to work(ir), zeroing out below it call stdlib${ii}$_slacpy( 'U', n, n, a, lda, work( ir ), ldwrkr ) call stdlib${ii}$_slaset( 'L', n - 1_${ik}$, n - 1_${ik}$, zero, zero, work(ir+1),ldwrkr ) ! generate q in a ! workspace: need n*n [r] + n [tau] + n [work] ! workspace: prefer n*n [r] + n [tau] + n*nb [work] call stdlib${ii}$_sorgqr( m, n, n, a, lda, work( itau ),work( nwork ), lwork - & nwork + 1_${ik}$, ierr ) ie = itau itauq = ie + n itaup = itauq + n nwork = itaup + n ! bidiagonalize r in work(ir) ! workspace: need n*n [r] + 3*n [e, tauq, taup] + n [work] ! workspace: prefer n*n [r] + 3*n [e, tauq, taup] + 2*n*nb [work] call stdlib${ii}$_sgebrd( n, n, work( ir ), ldwrkr, s, work( ie ),work( itauq ), & work( itaup ), work( nwork ),lwork - nwork + 1_${ik}$, ierr ) ! work(iu) is n by n iu = nwork nwork = iu + n*n ! perform bidiagonal svd, computing left singular vectors ! of bidiagonal matrix in work(iu) and computing right ! singular vectors of bidiagonal matrix in vt ! workspace: need n*n [r] + 3*n [e, tauq, taup] + n*n [u] + bdspac call stdlib${ii}$_sbdsdc( 'U', 'I', n, s, work( ie ), work( iu ), n,vt, ldvt, dum, & idum, work( nwork ), iwork,info ) ! overwrite work(iu) by left singular vectors of r ! and vt by right singular vectors of r ! workspace: need n*n [r] + 3*n [e, tauq, taup] + n*n [u] + n [work] ! workspace: prefer n*n [r] + 3*n [e, tauq, taup] + n*n [u] + n*nb [work] call stdlib${ii}$_sormbr( 'Q', 'L', 'N', n, n, n, work( ir ), ldwrkr,work( itauq ), & work( iu ), n, work( nwork ),lwork - nwork + 1_${ik}$, ierr ) call stdlib${ii}$_sormbr( 'P', 'R', 'T', n, n, n, work( ir ), ldwrkr,work( itaup ), & vt, ldvt, work( nwork ),lwork - nwork + 1_${ik}$, ierr ) ! multiply q in a by left singular vectors of r in ! work(iu), storing result in work(ir) and copying to a ! workspace: need n*n [r] + 3*n [e, tauq, taup] + n*n [u] ! workspace: prefer m*n [r] + 3*n [e, tauq, taup] + n*n [u] do i = 1, m, ldwrkr chunk = min( m - i + 1_${ik}$, ldwrkr ) call stdlib${ii}$_sgemm( 'N', 'N', chunk, n, n, one, a( i, 1_${ik}$ ),lda, work( iu ), & n, zero, work( ir ),ldwrkr ) call stdlib${ii}$_slacpy( 'F', chunk, n, work( ir ), ldwrkr,a( i, 1_${ik}$ ), lda ) end do else if( wntqs ) then ! path 3 (m >> n, jobz='s') ! n left singular vectors to be computed in u and ! n right singular vectors to be computed in vt ir = 1_${ik}$ ! work(ir) is n by n ldwrkr = n itau = ir + ldwrkr*n nwork = itau + n ! compute a=q*r ! workspace: need n*n [r] + n [tau] + n [work] ! workspace: prefer n*n [r] + n [tau] + n*nb [work] call stdlib${ii}$_sgeqrf( m, n, a, lda, work( itau ), work( nwork ),lwork - nwork + & 1_${ik}$, ierr ) ! copy r to work(ir), zeroing out below it call stdlib${ii}$_slacpy( 'U', n, n, a, lda, work( ir ), ldwrkr ) call stdlib${ii}$_slaset( 'L', n - 1_${ik}$, n - 1_${ik}$, zero, zero, work(ir+1),ldwrkr ) ! generate q in a ! workspace: need n*n [r] + n [tau] + n [work] ! workspace: prefer n*n [r] + n [tau] + n*nb [work] call stdlib${ii}$_sorgqr( m, n, n, a, lda, work( itau ),work( nwork ), lwork - & nwork + 1_${ik}$, ierr ) ie = itau itauq = ie + n itaup = itauq + n nwork = itaup + n ! bidiagonalize r in work(ir) ! workspace: need n*n [r] + 3*n [e, tauq, taup] + n [work] ! workspace: prefer n*n [r] + 3*n [e, tauq, taup] + 2*n*nb [work] call stdlib${ii}$_sgebrd( n, n, work( ir ), ldwrkr, s, work( ie ),work( itauq ), & work( itaup ), work( nwork ),lwork - nwork + 1_${ik}$, ierr ) ! perform bidiagonal svd, computing left singular vectors ! of bidiagoal matrix in u and computing right singular ! vectors of bidiagonal matrix in vt ! workspace: need n*n [r] + 3*n [e, tauq, taup] + bdspac call stdlib${ii}$_sbdsdc( 'U', 'I', n, s, work( ie ), u, ldu, vt,ldvt, dum, idum, & work( nwork ), iwork,info ) ! overwrite u by left singular vectors of r and vt ! by right singular vectors of r ! workspace: need n*n [r] + 3*n [e, tauq, taup] + n [work] ! workspace: prefer n*n [r] + 3*n [e, tauq, taup] + n*nb [work] call stdlib${ii}$_sormbr( 'Q', 'L', 'N', n, n, n, work( ir ), ldwrkr,work( itauq ), & u, ldu, work( nwork ),lwork - nwork + 1_${ik}$, ierr ) call stdlib${ii}$_sormbr( 'P', 'R', 'T', n, n, n, work( ir ), ldwrkr,work( itaup ), & vt, ldvt, work( nwork ),lwork - nwork + 1_${ik}$, ierr ) ! multiply q in a by left singular vectors of r in ! work(ir), storing result in u ! workspace: need n*n [r] call stdlib${ii}$_slacpy( 'F', n, n, u, ldu, work( ir ), ldwrkr ) call stdlib${ii}$_sgemm( 'N', 'N', m, n, n, one, a, lda, work( ir ),ldwrkr, zero, u,& ldu ) else if( wntqa ) then ! path 4 (m >> n, jobz='a') ! m left singular vectors to be computed in u and ! n right singular vectors to be computed in vt iu = 1_${ik}$ ! work(iu) is n by n ldwrku = n itau = iu + ldwrku*n nwork = itau + n ! compute a=q*r, copying result to u ! workspace: need n*n [u] + n [tau] + n [work] ! workspace: prefer n*n [u] + n [tau] + n*nb [work] call stdlib${ii}$_sgeqrf( m, n, a, lda, work( itau ), work( nwork ),lwork - nwork + & 1_${ik}$, ierr ) call stdlib${ii}$_slacpy( 'L', m, n, a, lda, u, ldu ) ! generate q in u ! workspace: need n*n [u] + n [tau] + m [work] ! workspace: prefer n*n [u] + n [tau] + m*nb [work] call stdlib${ii}$_sorgqr( m, m, n, u, ldu, work( itau ),work( nwork ), lwork - & nwork + 1_${ik}$, ierr ) ! produce r in a, zeroing out other entries if (n>1_${ik}$) call stdlib${ii}$_slaset( 'L', n-1, n-1, zero, zero, a( 2_${ik}$, 1_${ik}$ ), lda ) ie = itau itauq = ie + n itaup = itauq + n nwork = itaup + n ! bidiagonalize r in a ! workspace: need n*n [u] + 3*n [e, tauq, taup] + n [work] ! workspace: prefer n*n [u] + 3*n [e, tauq, taup] + 2*n*nb [work] call stdlib${ii}$_sgebrd( n, n, a, lda, s, work( ie ), work( itauq ),work( itaup ), & work( nwork ), lwork-nwork+1,ierr ) ! perform bidiagonal svd, computing left singular vectors ! of bidiagonal matrix in work(iu) and computing right ! singular vectors of bidiagonal matrix in vt ! workspace: need n*n [u] + 3*n [e, tauq, taup] + bdspac call stdlib${ii}$_sbdsdc( 'U', 'I', n, s, work( ie ), work( iu ), n,vt, ldvt, dum, & idum, work( nwork ), iwork,info ) ! overwrite work(iu) by left singular vectors of r and vt ! by right singular vectors of r ! workspace: need n*n [u] + 3*n [e, tauq, taup] + n [work] ! workspace: prefer n*n [u] + 3*n [e, tauq, taup] + n*nb [work] call stdlib${ii}$_sormbr( 'Q', 'L', 'N', n, n, n, a, lda,work( itauq ), work( iu ), & ldwrku,work( nwork ), lwork - nwork + 1_${ik}$, ierr ) call stdlib${ii}$_sormbr( 'P', 'R', 'T', n, n, n, a, lda,work( itaup ), vt, ldvt, & work( nwork ),lwork - nwork + 1_${ik}$, ierr ) ! multiply q in u by left singular vectors of r in ! work(iu), storing result in a ! workspace: need n*n [u] call stdlib${ii}$_sgemm( 'N', 'N', m, n, n, one, u, ldu, work( iu ),ldwrku, zero, a,& lda ) ! copy left singular vectors of a from a to u call stdlib${ii}$_slacpy( 'F', m, n, a, lda, u, ldu ) end if else ! m < mnthr ! path 5 (m >= n, but not much larger) ! reduce to bidiagonal form without qr decomposition ie = 1_${ik}$ itauq = ie + n itaup = itauq + n nwork = itaup + n ! bidiagonalize a ! workspace: need 3*n [e, tauq, taup] + m [work] ! workspace: prefer 3*n [e, tauq, taup] + (m+n)*nb [work] call stdlib${ii}$_sgebrd( m, n, a, lda, s, work( ie ), work( itauq ),work( itaup ), & work( nwork ), lwork-nwork+1,ierr ) if( wntqn ) then ! path 5n (m >= n, jobz='n') ! perform bidiagonal svd, only computing singular values ! workspace: need 3*n [e, tauq, taup] + bdspac call stdlib${ii}$_sbdsdc( 'U', 'N', n, s, work( ie ), dum, 1_${ik}$, dum, 1_${ik}$,dum, idum, & work( nwork ), iwork, info ) else if( wntqo ) then ! path 5o (m >= n, jobz='o') iu = nwork if( lwork >= m*n + 3_${ik}$*n + bdspac ) then ! work( iu ) is m by n ldwrku = m nwork = iu + ldwrku*n call stdlib${ii}$_slaset( 'F', m, n, zero, zero, work( iu ),ldwrku ) ! ir is unused; silence compile warnings ir = -1_${ik}$ else ! work( iu ) is n by n ldwrku = n nwork = iu + ldwrku*n ! work(ir) is ldwrkr by n ir = nwork ldwrkr = ( lwork - n*n - 3_${ik}$*n ) / n end if nwork = iu + ldwrku*n ! perform bidiagonal svd, computing left singular vectors ! of bidiagonal matrix in work(iu) and computing right ! singular vectors of bidiagonal matrix in vt ! workspace: need 3*n [e, tauq, taup] + n*n [u] + bdspac call stdlib${ii}$_sbdsdc( 'U', 'I', n, s, work( ie ), work( iu ),ldwrku, vt, ldvt, & dum, idum, work( nwork ),iwork, info ) ! overwrite vt by right singular vectors of a ! workspace: need 3*n [e, tauq, taup] + n*n [u] + n [work] ! workspace: prefer 3*n [e, tauq, taup] + n*n [u] + n*nb [work] call stdlib${ii}$_sormbr( 'P', 'R', 'T', n, n, n, a, lda,work( itaup ), vt, ldvt, & work( nwork ),lwork - nwork + 1_${ik}$, ierr ) if( lwork >= m*n + 3_${ik}$*n + bdspac ) then ! path 5o-fast ! overwrite work(iu) by left singular vectors of a ! workspace: need 3*n [e, tauq, taup] + m*n [u] + n [work] ! workspace: prefer 3*n [e, tauq, taup] + m*n [u] + n*nb [work] call stdlib${ii}$_sormbr( 'Q', 'L', 'N', m, n, n, a, lda,work( itauq ), work( iu & ), ldwrku,work( nwork ), lwork - nwork + 1_${ik}$, ierr ) ! copy left singular vectors of a from work(iu) to a call stdlib${ii}$_slacpy( 'F', m, n, work( iu ), ldwrku, a, lda ) else ! path 5o-slow ! generate q in a ! workspace: need 3*n [e, tauq, taup] + n*n [u] + n [work] ! workspace: prefer 3*n [e, tauq, taup] + n*n [u] + n*nb [work] call stdlib${ii}$_sorgbr( 'Q', m, n, n, a, lda, work( itauq ),work( nwork ), & lwork - nwork + 1_${ik}$, ierr ) ! multiply q in a by left singular vectors of ! bidiagonal matrix in work(iu), storing result in ! work(ir) and copying to a ! workspace: need 3*n [e, tauq, taup] + n*n [u] + nb*n [r] ! workspace: prefer 3*n [e, tauq, taup] + n*n [u] + m*n [r] do i = 1, m, ldwrkr chunk = min( m - i + 1_${ik}$, ldwrkr ) call stdlib${ii}$_sgemm( 'N', 'N', chunk, n, n, one, a( i, 1_${ik}$ ),lda, work( iu )& , ldwrku, zero,work( ir ), ldwrkr ) call stdlib${ii}$_slacpy( 'F', chunk, n, work( ir ), ldwrkr,a( i, 1_${ik}$ ), lda ) end do end if else if( wntqs ) then ! path 5s (m >= n, jobz='s') ! perform bidiagonal svd, computing left singular vectors ! of bidiagonal matrix in u and computing right singular ! vectors of bidiagonal matrix in vt ! workspace: need 3*n [e, tauq, taup] + bdspac call stdlib${ii}$_slaset( 'F', m, n, zero, zero, u, ldu ) call stdlib${ii}$_sbdsdc( 'U', 'I', n, s, work( ie ), u, ldu, vt,ldvt, dum, idum, & work( nwork ), iwork,info ) ! overwrite u by left singular vectors of a and vt ! by right singular vectors of a ! workspace: need 3*n [e, tauq, taup] + n [work] ! workspace: prefer 3*n [e, tauq, taup] + n*nb [work] call stdlib${ii}$_sormbr( 'Q', 'L', 'N', m, n, n, a, lda,work( itauq ), u, ldu, & work( nwork ),lwork - nwork + 1_${ik}$, ierr ) call stdlib${ii}$_sormbr( 'P', 'R', 'T', n, n, n, a, lda,work( itaup ), vt, ldvt, & work( nwork ),lwork - nwork + 1_${ik}$, ierr ) else if( wntqa ) then ! path 5a (m >= n, jobz='a') ! perform bidiagonal svd, computing left singular vectors ! of bidiagonal matrix in u and computing right singular ! vectors of bidiagonal matrix in vt ! workspace: need 3*n [e, tauq, taup] + bdspac call stdlib${ii}$_slaset( 'F', m, m, zero, zero, u, ldu ) call stdlib${ii}$_sbdsdc( 'U', 'I', n, s, work( ie ), u, ldu, vt,ldvt, dum, idum, & work( nwork ), iwork,info ) ! set the right corner of u to identity matrix if( m>n ) then call stdlib${ii}$_slaset( 'F', m - n, m - n, zero, one, u(n+1,n+1),ldu ) end if ! overwrite u by left singular vectors of a and vt ! by right singular vectors of a ! workspace: need 3*n [e, tauq, taup] + m [work] ! workspace: prefer 3*n [e, tauq, taup] + m*nb [work] call stdlib${ii}$_sormbr( 'Q', 'L', 'N', m, m, n, a, lda,work( itauq ), u, ldu, & work( nwork ),lwork - nwork + 1_${ik}$, ierr ) call stdlib${ii}$_sormbr( 'P', 'R', 'T', n, n, m, a, lda,work( itaup ), vt, ldvt, & work( nwork ),lwork - nwork + 1_${ik}$, ierr ) end if end if else ! a has more columns than rows. if a has sufficiently more ! columns than rows, first reduce using the lq decomposition (if ! sufficient workspace available) if( n>=mnthr ) then if( wntqn ) then ! path 1t (n >> m, jobz='n') ! no singular vectors to be computed itau = 1_${ik}$ nwork = itau + m ! compute a=l*q ! workspace: need m [tau] + m [work] ! workspace: prefer m [tau] + m*nb [work] call stdlib${ii}$_sgelqf( m, n, a, lda, work( itau ), work( nwork ),lwork - nwork + & 1_${ik}$, ierr ) ! zero out above l if (m>1_${ik}$) call stdlib${ii}$_slaset( 'U', m-1, m-1, zero, zero, a( 1_${ik}$, 2_${ik}$ ), lda ) ie = 1_${ik}$ itauq = ie + m itaup = itauq + m nwork = itaup + m ! bidiagonalize l in a ! workspace: need 3*m [e, tauq, taup] + m [work] ! workspace: prefer 3*m [e, tauq, taup] + 2*m*nb [work] call stdlib${ii}$_sgebrd( m, m, a, lda, s, work( ie ), work( itauq ),work( itaup ), & work( nwork ), lwork-nwork+1,ierr ) nwork = ie + m ! perform bidiagonal svd, computing singular values only ! workspace: need m [e] + bdspac call stdlib${ii}$_sbdsdc( 'U', 'N', m, s, work( ie ), dum, 1_${ik}$, dum, 1_${ik}$,dum, idum, & work( nwork ), iwork, info ) else if( wntqo ) then ! path 2t (n >> m, jobz='o') ! m right singular vectors to be overwritten on a and ! m left singular vectors to be computed in u ivt = 1_${ik}$ ! work(ivt) is m by m ! work(il) is m by m; it is later resized to m by chunk for gemm il = ivt + m*m if( lwork >= m*n + m*m + 3_${ik}$*m + bdspac ) then ldwrkl = m chunk = n else ldwrkl = m chunk = ( lwork - m*m ) / m end if itau = il + ldwrkl*m nwork = itau + m ! compute a=l*q ! workspace: need m*m [vt] + m*m [l] + m [tau] + m [work] ! workspace: prefer m*m [vt] + m*m [l] + m [tau] + m*nb [work] call stdlib${ii}$_sgelqf( m, n, a, lda, work( itau ), work( nwork ),lwork - nwork + & 1_${ik}$, ierr ) ! copy l to work(il), zeroing about above it call stdlib${ii}$_slacpy( 'L', m, m, a, lda, work( il ), ldwrkl ) call stdlib${ii}$_slaset( 'U', m - 1_${ik}$, m - 1_${ik}$, zero, zero,work( il + ldwrkl ), ldwrkl & ) ! generate q in a ! workspace: need m*m [vt] + m*m [l] + m [tau] + m [work] ! workspace: prefer m*m [vt] + m*m [l] + m [tau] + m*nb [work] call stdlib${ii}$_sorglq( m, n, m, a, lda, work( itau ),work( nwork ), lwork - & nwork + 1_${ik}$, ierr ) ie = itau itauq = ie + m itaup = itauq + m nwork = itaup + m ! bidiagonalize l in work(il) ! workspace: need m*m [vt] + m*m [l] + 3*m [e, tauq, taup] + m [work] ! workspace: prefer m*m [vt] + m*m [l] + 3*m [e, tauq, taup] + 2*m*nb [work] call stdlib${ii}$_sgebrd( m, m, work( il ), ldwrkl, s, work( ie ),work( itauq ), & work( itaup ), work( nwork ),lwork - nwork + 1_${ik}$, ierr ) ! perform bidiagonal svd, computing left singular vectors ! of bidiagonal matrix in u, and computing right singular ! vectors of bidiagonal matrix in work(ivt) ! workspace: need m*m [vt] + m*m [l] + 3*m [e, tauq, taup] + bdspac call stdlib${ii}$_sbdsdc( 'U', 'I', m, s, work( ie ), u, ldu,work( ivt ), m, dum, & idum, work( nwork ),iwork, info ) ! overwrite u by left singular vectors of l and work(ivt) ! by right singular vectors of l ! workspace: need m*m [vt] + m*m [l] + 3*m [e, tauq, taup] + m [work] ! workspace: prefer m*m [vt] + m*m [l] + 3*m [e, tauq, taup] + m*nb [work] call stdlib${ii}$_sormbr( 'Q', 'L', 'N', m, m, m, work( il ), ldwrkl,work( itauq ), & u, ldu, work( nwork ),lwork - nwork + 1_${ik}$, ierr ) call stdlib${ii}$_sormbr( 'P', 'R', 'T', m, m, m, work( il ), ldwrkl,work( itaup ), & work( ivt ), m,work( nwork ), lwork - nwork + 1_${ik}$, ierr ) ! multiply right singular vectors of l in work(ivt) by q ! in a, storing result in work(il) and copying to a ! workspace: need m*m [vt] + m*m [l] ! workspace: prefer m*m [vt] + m*n [l] ! at this point, l is resized as m by chunk. do i = 1, n, chunk blk = min( n - i + 1_${ik}$, chunk ) call stdlib${ii}$_sgemm( 'N', 'N', m, blk, m, one, work( ivt ), m,a( 1_${ik}$, i ), lda,& zero, work( il ), ldwrkl ) call stdlib${ii}$_slacpy( 'F', m, blk, work( il ), ldwrkl,a( 1_${ik}$, i ), lda ) end do else if( wntqs ) then ! path 3t (n >> m, jobz='s') ! m right singular vectors to be computed in vt and ! m left singular vectors to be computed in u il = 1_${ik}$ ! work(il) is m by m ldwrkl = m itau = il + ldwrkl*m nwork = itau + m ! compute a=l*q ! workspace: need m*m [l] + m [tau] + m [work] ! workspace: prefer m*m [l] + m [tau] + m*nb [work] call stdlib${ii}$_sgelqf( m, n, a, lda, work( itau ), work( nwork ),lwork - nwork + & 1_${ik}$, ierr ) ! copy l to work(il), zeroing out above it call stdlib${ii}$_slacpy( 'L', m, m, a, lda, work( il ), ldwrkl ) call stdlib${ii}$_slaset( 'U', m - 1_${ik}$, m - 1_${ik}$, zero, zero,work( il + ldwrkl ), ldwrkl & ) ! generate q in a ! workspace: need m*m [l] + m [tau] + m [work] ! workspace: prefer m*m [l] + m [tau] + m*nb [work] call stdlib${ii}$_sorglq( m, n, m, a, lda, work( itau ),work( nwork ), lwork - & nwork + 1_${ik}$, ierr ) ie = itau itauq = ie + m itaup = itauq + m nwork = itaup + m ! bidiagonalize l in work(iu). ! workspace: need m*m [l] + 3*m [e, tauq, taup] + m [work] ! workspace: prefer m*m [l] + 3*m [e, tauq, taup] + 2*m*nb [work] call stdlib${ii}$_sgebrd( m, m, work( il ), ldwrkl, s, work( ie ),work( itauq ), & work( itaup ), work( nwork ),lwork - nwork + 1_${ik}$, ierr ) ! perform bidiagonal svd, computing left singular vectors ! of bidiagonal matrix in u and computing right singular ! vectors of bidiagonal matrix in vt ! workspace: need m*m [l] + 3*m [e, tauq, taup] + bdspac call stdlib${ii}$_sbdsdc( 'U', 'I', m, s, work( ie ), u, ldu, vt,ldvt, dum, idum, & work( nwork ), iwork,info ) ! overwrite u by left singular vectors of l and vt ! by right singular vectors of l ! workspace: need m*m [l] + 3*m [e, tauq, taup] + m [work] ! workspace: prefer m*m [l] + 3*m [e, tauq, taup] + m*nb [work] call stdlib${ii}$_sormbr( 'Q', 'L', 'N', m, m, m, work( il ), ldwrkl,work( itauq ), & u, ldu, work( nwork ),lwork - nwork + 1_${ik}$, ierr ) call stdlib${ii}$_sormbr( 'P', 'R', 'T', m, m, m, work( il ), ldwrkl,work( itaup ), & vt, ldvt, work( nwork ),lwork - nwork + 1_${ik}$, ierr ) ! multiply right singular vectors of l in work(il) by ! q in a, storing result in vt ! workspace: need m*m [l] call stdlib${ii}$_slacpy( 'F', m, m, vt, ldvt, work( il ), ldwrkl ) call stdlib${ii}$_sgemm( 'N', 'N', m, n, m, one, work( il ), ldwrkl,a, lda, zero, & vt, ldvt ) else if( wntqa ) then ! path 4t (n >> m, jobz='a') ! n right singular vectors to be computed in vt and ! m left singular vectors to be computed in u ivt = 1_${ik}$ ! work(ivt) is m by m ldwkvt = m itau = ivt + ldwkvt*m nwork = itau + m ! compute a=l*q, copying result to vt ! workspace: need m*m [vt] + m [tau] + m [work] ! workspace: prefer m*m [vt] + m [tau] + m*nb [work] call stdlib${ii}$_sgelqf( m, n, a, lda, work( itau ), work( nwork ),lwork - nwork + & 1_${ik}$, ierr ) call stdlib${ii}$_slacpy( 'U', m, n, a, lda, vt, ldvt ) ! generate q in vt ! workspace: need m*m [vt] + m [tau] + n [work] ! workspace: prefer m*m [vt] + m [tau] + n*nb [work] call stdlib${ii}$_sorglq( n, n, m, vt, ldvt, work( itau ),work( nwork ), lwork - & nwork + 1_${ik}$, ierr ) ! produce l in a, zeroing out other entries if (m>1_${ik}$) call stdlib${ii}$_slaset( 'U', m-1, m-1, zero, zero, a( 1_${ik}$, 2_${ik}$ ), lda ) ie = itau itauq = ie + m itaup = itauq + m nwork = itaup + m ! bidiagonalize l in a ! workspace: need m*m [vt] + 3*m [e, tauq, taup] + m [work] ! workspace: prefer m*m [vt] + 3*m [e, tauq, taup] + 2*m*nb [work] call stdlib${ii}$_sgebrd( m, m, a, lda, s, work( ie ), work( itauq ),work( itaup ), & work( nwork ), lwork-nwork+1,ierr ) ! perform bidiagonal svd, computing left singular vectors ! of bidiagonal matrix in u and computing right singular ! vectors of bidiagonal matrix in work(ivt) ! workspace: need m*m [vt] + 3*m [e, tauq, taup] + bdspac call stdlib${ii}$_sbdsdc( 'U', 'I', m, s, work( ie ), u, ldu,work( ivt ), ldwkvt, & dum, idum,work( nwork ), iwork, info ) ! overwrite u by left singular vectors of l and work(ivt) ! by right singular vectors of l ! workspace: need m*m [vt] + 3*m [e, tauq, taup]+ m [work] ! workspace: prefer m*m [vt] + 3*m [e, tauq, taup]+ m*nb [work] call stdlib${ii}$_sormbr( 'Q', 'L', 'N', m, m, m, a, lda,work( itauq ), u, ldu, & work( nwork ),lwork - nwork + 1_${ik}$, ierr ) call stdlib${ii}$_sormbr( 'P', 'R', 'T', m, m, m, a, lda,work( itaup ), work( ivt ),& ldwkvt,work( nwork ), lwork - nwork + 1_${ik}$, ierr ) ! multiply right singular vectors of l in work(ivt) by ! q in vt, storing result in a ! workspace: need m*m [vt] call stdlib${ii}$_sgemm( 'N', 'N', m, n, m, one, work( ivt ), ldwkvt,vt, ldvt, zero,& a, lda ) ! copy right singular vectors of a from a to vt call stdlib${ii}$_slacpy( 'F', m, n, a, lda, vt, ldvt ) end if else ! n < mnthr ! path 5t (n > m, but not much larger) ! reduce to bidiagonal form without lq decomposition ie = 1_${ik}$ itauq = ie + m itaup = itauq + m nwork = itaup + m ! bidiagonalize a ! workspace: need 3*m [e, tauq, taup] + n [work] ! workspace: prefer 3*m [e, tauq, taup] + (m+n)*nb [work] call stdlib${ii}$_sgebrd( m, n, a, lda, s, work( ie ), work( itauq ),work( itaup ), & work( nwork ), lwork-nwork+1,ierr ) if( wntqn ) then ! path 5tn (n > m, jobz='n') ! perform bidiagonal svd, only computing singular values ! workspace: need 3*m [e, tauq, taup] + bdspac call stdlib${ii}$_sbdsdc( 'L', 'N', m, s, work( ie ), dum, 1_${ik}$, dum, 1_${ik}$,dum, idum, & work( nwork ), iwork, info ) else if( wntqo ) then ! path 5to (n > m, jobz='o') ldwkvt = m ivt = nwork if( lwork >= m*n + 3_${ik}$*m + bdspac ) then ! work( ivt ) is m by n call stdlib${ii}$_slaset( 'F', m, n, zero, zero, work( ivt ),ldwkvt ) nwork = ivt + ldwkvt*n ! il is unused; silence compile warnings il = -1_${ik}$ else ! work( ivt ) is m by m nwork = ivt + ldwkvt*m il = nwork ! work(il) is m by chunk chunk = ( lwork - m*m - 3_${ik}$*m ) / m end if ! perform bidiagonal svd, computing left singular vectors ! of bidiagonal matrix in u and computing right singular ! vectors of bidiagonal matrix in work(ivt) ! workspace: need 3*m [e, tauq, taup] + m*m [vt] + bdspac call stdlib${ii}$_sbdsdc( 'L', 'I', m, s, work( ie ), u, ldu,work( ivt ), ldwkvt, & dum, idum,work( nwork ), iwork, info ) ! overwrite u by left singular vectors of a ! workspace: need 3*m [e, tauq, taup] + m*m [vt] + m [work] ! workspace: prefer 3*m [e, tauq, taup] + m*m [vt] + m*nb [work] call stdlib${ii}$_sormbr( 'Q', 'L', 'N', m, m, n, a, lda,work( itauq ), u, ldu, & work( nwork ),lwork - nwork + 1_${ik}$, ierr ) if( lwork >= m*n + 3_${ik}$*m + bdspac ) then ! path 5to-fast ! overwrite work(ivt) by left singular vectors of a ! workspace: need 3*m [e, tauq, taup] + m*n [vt] + m [work] ! workspace: prefer 3*m [e, tauq, taup] + m*n [vt] + m*nb [work] call stdlib${ii}$_sormbr( 'P', 'R', 'T', m, n, m, a, lda,work( itaup ), work( & ivt ), ldwkvt,work( nwork ), lwork - nwork + 1_${ik}$, ierr ) ! copy right singular vectors of a from work(ivt) to a call stdlib${ii}$_slacpy( 'F', m, n, work( ivt ), ldwkvt, a, lda ) else ! path 5to-slow ! generate p**t in a ! workspace: need 3*m [e, tauq, taup] + m*m [vt] + m [work] ! workspace: prefer 3*m [e, tauq, taup] + m*m [vt] + m*nb [work] call stdlib${ii}$_sorgbr( 'P', m, n, m, a, lda, work( itaup ),work( nwork ), & lwork - nwork + 1_${ik}$, ierr ) ! multiply q in a by right singular vectors of ! bidiagonal matrix in work(ivt), storing result in ! work(il) and copying to a ! workspace: need 3*m [e, tauq, taup] + m*m [vt] + m*nb [l] ! workspace: prefer 3*m [e, tauq, taup] + m*m [vt] + m*n [l] do i = 1, n, chunk blk = min( n - i + 1_${ik}$, chunk ) call stdlib${ii}$_sgemm( 'N', 'N', m, blk, m, one, work( ivt ),ldwkvt, a( 1_${ik}$, & i ), lda, zero,work( il ), m ) call stdlib${ii}$_slacpy( 'F', m, blk, work( il ), m, a( 1_${ik}$, i ),lda ) end do end if else if( wntqs ) then ! path 5ts (n > m, jobz='s') ! perform bidiagonal svd, computing left singular vectors ! of bidiagonal matrix in u and computing right singular ! vectors of bidiagonal matrix in vt ! workspace: need 3*m [e, tauq, taup] + bdspac call stdlib${ii}$_slaset( 'F', m, n, zero, zero, vt, ldvt ) call stdlib${ii}$_sbdsdc( 'L', 'I', m, s, work( ie ), u, ldu, vt,ldvt, dum, idum, & work( nwork ), iwork,info ) ! overwrite u by left singular vectors of a and vt ! by right singular vectors of a ! workspace: need 3*m [e, tauq, taup] + m [work] ! workspace: prefer 3*m [e, tauq, taup] + m*nb [work] call stdlib${ii}$_sormbr( 'Q', 'L', 'N', m, m, n, a, lda,work( itauq ), u, ldu, & work( nwork ),lwork - nwork + 1_${ik}$, ierr ) call stdlib${ii}$_sormbr( 'P', 'R', 'T', m, n, m, a, lda,work( itaup ), vt, ldvt, & work( nwork ),lwork - nwork + 1_${ik}$, ierr ) else if( wntqa ) then ! path 5ta (n > m, jobz='a') ! perform bidiagonal svd, computing left singular vectors ! of bidiagonal matrix in u and computing right singular ! vectors of bidiagonal matrix in vt ! workspace: need 3*m [e, tauq, taup] + bdspac call stdlib${ii}$_slaset( 'F', n, n, zero, zero, vt, ldvt ) call stdlib${ii}$_sbdsdc( 'L', 'I', m, s, work( ie ), u, ldu, vt,ldvt, dum, idum, & work( nwork ), iwork,info ) ! set the right corner of vt to identity matrix if( n>m ) then call stdlib${ii}$_slaset( 'F', n-m, n-m, zero, one, vt(m+1,m+1),ldvt ) end if ! overwrite u by left singular vectors of a and vt ! by right singular vectors of a ! workspace: need 3*m [e, tauq, taup] + n [work] ! workspace: prefer 3*m [e, tauq, taup] + n*nb [work] call stdlib${ii}$_sormbr( 'Q', 'L', 'N', m, m, n, a, lda,work( itauq ), u, ldu, & work( nwork ),lwork - nwork + 1_${ik}$, ierr ) call stdlib${ii}$_sormbr( 'P', 'R', 'T', n, n, m, a, lda,work( itaup ), vt, ldvt, & work( nwork ),lwork - nwork + 1_${ik}$, ierr ) end if end if end if ! undo scaling if necessary if( iscl==1_${ik}$ ) then if( anrm>bignum )call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, bignum, anrm, minmn, 1_${ik}$, s, minmn,& ierr ) if( anrm<smlnum )call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, smlnum, anrm, minmn, 1_${ik}$, s, minmn,& ierr ) end if ! return optimal workspace in work(1) work( 1_${ik}$ ) = stdlib${ii}$_sroundup_lwork( maxwrk ) return end subroutine stdlib${ii}$_sgesdd module subroutine stdlib${ii}$_dgesdd( jobz, m, n, a, lda, s, u, ldu, vt, ldvt,work, lwork, iwork, info ) !! DGESDD computes the singular value decomposition (SVD) of a real !! M-by-N matrix A, optionally computing the left and right singular !! vectors. If singular vectors are desired, it uses a !! divide-and-conquer algorithm. !! The SVD is written !! A = U * SIGMA * transpose(V) !! where SIGMA is an M-by-N matrix which is zero except for its !! min(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and !! V is an N-by-N orthogonal matrix. The diagonal elements of SIGMA !! are the singular values of A; they are real and non-negative, and !! are returned in descending order. The first min(m,n) columns of !! U and V are the left and right singular vectors of A. !! Note that the routine returns VT = V**T, not V. !! The divide and conquer algorithm makes very mild assumptions about !! floating point arithmetic. It will work on machines with a guard !! digit in add/subtract, or on those binary machines without guard !! digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or !! Cray-2. It could conceivably fail on hexadecimal or decimal machines !! without guard digits, but we know of none. ! -- lapack driver routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments character, intent(in) :: jobz integer(${ik}$), intent(out) :: info integer(${ik}$), intent(in) :: lda, ldu, ldvt, lwork, m, n ! Array Arguments integer(${ik}$), intent(out) :: iwork(*) real(dp), intent(inout) :: a(lda,*) real(dp), intent(out) :: s(*), u(ldu,*), vt(ldvt,*), work(*) ! ===================================================================== ! Local Scalars logical(lk) :: lquery, wntqa, wntqas, wntqn, wntqo, wntqs integer(${ik}$) :: bdspac, blk, chunk, i, ie, ierr, il, ir, iscl, itau, itaup, itauq, iu, & ivt, ldwkvt, ldwrkl, ldwrkr, ldwrku, maxwrk, minmn, minwrk, mnthr, nwork, wrkbl integer(${ik}$) :: lwork_dgebrd_mn, lwork_dgebrd_mm, lwork_dgebrd_nn, lwork_dgelqf_mn, & lwork_dgeqrf_mn, lwork_dorgbr_p_mm, lwork_dorgbr_q_nn, lwork_dorglq_mn, & lwork_dorglq_nn, lwork_dorgqr_mm, lwork_dorgqr_mn, lwork_dormbr_prt_mm, & lwork_dormbr_qln_mm, lwork_dormbr_prt_mn, lwork_dormbr_qln_mn, lwork_dormbr_prt_nn, & lwork_dormbr_qln_nn real(dp) :: anrm, bignum, eps, smlnum ! Local Arrays integer(${ik}$) :: idum(1_${ik}$) real(dp) :: dum(1_${ik}$) ! Intrinsic Functions ! Executable Statements ! test the input arguments info = 0_${ik}$ minmn = min( m, n ) wntqa = stdlib_lsame( jobz, 'A' ) wntqs = stdlib_lsame( jobz, 'S' ) wntqas = wntqa .or. wntqs wntqo = stdlib_lsame( jobz, 'O' ) wntqn = stdlib_lsame( jobz, 'N' ) lquery = ( lwork==-1_${ik}$ ) if( .not.( wntqa .or. wntqs .or. wntqo .or. wntqn ) ) then info = -1_${ik}$ else if( m<0_${ik}$ ) then info = -2_${ik}$ else if( n<0_${ik}$ ) then info = -3_${ik}$ else if( lda<max( 1_${ik}$, m ) ) then info = -5_${ik}$ else if( ldu<1_${ik}$ .or. ( wntqas .and. ldu<m ) .or.( wntqo .and. m<n .and. ldu<m ) ) & then info = -8_${ik}$ else if( ldvt<1_${ik}$ .or. ( wntqa .and. ldvt<n ) .or.( wntqs .and. ldvt<minmn ) .or.( wntqo & .and. m>=n .and. ldvt<n ) ) then info = -10_${ik}$ end if ! compute workspace ! note: comments in the code beginning "workspace:" describe the ! minimal amount of workspace allocated at that point in the code, ! as well as the preferred amount for good performance. ! nb refers to the optimal block size for the immediately ! following subroutine, as returned by stdlib${ii}$_ilaenv. if( info==0_${ik}$ ) then minwrk = 1_${ik}$ maxwrk = 1_${ik}$ bdspac = 0_${ik}$ mnthr = int( minmn*11.0_dp / 6.0_dp,KIND=${ik}$) if( m>=n .and. minmn>0_${ik}$ ) then ! compute space needed for stdlib${ii}$_dbdsdc if( wntqn ) then ! stdlib${ii}$_dbdsdc needs only 4*n (or 6*n for uplo=l for lapack <= 3.6_dp) ! keep 7*n for backwards compatibility. bdspac = 7_${ik}$*n else bdspac = 3_${ik}$*n*n + 4_${ik}$*n end if ! compute space preferred for each routine call stdlib${ii}$_dgebrd( m, n, dum(1_${ik}$), m, dum(1_${ik}$), dum(1_${ik}$), dum(1_${ik}$),dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, & ierr ) lwork_dgebrd_mn = int( dum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_dgebrd( n, n, dum(1_${ik}$), n, dum(1_${ik}$), dum(1_${ik}$), dum(1_${ik}$),dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, & ierr ) lwork_dgebrd_nn = int( dum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_dgeqrf( m, n, dum(1_${ik}$), m, dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, ierr ) lwork_dgeqrf_mn = int( dum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_dorgbr( 'Q', n, n, n, dum(1_${ik}$), n, dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$,ierr ) lwork_dorgbr_q_nn = int( dum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_dorgqr( m, m, n, dum(1_${ik}$), m, dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, ierr ) lwork_dorgqr_mm = int( dum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_dorgqr( m, n, n, dum(1_${ik}$), m, dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, ierr ) lwork_dorgqr_mn = int( dum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_dormbr( 'P', 'R', 'T', n, n, n, dum(1_${ik}$), n,dum(1_${ik}$), dum(1_${ik}$), n, dum(1_${ik}$), & -1_${ik}$, ierr ) lwork_dormbr_prt_nn = int( dum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_dormbr( 'Q', 'L', 'N', n, n, n, dum(1_${ik}$), n,dum(1_${ik}$), dum(1_${ik}$), n, dum(1_${ik}$), & -1_${ik}$, ierr ) lwork_dormbr_qln_nn = int( dum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_dormbr( 'Q', 'L', 'N', m, n, n, dum(1_${ik}$), m,dum(1_${ik}$), dum(1_${ik}$), m, dum(1_${ik}$), & -1_${ik}$, ierr ) lwork_dormbr_qln_mn = int( dum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_dormbr( 'Q', 'L', 'N', m, m, n, dum(1_${ik}$), m,dum(1_${ik}$), dum(1_${ik}$), m, dum(1_${ik}$), & -1_${ik}$, ierr ) lwork_dormbr_qln_mm = int( dum(1_${ik}$),KIND=${ik}$) if( m>=mnthr ) then if( wntqn ) then ! path 1 (m >> n, jobz='n') wrkbl = n + lwork_dgeqrf_mn wrkbl = max( wrkbl, 3_${ik}$*n + lwork_dgebrd_nn ) maxwrk = max( wrkbl, bdspac + n ) minwrk = bdspac + n else if( wntqo ) then ! path 2 (m >> n, jobz='o') wrkbl = n + lwork_dgeqrf_mn wrkbl = max( wrkbl, n + lwork_dorgqr_mn ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_dgebrd_nn ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_dormbr_qln_nn ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_dormbr_prt_nn ) wrkbl = max( wrkbl, 3_${ik}$*n + bdspac ) maxwrk = wrkbl + 2_${ik}$*n*n minwrk = bdspac + 2_${ik}$*n*n + 3_${ik}$*n else if( wntqs ) then ! path 3 (m >> n, jobz='s') wrkbl = n + lwork_dgeqrf_mn wrkbl = max( wrkbl, n + lwork_dorgqr_mn ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_dgebrd_nn ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_dormbr_qln_nn ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_dormbr_prt_nn ) wrkbl = max( wrkbl, 3_${ik}$*n + bdspac ) maxwrk = wrkbl + n*n minwrk = bdspac + n*n + 3_${ik}$*n else if( wntqa ) then ! path 4 (m >> n, jobz='a') wrkbl = n + lwork_dgeqrf_mn wrkbl = max( wrkbl, n + lwork_dorgqr_mm ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_dgebrd_nn ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_dormbr_qln_nn ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_dormbr_prt_nn ) wrkbl = max( wrkbl, 3_${ik}$*n + bdspac ) maxwrk = wrkbl + n*n minwrk = n*n + max( 3_${ik}$*n + bdspac, n + m ) end if else ! path 5 (m >= n, but not much larger) wrkbl = 3_${ik}$*n + lwork_dgebrd_mn if( wntqn ) then ! path 5n (m >= n, jobz='n') maxwrk = max( wrkbl, 3_${ik}$*n + bdspac ) minwrk = 3_${ik}$*n + max( m, bdspac ) else if( wntqo ) then ! path 5o (m >= n, jobz='o') wrkbl = max( wrkbl, 3_${ik}$*n + lwork_dormbr_prt_nn ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_dormbr_qln_mn ) wrkbl = max( wrkbl, 3_${ik}$*n + bdspac ) maxwrk = wrkbl + m*n minwrk = 3_${ik}$*n + max( m, n*n + bdspac ) else if( wntqs ) then ! path 5s (m >= n, jobz='s') wrkbl = max( wrkbl, 3_${ik}$*n + lwork_dormbr_qln_mn ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_dormbr_prt_nn ) maxwrk = max( wrkbl, 3_${ik}$*n + bdspac ) minwrk = 3_${ik}$*n + max( m, bdspac ) else if( wntqa ) then ! path 5a (m >= n, jobz='a') wrkbl = max( wrkbl, 3_${ik}$*n + lwork_dormbr_qln_mm ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_dormbr_prt_nn ) maxwrk = max( wrkbl, 3_${ik}$*n + bdspac ) minwrk = 3_${ik}$*n + max( m, bdspac ) end if end if else if( minmn>0_${ik}$ ) then ! compute space needed for stdlib${ii}$_dbdsdc if( wntqn ) then ! stdlib${ii}$_dbdsdc needs only 4*n (or 6*n for uplo=l for lapack <= 3.6_dp) ! keep 7*n for backwards compatibility. bdspac = 7_${ik}$*m else bdspac = 3_${ik}$*m*m + 4_${ik}$*m end if ! compute space preferred for each routine call stdlib${ii}$_dgebrd( m, n, dum(1_${ik}$), m, dum(1_${ik}$), dum(1_${ik}$), dum(1_${ik}$),dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, & ierr ) lwork_dgebrd_mn = int( dum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_dgebrd( m, m, a, m, s, dum(1_${ik}$), dum(1_${ik}$),dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, ierr ) lwork_dgebrd_mm = int( dum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_dgelqf( m, n, a, m, dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, ierr ) lwork_dgelqf_mn = int( dum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_dorglq( n, n, m, dum(1_${ik}$), n, dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, ierr ) lwork_dorglq_nn = int( dum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_dorglq( m, n, m, a, m, dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, ierr ) lwork_dorglq_mn = int( dum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_dorgbr( 'P', m, m, m, a, n, dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, ierr ) lwork_dorgbr_p_mm = int( dum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_dormbr( 'P', 'R', 'T', m, m, m, dum(1_${ik}$), m,dum(1_${ik}$), dum(1_${ik}$), m, dum(1_${ik}$), & -1_${ik}$, ierr ) lwork_dormbr_prt_mm = int( dum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_dormbr( 'P', 'R', 'T', m, n, m, dum(1_${ik}$), m,dum(1_${ik}$), dum(1_${ik}$), m, dum(1_${ik}$), & -1_${ik}$, ierr ) lwork_dormbr_prt_mn = int( dum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_dormbr( 'P', 'R', 'T', n, n, m, dum(1_${ik}$), n,dum(1_${ik}$), dum(1_${ik}$), n, dum(1_${ik}$), & -1_${ik}$, ierr ) lwork_dormbr_prt_nn = int( dum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_dormbr( 'Q', 'L', 'N', m, m, m, dum(1_${ik}$), m,dum(1_${ik}$), dum(1_${ik}$), m, dum(1_${ik}$), & -1_${ik}$, ierr ) lwork_dormbr_qln_mm = int( dum(1_${ik}$),KIND=${ik}$) if( n>=mnthr ) then if( wntqn ) then ! path 1t (n >> m, jobz='n') wrkbl = m + lwork_dgelqf_mn wrkbl = max( wrkbl, 3_${ik}$*m + lwork_dgebrd_mm ) maxwrk = max( wrkbl, bdspac + m ) minwrk = bdspac + m else if( wntqo ) then ! path 2t (n >> m, jobz='o') wrkbl = m + lwork_dgelqf_mn wrkbl = max( wrkbl, m + lwork_dorglq_mn ) wrkbl = max( wrkbl, 3_${ik}$*m + lwork_dgebrd_mm ) wrkbl = max( wrkbl, 3_${ik}$*m + lwork_dormbr_qln_mm ) wrkbl = max( wrkbl, 3_${ik}$*m + lwork_dormbr_prt_mm ) wrkbl = max( wrkbl, 3_${ik}$*m + bdspac ) maxwrk = wrkbl + 2_${ik}$*m*m minwrk = bdspac + 2_${ik}$*m*m + 3_${ik}$*m else if( wntqs ) then ! path 3t (n >> m, jobz='s') wrkbl = m + lwork_dgelqf_mn wrkbl = max( wrkbl, m + lwork_dorglq_mn ) wrkbl = max( wrkbl, 3_${ik}$*m + lwork_dgebrd_mm ) wrkbl = max( wrkbl, 3_${ik}$*m + lwork_dormbr_qln_mm ) wrkbl = max( wrkbl, 3_${ik}$*m + lwork_dormbr_prt_mm ) wrkbl = max( wrkbl, 3_${ik}$*m + bdspac ) maxwrk = wrkbl + m*m minwrk = bdspac + m*m + 3_${ik}$*m else if( wntqa ) then ! path 4t (n >> m, jobz='a') wrkbl = m + lwork_dgelqf_mn wrkbl = max( wrkbl, m + lwork_dorglq_nn ) wrkbl = max( wrkbl, 3_${ik}$*m + lwork_dgebrd_mm ) wrkbl = max( wrkbl, 3_${ik}$*m + lwork_dormbr_qln_mm ) wrkbl = max( wrkbl, 3_${ik}$*m + lwork_dormbr_prt_mm ) wrkbl = max( wrkbl, 3_${ik}$*m + bdspac ) maxwrk = wrkbl + m*m minwrk = m*m + max( 3_${ik}$*m + bdspac, m + n ) end if else ! path 5t (n > m, but not much larger) wrkbl = 3_${ik}$*m + lwork_dgebrd_mn if( wntqn ) then ! path 5tn (n > m, jobz='n') maxwrk = max( wrkbl, 3_${ik}$*m + bdspac ) minwrk = 3_${ik}$*m + max( n, bdspac ) else if( wntqo ) then ! path 5to (n > m, jobz='o') wrkbl = max( wrkbl, 3_${ik}$*m + lwork_dormbr_qln_mm ) wrkbl = max( wrkbl, 3_${ik}$*m + lwork_dormbr_prt_mn ) wrkbl = max( wrkbl, 3_${ik}$*m + bdspac ) maxwrk = wrkbl + m*n minwrk = 3_${ik}$*m + max( n, m*m + bdspac ) else if( wntqs ) then ! path 5ts (n > m, jobz='s') wrkbl = max( wrkbl, 3_${ik}$*m + lwork_dormbr_qln_mm ) wrkbl = max( wrkbl, 3_${ik}$*m + lwork_dormbr_prt_mn ) maxwrk = max( wrkbl, 3_${ik}$*m + bdspac ) minwrk = 3_${ik}$*m + max( n, bdspac ) else if( wntqa ) then ! path 5ta (n > m, jobz='a') wrkbl = max( wrkbl, 3_${ik}$*m + lwork_dormbr_qln_mm ) wrkbl = max( wrkbl, 3_${ik}$*m + lwork_dormbr_prt_nn ) maxwrk = max( wrkbl, 3_${ik}$*m + bdspac ) minwrk = 3_${ik}$*m + max( n, bdspac ) end if end if end if maxwrk = max( maxwrk, minwrk ) work( 1_${ik}$ ) = stdlib${ii}$_droundup_lwork( maxwrk ) if( lwork<minwrk .and. .not.lquery ) then info = -12_${ik}$ end if end if if( info/=0_${ik}$ ) then call stdlib${ii}$_xerbla( 'DGESDD', -info ) return else if( lquery ) then return end if ! quick return if possible if( m==0_${ik}$ .or. n==0_${ik}$ ) then return end if ! get machine constants eps = stdlib${ii}$_dlamch( 'P' ) smlnum = sqrt( stdlib${ii}$_dlamch( 'S' ) ) / eps bignum = one / smlnum ! scale a if max element outside range [smlnum,bignum] anrm = stdlib${ii}$_dlange( 'M', m, n, a, lda, dum ) if( stdlib${ii}$_disnan( anrm ) ) then info = -4_${ik}$ return end if iscl = 0_${ik}$ if( anrm>zero .and. anrm<smlnum ) then iscl = 1_${ik}$ call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, m, n, a, lda, ierr ) else if( anrm>bignum ) then iscl = 1_${ik}$ call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, m, n, a, lda, ierr ) end if if( m>=n ) then ! a has at least as many rows as columns. if a has sufficiently ! more rows than columns, first reduce using the qr ! decomposition (if sufficient workspace available) if( m>=mnthr ) then if( wntqn ) then ! path 1 (m >> n, jobz='n') ! no singular vectors to be computed itau = 1_${ik}$ nwork = itau + n ! compute a=q*r ! workspace: need n [tau] + n [work] ! workspace: prefer n [tau] + n*nb [work] call stdlib${ii}$_dgeqrf( m, n, a, lda, work( itau ), work( nwork ),lwork - nwork + & 1_${ik}$, ierr ) ! zero out below r if (n>1_${ik}$) call stdlib${ii}$_dlaset( 'L', n-1, n-1, zero, zero, a( 2_${ik}$, 1_${ik}$ ), lda ) ie = 1_${ik}$ itauq = ie + n itaup = itauq + n nwork = itaup + n ! bidiagonalize r in a ! workspace: need 3*n [e, tauq, taup] + n [work] ! workspace: prefer 3*n [e, tauq, taup] + 2*n*nb [work] call stdlib${ii}$_dgebrd( n, n, a, lda, s, work( ie ), work( itauq ),work( itaup ), & work( nwork ), lwork-nwork+1,ierr ) nwork = ie + n ! perform bidiagonal svd, computing singular values only ! workspace: need n [e] + bdspac call stdlib${ii}$_dbdsdc( 'U', 'N', n, s, work( ie ), dum, 1_${ik}$, dum, 1_${ik}$,dum, idum, & work( nwork ), iwork, info ) else if( wntqo ) then ! path 2 (m >> n, jobz = 'o') ! n left singular vectors to be overwritten on a and ! n right singular vectors to be computed in vt ir = 1_${ik}$ ! work(ir) is ldwrkr by n if( lwork >= lda*n + n*n + 3_${ik}$*n + bdspac ) then ldwrkr = lda else ldwrkr = ( lwork - n*n - 3_${ik}$*n - bdspac ) / n end if itau = ir + ldwrkr*n nwork = itau + n ! compute a=q*r ! workspace: need n*n [r] + n [tau] + n [work] ! workspace: prefer n*n [r] + n [tau] + n*nb [work] call stdlib${ii}$_dgeqrf( m, n, a, lda, work( itau ), work( nwork ),lwork - nwork + & 1_${ik}$, ierr ) ! copy r to work(ir), zeroing out below it call stdlib${ii}$_dlacpy( 'U', n, n, a, lda, work( ir ), ldwrkr ) call stdlib${ii}$_dlaset( 'L', n - 1_${ik}$, n - 1_${ik}$, zero, zero, work(ir+1),ldwrkr ) ! generate q in a ! workspace: need n*n [r] + n [tau] + n [work] ! workspace: prefer n*n [r] + n [tau] + n*nb [work] call stdlib${ii}$_dorgqr( m, n, n, a, lda, work( itau ),work( nwork ), lwork - & nwork + 1_${ik}$, ierr ) ie = itau itauq = ie + n itaup = itauq + n nwork = itaup + n ! bidiagonalize r in work(ir) ! workspace: need n*n [r] + 3*n [e, tauq, taup] + n [work] ! workspace: prefer n*n [r] + 3*n [e, tauq, taup] + 2*n*nb [work] call stdlib${ii}$_dgebrd( n, n, work( ir ), ldwrkr, s, work( ie ),work( itauq ), & work( itaup ), work( nwork ),lwork - nwork + 1_${ik}$, ierr ) ! work(iu) is n by n iu = nwork nwork = iu + n*n ! perform bidiagonal svd, computing left singular vectors ! of bidiagonal matrix in work(iu) and computing right ! singular vectors of bidiagonal matrix in vt ! workspace: need n*n [r] + 3*n [e, tauq, taup] + n*n [u] + bdspac call stdlib${ii}$_dbdsdc( 'U', 'I', n, s, work( ie ), work( iu ), n,vt, ldvt, dum, & idum, work( nwork ), iwork,info ) ! overwrite work(iu) by left singular vectors of r ! and vt by right singular vectors of r ! workspace: need n*n [r] + 3*n [e, tauq, taup] + n*n [u] + n [work] ! workspace: prefer n*n [r] + 3*n [e, tauq, taup] + n*n [u] + n*nb [work] call stdlib${ii}$_dormbr( 'Q', 'L', 'N', n, n, n, work( ir ), ldwrkr,work( itauq ), & work( iu ), n, work( nwork ),lwork - nwork + 1_${ik}$, ierr ) call stdlib${ii}$_dormbr( 'P', 'R', 'T', n, n, n, work( ir ), ldwrkr,work( itaup ), & vt, ldvt, work( nwork ),lwork - nwork + 1_${ik}$, ierr ) ! multiply q in a by left singular vectors of r in ! work(iu), storing result in work(ir) and copying to a ! workspace: need n*n [r] + 3*n [e, tauq, taup] + n*n [u] ! workspace: prefer m*n [r] + 3*n [e, tauq, taup] + n*n [u] do i = 1, m, ldwrkr chunk = min( m - i + 1_${ik}$, ldwrkr ) call stdlib${ii}$_dgemm( 'N', 'N', chunk, n, n, one, a( i, 1_${ik}$ ),lda, work( iu ), & n, zero, work( ir ),ldwrkr ) call stdlib${ii}$_dlacpy( 'F', chunk, n, work( ir ), ldwrkr,a( i, 1_${ik}$ ), lda ) end do else if( wntqs ) then ! path 3 (m >> n, jobz='s') ! n left singular vectors to be computed in u and ! n right singular vectors to be computed in vt ir = 1_${ik}$ ! work(ir) is n by n ldwrkr = n itau = ir + ldwrkr*n nwork = itau + n ! compute a=q*r ! workspace: need n*n [r] + n [tau] + n [work] ! workspace: prefer n*n [r] + n [tau] + n*nb [work] call stdlib${ii}$_dgeqrf( m, n, a, lda, work( itau ), work( nwork ),lwork - nwork + & 1_${ik}$, ierr ) ! copy r to work(ir), zeroing out below it call stdlib${ii}$_dlacpy( 'U', n, n, a, lda, work( ir ), ldwrkr ) call stdlib${ii}$_dlaset( 'L', n - 1_${ik}$, n - 1_${ik}$, zero, zero, work(ir+1),ldwrkr ) ! generate q in a ! workspace: need n*n [r] + n [tau] + n [work] ! workspace: prefer n*n [r] + n [tau] + n*nb [work] call stdlib${ii}$_dorgqr( m, n, n, a, lda, work( itau ),work( nwork ), lwork - & nwork + 1_${ik}$, ierr ) ie = itau itauq = ie + n itaup = itauq + n nwork = itaup + n ! bidiagonalize r in work(ir) ! workspace: need n*n [r] + 3*n [e, tauq, taup] + n [work] ! workspace: prefer n*n [r] + 3*n [e, tauq, taup] + 2*n*nb [work] call stdlib${ii}$_dgebrd( n, n, work( ir ), ldwrkr, s, work( ie ),work( itauq ), & work( itaup ), work( nwork ),lwork - nwork + 1_${ik}$, ierr ) ! perform bidiagonal svd, computing left singular vectors ! of bidiagoal matrix in u and computing right singular ! vectors of bidiagonal matrix in vt ! workspace: need n*n [r] + 3*n [e, tauq, taup] + bdspac call stdlib${ii}$_dbdsdc( 'U', 'I', n, s, work( ie ), u, ldu, vt,ldvt, dum, idum, & work( nwork ), iwork,info ) ! overwrite u by left singular vectors of r and vt ! by right singular vectors of r ! workspace: need n*n [r] + 3*n [e, tauq, taup] + n [work] ! workspace: prefer n*n [r] + 3*n [e, tauq, taup] + n*nb [work] call stdlib${ii}$_dormbr( 'Q', 'L', 'N', n, n, n, work( ir ), ldwrkr,work( itauq ), & u, ldu, work( nwork ),lwork - nwork + 1_${ik}$, ierr ) call stdlib${ii}$_dormbr( 'P', 'R', 'T', n, n, n, work( ir ), ldwrkr,work( itaup ), & vt, ldvt, work( nwork ),lwork - nwork + 1_${ik}$, ierr ) ! multiply q in a by left singular vectors of r in ! work(ir), storing result in u ! workspace: need n*n [r] call stdlib${ii}$_dlacpy( 'F', n, n, u, ldu, work( ir ), ldwrkr ) call stdlib${ii}$_dgemm( 'N', 'N', m, n, n, one, a, lda, work( ir ),ldwrkr, zero, u,& ldu ) else if( wntqa ) then ! path 4 (m >> n, jobz='a') ! m left singular vectors to be computed in u and ! n right singular vectors to be computed in vt iu = 1_${ik}$ ! work(iu) is n by n ldwrku = n itau = iu + ldwrku*n nwork = itau + n ! compute a=q*r, copying result to u ! workspace: need n*n [u] + n [tau] + n [work] ! workspace: prefer n*n [u] + n [tau] + n*nb [work] call stdlib${ii}$_dgeqrf( m, n, a, lda, work( itau ), work( nwork ),lwork - nwork + & 1_${ik}$, ierr ) call stdlib${ii}$_dlacpy( 'L', m, n, a, lda, u, ldu ) ! generate q in u ! workspace: need n*n [u] + n [tau] + m [work] ! workspace: prefer n*n [u] + n [tau] + m*nb [work] call stdlib${ii}$_dorgqr( m, m, n, u, ldu, work( itau ),work( nwork ), lwork - & nwork + 1_${ik}$, ierr ) ! produce r in a, zeroing out other entries if (n>1_${ik}$) call stdlib${ii}$_dlaset( 'L', n-1, n-1, zero, zero, a( 2_${ik}$, 1_${ik}$ ), lda ) ie = itau itauq = ie + n itaup = itauq + n nwork = itaup + n ! bidiagonalize r in a ! workspace: need n*n [u] + 3*n [e, tauq, taup] + n [work] ! workspace: prefer n*n [u] + 3*n [e, tauq, taup] + 2*n*nb [work] call stdlib${ii}$_dgebrd( n, n, a, lda, s, work( ie ), work( itauq ),work( itaup ), & work( nwork ), lwork-nwork+1,ierr ) ! perform bidiagonal svd, computing left singular vectors ! of bidiagonal matrix in work(iu) and computing right ! singular vectors of bidiagonal matrix in vt ! workspace: need n*n [u] + 3*n [e, tauq, taup] + bdspac call stdlib${ii}$_dbdsdc( 'U', 'I', n, s, work( ie ), work( iu ), n,vt, ldvt, dum, & idum, work( nwork ), iwork,info ) ! overwrite work(iu) by left singular vectors of r and vt ! by right singular vectors of r ! workspace: need n*n [u] + 3*n [e, tauq, taup] + n [work] ! workspace: prefer n*n [u] + 3*n [e, tauq, taup] + n*nb [work] call stdlib${ii}$_dormbr( 'Q', 'L', 'N', n, n, n, a, lda,work( itauq ), work( iu ), & ldwrku,work( nwork ), lwork - nwork + 1_${ik}$, ierr ) call stdlib${ii}$_dormbr( 'P', 'R', 'T', n, n, n, a, lda,work( itaup ), vt, ldvt, & work( nwork ),lwork - nwork + 1_${ik}$, ierr ) ! multiply q in u by left singular vectors of r in ! work(iu), storing result in a ! workspace: need n*n [u] call stdlib${ii}$_dgemm( 'N', 'N', m, n, n, one, u, ldu, work( iu ),ldwrku, zero, a,& lda ) ! copy left singular vectors of a from a to u call stdlib${ii}$_dlacpy( 'F', m, n, a, lda, u, ldu ) end if else ! m < mnthr ! path 5 (m >= n, but not much larger) ! reduce to bidiagonal form without qr decomposition ie = 1_${ik}$ itauq = ie + n itaup = itauq + n nwork = itaup + n ! bidiagonalize a ! workspace: need 3*n [e, tauq, taup] + m [work] ! workspace: prefer 3*n [e, tauq, taup] + (m+n)*nb [work] call stdlib${ii}$_dgebrd( m, n, a, lda, s, work( ie ), work( itauq ),work( itaup ), & work( nwork ), lwork-nwork+1,ierr ) if( wntqn ) then ! path 5n (m >= n, jobz='n') ! perform bidiagonal svd, only computing singular values ! workspace: need 3*n [e, tauq, taup] + bdspac call stdlib${ii}$_dbdsdc( 'U', 'N', n, s, work( ie ), dum, 1_${ik}$, dum, 1_${ik}$,dum, idum, & work( nwork ), iwork, info ) else if( wntqo ) then ! path 5o (m >= n, jobz='o') iu = nwork if( lwork >= m*n + 3_${ik}$*n + bdspac ) then ! work( iu ) is m by n ldwrku = m nwork = iu + ldwrku*n call stdlib${ii}$_dlaset( 'F', m, n, zero, zero, work( iu ),ldwrku ) ! ir is unused; silence compile warnings ir = -1_${ik}$ else ! work( iu ) is n by n ldwrku = n nwork = iu + ldwrku*n ! work(ir) is ldwrkr by n ir = nwork ldwrkr = ( lwork - n*n - 3_${ik}$*n ) / n end if nwork = iu + ldwrku*n ! perform bidiagonal svd, computing left singular vectors ! of bidiagonal matrix in work(iu) and computing right ! singular vectors of bidiagonal matrix in vt ! workspace: need 3*n [e, tauq, taup] + n*n [u] + bdspac call stdlib${ii}$_dbdsdc( 'U', 'I', n, s, work( ie ), work( iu ),ldwrku, vt, ldvt, & dum, idum, work( nwork ),iwork, info ) ! overwrite vt by right singular vectors of a ! workspace: need 3*n [e, tauq, taup] + n*n [u] + n [work] ! workspace: prefer 3*n [e, tauq, taup] + n*n [u] + n*nb [work] call stdlib${ii}$_dormbr( 'P', 'R', 'T', n, n, n, a, lda,work( itaup ), vt, ldvt, & work( nwork ),lwork - nwork + 1_${ik}$, ierr ) if( lwork >= m*n + 3_${ik}$*n + bdspac ) then ! path 5o-fast ! overwrite work(iu) by left singular vectors of a ! workspace: need 3*n [e, tauq, taup] + m*n [u] + n [work] ! workspace: prefer 3*n [e, tauq, taup] + m*n [u] + n*nb [work] call stdlib${ii}$_dormbr( 'Q', 'L', 'N', m, n, n, a, lda,work( itauq ), work( iu & ), ldwrku,work( nwork ), lwork - nwork + 1_${ik}$, ierr ) ! copy left singular vectors of a from work(iu) to a call stdlib${ii}$_dlacpy( 'F', m, n, work( iu ), ldwrku, a, lda ) else ! path 5o-slow ! generate q in a ! workspace: need 3*n [e, tauq, taup] + n*n [u] + n [work] ! workspace: prefer 3*n [e, tauq, taup] + n*n [u] + n*nb [work] call stdlib${ii}$_dorgbr( 'Q', m, n, n, a, lda, work( itauq ),work( nwork ), & lwork - nwork + 1_${ik}$, ierr ) ! multiply q in a by left singular vectors of ! bidiagonal matrix in work(iu), storing result in ! work(ir) and copying to a ! workspace: need 3*n [e, tauq, taup] + n*n [u] + nb*n [r] ! workspace: prefer 3*n [e, tauq, taup] + n*n [u] + m*n [r] do i = 1, m, ldwrkr chunk = min( m - i + 1_${ik}$, ldwrkr ) call stdlib${ii}$_dgemm( 'N', 'N', chunk, n, n, one, a( i, 1_${ik}$ ),lda, work( iu )& , ldwrku, zero,work( ir ), ldwrkr ) call stdlib${ii}$_dlacpy( 'F', chunk, n, work( ir ), ldwrkr,a( i, 1_${ik}$ ), lda ) end do end if else if( wntqs ) then ! path 5s (m >= n, jobz='s') ! perform bidiagonal svd, computing left singular vectors ! of bidiagonal matrix in u and computing right singular ! vectors of bidiagonal matrix in vt ! workspace: need 3*n [e, tauq, taup] + bdspac call stdlib${ii}$_dlaset( 'F', m, n, zero, zero, u, ldu ) call stdlib${ii}$_dbdsdc( 'U', 'I', n, s, work( ie ), u, ldu, vt,ldvt, dum, idum, & work( nwork ), iwork,info ) ! overwrite u by left singular vectors of a and vt ! by right singular vectors of a ! workspace: need 3*n [e, tauq, taup] + n [work] ! workspace: prefer 3*n [e, tauq, taup] + n*nb [work] call stdlib${ii}$_dormbr( 'Q', 'L', 'N', m, n, n, a, lda,work( itauq ), u, ldu, & work( nwork ),lwork - nwork + 1_${ik}$, ierr ) call stdlib${ii}$_dormbr( 'P', 'R', 'T', n, n, n, a, lda,work( itaup ), vt, ldvt, & work( nwork ),lwork - nwork + 1_${ik}$, ierr ) else if( wntqa ) then ! path 5a (m >= n, jobz='a') ! perform bidiagonal svd, computing left singular vectors ! of bidiagonal matrix in u and computing right singular ! vectors of bidiagonal matrix in vt ! workspace: need 3*n [e, tauq, taup] + bdspac call stdlib${ii}$_dlaset( 'F', m, m, zero, zero, u, ldu ) call stdlib${ii}$_dbdsdc( 'U', 'I', n, s, work( ie ), u, ldu, vt,ldvt, dum, idum, & work( nwork ), iwork,info ) ! set the right corner of u to identity matrix if( m>n ) then call stdlib${ii}$_dlaset( 'F', m - n, m - n, zero, one, u(n+1,n+1),ldu ) end if ! overwrite u by left singular vectors of a and vt ! by right singular vectors of a ! workspace: need 3*n [e, tauq, taup] + m [work] ! workspace: prefer 3*n [e, tauq, taup] + m*nb [work] call stdlib${ii}$_dormbr( 'Q', 'L', 'N', m, m, n, a, lda,work( itauq ), u, ldu, & work( nwork ),lwork - nwork + 1_${ik}$, ierr ) call stdlib${ii}$_dormbr( 'P', 'R', 'T', n, n, m, a, lda,work( itaup ), vt, ldvt, & work( nwork ),lwork - nwork + 1_${ik}$, ierr ) end if end if else ! a has more columns than rows. if a has sufficiently more ! columns than rows, first reduce using the lq decomposition (if ! sufficient workspace available) if( n>=mnthr ) then if( wntqn ) then ! path 1t (n >> m, jobz='n') ! no singular vectors to be computed itau = 1_${ik}$ nwork = itau + m ! compute a=l*q ! workspace: need m [tau] + m [work] ! workspace: prefer m [tau] + m*nb [work] call stdlib${ii}$_dgelqf( m, n, a, lda, work( itau ), work( nwork ),lwork - nwork + & 1_${ik}$, ierr ) ! zero out above l if (m>1_${ik}$) call stdlib${ii}$_dlaset( 'U', m-1, m-1, zero, zero, a( 1_${ik}$, 2_${ik}$ ), lda ) ie = 1_${ik}$ itauq = ie + m itaup = itauq + m nwork = itaup + m ! bidiagonalize l in a ! workspace: need 3*m [e, tauq, taup] + m [work] ! workspace: prefer 3*m [e, tauq, taup] + 2*m*nb [work] call stdlib${ii}$_dgebrd( m, m, a, lda, s, work( ie ), work( itauq ),work( itaup ), & work( nwork ), lwork-nwork+1,ierr ) nwork = ie + m ! perform bidiagonal svd, computing singular values only ! workspace: need m [e] + bdspac call stdlib${ii}$_dbdsdc( 'U', 'N', m, s, work( ie ), dum, 1_${ik}$, dum, 1_${ik}$,dum, idum, & work( nwork ), iwork, info ) else if( wntqo ) then ! path 2t (n >> m, jobz='o') ! m right singular vectors to be overwritten on a and ! m left singular vectors to be computed in u ivt = 1_${ik}$ ! work(ivt) is m by m ! work(il) is m by m; it is later resized to m by chunk for gemm il = ivt + m*m if( lwork >= m*n + m*m + 3_${ik}$*m + bdspac ) then ldwrkl = m chunk = n else ldwrkl = m chunk = ( lwork - m*m ) / m end if itau = il + ldwrkl*m nwork = itau + m ! compute a=l*q ! workspace: need m*m [vt] + m*m [l] + m [tau] + m [work] ! workspace: prefer m*m [vt] + m*m [l] + m [tau] + m*nb [work] call stdlib${ii}$_dgelqf( m, n, a, lda, work( itau ), work( nwork ),lwork - nwork + & 1_${ik}$, ierr ) ! copy l to work(il), zeroing about above it call stdlib${ii}$_dlacpy( 'L', m, m, a, lda, work( il ), ldwrkl ) call stdlib${ii}$_dlaset( 'U', m - 1_${ik}$, m - 1_${ik}$, zero, zero,work( il + ldwrkl ), ldwrkl & ) ! generate q in a ! workspace: need m*m [vt] + m*m [l] + m [tau] + m [work] ! workspace: prefer m*m [vt] + m*m [l] + m [tau] + m*nb [work] call stdlib${ii}$_dorglq( m, n, m, a, lda, work( itau ),work( nwork ), lwork - & nwork + 1_${ik}$, ierr ) ie = itau itauq = ie + m itaup = itauq + m nwork = itaup + m ! bidiagonalize l in work(il) ! workspace: need m*m [vt] + m*m [l] + 3*m [e, tauq, taup] + m [work] ! workspace: prefer m*m [vt] + m*m [l] + 3*m [e, tauq, taup] + 2*m*nb [work] call stdlib${ii}$_dgebrd( m, m, work( il ), ldwrkl, s, work( ie ),work( itauq ), & work( itaup ), work( nwork ),lwork - nwork + 1_${ik}$, ierr ) ! perform bidiagonal svd, computing left singular vectors ! of bidiagonal matrix in u, and computing right singular ! vectors of bidiagonal matrix in work(ivt) ! workspace: need m*m [vt] + m*m [l] + 3*m [e, tauq, taup] + bdspac call stdlib${ii}$_dbdsdc( 'U', 'I', m, s, work( ie ), u, ldu,work( ivt ), m, dum, & idum, work( nwork ),iwork, info ) ! overwrite u by left singular vectors of l and work(ivt) ! by right singular vectors of l ! workspace: need m*m [vt] + m*m [l] + 3*m [e, tauq, taup] + m [work] ! workspace: prefer m*m [vt] + m*m [l] + 3*m [e, tauq, taup] + m*nb [work] call stdlib${ii}$_dormbr( 'Q', 'L', 'N', m, m, m, work( il ), ldwrkl,work( itauq ), & u, ldu, work( nwork ),lwork - nwork + 1_${ik}$, ierr ) call stdlib${ii}$_dormbr( 'P', 'R', 'T', m, m, m, work( il ), ldwrkl,work( itaup ), & work( ivt ), m,work( nwork ), lwork - nwork + 1_${ik}$, ierr ) ! multiply right singular vectors of l in work(ivt) by q ! in a, storing result in work(il) and copying to a ! workspace: need m*m [vt] + m*m [l] ! workspace: prefer m*m [vt] + m*n [l] ! at this point, l is resized as m by chunk. do i = 1, n, chunk blk = min( n - i + 1_${ik}$, chunk ) call stdlib${ii}$_dgemm( 'N', 'N', m, blk, m, one, work( ivt ), m,a( 1_${ik}$, i ), lda,& zero, work( il ), ldwrkl ) call stdlib${ii}$_dlacpy( 'F', m, blk, work( il ), ldwrkl,a( 1_${ik}$, i ), lda ) end do else if( wntqs ) then ! path 3t (n >> m, jobz='s') ! m right singular vectors to be computed in vt and ! m left singular vectors to be computed in u il = 1_${ik}$ ! work(il) is m by m ldwrkl = m itau = il + ldwrkl*m nwork = itau + m ! compute a=l*q ! workspace: need m*m [l] + m [tau] + m [work] ! workspace: prefer m*m [l] + m [tau] + m*nb [work] call stdlib${ii}$_dgelqf( m, n, a, lda, work( itau ), work( nwork ),lwork - nwork + & 1_${ik}$, ierr ) ! copy l to work(il), zeroing out above it call stdlib${ii}$_dlacpy( 'L', m, m, a, lda, work( il ), ldwrkl ) call stdlib${ii}$_dlaset( 'U', m - 1_${ik}$, m - 1_${ik}$, zero, zero,work( il + ldwrkl ), ldwrkl & ) ! generate q in a ! workspace: need m*m [l] + m [tau] + m [work] ! workspace: prefer m*m [l] + m [tau] + m*nb [work] call stdlib${ii}$_dorglq( m, n, m, a, lda, work( itau ),work( nwork ), lwork - & nwork + 1_${ik}$, ierr ) ie = itau itauq = ie + m itaup = itauq + m nwork = itaup + m ! bidiagonalize l in work(iu). ! workspace: need m*m [l] + 3*m [e, tauq, taup] + m [work] ! workspace: prefer m*m [l] + 3*m [e, tauq, taup] + 2*m*nb [work] call stdlib${ii}$_dgebrd( m, m, work( il ), ldwrkl, s, work( ie ),work( itauq ), & work( itaup ), work( nwork ),lwork - nwork + 1_${ik}$, ierr ) ! perform bidiagonal svd, computing left singular vectors ! of bidiagonal matrix in u and computing right singular ! vectors of bidiagonal matrix in vt ! workspace: need m*m [l] + 3*m [e, tauq, taup] + bdspac call stdlib${ii}$_dbdsdc( 'U', 'I', m, s, work( ie ), u, ldu, vt,ldvt, dum, idum, & work( nwork ), iwork,info ) ! overwrite u by left singular vectors of l and vt ! by right singular vectors of l ! workspace: need m*m [l] + 3*m [e, tauq, taup] + m [work] ! workspace: prefer m*m [l] + 3*m [e, tauq, taup] + m*nb [work] call stdlib${ii}$_dormbr( 'Q', 'L', 'N', m, m, m, work( il ), ldwrkl,work( itauq ), & u, ldu, work( nwork ),lwork - nwork + 1_${ik}$, ierr ) call stdlib${ii}$_dormbr( 'P', 'R', 'T', m, m, m, work( il ), ldwrkl,work( itaup ), & vt, ldvt, work( nwork ),lwork - nwork + 1_${ik}$, ierr ) ! multiply right singular vectors of l in work(il) by ! q in a, storing result in vt ! workspace: need m*m [l] call stdlib${ii}$_dlacpy( 'F', m, m, vt, ldvt, work( il ), ldwrkl ) call stdlib${ii}$_dgemm( 'N', 'N', m, n, m, one, work( il ), ldwrkl,a, lda, zero, & vt, ldvt ) else if( wntqa ) then ! path 4t (n >> m, jobz='a') ! n right singular vectors to be computed in vt and ! m left singular vectors to be computed in u ivt = 1_${ik}$ ! work(ivt) is m by m ldwkvt = m itau = ivt + ldwkvt*m nwork = itau + m ! compute a=l*q, copying result to vt ! workspace: need m*m [vt] + m [tau] + m [work] ! workspace: prefer m*m [vt] + m [tau] + m*nb [work] call stdlib${ii}$_dgelqf( m, n, a, lda, work( itau ), work( nwork ),lwork - nwork + & 1_${ik}$, ierr ) call stdlib${ii}$_dlacpy( 'U', m, n, a, lda, vt, ldvt ) ! generate q in vt ! workspace: need m*m [vt] + m [tau] + n [work] ! workspace: prefer m*m [vt] + m [tau] + n*nb [work] call stdlib${ii}$_dorglq( n, n, m, vt, ldvt, work( itau ),work( nwork ), lwork - & nwork + 1_${ik}$, ierr ) ! produce l in a, zeroing out other entries if (m>1_${ik}$) call stdlib${ii}$_dlaset( 'U', m-1, m-1, zero, zero, a( 1_${ik}$, 2_${ik}$ ), lda ) ie = itau itauq = ie + m itaup = itauq + m nwork = itaup + m ! bidiagonalize l in a ! workspace: need m*m [vt] + 3*m [e, tauq, taup] + m [work] ! workspace: prefer m*m [vt] + 3*m [e, tauq, taup] + 2*m*nb [work] call stdlib${ii}$_dgebrd( m, m, a, lda, s, work( ie ), work( itauq ),work( itaup ), & work( nwork ), lwork-nwork+1,ierr ) ! perform bidiagonal svd, computing left singular vectors ! of bidiagonal matrix in u and computing right singular ! vectors of bidiagonal matrix in work(ivt) ! workspace: need m*m [vt] + 3*m [e, tauq, taup] + bdspac call stdlib${ii}$_dbdsdc( 'U', 'I', m, s, work( ie ), u, ldu,work( ivt ), ldwkvt, & dum, idum,work( nwork ), iwork, info ) ! overwrite u by left singular vectors of l and work(ivt) ! by right singular vectors of l ! workspace: need m*m [vt] + 3*m [e, tauq, taup]+ m [work] ! workspace: prefer m*m [vt] + 3*m [e, tauq, taup]+ m*nb [work] call stdlib${ii}$_dormbr( 'Q', 'L', 'N', m, m, m, a, lda,work( itauq ), u, ldu, & work( nwork ),lwork - nwork + 1_${ik}$, ierr ) call stdlib${ii}$_dormbr( 'P', 'R', 'T', m, m, m, a, lda,work( itaup ), work( ivt ),& ldwkvt,work( nwork ), lwork - nwork + 1_${ik}$, ierr ) ! multiply right singular vectors of l in work(ivt) by ! q in vt, storing result in a ! workspace: need m*m [vt] call stdlib${ii}$_dgemm( 'N', 'N', m, n, m, one, work( ivt ), ldwkvt,vt, ldvt, zero,& a, lda ) ! copy right singular vectors of a from a to vt call stdlib${ii}$_dlacpy( 'F', m, n, a, lda, vt, ldvt ) end if else ! n < mnthr ! path 5t (n > m, but not much larger) ! reduce to bidiagonal form without lq decomposition ie = 1_${ik}$ itauq = ie + m itaup = itauq + m nwork = itaup + m ! bidiagonalize a ! workspace: need 3*m [e, tauq, taup] + n [work] ! workspace: prefer 3*m [e, tauq, taup] + (m+n)*nb [work] call stdlib${ii}$_dgebrd( m, n, a, lda, s, work( ie ), work( itauq ),work( itaup ), & work( nwork ), lwork-nwork+1,ierr ) if( wntqn ) then ! path 5tn (n > m, jobz='n') ! perform bidiagonal svd, only computing singular values ! workspace: need 3*m [e, tauq, taup] + bdspac call stdlib${ii}$_dbdsdc( 'L', 'N', m, s, work( ie ), dum, 1_${ik}$, dum, 1_${ik}$,dum, idum, & work( nwork ), iwork, info ) else if( wntqo ) then ! path 5to (n > m, jobz='o') ldwkvt = m ivt = nwork if( lwork >= m*n + 3_${ik}$*m + bdspac ) then ! work( ivt ) is m by n call stdlib${ii}$_dlaset( 'F', m, n, zero, zero, work( ivt ),ldwkvt ) nwork = ivt + ldwkvt*n ! il is unused; silence compile warnings il = -1_${ik}$ else ! work( ivt ) is m by m nwork = ivt + ldwkvt*m il = nwork ! work(il) is m by chunk chunk = ( lwork - m*m - 3_${ik}$*m ) / m end if ! perform bidiagonal svd, computing left singular vectors ! of bidiagonal matrix in u and computing right singular ! vectors of bidiagonal matrix in work(ivt) ! workspace: need 3*m [e, tauq, taup] + m*m [vt] + bdspac call stdlib${ii}$_dbdsdc( 'L', 'I', m, s, work( ie ), u, ldu,work( ivt ), ldwkvt, & dum, idum,work( nwork ), iwork, info ) ! overwrite u by left singular vectors of a ! workspace: need 3*m [e, tauq, taup] + m*m [vt] + m [work] ! workspace: prefer 3*m [e, tauq, taup] + m*m [vt] + m*nb [work] call stdlib${ii}$_dormbr( 'Q', 'L', 'N', m, m, n, a, lda,work( itauq ), u, ldu, & work( nwork ),lwork - nwork + 1_${ik}$, ierr ) if( lwork >= m*n + 3_${ik}$*m + bdspac ) then ! path 5to-fast ! overwrite work(ivt) by left singular vectors of a ! workspace: need 3*m [e, tauq, taup] + m*n [vt] + m [work] ! workspace: prefer 3*m [e, tauq, taup] + m*n [vt] + m*nb [work] call stdlib${ii}$_dormbr( 'P', 'R', 'T', m, n, m, a, lda,work( itaup ), work( & ivt ), ldwkvt,work( nwork ), lwork - nwork + 1_${ik}$, ierr ) ! copy right singular vectors of a from work(ivt) to a call stdlib${ii}$_dlacpy( 'F', m, n, work( ivt ), ldwkvt, a, lda ) else ! path 5to-slow ! generate p**t in a ! workspace: need 3*m [e, tauq, taup] + m*m [vt] + m [work] ! workspace: prefer 3*m [e, tauq, taup] + m*m [vt] + m*nb [work] call stdlib${ii}$_dorgbr( 'P', m, n, m, a, lda, work( itaup ),work( nwork ), & lwork - nwork + 1_${ik}$, ierr ) ! multiply q in a by right singular vectors of ! bidiagonal matrix in work(ivt), storing result in ! work(il) and copying to a ! workspace: need 3*m [e, tauq, taup] + m*m [vt] + m*nb [l] ! workspace: prefer 3*m [e, tauq, taup] + m*m [vt] + m*n [l] do i = 1, n, chunk blk = min( n - i + 1_${ik}$, chunk ) call stdlib${ii}$_dgemm( 'N', 'N', m, blk, m, one, work( ivt ),ldwkvt, a( 1_${ik}$, & i ), lda, zero,work( il ), m ) call stdlib${ii}$_dlacpy( 'F', m, blk, work( il ), m, a( 1_${ik}$, i ),lda ) end do end if else if( wntqs ) then ! path 5ts (n > m, jobz='s') ! perform bidiagonal svd, computing left singular vectors ! of bidiagonal matrix in u and computing right singular ! vectors of bidiagonal matrix in vt ! workspace: need 3*m [e, tauq, taup] + bdspac call stdlib${ii}$_dlaset( 'F', m, n, zero, zero, vt, ldvt ) call stdlib${ii}$_dbdsdc( 'L', 'I', m, s, work( ie ), u, ldu, vt,ldvt, dum, idum, & work( nwork ), iwork,info ) ! overwrite u by left singular vectors of a and vt ! by right singular vectors of a ! workspace: need 3*m [e, tauq, taup] + m [work] ! workspace: prefer 3*m [e, tauq, taup] + m*nb [work] call stdlib${ii}$_dormbr( 'Q', 'L', 'N', m, m, n, a, lda,work( itauq ), u, ldu, & work( nwork ),lwork - nwork + 1_${ik}$, ierr ) call stdlib${ii}$_dormbr( 'P', 'R', 'T', m, n, m, a, lda,work( itaup ), vt, ldvt, & work( nwork ),lwork - nwork + 1_${ik}$, ierr ) else if( wntqa ) then ! path 5ta (n > m, jobz='a') ! perform bidiagonal svd, computing left singular vectors ! of bidiagonal matrix in u and computing right singular ! vectors of bidiagonal matrix in vt ! workspace: need 3*m [e, tauq, taup] + bdspac call stdlib${ii}$_dlaset( 'F', n, n, zero, zero, vt, ldvt ) call stdlib${ii}$_dbdsdc( 'L', 'I', m, s, work( ie ), u, ldu, vt,ldvt, dum, idum, & work( nwork ), iwork,info ) ! set the right corner of vt to identity matrix if( n>m ) then call stdlib${ii}$_dlaset( 'F', n-m, n-m, zero, one, vt(m+1,m+1),ldvt ) end if ! overwrite u by left singular vectors of a and vt ! by right singular vectors of a ! workspace: need 3*m [e, tauq, taup] + n [work] ! workspace: prefer 3*m [e, tauq, taup] + n*nb [work] call stdlib${ii}$_dormbr( 'Q', 'L', 'N', m, m, n, a, lda,work( itauq ), u, ldu, & work( nwork ),lwork - nwork + 1_${ik}$, ierr ) call stdlib${ii}$_dormbr( 'P', 'R', 'T', n, n, m, a, lda,work( itaup ), vt, ldvt, & work( nwork ),lwork - nwork + 1_${ik}$, ierr ) end if end if end if ! undo scaling if necessary if( iscl==1_${ik}$ ) then if( anrm>bignum )call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, bignum, anrm, minmn, 1_${ik}$, s, minmn,& ierr ) if( anrm<smlnum )call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, smlnum, anrm, minmn, 1_${ik}$, s, minmn,& ierr ) end if ! return optimal workspace in work(1) work( 1_${ik}$ ) = stdlib${ii}$_droundup_lwork( maxwrk ) return end subroutine stdlib${ii}$_dgesdd #:for rk,rt,ri in REAL_KINDS_TYPES #:if not rk in ["sp","dp"] module subroutine stdlib${ii}$_${ri}$gesdd( jobz, m, n, a, lda, s, u, ldu, vt, ldvt,work, lwork, iwork, info ) !! DGESDD: computes the singular value decomposition (SVD) of a real !! M-by-N matrix A, optionally computing the left and right singular !! vectors. If singular vectors are desired, it uses a !! divide-and-conquer algorithm. !! The SVD is written !! A = U * SIGMA * transpose(V) !! where SIGMA is an M-by-N matrix which is zero except for its !! min(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and !! V is an N-by-N orthogonal matrix. The diagonal elements of SIGMA !! are the singular values of A; they are real and non-negative, and !! are returned in descending order. The first min(m,n) columns of !! U and V are the left and right singular vectors of A. !! Note that the routine returns VT = V**T, not V. !! The divide and conquer algorithm makes very mild assumptions about !! floating point arithmetic. It will work on machines with a guard !! digit in add/subtract, or on those binary machines without guard !! digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or !! Cray-2. It could conceivably fail on hexadecimal or decimal machines !! without guard digits, but we know of none. ! -- lapack driver routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments character, intent(in) :: jobz integer(${ik}$), intent(out) :: info integer(${ik}$), intent(in) :: lda, ldu, ldvt, lwork, m, n ! Array Arguments integer(${ik}$), intent(out) :: iwork(*) real(${rk}$), intent(inout) :: a(lda,*) real(${rk}$), intent(out) :: s(*), u(ldu,*), vt(ldvt,*), work(*) ! ===================================================================== ! Local Scalars logical(lk) :: lquery, wntqa, wntqas, wntqn, wntqo, wntqs integer(${ik}$) :: bdspac, blk, chunk, i, ie, ierr, il, ir, iscl, itau, itaup, itauq, iu, & ivt, ldwkvt, ldwrkl, ldwrkr, ldwrku, maxwrk, minmn, minwrk, mnthr, nwork, wrkbl integer(${ik}$) :: lwork_qgebrd_mn, lwork_qgebrd_mm, lwork_qgebrd_nn, lwork_qgelqf_mn, & lwork_qgeqrf_mn, lwork_qorgbr_p_mm, lwork_qorgbr_q_nn, lwork_qorglq_mn, & lwork_qorglq_nn, lwork_qorgqr_mm, lwork_qorgqr_mn, lwork_qormbr_prt_mm, & lwork_qormbr_qln_mm, lwork_qormbr_prt_mn, lwork_qormbr_qln_mn, lwork_qormbr_prt_nn, & lwork_qormbr_qln_nn real(${rk}$) :: anrm, bignum, eps, smlnum ! Local Arrays integer(${ik}$) :: idum(1_${ik}$) real(${rk}$) :: dum(1_${ik}$) ! Intrinsic Functions ! Executable Statements ! test the input arguments info = 0_${ik}$ minmn = min( m, n ) wntqa = stdlib_lsame( jobz, 'A' ) wntqs = stdlib_lsame( jobz, 'S' ) wntqas = wntqa .or. wntqs wntqo = stdlib_lsame( jobz, 'O' ) wntqn = stdlib_lsame( jobz, 'N' ) lquery = ( lwork==-1_${ik}$ ) if( .not.( wntqa .or. wntqs .or. wntqo .or. wntqn ) ) then info = -1_${ik}$ else if( m<0_${ik}$ ) then info = -2_${ik}$ else if( n<0_${ik}$ ) then info = -3_${ik}$ else if( lda<max( 1_${ik}$, m ) ) then info = -5_${ik}$ else if( ldu<1_${ik}$ .or. ( wntqas .and. ldu<m ) .or.( wntqo .and. m<n .and. ldu<m ) ) & then info = -8_${ik}$ else if( ldvt<1_${ik}$ .or. ( wntqa .and. ldvt<n ) .or.( wntqs .and. ldvt<minmn ) .or.( wntqo & .and. m>=n .and. ldvt<n ) ) then info = -10_${ik}$ end if ! compute workspace ! note: comments in the code beginning "workspace:" describe the ! minimal amount of workspace allocated at that point in the code, ! as well as the preferred amount for good performance. ! nb refers to the optimal block size for the immediately ! following subroutine, as returned by stdlib${ii}$_ilaenv. if( info==0_${ik}$ ) then minwrk = 1_${ik}$ maxwrk = 1_${ik}$ bdspac = 0_${ik}$ mnthr = int( minmn*11.0_${rk}$ / 6.0_${rk}$,KIND=${ik}$) if( m>=n .and. minmn>0_${ik}$ ) then ! compute space needed for stdlib${ii}$_${ri}$bdsdc if( wntqn ) then ! stdlib${ii}$_${ri}$bdsdc needs only 4*n (or 6*n for uplo=l for lapack <= 3.6_${rk}$) ! keep 7*n for backwards compatibility. bdspac = 7_${ik}$*n else bdspac = 3_${ik}$*n*n + 4_${ik}$*n end if ! compute space preferred for each routine call stdlib${ii}$_${ri}$gebrd( m, n, dum(1_${ik}$), m, dum(1_${ik}$), dum(1_${ik}$), dum(1_${ik}$),dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, & ierr ) lwork_qgebrd_mn = int( dum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_${ri}$gebrd( n, n, dum(1_${ik}$), n, dum(1_${ik}$), dum(1_${ik}$), dum(1_${ik}$),dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, & ierr ) lwork_qgebrd_nn = int( dum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_${ri}$geqrf( m, n, dum(1_${ik}$), m, dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, ierr ) lwork_qgeqrf_mn = int( dum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_${ri}$orgbr( 'Q', n, n, n, dum(1_${ik}$), n, dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$,ierr ) lwork_qorgbr_q_nn = int( dum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_${ri}$orgqr( m, m, n, dum(1_${ik}$), m, dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, ierr ) lwork_qorgqr_mm = int( dum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_${ri}$orgqr( m, n, n, dum(1_${ik}$), m, dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, ierr ) lwork_qorgqr_mn = int( dum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_${ri}$ormbr( 'P', 'R', 'T', n, n, n, dum(1_${ik}$), n,dum(1_${ik}$), dum(1_${ik}$), n, dum(1_${ik}$), & -1_${ik}$, ierr ) lwork_qormbr_prt_nn = int( dum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_${ri}$ormbr( 'Q', 'L', 'N', n, n, n, dum(1_${ik}$), n,dum(1_${ik}$), dum(1_${ik}$), n, dum(1_${ik}$), & -1_${ik}$, ierr ) lwork_qormbr_qln_nn = int( dum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_${ri}$ormbr( 'Q', 'L', 'N', m, n, n, dum(1_${ik}$), m,dum(1_${ik}$), dum(1_${ik}$), m, dum(1_${ik}$), & -1_${ik}$, ierr ) lwork_qormbr_qln_mn = int( dum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_${ri}$ormbr( 'Q', 'L', 'N', m, m, n, dum(1_${ik}$), m,dum(1_${ik}$), dum(1_${ik}$), m, dum(1_${ik}$), & -1_${ik}$, ierr ) lwork_qormbr_qln_mm = int( dum(1_${ik}$),KIND=${ik}$) if( m>=mnthr ) then if( wntqn ) then ! path 1 (m >> n, jobz='n') wrkbl = n + lwork_qgeqrf_mn wrkbl = max( wrkbl, 3_${ik}$*n + lwork_qgebrd_nn ) maxwrk = max( wrkbl, bdspac + n ) minwrk = bdspac + n else if( wntqo ) then ! path 2 (m >> n, jobz='o') wrkbl = n + lwork_qgeqrf_mn wrkbl = max( wrkbl, n + lwork_qorgqr_mn ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_qgebrd_nn ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_qormbr_qln_nn ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_qormbr_prt_nn ) wrkbl = max( wrkbl, 3_${ik}$*n + bdspac ) maxwrk = wrkbl + 2_${ik}$*n*n minwrk = bdspac + 2_${ik}$*n*n + 3_${ik}$*n else if( wntqs ) then ! path 3 (m >> n, jobz='s') wrkbl = n + lwork_qgeqrf_mn wrkbl = max( wrkbl, n + lwork_qorgqr_mn ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_qgebrd_nn ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_qormbr_qln_nn ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_qormbr_prt_nn ) wrkbl = max( wrkbl, 3_${ik}$*n + bdspac ) maxwrk = wrkbl + n*n minwrk = bdspac + n*n + 3_${ik}$*n else if( wntqa ) then ! path 4 (m >> n, jobz='a') wrkbl = n + lwork_qgeqrf_mn wrkbl = max( wrkbl, n + lwork_qorgqr_mm ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_qgebrd_nn ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_qormbr_qln_nn ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_qormbr_prt_nn ) wrkbl = max( wrkbl, 3_${ik}$*n + bdspac ) maxwrk = wrkbl + n*n minwrk = n*n + max( 3_${ik}$*n + bdspac, n + m ) end if else ! path 5 (m >= n, but not much larger) wrkbl = 3_${ik}$*n + lwork_qgebrd_mn if( wntqn ) then ! path 5n (m >= n, jobz='n') maxwrk = max( wrkbl, 3_${ik}$*n + bdspac ) minwrk = 3_${ik}$*n + max( m, bdspac ) else if( wntqo ) then ! path 5o (m >= n, jobz='o') wrkbl = max( wrkbl, 3_${ik}$*n + lwork_qormbr_prt_nn ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_qormbr_qln_mn ) wrkbl = max( wrkbl, 3_${ik}$*n + bdspac ) maxwrk = wrkbl + m*n minwrk = 3_${ik}$*n + max( m, n*n + bdspac ) else if( wntqs ) then ! path 5s (m >= n, jobz='s') wrkbl = max( wrkbl, 3_${ik}$*n + lwork_qormbr_qln_mn ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_qormbr_prt_nn ) maxwrk = max( wrkbl, 3_${ik}$*n + bdspac ) minwrk = 3_${ik}$*n + max( m, bdspac ) else if( wntqa ) then ! path 5a (m >= n, jobz='a') wrkbl = max( wrkbl, 3_${ik}$*n + lwork_qormbr_qln_mm ) wrkbl = max( wrkbl, 3_${ik}$*n + lwork_qormbr_prt_nn ) maxwrk = max( wrkbl, 3_${ik}$*n + bdspac ) minwrk = 3_${ik}$*n + max( m, bdspac ) end if end if else if( minmn>0_${ik}$ ) then ! compute space needed for stdlib${ii}$_${ri}$bdsdc if( wntqn ) then ! stdlib${ii}$_${ri}$bdsdc needs only 4*n (or 6*n for uplo=l for lapack <= 3.6_${rk}$) ! keep 7*n for backwards compatibility. bdspac = 7_${ik}$*m else bdspac = 3_${ik}$*m*m + 4_${ik}$*m end if ! compute space preferred for each routine call stdlib${ii}$_${ri}$gebrd( m, n, dum(1_${ik}$), m, dum(1_${ik}$), dum(1_${ik}$), dum(1_${ik}$),dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, & ierr ) lwork_qgebrd_mn = int( dum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_${ri}$gebrd( m, m, a, m, s, dum(1_${ik}$), dum(1_${ik}$),dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, ierr ) lwork_qgebrd_mm = int( dum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_${ri}$gelqf( m, n, a, m, dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, ierr ) lwork_qgelqf_mn = int( dum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_${ri}$orglq( n, n, m, dum(1_${ik}$), n, dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, ierr ) lwork_qorglq_nn = int( dum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_${ri}$orglq( m, n, m, a, m, dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, ierr ) lwork_qorglq_mn = int( dum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_${ri}$orgbr( 'P', m, m, m, a, n, dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, ierr ) lwork_qorgbr_p_mm = int( dum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_${ri}$ormbr( 'P', 'R', 'T', m, m, m, dum(1_${ik}$), m,dum(1_${ik}$), dum(1_${ik}$), m, dum(1_${ik}$), & -1_${ik}$, ierr ) lwork_qormbr_prt_mm = int( dum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_${ri}$ormbr( 'P', 'R', 'T', m, n, m, dum(1_${ik}$), m,dum(1_${ik}$), dum(1_${ik}$), m, dum(1_${ik}$), & -1_${ik}$, ierr ) lwork_qormbr_prt_mn = int( dum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_${ri}$ormbr( 'P', 'R', 'T', n, n, m, dum(1_${ik}$), n,dum(1_${ik}$), dum(1_${ik}$), n, dum(1_${ik}$), & -1_${ik}$, ierr ) lwork_qormbr_prt_nn = int( dum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_${ri}$ormbr( 'Q', 'L', 'N', m, m, m, dum(1_${ik}$), m,dum(1_${ik}$), dum(1_${ik}$), m, dum(1_${ik}$), & -1_${ik}$, ierr ) lwork_qormbr_qln_mm = int( dum(1_${ik}$),KIND=${ik}$) if( n>=mnthr ) then if( wntqn ) then ! path 1t (n >> m, jobz='n') wrkbl = m + lwork_qgelqf_mn wrkbl = max( wrkbl, 3_${ik}$*m + lwork_qgebrd_mm ) maxwrk = max( wrkbl, bdspac + m ) minwrk = bdspac + m else if( wntqo ) then ! path 2t (n >> m, jobz='o') wrkbl = m + lwork_qgelqf_mn wrkbl = max( wrkbl, m + lwork_qorglq_mn ) wrkbl = max( wrkbl, 3_${ik}$*m + lwork_qgebrd_mm ) wrkbl = max( wrkbl, 3_${ik}$*m + lwork_qormbr_qln_mm ) wrkbl = max( wrkbl, 3_${ik}$*m + lwork_qormbr_prt_mm ) wrkbl = max( wrkbl, 3_${ik}$*m + bdspac ) maxwrk = wrkbl + 2_${ik}$*m*m minwrk = bdspac + 2_${ik}$*m*m + 3_${ik}$*m else if( wntqs ) then ! path 3t (n >> m, jobz='s') wrkbl = m + lwork_qgelqf_mn wrkbl = max( wrkbl, m + lwork_qorglq_mn ) wrkbl = max( wrkbl, 3_${ik}$*m + lwork_qgebrd_mm ) wrkbl = max( wrkbl, 3_${ik}$*m + lwork_qormbr_qln_mm ) wrkbl = max( wrkbl, 3_${ik}$*m + lwork_qormbr_prt_mm ) wrkbl = max( wrkbl, 3_${ik}$*m + bdspac ) maxwrk = wrkbl + m*m minwrk = bdspac + m*m + 3_${ik}$*m else if( wntqa ) then ! path 4t (n >> m, jobz='a') wrkbl = m + lwork_qgelqf_mn wrkbl = max( wrkbl, m + lwork_qorglq_nn ) wrkbl = max( wrkbl, 3_${ik}$*m + lwork_qgebrd_mm ) wrkbl = max( wrkbl, 3_${ik}$*m + lwork_qormbr_qln_mm ) wrkbl = max( wrkbl, 3_${ik}$*m + lwork_qormbr_prt_mm ) wrkbl = max( wrkbl, 3_${ik}$*m + bdspac ) maxwrk = wrkbl + m*m minwrk = m*m + max( 3_${ik}$*m + bdspac, m + n ) end if else ! path 5t (n > m, but not much larger) wrkbl = 3_${ik}$*m + lwork_qgebrd_mn if( wntqn ) then ! path 5tn (n > m, jobz='n') maxwrk = max( wrkbl, 3_${ik}$*m + bdspac ) minwrk = 3_${ik}$*m + max( n, bdspac ) else if( wntqo ) then ! path 5to (n > m, jobz='o') wrkbl = max( wrkbl, 3_${ik}$*m + lwork_qormbr_qln_mm ) wrkbl = max( wrkbl, 3_${ik}$*m + lwork_qormbr_prt_mn ) wrkbl = max( wrkbl, 3_${ik}$*m + bdspac ) maxwrk = wrkbl + m*n minwrk = 3_${ik}$*m + max( n, m*m + bdspac ) else if( wntqs ) then ! path 5ts (n > m, jobz='s') wrkbl = max( wrkbl, 3_${ik}$*m + lwork_qormbr_qln_mm ) wrkbl = max( wrkbl, 3_${ik}$*m + lwork_qormbr_prt_mn ) maxwrk = max( wrkbl, 3_${ik}$*m + bdspac ) minwrk = 3_${ik}$*m + max( n, bdspac ) else if( wntqa ) then ! path 5ta (n > m, jobz='a') wrkbl = max( wrkbl, 3_${ik}$*m + lwork_qormbr_qln_mm ) wrkbl = max( wrkbl, 3_${ik}$*m + lwork_qormbr_prt_nn ) maxwrk = max( wrkbl, 3_${ik}$*m + bdspac ) minwrk = 3_${ik}$*m + max( n, bdspac ) end if end if end if maxwrk = max( maxwrk, minwrk ) work( 1_${ik}$ ) = stdlib${ii}$_${ri}$roundup_lwork( maxwrk ) if( lwork<minwrk .and. .not.lquery ) then info = -12_${ik}$ end if end if if( info/=0_${ik}$ ) then call stdlib${ii}$_xerbla( 'DGESDD', -info ) return else if( lquery ) then return end if ! quick return if possible if( m==0_${ik}$ .or. n==0_${ik}$ ) then return end if ! get machine constants eps = stdlib${ii}$_${ri}$lamch( 'P' ) smlnum = sqrt( stdlib${ii}$_${ri}$lamch( 'S' ) ) / eps bignum = one / smlnum ! scale a if max element outside range [smlnum,bignum] anrm = stdlib${ii}$_${ri}$lange( 'M', m, n, a, lda, dum ) if( stdlib${ii}$_${ri}$isnan( anrm ) ) then info = -4_${ik}$ return end if iscl = 0_${ik}$ if( anrm>zero .and. anrm<smlnum ) then iscl = 1_${ik}$ call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, m, n, a, lda, ierr ) else if( anrm>bignum ) then iscl = 1_${ik}$ call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, m, n, a, lda, ierr ) end if if( m>=n ) then ! a has at least as many rows as columns. if a has sufficiently ! more rows than columns, first reduce using the qr ! decomposition (if sufficient workspace available) if( m>=mnthr ) then if( wntqn ) then ! path 1 (m >> n, jobz='n') ! no singular vectors to be computed itau = 1_${ik}$ nwork = itau + n ! compute a=q*r ! workspace: need n [tau] + n [work] ! workspace: prefer n [tau] + n*nb [work] call stdlib${ii}$_${ri}$geqrf( m, n, a, lda, work( itau ), work( nwork ),lwork - nwork + & 1_${ik}$, ierr ) ! zero out below r if (n>1_${ik}$) call stdlib${ii}$_${ri}$laset( 'L', n-1, n-1, zero, zero, a( 2_${ik}$, 1_${ik}$ ), lda ) ie = 1_${ik}$ itauq = ie + n itaup = itauq + n nwork = itaup + n ! bidiagonalize r in a ! workspace: need 3*n [e, tauq, taup] + n [work] ! workspace: prefer 3*n [e, tauq, taup] + 2*n*nb [work] call stdlib${ii}$_${ri}$gebrd( n, n, a, lda, s, work( ie ), work( itauq ),work( itaup ), & work( nwork ), lwork-nwork+1,ierr ) nwork = ie + n ! perform bidiagonal svd, computing singular values only ! workspace: need n [e] + bdspac call stdlib${ii}$_${ri}$bdsdc( 'U', 'N', n, s, work( ie ), dum, 1_${ik}$, dum, 1_${ik}$,dum, idum, & work( nwork ), iwork, info ) else if( wntqo ) then ! path 2 (m >> n, jobz = 'o') ! n left singular vectors to be overwritten on a and ! n right singular vectors to be computed in vt ir = 1_${ik}$ ! work(ir) is ldwrkr by n if( lwork >= lda*n + n*n + 3_${ik}$*n + bdspac ) then ldwrkr = lda else ldwrkr = ( lwork - n*n - 3_${ik}$*n - bdspac ) / n end if itau = ir + ldwrkr*n nwork = itau + n ! compute a=q*r ! workspace: need n*n [r] + n [tau] + n [work] ! workspace: prefer n*n [r] + n [tau] + n*nb [work] call stdlib${ii}$_${ri}$geqrf( m, n, a, lda, work( itau ), work( nwork ),lwork - nwork + & 1_${ik}$, ierr ) ! copy r to work(ir), zeroing out below it call stdlib${ii}$_${ri}$lacpy( 'U', n, n, a, lda, work( ir ), ldwrkr ) call stdlib${ii}$_${ri}$laset( 'L', n - 1_${ik}$, n - 1_${ik}$, zero, zero, work(ir+1),ldwrkr ) ! generate q in a ! workspace: need n*n [r] + n [tau] + n [work] ! workspace: prefer n*n [r] + n [tau] + n*nb [work] call stdlib${ii}$_${ri}$orgqr( m, n, n, a, lda, work( itau ),work( nwork ), lwork - & nwork + 1_${ik}$, ierr ) ie = itau itauq = ie + n itaup = itauq + n nwork = itaup + n ! bidiagonalize r in work(ir) ! workspace: need n*n [r] + 3*n [e, tauq, taup] + n [work] ! workspace: prefer n*n [r] + 3*n [e, tauq, taup] + 2*n*nb [work] call stdlib${ii}$_${ri}$gebrd( n, n, work( ir ), ldwrkr, s, work( ie ),work( itauq ), & work( itaup ), work( nwork ),lwork - nwork + 1_${ik}$, ierr ) ! work(iu) is n by n iu = nwork nwork = iu + n*n ! perform bidiagonal svd, computing left singular vectors ! of bidiagonal matrix in work(iu) and computing right ! singular vectors of bidiagonal matrix in vt ! workspace: need n*n [r] + 3*n [e, tauq, taup] + n*n [u] + bdspac call stdlib${ii}$_${ri}$bdsdc( 'U', 'I', n, s, work( ie ), work( iu ), n,vt, ldvt, dum, & idum, work( nwork ), iwork,info ) ! overwrite work(iu) by left singular vectors of r ! and vt by right singular vectors of r ! workspace: need n*n [r] + 3*n [e, tauq, taup] + n*n [u] + n [work] ! workspace: prefer n*n [r] + 3*n [e, tauq, taup] + n*n [u] + n*nb [work] call stdlib${ii}$_${ri}$ormbr( 'Q', 'L', 'N', n, n, n, work( ir ), ldwrkr,work( itauq ), & work( iu ), n, work( nwork ),lwork - nwork + 1_${ik}$, ierr ) call stdlib${ii}$_${ri}$ormbr( 'P', 'R', 'T', n, n, n, work( ir ), ldwrkr,work( itaup ), & vt, ldvt, work( nwork ),lwork - nwork + 1_${ik}$, ierr ) ! multiply q in a by left singular vectors of r in ! work(iu), storing result in work(ir) and copying to a ! workspace: need n*n [r] + 3*n [e, tauq, taup] + n*n [u] ! workspace: prefer m*n [r] + 3*n [e, tauq, taup] + n*n [u] do i = 1, m, ldwrkr chunk = min( m - i + 1_${ik}$, ldwrkr ) call stdlib${ii}$_${ri}$gemm( 'N', 'N', chunk, n, n, one, a( i, 1_${ik}$ ),lda, work( iu ), & n, zero, work( ir ),ldwrkr ) call stdlib${ii}$_${ri}$lacpy( 'F', chunk, n, work( ir ), ldwrkr,a( i, 1_${ik}$ ), lda ) end do else if( wntqs ) then ! path 3 (m >> n, jobz='s') ! n left singular vectors to be computed in u and ! n right singular vectors to be computed in vt ir = 1_${ik}$ ! work(ir) is n by n ldwrkr = n itau = ir + ldwrkr*n nwork = itau + n ! compute a=q*r ! workspace: need n*n [r] + n [tau] + n [work] ! workspace: prefer n*n [r] + n [tau] + n*nb [work] call stdlib${ii}$_${ri}$geqrf( m, n, a, lda, work( itau ), work( nwork ),lwork - nwork + & 1_${ik}$, ierr ) ! copy r to work(ir), zeroing out below it call stdlib${ii}$_${ri}$lacpy( 'U', n, n, a, lda, work( ir ), ldwrkr ) call stdlib${ii}$_${ri}$laset( 'L', n - 1_${ik}$, n - 1_${ik}$, zero, zero, work(ir+1),ldwrkr ) ! generate q in a ! workspace: need n*n [r] + n [tau] + n [work] ! workspace: prefer n*n [r] + n [tau] + n*nb [work] call stdlib${ii}$_${ri}$orgqr( m, n, n, a, lda, work( itau ),work( nwork ), lwork - & nwork + 1_${ik}$, ierr ) ie = itau itauq = ie + n itaup = itauq + n nwork = itaup + n ! bidiagonalize r in work(ir) ! workspace: need n*n [r] + 3*n [e, tauq, taup] + n [work] ! workspace: prefer n*n [r] + 3*n [e, tauq, taup] + 2*n*nb [work] call stdlib${ii}$_${ri}$gebrd( n, n, work( ir ), ldwrkr, s, work( ie ),work( itauq ), & work( itaup ), work( nwork ),lwork - nwork + 1_${ik}$, ierr ) ! perform bidiagonal svd, computing left singular vectors ! of bidiagoal matrix in u and computing right singular ! vectors of bidiagonal matrix in vt ! workspace: need n*n [r] + 3*n [e, tauq, taup] + bdspac call stdlib${ii}$_${ri}$bdsdc( 'U', 'I', n, s, work( ie ), u, ldu, vt,ldvt, dum, idum, & work( nwork ), iwork,info ) ! overwrite u by left singular vectors of r and vt ! by right singular vectors of r ! workspace: need n*n [r] + 3*n [e, tauq, taup] + n [work] ! workspace: prefer n*n [r] + 3*n [e, tauq, taup] + n*nb [work] call stdlib${ii}$_${ri}$ormbr( 'Q', 'L', 'N', n, n, n, work( ir ), ldwrkr,work( itauq ), & u, ldu, work( nwork ),lwork - nwork + 1_${ik}$, ierr ) call stdlib${ii}$_${ri}$ormbr( 'P', 'R', 'T', n, n, n, work( ir ), ldwrkr,work( itaup ), & vt, ldvt, work( nwork ),lwork - nwork + 1_${ik}$, ierr ) ! multiply q in a by left singular vectors of r in ! work(ir), storing result in u ! workspace: need n*n [r] call stdlib${ii}$_${ri}$lacpy( 'F', n, n, u, ldu, work( ir ), ldwrkr ) call stdlib${ii}$_${ri}$gemm( 'N', 'N', m, n, n, one, a, lda, work( ir ),ldwrkr, zero, u,& ldu ) else if( wntqa ) then ! path 4 (m >> n, jobz='a') ! m left singular vectors to be computed in u and ! n right singular vectors to be computed in vt iu = 1_${ik}$ ! work(iu) is n by n ldwrku = n itau = iu + ldwrku*n nwork = itau + n ! compute a=q*r, copying result to u ! workspace: need n*n [u] + n [tau] + n [work] ! workspace: prefer n*n [u] + n [tau] + n*nb [work] call stdlib${ii}$_${ri}$geqrf( m, n, a, lda, work( itau ), work( nwork ),lwork - nwork + & 1_${ik}$, ierr ) call stdlib${ii}$_${ri}$lacpy( 'L', m, n, a, lda, u, ldu ) ! generate q in u ! workspace: need n*n [u] + n [tau] + m [work] ! workspace: prefer n*n [u] + n [tau] + m*nb [work] call stdlib${ii}$_${ri}$orgqr( m, m, n, u, ldu, work( itau ),work( nwork ), lwork - & nwork + 1_${ik}$, ierr ) ! produce r in a, zeroing out other entries if (n>1_${ik}$) call stdlib${ii}$_${ri}$laset( 'L', n-1, n-1, zero, zero, a( 2_${ik}$, 1_${ik}$ ), lda ) ie = itau itauq = ie + n itaup = itauq + n nwork = itaup + n ! bidiagonalize r in a ! workspace: need n*n [u] + 3*n [e, tauq, taup] + n [work] ! workspace: prefer n*n [u] + 3*n [e, tauq, taup] + 2*n*nb [work] call stdlib${ii}$_${ri}$gebrd( n, n, a, lda, s, work( ie ), work( itauq ),work( itaup ), & work( nwork ), lwork-nwork+1,ierr ) ! perform bidiagonal svd, computing left singular vectors ! of bidiagonal matrix in work(iu) and computing right ! singular vectors of bidiagonal matrix in vt ! workspace: need n*n [u] + 3*n [e, tauq, taup] + bdspac call stdlib${ii}$_${ri}$bdsdc( 'U', 'I', n, s, work( ie ), work( iu ), n,vt, ldvt, dum, & idum, work( nwork ), iwork,info ) ! overwrite work(iu) by left singular vectors of r and vt ! by right singular vectors of r ! workspace: need n*n [u] + 3*n [e, tauq, taup] + n [work] ! workspace: prefer n*n [u] + 3*n [e, tauq, taup] + n*nb [work] call stdlib${ii}$_${ri}$ormbr( 'Q', 'L', 'N', n, n, n, a, lda,work( itauq ), work( iu ), & ldwrku,work( nwork ), lwork - nwork + 1_${ik}$, ierr ) call stdlib${ii}$_${ri}$ormbr( 'P', 'R', 'T', n, n, n, a, lda,work( itaup ), vt, ldvt, & work( nwork ),lwork - nwork + 1_${ik}$, ierr ) ! multiply q in u by left singular vectors of r in ! work(iu), storing result in a ! workspace: need n*n [u] call stdlib${ii}$_${ri}$gemm( 'N', 'N', m, n, n, one, u, ldu, work( iu ),ldwrku, zero, a,& lda ) ! copy left singular vectors of a from a to u call stdlib${ii}$_${ri}$lacpy( 'F', m, n, a, lda, u, ldu ) end if else ! m < mnthr ! path 5 (m >= n, but not much larger) ! reduce to bidiagonal form without qr decomposition ie = 1_${ik}$ itauq = ie + n itaup = itauq + n nwork = itaup + n ! bidiagonalize a ! workspace: need 3*n [e, tauq, taup] + m [work] ! workspace: prefer 3*n [e, tauq, taup] + (m+n)*nb [work] call stdlib${ii}$_${ri}$gebrd( m, n, a, lda, s, work( ie ), work( itauq ),work( itaup ), & work( nwork ), lwork-nwork+1,ierr ) if( wntqn ) then ! path 5n (m >= n, jobz='n') ! perform bidiagonal svd, only computing singular values ! workspace: need 3*n [e, tauq, taup] + bdspac call stdlib${ii}$_${ri}$bdsdc( 'U', 'N', n, s, work( ie ), dum, 1_${ik}$, dum, 1_${ik}$,dum, idum, & work( nwork ), iwork, info ) else if( wntqo ) then ! path 5o (m >= n, jobz='o') iu = nwork if( lwork >= m*n + 3_${ik}$*n + bdspac ) then ! work( iu ) is m by n ldwrku = m nwork = iu + ldwrku*n call stdlib${ii}$_${ri}$laset( 'F', m, n, zero, zero, work( iu ),ldwrku ) ! ir is unused; silence compile warnings ir = -1_${ik}$ else ! work( iu ) is n by n ldwrku = n nwork = iu + ldwrku*n ! work(ir) is ldwrkr by n ir = nwork ldwrkr = ( lwork - n*n - 3_${ik}$*n ) / n end if nwork = iu + ldwrku*n ! perform bidiagonal svd, computing left singular vectors ! of bidiagonal matrix in work(iu) and computing right ! singular vectors of bidiagonal matrix in vt ! workspace: need 3*n [e, tauq, taup] + n*n [u] + bdspac call stdlib${ii}$_${ri}$bdsdc( 'U', 'I', n, s, work( ie ), work( iu ),ldwrku, vt, ldvt, & dum, idum, work( nwork ),iwork, info ) ! overwrite vt by right singular vectors of a ! workspace: need 3*n [e, tauq, taup] + n*n [u] + n [work] ! workspace: prefer 3*n [e, tauq, taup] + n*n [u] + n*nb [work] call stdlib${ii}$_${ri}$ormbr( 'P', 'R', 'T', n, n, n, a, lda,work( itaup ), vt, ldvt, & work( nwork ),lwork - nwork + 1_${ik}$, ierr ) if( lwork >= m*n + 3_${ik}$*n + bdspac ) then ! path 5o-fast ! overwrite work(iu) by left singular vectors of a ! workspace: need 3*n [e, tauq, taup] + m*n [u] + n [work] ! workspace: prefer 3*n [e, tauq, taup] + m*n [u] + n*nb [work] call stdlib${ii}$_${ri}$ormbr( 'Q', 'L', 'N', m, n, n, a, lda,work( itauq ), work( iu & ), ldwrku,work( nwork ), lwork - nwork + 1_${ik}$, ierr ) ! copy left singular vectors of a from work(iu) to a call stdlib${ii}$_${ri}$lacpy( 'F', m, n, work( iu ), ldwrku, a, lda ) else ! path 5o-slow ! generate q in a ! workspace: need 3*n [e, tauq, taup] + n*n [u] + n [work] ! workspace: prefer 3*n [e, tauq, taup] + n*n [u] + n*nb [work] call stdlib${ii}$_${ri}$orgbr( 'Q', m, n, n, a, lda, work( itauq ),work( nwork ), & lwork - nwork + 1_${ik}$, ierr ) ! multiply q in a by left singular vectors of ! bidiagonal matrix in work(iu), storing result in ! work(ir) and copying to a ! workspace: need 3*n [e, tauq, taup] + n*n [u] + nb*n [r] ! workspace: prefer 3*n [e, tauq, taup] + n*n [u] + m*n [r] do i = 1, m, ldwrkr chunk = min( m - i + 1_${ik}$, ldwrkr ) call stdlib${ii}$_${ri}$gemm( 'N', 'N', chunk, n, n, one, a( i, 1_${ik}$ ),lda, work( iu )& , ldwrku, zero,work( ir ), ldwrkr ) call stdlib${ii}$_${ri}$lacpy( 'F', chunk, n, work( ir ), ldwrkr,a( i, 1_${ik}$ ), lda ) end do end if else if( wntqs ) then ! path 5s (m >= n, jobz='s') ! perform bidiagonal svd, computing left singular vectors ! of bidiagonal matrix in u and computing right singular ! vectors of bidiagonal matrix in vt ! workspace: need 3*n [e, tauq, taup] + bdspac call stdlib${ii}$_${ri}$laset( 'F', m, n, zero, zero, u, ldu ) call stdlib${ii}$_${ri}$bdsdc( 'U', 'I', n, s, work( ie ), u, ldu, vt,ldvt, dum, idum, & work( nwork ), iwork,info ) ! overwrite u by left singular vectors of a and vt ! by right singular vectors of a ! workspace: need 3*n [e, tauq, taup] + n [work] ! workspace: prefer 3*n [e, tauq, taup] + n*nb [work] call stdlib${ii}$_${ri}$ormbr( 'Q', 'L', 'N', m, n, n, a, lda,work( itauq ), u, ldu, & work( nwork ),lwork - nwork + 1_${ik}$, ierr ) call stdlib${ii}$_${ri}$ormbr( 'P', 'R', 'T', n, n, n, a, lda,work( itaup ), vt, ldvt, & work( nwork ),lwork - nwork + 1_${ik}$, ierr ) else if( wntqa ) then ! path 5a (m >= n, jobz='a') ! perform bidiagonal svd, computing left singular vectors ! of bidiagonal matrix in u and computing right singular ! vectors of bidiagonal matrix in vt ! workspace: need 3*n [e, tauq, taup] + bdspac call stdlib${ii}$_${ri}$laset( 'F', m, m, zero, zero, u, ldu ) call stdlib${ii}$_${ri}$bdsdc( 'U', 'I', n, s, work( ie ), u, ldu, vt,ldvt, dum, idum, & work( nwork ), iwork,info ) ! set the right corner of u to identity matrix if( m>n ) then call stdlib${ii}$_${ri}$laset( 'F', m - n, m - n, zero, one, u(n+1,n+1),ldu ) end if ! overwrite u by left singular vectors of a and vt ! by right singular vectors of a ! workspace: need 3*n [e, tauq, taup] + m [work] ! workspace: prefer 3*n [e, tauq, taup] + m*nb [work] call stdlib${ii}$_${ri}$ormbr( 'Q', 'L', 'N', m, m, n, a, lda,work( itauq ), u, ldu, & work( nwork ),lwork - nwork + 1_${ik}$, ierr ) call stdlib${ii}$_${ri}$ormbr( 'P', 'R', 'T', n, n, m, a, lda,work( itaup ), vt, ldvt, & work( nwork ),lwork - nwork + 1_${ik}$, ierr ) end if end if else ! a has more columns than rows. if a has sufficiently more ! columns than rows, first reduce using the lq decomposition (if ! sufficient workspace available) if( n>=mnthr ) then if( wntqn ) then ! path 1t (n >> m, jobz='n') ! no singular vectors to be computed itau = 1_${ik}$ nwork = itau + m ! compute a=l*q ! workspace: need m [tau] + m [work] ! workspace: prefer m [tau] + m*nb [work] call stdlib${ii}$_${ri}$gelqf( m, n, a, lda, work( itau ), work( nwork ),lwork - nwork + & 1_${ik}$, ierr ) ! zero out above l if (m>1_${ik}$) call stdlib${ii}$_${ri}$laset( 'U', m-1, m-1, zero, zero, a( 1_${ik}$, 2_${ik}$ ), lda ) ie = 1_${ik}$ itauq = ie + m itaup = itauq + m nwork = itaup + m ! bidiagonalize l in a ! workspace: need 3*m [e, tauq, taup] + m [work] ! workspace: prefer 3*m [e, tauq, taup] + 2*m*nb [work] call stdlib${ii}$_${ri}$gebrd( m, m, a, lda, s, work( ie ), work( itauq ),work( itaup ), & work( nwork ), lwork-nwork+1,ierr ) nwork = ie + m ! perform bidiagonal svd, computing singular values only ! workspace: need m [e] + bdspac call stdlib${ii}$_${ri}$bdsdc( 'U', 'N', m, s, work( ie ), dum, 1_${ik}$, dum, 1_${ik}$,dum, idum, & work( nwork ), iwork, info ) else if( wntqo ) then ! path 2t (n >> m, jobz='o') ! m right singular vectors to be overwritten on a and ! m left singular vectors to be computed in u ivt = 1_${ik}$ ! work(ivt) is m by m ! work(il) is m by m; it is later resized to m by chunk for gemm il = ivt + m*m if( lwork >= m*n + m*m + 3_${ik}$*m + bdspac ) then ldwrkl = m chunk = n else ldwrkl = m chunk = ( lwork - m*m ) / m end if itau = il + ldwrkl*m nwork = itau + m ! compute a=l*q ! workspace: need m*m [vt] + m*m [l] + m [tau] + m [work] ! workspace: prefer m*m [vt] + m*m [l] + m [tau] + m*nb [work] call stdlib${ii}$_${ri}$gelqf( m, n, a, lda, work( itau ), work( nwork ),lwork - nwork + & 1_${ik}$, ierr ) ! copy l to work(il), zeroing about above it call stdlib${ii}$_${ri}$lacpy( 'L', m, m, a, lda, work( il ), ldwrkl ) call stdlib${ii}$_${ri}$laset( 'U', m - 1_${ik}$, m - 1_${ik}$, zero, zero,work( il + ldwrkl ), ldwrkl & ) ! generate q in a ! workspace: need m*m [vt] + m*m [l] + m [tau] + m [work] ! workspace: prefer m*m [vt] + m*m [l] + m [tau] + m*nb [work] call stdlib${ii}$_${ri}$orglq( m, n, m, a, lda, work( itau ),work( nwork ), lwork - & nwork + 1_${ik}$, ierr ) ie = itau itauq = ie + m itaup = itauq + m nwork = itaup + m ! bidiagonalize l in work(il) ! workspace: need m*m [vt] + m*m [l] + 3*m [e, tauq, taup] + m [work] ! workspace: prefer m*m [vt] + m*m [l] + 3*m [e, tauq, taup] + 2*m*nb [work] call stdlib${ii}$_${ri}$gebrd( m, m, work( il ), ldwrkl, s, work( ie ),work( itauq ), & work( itaup ), work( nwork ),lwork - nwork + 1_${ik}$, ierr ) ! perform bidiagonal svd, computing left singular vectors ! of bidiagonal matrix in u, and computing right singular ! vectors of bidiagonal matrix in work(ivt) ! workspace: need m*m [vt] + m*m [l] + 3*m [e, tauq, taup] + bdspac call stdlib${ii}$_${ri}$bdsdc( 'U', 'I', m, s, work( ie ), u, ldu,work( ivt ), m, dum, & idum, work( nwork ),iwork, info ) ! overwrite u by left singular vectors of l and work(ivt) ! by right singular vectors of l ! workspace: need m*m [vt] + m*m [l] + 3*m [e, tauq, taup] + m [work] ! workspace: prefer m*m [vt] + m*m [l] + 3*m [e, tauq, taup] + m*nb [work] call stdlib${ii}$_${ri}$ormbr( 'Q', 'L', 'N', m, m, m, work( il ), ldwrkl,work( itauq ), & u, ldu, work( nwork ),lwork - nwork + 1_${ik}$, ierr ) call stdlib${ii}$_${ri}$ormbr( 'P', 'R', 'T', m, m, m, work( il ), ldwrkl,work( itaup ), & work( ivt ), m,work( nwork ), lwork - nwork + 1_${ik}$, ierr ) ! multiply right singular vectors of l in work(ivt) by q ! in a, storing result in work(il) and copying to a ! workspace: need m*m [vt] + m*m [l] ! workspace: prefer m*m [vt] + m*n [l] ! at this point, l is resized as m by chunk. do i = 1, n, chunk blk = min( n - i + 1_${ik}$, chunk ) call stdlib${ii}$_${ri}$gemm( 'N', 'N', m, blk, m, one, work( ivt ), m,a( 1_${ik}$, i ), lda,& zero, work( il ), ldwrkl ) call stdlib${ii}$_${ri}$lacpy( 'F', m, blk, work( il ), ldwrkl,a( 1_${ik}$, i ), lda ) end do else if( wntqs ) then ! path 3t (n >> m, jobz='s') ! m right singular vectors to be computed in vt and ! m left singular vectors to be computed in u il = 1_${ik}$ ! work(il) is m by m ldwrkl = m itau = il + ldwrkl*m nwork = itau + m ! compute a=l*q ! workspace: need m*m [l] + m [tau] + m [work] ! workspace: prefer m*m [l] + m [tau] + m*nb [work] call stdlib${ii}$_${ri}$gelqf( m, n, a, lda, work( itau ), work( nwork ),lwork - nwork + & 1_${ik}$, ierr ) ! copy l to work(il), zeroing out above it call stdlib${ii}$_${ri}$lacpy( 'L', m, m, a, lda, work( il ), ldwrkl ) call stdlib${ii}$_${ri}$laset( 'U', m - 1_${ik}$, m - 1_${ik}$, zero, zero,work( il + ldwrkl ), ldwrkl & ) ! generate q in a ! workspace: need m*m [l] + m [tau] + m [work] ! workspace: prefer m*m [l] + m [tau] + m*nb [work] call stdlib${ii}$_${ri}$orglq( m, n, m, a, lda, work( itau ),work( nwork ), lwork - & nwork + 1_${ik}$, ierr ) ie = itau itauq = ie + m itaup = itauq + m nwork = itaup + m ! bidiagonalize l in work(iu). ! workspace: need m*m [l] + 3*m [e, tauq, taup] + m [work] ! workspace: prefer m*m [l] + 3*m [e, tauq, taup] + 2*m*nb [work] call stdlib${ii}$_${ri}$gebrd( m, m, work( il ), ldwrkl, s, work( ie ),work( itauq ), & work( itaup ), work( nwork ),lwork - nwork + 1_${ik}$, ierr ) ! perform bidiagonal svd, computing left singular vectors ! of bidiagonal matrix in u and computing right singular ! vectors of bidiagonal matrix in vt ! workspace: need m*m [l] + 3*m [e, tauq, taup] + bdspac call stdlib${ii}$_${ri}$bdsdc( 'U', 'I', m, s, work( ie ), u, ldu, vt,ldvt, dum, idum, & work( nwork ), iwork,info ) ! overwrite u by left singular vectors of l and vt ! by right singular vectors of l ! workspace: need m*m [l] + 3*m [e, tauq, taup] + m [work] ! workspace: prefer m*m [l] + 3*m [e, tauq, taup] + m*nb [work] call stdlib${ii}$_${ri}$ormbr( 'Q', 'L', 'N', m, m, m, work( il ), ldwrkl,work( itauq ), & u, ldu, work( nwork ),lwork - nwork + 1_${ik}$, ierr ) call stdlib${ii}$_${ri}$ormbr( 'P', 'R', 'T', m, m, m, work( il ), ldwrkl,work( itaup ), & vt, ldvt, work( nwork ),lwork - nwork + 1_${ik}$, ierr ) ! multiply right singular vectors of l in work(il) by ! q in a, storing result in vt ! workspace: need m*m [l] call stdlib${ii}$_${ri}$lacpy( 'F', m, m, vt, ldvt, work( il ), ldwrkl ) call stdlib${ii}$_${ri}$gemm( 'N', 'N', m, n, m, one, work( il ), ldwrkl,a, lda, zero, & vt, ldvt ) else if( wntqa ) then ! path 4t (n >> m, jobz='a') ! n right singular vectors to be computed in vt and ! m left singular vectors to be computed in u ivt = 1_${ik}$ ! work(ivt) is m by m ldwkvt = m itau = ivt + ldwkvt*m nwork = itau + m ! compute a=l*q, copying result to vt ! workspace: need m*m [vt] + m [tau] + m [work] ! workspace: prefer m*m [vt] + m [tau] + m*nb [work] call stdlib${ii}$_${ri}$gelqf( m, n, a, lda, work( itau ), work( nwork ),lwork - nwork + & 1_${ik}$, ierr ) call stdlib${ii}$_${ri}$lacpy( 'U', m, n, a, lda, vt, ldvt ) ! generate q in vt ! workspace: need m*m [vt] + m [tau] + n [work] ! workspace: prefer m*m [vt] + m [tau] + n*nb [work] call stdlib${ii}$_${ri}$orglq( n, n, m, vt, ldvt, work( itau ),work( nwork ), lwork - & nwork + 1_${ik}$, ierr ) ! produce l in a, zeroing out other entries if (m>1_${ik}$) call stdlib${ii}$_${ri}$laset( 'U', m-1, m-1, zero, zero, a( 1_${ik}$, 2_${ik}$ ), lda ) ie = itau itauq = ie + m itaup = itauq + m nwork = itaup + m ! bidiagonalize l in a ! workspace: need m*m [vt] + 3*m [e, tauq, taup] + m [work] ! workspace: prefer m*m [vt] + 3*m [e, tauq, taup] + 2*m*nb [work] call stdlib${ii}$_${ri}$gebrd( m, m, a, lda, s, work( ie ), work( itauq ),work( itaup ), & work( nwork ), lwork-nwork+1,ierr ) ! perform bidiagonal svd, computing left singular vectors ! of bidiagonal matrix in u and computing right singular ! vectors of bidiagonal matrix in work(ivt) ! workspace: need m*m [vt] + 3*m [e, tauq, taup] + bdspac call stdlib${ii}$_${ri}$bdsdc( 'U', 'I', m, s, work( ie ), u, ldu,work( ivt ), ldwkvt, & dum, idum,work( nwork ), iwork, info ) ! overwrite u by left singular vectors of l and work(ivt) ! by right singular vectors of l ! workspace: need m*m [vt] + 3*m [e, tauq, taup]+ m [work] ! workspace: prefer m*m [vt] + 3*m [e, tauq, taup]+ m*nb [work] call stdlib${ii}$_${ri}$ormbr( 'Q', 'L', 'N', m, m, m, a, lda,work( itauq ), u, ldu, & work( nwork ),lwork - nwork + 1_${ik}$, ierr ) call stdlib${ii}$_${ri}$ormbr( 'P', 'R', 'T', m, m, m, a, lda,work( itaup ), work( ivt ),& ldwkvt,work( nwork ), lwork - nwork + 1_${ik}$, ierr ) ! multiply right singular vectors of l in work(ivt) by ! q in vt, storing result in a ! workspace: need m*m [vt] call stdlib${ii}$_${ri}$gemm( 'N', 'N', m, n, m, one, work( ivt ), ldwkvt,vt, ldvt, zero,& a, lda ) ! copy right singular vectors of a from a to vt call stdlib${ii}$_${ri}$lacpy( 'F', m, n, a, lda, vt, ldvt ) end if else ! n < mnthr ! path 5t (n > m, but not much larger) ! reduce to bidiagonal form without lq decomposition ie = 1_${ik}$ itauq = ie + m itaup = itauq + m nwork = itaup + m ! bidiagonalize a ! workspace: need 3*m [e, tauq, taup] + n [work] ! workspace: prefer 3*m [e, tauq, taup] + (m+n)*nb [work] call stdlib${ii}$_${ri}$gebrd( m, n, a, lda, s, work( ie ), work( itauq ),work( itaup ), & work( nwork ), lwork-nwork+1,ierr ) if( wntqn ) then ! path 5tn (n > m, jobz='n') ! perform bidiagonal svd, only computing singular values ! workspace: need 3*m [e, tauq, taup] + bdspac call stdlib${ii}$_${ri}$bdsdc( 'L', 'N', m, s, work( ie ), dum, 1_${ik}$, dum, 1_${ik}$,dum, idum, & work( nwork ), iwork, info ) else if( wntqo ) then ! path 5to (n > m, jobz='o') ldwkvt = m ivt = nwork if( lwork >= m*n + 3_${ik}$*m + bdspac ) then ! work( ivt ) is m by n call stdlib${ii}$_${ri}$laset( 'F', m, n, zero, zero, work( ivt ),ldwkvt ) nwork = ivt + ldwkvt*n ! il is unused; silence compile warnings il = -1_${ik}$ else ! work( ivt ) is m by m nwork = ivt + ldwkvt*m il = nwork ! work(il) is m by chunk chunk = ( lwork - m*m - 3_${ik}$*m ) / m end if ! perform bidiagonal svd, computing left singular vectors ! of bidiagonal matrix in u and computing right singular ! vectors of bidiagonal matrix in work(ivt) ! workspace: need 3*m [e, tauq, taup] + m*m [vt] + bdspac call stdlib${ii}$_${ri}$bdsdc( 'L', 'I', m, s, work( ie ), u, ldu,work( ivt ), ldwkvt, & dum, idum,work( nwork ), iwork, info ) ! overwrite u by left singular vectors of a ! workspace: need 3*m [e, tauq, taup] + m*m [vt] + m [work] ! workspace: prefer 3*m [e, tauq, taup] + m*m [vt] + m*nb [work] call stdlib${ii}$_${ri}$ormbr( 'Q', 'L', 'N', m, m, n, a, lda,work( itauq ), u, ldu, & work( nwork ),lwork - nwork + 1_${ik}$, ierr ) if( lwork >= m*n + 3_${ik}$*m + bdspac ) then ! path 5to-fast ! overwrite work(ivt) by left singular vectors of a ! workspace: need 3*m [e, tauq, taup] + m*n [vt] + m [work] ! workspace: prefer 3*m [e, tauq, taup] + m*n [vt] + m*nb [work] call stdlib${ii}$_${ri}$ormbr( 'P', 'R', 'T', m, n, m, a, lda,work( itaup ), work( & ivt ), ldwkvt,work( nwork ), lwork - nwork + 1_${ik}$, ierr ) ! copy right singular vectors of a from work(ivt) to a call stdlib${ii}$_${ri}$lacpy( 'F', m, n, work( ivt ), ldwkvt, a, lda ) else ! path 5to-slow ! generate p**t in a ! workspace: need 3*m [e, tauq, taup] + m*m [vt] + m [work] ! workspace: prefer 3*m [e, tauq, taup] + m*m [vt] + m*nb [work] call stdlib${ii}$_${ri}$orgbr( 'P', m, n, m, a, lda, work( itaup ),work( nwork ), & lwork - nwork + 1_${ik}$, ierr ) ! multiply q in a by right singular vectors of ! bidiagonal matrix in work(ivt), storing result in ! work(il) and copying to a ! workspace: need 3*m [e, tauq, taup] + m*m [vt] + m*nb [l] ! workspace: prefer 3*m [e, tauq, taup] + m*m [vt] + m*n [l] do i = 1, n, chunk blk = min( n - i + 1_${ik}$, chunk ) call stdlib${ii}$_${ri}$gemm( 'N', 'N', m, blk, m, one, work( ivt ),ldwkvt, a( 1_${ik}$, & i ), lda, zero,work( il ), m ) call stdlib${ii}$_${ri}$lacpy( 'F', m, blk, work( il ), m, a( 1_${ik}$, i ),lda ) end do end if else if( wntqs ) then ! path 5ts (n > m, jobz='s') ! perform bidiagonal svd, computing left singular vectors ! of bidiagonal matrix in u and computing right singular ! vectors of bidiagonal matrix in vt ! workspace: need 3*m [e, tauq, taup] + bdspac call stdlib${ii}$_${ri}$laset( 'F', m, n, zero, zero, vt, ldvt ) call stdlib${ii}$_${ri}$bdsdc( 'L', 'I', m, s, work( ie ), u, ldu, vt,ldvt, dum, idum, & work( nwork ), iwork,info ) ! overwrite u by left singular vectors of a and vt ! by right singular vectors of a ! workspace: need 3*m [e, tauq, taup] + m [work] ! workspace: prefer 3*m [e, tauq, taup] + m*nb [work] call stdlib${ii}$_${ri}$ormbr( 'Q', 'L', 'N', m, m, n, a, lda,work( itauq ), u, ldu, & work( nwork ),lwork - nwork + 1_${ik}$, ierr ) call stdlib${ii}$_${ri}$ormbr( 'P', 'R', 'T', m, n, m, a, lda,work( itaup ), vt, ldvt, & work( nwork ),lwork - nwork + 1_${ik}$, ierr ) else if( wntqa ) then ! path 5ta (n > m, jobz='a') ! perform bidiagonal svd, computing left singular vectors ! of bidiagonal matrix in u and computing right singular ! vectors of bidiagonal matrix in vt ! workspace: need 3*m [e, tauq, taup] + bdspac call stdlib${ii}$_${ri}$laset( 'F', n, n, zero, zero, vt, ldvt ) call stdlib${ii}$_${ri}$bdsdc( 'L', 'I', m, s, work( ie ), u, ldu, vt,ldvt, dum, idum, & work( nwork ), iwork,info ) ! set the right corner of vt to identity matrix if( n>m ) then call stdlib${ii}$_${ri}$laset( 'F', n-m, n-m, zero, one, vt(m+1,m+1),ldvt ) end if ! overwrite u by left singular vectors of a and vt ! by right singular vectors of a ! workspace: need 3*m [e, tauq, taup] + n [work] ! workspace: prefer 3*m [e, tauq, taup] + n*nb [work] call stdlib${ii}$_${ri}$ormbr( 'Q', 'L', 'N', m, m, n, a, lda,work( itauq ), u, ldu, & work( nwork ),lwork - nwork + 1_${ik}$, ierr ) call stdlib${ii}$_${ri}$ormbr( 'P', 'R', 'T', n, n, m, a, lda,work( itaup ), vt, ldvt, & work( nwork ),lwork - nwork + 1_${ik}$, ierr ) end if end if end if ! undo scaling if necessary if( iscl==1_${ik}$ ) then if( anrm>bignum )call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, bignum, anrm, minmn, 1_${ik}$, s, minmn,& ierr ) if( anrm<smlnum )call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, smlnum, anrm, minmn, 1_${ik}$, s, minmn,& ierr ) end if ! return optimal workspace in work(1) work( 1_${ik}$ ) = stdlib${ii}$_${ri}$roundup_lwork( maxwrk ) return end subroutine stdlib${ii}$_${ri}$gesdd #:endif #:endfor module subroutine stdlib${ii}$_cgesdd( jobz, m, n, a, lda, s, u, ldu, vt, ldvt,work, lwork, rwork, iwork, & !! CGESDD computes the singular value decomposition (SVD) of a complex !! M-by-N matrix A, optionally computing the left and/or right singular !! vectors, by using divide-and-conquer method. The SVD is written !! A = U * SIGMA * conjugate-transpose(V) !! where SIGMA is an M-by-N matrix which is zero except for its !! min(m,n) diagonal elements, U is an M-by-M unitary matrix, and !! V is an N-by-N unitary matrix. The diagonal elements of SIGMA !! are the singular values of A; they are real and non-negative, and !! are returned in descending order. The first min(m,n) columns of !! U and V are the left and right singular vectors of A. !! Note that the routine returns VT = V**H, not V. !! The divide and conquer algorithm makes very mild assumptions about !! floating point arithmetic. It will work on machines with a guard !! digit in add/subtract, or on those binary machines without guard !! digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or !! Cray-2. It could conceivably fail on hexadecimal or decimal machines !! without guard digits, but we know of none. info ) ! -- lapack driver routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments character, intent(in) :: jobz integer(${ik}$), intent(out) :: info integer(${ik}$), intent(in) :: lda, ldu, ldvt, lwork, m, n ! Array Arguments integer(${ik}$), intent(out) :: iwork(*) real(sp), intent(out) :: rwork(*), s(*) complex(sp), intent(inout) :: a(lda,*) complex(sp), intent(out) :: u(ldu,*), vt(ldvt,*), work(*) ! ===================================================================== ! Local Scalars logical(lk) :: lquery, wntqa, wntqas, wntqn, wntqo, wntqs integer(${ik}$) :: blk, chunk, i, ie, ierr, il, ir, iru, irvt, iscl, itau, itaup, itauq, & iu, ivt, ldwkvt, ldwrkl, ldwrkr, ldwrku, maxwrk, minmn, minwrk, mnthr1, mnthr2, nrwork,& nwork, wrkbl integer(${ik}$) :: lwork_cgebrd_mn, lwork_cgebrd_mm, lwork_cgebrd_nn, lwork_cgelqf_mn, & lwork_cgeqrf_mn, lwork_cungbr_p_mn, lwork_cungbr_p_nn, lwork_cungbr_q_mn, & lwork_cungbr_q_mm, lwork_cunglq_mn, lwork_cunglq_nn, lwork_cungqr_mm, lwork_cungqr_mn, & lwork_cunmbr_prc_mm, lwork_cunmbr_qln_mm, lwork_cunmbr_prc_mn, lwork_cunmbr_qln_mn, & lwork_cunmbr_prc_nn, lwork_cunmbr_qln_nn real(sp) :: anrm, bignum, eps, smlnum ! Local Arrays integer(${ik}$) :: idum(1_${ik}$) real(sp) :: dum(1_${ik}$) complex(sp) :: cdum(1_${ik}$) ! Intrinsic Functions ! Executable Statements ! test the input arguments info = 0_${ik}$ minmn = min( m, n ) mnthr1 = int( minmn*17.0_sp / 9.0_sp,KIND=${ik}$) mnthr2 = int( minmn*5.0_sp / 3.0_sp,KIND=${ik}$) wntqa = stdlib_lsame( jobz, 'A' ) wntqs = stdlib_lsame( jobz, 'S' ) wntqas = wntqa .or. wntqs wntqo = stdlib_lsame( jobz, 'O' ) wntqn = stdlib_lsame( jobz, 'N' ) lquery = ( lwork==-1_${ik}$ ) minwrk = 1_${ik}$ maxwrk = 1_${ik}$ if( .not.( wntqa .or. wntqs .or. wntqo .or. wntqn ) ) then info = -1_${ik}$ else if( m<0_${ik}$ ) then info = -2_${ik}$ else if( n<0_${ik}$ ) then info = -3_${ik}$ else if( lda<max( 1_${ik}$, m ) ) then info = -5_${ik}$ else if( ldu<1_${ik}$ .or. ( wntqas .and. ldu<m ) .or.( wntqo .and. m<n .and. ldu<m ) ) & then info = -8_${ik}$ else if( ldvt<1_${ik}$ .or. ( wntqa .and. ldvt<n ) .or.( wntqs .and. ldvt<minmn ) .or.( wntqo & .and. m>=n .and. ldvt<n ) ) then info = -10_${ik}$ end if ! compute workspace ! note: comments in the code beginning "workspace:" describe the ! minimal amount of workspace allocated at that point in the code, ! as well as the preferred amount for good performance. ! cworkspace refers to complex workspace, and rworkspace to ! real workspace. nb refers to the optimal block size for the ! immediately following subroutine, as returned by stdlib${ii}$_ilaenv.) if( info==0_${ik}$ ) then minwrk = 1_${ik}$ maxwrk = 1_${ik}$ if( m>=n .and. minmn>0_${ik}$ ) then ! there is no complex work space needed for bidiagonal svd ! the realwork space needed for bidiagonal svd (stdlib${ii}$_sbdsdc,KIND=sp) is ! bdspac = 3*n*n + 4*n for singular values and vectors; ! bdspac = 4*n for singular values only; ! not including e, ru, and rvt matrices. ! compute space preferred for each routine call stdlib${ii}$_cgebrd( m, n, cdum(1_${ik}$), m, dum(1_${ik}$), dum(1_${ik}$), cdum(1_${ik}$),cdum(1_${ik}$), cdum(1_${ik}$), -& 1_${ik}$, ierr ) lwork_cgebrd_mn = int( cdum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_cgebrd( n, n, cdum(1_${ik}$), n, dum(1_${ik}$), dum(1_${ik}$), cdum(1_${ik}$),cdum(1_${ik}$), cdum(1_${ik}$), -& 1_${ik}$, ierr ) lwork_cgebrd_nn = int( cdum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_cgeqrf( m, n, cdum(1_${ik}$), m, cdum(1_${ik}$), cdum(1_${ik}$), -1_${ik}$, ierr ) lwork_cgeqrf_mn = int( cdum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_cungbr( 'P', n, n, n, cdum(1_${ik}$), n, cdum(1_${ik}$), cdum(1_${ik}$),-1_${ik}$, ierr ) lwork_cungbr_p_nn = int( cdum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_cungbr( 'Q', m, m, n, cdum(1_${ik}$), m, cdum(1_${ik}$), cdum(1_${ik}$),-1_${ik}$, ierr ) lwork_cungbr_q_mm = int( cdum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_cungbr( 'Q', m, n, n, cdum(1_${ik}$), m, cdum(1_${ik}$), cdum(1_${ik}$),-1_${ik}$, ierr ) lwork_cungbr_q_mn = int( cdum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_cungqr( m, m, n, cdum(1_${ik}$), m, cdum(1_${ik}$), cdum(1_${ik}$),-1_${ik}$, ierr ) lwork_cungqr_mm = int( cdum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_cungqr( m, n, n, cdum(1_${ik}$), m, cdum(1_${ik}$), cdum(1_${ik}$),-1_${ik}$, ierr ) lwork_cungqr_mn = int( cdum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_cunmbr( 'P', 'R', 'C', n, n, n, cdum(1_${ik}$), n, cdum(1_${ik}$),cdum(1_${ik}$), n, cdum(& 1_${ik}$), -1_${ik}$, ierr ) lwork_cunmbr_prc_nn = int( cdum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_cunmbr( 'Q', 'L', 'N', m, m, n, cdum(1_${ik}$), m, cdum(1_${ik}$),cdum(1_${ik}$), m, cdum(& 1_${ik}$), -1_${ik}$, ierr ) lwork_cunmbr_qln_mm = int( cdum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_cunmbr( 'Q', 'L', 'N', m, n, n, cdum(1_${ik}$), m, cdum(1_${ik}$),cdum(1_${ik}$), m, cdum(& 1_${ik}$), -1_${ik}$, ierr ) lwork_cunmbr_qln_mn = int( cdum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_cunmbr( 'Q', 'L', 'N', n, n, n, cdum(1_${ik}$), n, cdum(1_${ik}$),cdum(1_${ik}$), n, cdum(& 1_${ik}$), -1_${ik}$, ierr ) lwork_cunmbr_qln_nn = int( cdum(1_${ik}$),KIND=${ik}$) if( m>=mnthr1 ) then if( wntqn ) then ! path 1 (m >> n, jobz='n') maxwrk = n + lwork_cgeqrf_mn maxwrk = max( maxwrk, 2_${ik}$*n + lwork_cgebrd_nn ) minwrk = 3_${ik}$*n else if( wntqo ) then ! path 2 (m >> n, jobz='o') wrkbl = n + lwork_cgeqrf_mn wrkbl = max( wrkbl, n + lwork_cungqr_mn ) wrkbl = max( wrkbl, 2_${ik}$*n + lwork_cgebrd_nn ) wrkbl = max( wrkbl, 2_${ik}$*n + lwork_cunmbr_qln_nn ) wrkbl = max( wrkbl, 2_${ik}$*n + lwork_cunmbr_prc_nn ) maxwrk = m*n + n*n + wrkbl minwrk = 2_${ik}$*n*n + 3_${ik}$*n else if( wntqs ) then ! path 3 (m >> n, jobz='s') wrkbl = n + lwork_cgeqrf_mn wrkbl = max( wrkbl, n + lwork_cungqr_mn ) wrkbl = max( wrkbl, 2_${ik}$*n + lwork_cgebrd_nn ) wrkbl = max( wrkbl, 2_${ik}$*n + lwork_cunmbr_qln_nn ) wrkbl = max( wrkbl, 2_${ik}$*n + lwork_cunmbr_prc_nn ) maxwrk = n*n + wrkbl minwrk = n*n + 3_${ik}$*n else if( wntqa ) then ! path 4 (m >> n, jobz='a') wrkbl = n + lwork_cgeqrf_mn wrkbl = max( wrkbl, n + lwork_cungqr_mm ) wrkbl = max( wrkbl, 2_${ik}$*n + lwork_cgebrd_nn ) wrkbl = max( wrkbl, 2_${ik}$*n + lwork_cunmbr_qln_nn ) wrkbl = max( wrkbl, 2_${ik}$*n + lwork_cunmbr_prc_nn ) maxwrk = n*n + wrkbl minwrk = n*n + max( 3_${ik}$*n, n + m ) end if else if( m>=mnthr2 ) then ! path 5 (m >> n, but not as much as mnthr1) maxwrk = 2_${ik}$*n + lwork_cgebrd_mn minwrk = 2_${ik}$*n + m if( wntqo ) then ! path 5o (m >> n, jobz='o') maxwrk = max( maxwrk, 2_${ik}$*n + lwork_cungbr_p_nn ) maxwrk = max( maxwrk, 2_${ik}$*n + lwork_cungbr_q_mn ) maxwrk = maxwrk + m*n minwrk = minwrk + n*n else if( wntqs ) then ! path 5s (m >> n, jobz='s') maxwrk = max( maxwrk, 2_${ik}$*n + lwork_cungbr_p_nn ) maxwrk = max( maxwrk, 2_${ik}$*n + lwork_cungbr_q_mn ) else if( wntqa ) then ! path 5a (m >> n, jobz='a') maxwrk = max( maxwrk, 2_${ik}$*n + lwork_cungbr_p_nn ) maxwrk = max( maxwrk, 2_${ik}$*n + lwork_cungbr_q_mm ) end if else ! path 6 (m >= n, but not much larger) maxwrk = 2_${ik}$*n + lwork_cgebrd_mn minwrk = 2_${ik}$*n + m if( wntqo ) then ! path 6o (m >= n, jobz='o') maxwrk = max( maxwrk, 2_${ik}$*n + lwork_cunmbr_prc_nn ) maxwrk = max( maxwrk, 2_${ik}$*n + lwork_cunmbr_qln_mn ) maxwrk = maxwrk + m*n minwrk = minwrk + n*n else if( wntqs ) then ! path 6s (m >= n, jobz='s') maxwrk = max( maxwrk, 2_${ik}$*n + lwork_cunmbr_qln_mn ) maxwrk = max( maxwrk, 2_${ik}$*n + lwork_cunmbr_prc_nn ) else if( wntqa ) then ! path 6a (m >= n, jobz='a') maxwrk = max( maxwrk, 2_${ik}$*n + lwork_cunmbr_qln_mm ) maxwrk = max( maxwrk, 2_${ik}$*n + lwork_cunmbr_prc_nn ) end if end if else if( minmn>0_${ik}$ ) then ! there is no complex work space needed for bidiagonal svd ! the realwork space needed for bidiagonal svd (stdlib${ii}$_sbdsdc,KIND=sp) is ! bdspac = 3*m*m + 4*m for singular values and vectors; ! bdspac = 4*m for singular values only; ! not including e, ru, and rvt matrices. ! compute space preferred for each routine call stdlib${ii}$_cgebrd( m, n, cdum(1_${ik}$), m, dum(1_${ik}$), dum(1_${ik}$), cdum(1_${ik}$),cdum(1_${ik}$), cdum(1_${ik}$), -& 1_${ik}$, ierr ) lwork_cgebrd_mn = int( cdum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_cgebrd( m, m, cdum(1_${ik}$), m, dum(1_${ik}$), dum(1_${ik}$), cdum(1_${ik}$),cdum(1_${ik}$), cdum(1_${ik}$), -& 1_${ik}$, ierr ) lwork_cgebrd_mm = int( cdum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_cgelqf( m, n, cdum(1_${ik}$), m, cdum(1_${ik}$), cdum(1_${ik}$), -1_${ik}$, ierr ) lwork_cgelqf_mn = int( cdum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_cungbr( 'P', m, n, m, cdum(1_${ik}$), m, cdum(1_${ik}$), cdum(1_${ik}$),-1_${ik}$, ierr ) lwork_cungbr_p_mn = int( cdum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_cungbr( 'P', n, n, m, cdum(1_${ik}$), n, cdum(1_${ik}$), cdum(1_${ik}$),-1_${ik}$, ierr ) lwork_cungbr_p_nn = int( cdum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_cungbr( 'Q', m, m, n, cdum(1_${ik}$), m, cdum(1_${ik}$), cdum(1_${ik}$),-1_${ik}$, ierr ) lwork_cungbr_q_mm = int( cdum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_cunglq( m, n, m, cdum(1_${ik}$), m, cdum(1_${ik}$), cdum(1_${ik}$),-1_${ik}$, ierr ) lwork_cunglq_mn = int( cdum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_cunglq( n, n, m, cdum(1_${ik}$), n, cdum(1_${ik}$), cdum(1_${ik}$),-1_${ik}$, ierr ) lwork_cunglq_nn = int( cdum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_cunmbr( 'P', 'R', 'C', m, m, m, cdum(1_${ik}$), m, cdum(1_${ik}$),cdum(1_${ik}$), m, cdum(& 1_${ik}$), -1_${ik}$, ierr ) lwork_cunmbr_prc_mm = int( cdum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_cunmbr( 'P', 'R', 'C', m, n, m, cdum(1_${ik}$), m, cdum(1_${ik}$),cdum(1_${ik}$), m, cdum(& 1_${ik}$), -1_${ik}$, ierr ) lwork_cunmbr_prc_mn = int( cdum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_cunmbr( 'P', 'R', 'C', n, n, m, cdum(1_${ik}$), n, cdum(1_${ik}$),cdum(1_${ik}$), n, cdum(& 1_${ik}$), -1_${ik}$, ierr ) lwork_cunmbr_prc_nn = int( cdum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_cunmbr( 'Q', 'L', 'N', m, m, m, cdum(1_${ik}$), m, cdum(1_${ik}$),cdum(1_${ik}$), m, cdum(& 1_${ik}$), -1_${ik}$, ierr ) lwork_cunmbr_qln_mm = int( cdum(1_${ik}$),KIND=${ik}$) if( n>=mnthr1 ) then if( wntqn ) then ! path 1t (n >> m, jobz='n') maxwrk = m + lwork_cgelqf_mn maxwrk = max( maxwrk, 2_${ik}$*m + lwork_cgebrd_mm ) minwrk = 3_${ik}$*m else if( wntqo ) then ! path 2t (n >> m, jobz='o') wrkbl = m + lwork_cgelqf_mn wrkbl = max( wrkbl, m + lwork_cunglq_mn ) wrkbl = max( wrkbl, 2_${ik}$*m + lwork_cgebrd_mm ) wrkbl = max( wrkbl, 2_${ik}$*m + lwork_cunmbr_qln_mm ) wrkbl = max( wrkbl, 2_${ik}$*m + lwork_cunmbr_prc_mm ) maxwrk = m*n + m*m + wrkbl minwrk = 2_${ik}$*m*m + 3_${ik}$*m else if( wntqs ) then ! path 3t (n >> m, jobz='s') wrkbl = m + lwork_cgelqf_mn wrkbl = max( wrkbl, m + lwork_cunglq_mn ) wrkbl = max( wrkbl, 2_${ik}$*m + lwork_cgebrd_mm ) wrkbl = max( wrkbl, 2_${ik}$*m + lwork_cunmbr_qln_mm ) wrkbl = max( wrkbl, 2_${ik}$*m + lwork_cunmbr_prc_mm ) maxwrk = m*m + wrkbl minwrk = m*m + 3_${ik}$*m else if( wntqa ) then ! path 4t (n >> m, jobz='a') wrkbl = m + lwork_cgelqf_mn wrkbl = max( wrkbl, m + lwork_cunglq_nn ) wrkbl = max( wrkbl, 2_${ik}$*m + lwork_cgebrd_mm ) wrkbl = max( wrkbl, 2_${ik}$*m + lwork_cunmbr_qln_mm ) wrkbl = max( wrkbl, 2_${ik}$*m + lwork_cunmbr_prc_mm ) maxwrk = m*m + wrkbl minwrk = m*m + max( 3_${ik}$*m, m + n ) end if else if( n>=mnthr2 ) then ! path 5t (n >> m, but not as much as mnthr1) maxwrk = 2_${ik}$*m + lwork_cgebrd_mn minwrk = 2_${ik}$*m + n if( wntqo ) then ! path 5to (n >> m, jobz='o') maxwrk = max( maxwrk, 2_${ik}$*m + lwork_cungbr_q_mm ) maxwrk = max( maxwrk, 2_${ik}$*m + lwork_cungbr_p_mn ) maxwrk = maxwrk + m*n minwrk = minwrk + m*m else if( wntqs ) then ! path 5ts (n >> m, jobz='s') maxwrk = max( maxwrk, 2_${ik}$*m + lwork_cungbr_q_mm ) maxwrk = max( maxwrk, 2_${ik}$*m + lwork_cungbr_p_mn ) else if( wntqa ) then ! path 5ta (n >> m, jobz='a') maxwrk = max( maxwrk, 2_${ik}$*m + lwork_cungbr_q_mm ) maxwrk = max( maxwrk, 2_${ik}$*m + lwork_cungbr_p_nn ) end if else ! path 6t (n > m, but not much larger) maxwrk = 2_${ik}$*m + lwork_cgebrd_mn minwrk = 2_${ik}$*m + n if( wntqo ) then ! path 6to (n > m, jobz='o') maxwrk = max( maxwrk, 2_${ik}$*m + lwork_cunmbr_qln_mm ) maxwrk = max( maxwrk, 2_${ik}$*m + lwork_cunmbr_prc_mn ) maxwrk = maxwrk + m*n minwrk = minwrk + m*m else if( wntqs ) then ! path 6ts (n > m, jobz='s') maxwrk = max( maxwrk, 2_${ik}$*m + lwork_cunmbr_qln_mm ) maxwrk = max( maxwrk, 2_${ik}$*m + lwork_cunmbr_prc_mn ) else if( wntqa ) then ! path 6ta (n > m, jobz='a') maxwrk = max( maxwrk, 2_${ik}$*m + lwork_cunmbr_qln_mm ) maxwrk = max( maxwrk, 2_${ik}$*m + lwork_cunmbr_prc_nn ) end if end if end if maxwrk = max( maxwrk, minwrk ) end if if( info==0_${ik}$ ) then work( 1_${ik}$ ) = stdlib${ii}$_sroundup_lwork( maxwrk ) if( lwork<minwrk .and. .not. lquery ) then info = -12_${ik}$ end if end if if( info/=0_${ik}$ ) then call stdlib${ii}$_xerbla( 'CGESDD', -info ) return else if( lquery ) then return end if ! quick return if possible if( m==0_${ik}$ .or. n==0_${ik}$ ) then return end if ! get machine constants eps = stdlib${ii}$_slamch( 'P' ) smlnum = sqrt( stdlib${ii}$_slamch( 'S' ) ) / eps bignum = one / smlnum ! scale a if max element outside range [smlnum,bignum] anrm = stdlib${ii}$_clange( 'M', m, n, a, lda, dum ) if( stdlib${ii}$_sisnan ( anrm ) ) then info = -4_${ik}$ return end if iscl = 0_${ik}$ if( anrm>zero .and. anrm<smlnum ) then iscl = 1_${ik}$ call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, m, n, a, lda, ierr ) else if( anrm>bignum ) then iscl = 1_${ik}$ call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, m, n, a, lda, ierr ) end if if( m>=n ) then ! a has at least as many rows as columns. if a has sufficiently ! more rows than columns, first reduce using the qr ! decomposition (if sufficient workspace available) if( m>=mnthr1 ) then if( wntqn ) then ! path 1 (m >> n, jobz='n') ! no singular vectors to be computed itau = 1_${ik}$ nwork = itau + n ! compute a=q*r ! cworkspace: need n [tau] + n [work] ! cworkspace: prefer n [tau] + n*nb [work] ! rworkspace: need 0 call stdlib${ii}$_cgeqrf( m, n, a, lda, work( itau ), work( nwork ),lwork-nwork+1, & ierr ) ! zero out below r if (n>1_${ik}$) call stdlib${ii}$_claset( 'L', n-1, n-1, czero, czero, a( 2_${ik}$, 1_${ik}$ ),lda ) ie = 1_${ik}$ itauq = 1_${ik}$ itaup = itauq + n nwork = itaup + n ! bidiagonalize r in a ! cworkspace: need 2*n [tauq, taup] + n [work] ! cworkspace: prefer 2*n [tauq, taup] + 2*n*nb [work] ! rworkspace: need n [e] call stdlib${ii}$_cgebrd( n, n, a, lda, s, rwork( ie ), work( itauq ),work( itaup ),& work( nwork ), lwork-nwork+1,ierr ) nrwork = ie + n ! perform bidiagonal svd, compute singular values only ! cworkspace: need 0 ! rworkspace: need n [e] + bdspac call stdlib${ii}$_sbdsdc( 'U', 'N', n, s, rwork( ie ), dum,1_${ik}$,dum,1_${ik}$,dum, idum, rwork(& nrwork ), iwork, info ) else if( wntqo ) then ! path 2 (m >> n, jobz='o') ! n left singular vectors to be overwritten on a and ! n right singular vectors to be computed in vt iu = 1_${ik}$ ! work(iu) is n by n ldwrku = n ir = iu + ldwrku*n if( lwork >= m*n + n*n + 3_${ik}$*n ) then ! work(ir) is m by n ldwrkr = m else ldwrkr = ( lwork - n*n - 3_${ik}$*n ) / n end if itau = ir + ldwrkr*n nwork = itau + n ! compute a=q*r ! cworkspace: need n*n [u] + n*n [r] + n [tau] + n [work] ! cworkspace: prefer n*n [u] + n*n [r] + n [tau] + n*nb [work] ! rworkspace: need 0 call stdlib${ii}$_cgeqrf( m, n, a, lda, work( itau ), work( nwork ),lwork-nwork+1, & ierr ) ! copy r to work( ir ), zeroing out below it call stdlib${ii}$_clacpy( 'U', n, n, a, lda, work( ir ), ldwrkr ) call stdlib${ii}$_claset( 'L', n-1, n-1, czero, czero, work( ir+1 ),ldwrkr ) ! generate q in a ! cworkspace: need n*n [u] + n*n [r] + n [tau] + n [work] ! cworkspace: prefer n*n [u] + n*n [r] + n [tau] + n*nb [work] ! rworkspace: need 0 call stdlib${ii}$_cungqr( m, n, n, a, lda, work( itau ),work( nwork ), lwork-nwork+& 1_${ik}$, ierr ) ie = 1_${ik}$ itauq = itau itaup = itauq + n nwork = itaup + n ! bidiagonalize r in work(ir) ! cworkspace: need n*n [u] + n*n [r] + 2*n [tauq, taup] + n [work] ! cworkspace: prefer n*n [u] + n*n [r] + 2*n [tauq, taup] + 2*n*nb [work] ! rworkspace: need n [e] call stdlib${ii}$_cgebrd( n, n, work( ir ), ldwrkr, s, rwork( ie ),work( itauq ), & work( itaup ), work( nwork ),lwork-nwork+1, ierr ) ! perform bidiagonal svd, computing left singular vectors ! of r in work(iru) and computing right singular vectors ! of r in work(irvt) ! cworkspace: need 0 ! rworkspace: need n [e] + n*n [ru] + n*n [rvt] + bdspac iru = ie + n irvt = iru + n*n nrwork = irvt + n*n call stdlib${ii}$_sbdsdc( 'U', 'I', n, s, rwork( ie ), rwork( iru ),n, rwork( irvt )& , n, dum, idum,rwork( nrwork ), iwork, info ) ! copy realmatrix rwork(iru,KIND=sp) to complex matrix work(iu) ! overwrite work(iu) by the left singular vectors of r ! cworkspace: need n*n [u] + n*n [r] + 2*n [tauq, taup] + n [work] ! cworkspace: prefer n*n [u] + n*n [r] + 2*n [tauq, taup] + n*nb [work] ! rworkspace: need 0 call stdlib${ii}$_clacp2( 'F', n, n, rwork( iru ), n, work( iu ),ldwrku ) call stdlib${ii}$_cunmbr( 'Q', 'L', 'N', n, n, n, work( ir ), ldwrkr,work( itauq ), & work( iu ), ldwrku,work( nwork ), lwork-nwork+1, ierr ) ! copy realmatrix rwork(irvt,KIND=sp) to complex matrix vt ! overwrite vt by the right singular vectors of r ! cworkspace: need n*n [u] + n*n [r] + 2*n [tauq, taup] + n [work] ! cworkspace: prefer n*n [u] + n*n [r] + 2*n [tauq, taup] + n*nb [work] ! rworkspace: need 0 call stdlib${ii}$_clacp2( 'F', n, n, rwork( irvt ), n, vt, ldvt ) call stdlib${ii}$_cunmbr( 'P', 'R', 'C', n, n, n, work( ir ), ldwrkr,work( itaup ), & vt, ldvt, work( nwork ),lwork-nwork+1, ierr ) ! multiply q in a by left singular vectors of r in ! work(iu), storing result in work(ir) and copying to a ! cworkspace: need n*n [u] + n*n [r] ! cworkspace: prefer n*n [u] + m*n [r] ! rworkspace: need 0 do i = 1, m, ldwrkr chunk = min( m-i+1, ldwrkr ) call stdlib${ii}$_cgemm( 'N', 'N', chunk, n, n, cone, a( i, 1_${ik}$ ),lda, work( iu ), & ldwrku, czero,work( ir ), ldwrkr ) call stdlib${ii}$_clacpy( 'F', chunk, n, work( ir ), ldwrkr,a( i, 1_${ik}$ ), lda ) end do else if( wntqs ) then ! path 3 (m >> n, jobz='s') ! n left singular vectors to be computed in u and ! n right singular vectors to be computed in vt ir = 1_${ik}$ ! work(ir) is n by n ldwrkr = n itau = ir + ldwrkr*n nwork = itau + n ! compute a=q*r ! cworkspace: need n*n [r] + n [tau] + n [work] ! cworkspace: prefer n*n [r] + n [tau] + n*nb [work] ! rworkspace: need 0 call stdlib${ii}$_cgeqrf( m, n, a, lda, work( itau ), work( nwork ),lwork-nwork+1, & ierr ) ! copy r to work(ir), zeroing out below it call stdlib${ii}$_clacpy( 'U', n, n, a, lda, work( ir ), ldwrkr ) call stdlib${ii}$_claset( 'L', n-1, n-1, czero, czero, work( ir+1 ),ldwrkr ) ! generate q in a ! cworkspace: need n*n [r] + n [tau] + n [work] ! cworkspace: prefer n*n [r] + n [tau] + n*nb [work] ! rworkspace: need 0 call stdlib${ii}$_cungqr( m, n, n, a, lda, work( itau ),work( nwork ), lwork-nwork+& 1_${ik}$, ierr ) ie = 1_${ik}$ itauq = itau itaup = itauq + n nwork = itaup + n ! bidiagonalize r in work(ir) ! cworkspace: need n*n [r] + 2*n [tauq, taup] + n [work] ! cworkspace: prefer n*n [r] + 2*n [tauq, taup] + 2*n*nb [work] ! rworkspace: need n [e] call stdlib${ii}$_cgebrd( n, n, work( ir ), ldwrkr, s, rwork( ie ),work( itauq ), & work( itaup ), work( nwork ),lwork-nwork+1, ierr ) ! perform bidiagonal svd, computing left singular vectors ! of bidiagonal matrix in rwork(iru) and computing right ! singular vectors of bidiagonal matrix in rwork(irvt) ! cworkspace: need 0 ! rworkspace: need n [e] + n*n [ru] + n*n [rvt] + bdspac iru = ie + n irvt = iru + n*n nrwork = irvt + n*n call stdlib${ii}$_sbdsdc( 'U', 'I', n, s, rwork( ie ), rwork( iru ),n, rwork( irvt )& , n, dum, idum,rwork( nrwork ), iwork, info ) ! copy realmatrix rwork(iru,KIND=sp) to complex matrix u ! overwrite u by left singular vectors of r ! cworkspace: need n*n [r] + 2*n [tauq, taup] + n [work] ! cworkspace: prefer n*n [r] + 2*n [tauq, taup] + n*nb [work] ! rworkspace: need 0 call stdlib${ii}$_clacp2( 'F', n, n, rwork( iru ), n, u, ldu ) call stdlib${ii}$_cunmbr( 'Q', 'L', 'N', n, n, n, work( ir ), ldwrkr,work( itauq ), & u, ldu, work( nwork ),lwork-nwork+1, ierr ) ! copy realmatrix rwork(irvt,KIND=sp) to complex matrix vt ! overwrite vt by right singular vectors of r ! cworkspace: need n*n [r] + 2*n [tauq, taup] + n [work] ! cworkspace: prefer n*n [r] + 2*n [tauq, taup] + n*nb [work] ! rworkspace: need 0 call stdlib${ii}$_clacp2( 'F', n, n, rwork( irvt ), n, vt, ldvt ) call stdlib${ii}$_cunmbr( 'P', 'R', 'C', n, n, n, work( ir ), ldwrkr,work( itaup ), & vt, ldvt, work( nwork ),lwork-nwork+1, ierr ) ! multiply q in a by left singular vectors of r in ! work(ir), storing result in u ! cworkspace: need n*n [r] ! rworkspace: need 0 call stdlib${ii}$_clacpy( 'F', n, n, u, ldu, work( ir ), ldwrkr ) call stdlib${ii}$_cgemm( 'N', 'N', m, n, n, cone, a, lda, work( ir ),ldwrkr, czero, & u, ldu ) else if( wntqa ) then ! path 4 (m >> n, jobz='a') ! m left singular vectors to be computed in u and ! n right singular vectors to be computed in vt iu = 1_${ik}$ ! work(iu) is n by n ldwrku = n itau = iu + ldwrku*n nwork = itau + n ! compute a=q*r, copying result to u ! cworkspace: need n*n [u] + n [tau] + n [work] ! cworkspace: prefer n*n [u] + n [tau] + n*nb [work] ! rworkspace: need 0 call stdlib${ii}$_cgeqrf( m, n, a, lda, work( itau ), work( nwork ),lwork-nwork+1, & ierr ) call stdlib${ii}$_clacpy( 'L', m, n, a, lda, u, ldu ) ! generate q in u ! cworkspace: need n*n [u] + n [tau] + m [work] ! cworkspace: prefer n*n [u] + n [tau] + m*nb [work] ! rworkspace: need 0 call stdlib${ii}$_cungqr( m, m, n, u, ldu, work( itau ),work( nwork ), lwork-nwork+& 1_${ik}$, ierr ) ! produce r in a, zeroing out below it if (n>1_${ik}$) call stdlib${ii}$_claset( 'L', n-1, n-1, czero, czero, a( 2_${ik}$, 1_${ik}$ ),lda ) ie = 1_${ik}$ itauq = itau itaup = itauq + n nwork = itaup + n ! bidiagonalize r in a ! cworkspace: need n*n [u] + 2*n [tauq, taup] + n [work] ! cworkspace: prefer n*n [u] + 2*n [tauq, taup] + 2*n*nb [work] ! rworkspace: need n [e] call stdlib${ii}$_cgebrd( n, n, a, lda, s, rwork( ie ), work( itauq ),work( itaup ),& work( nwork ), lwork-nwork+1,ierr ) iru = ie + n irvt = iru + n*n nrwork = irvt + n*n ! perform bidiagonal svd, computing left singular vectors ! of bidiagonal matrix in rwork(iru) and computing right ! singular vectors of bidiagonal matrix in rwork(irvt) ! cworkspace: need 0 ! rworkspace: need n [e] + n*n [ru] + n*n [rvt] + bdspac call stdlib${ii}$_sbdsdc( 'U', 'I', n, s, rwork( ie ), rwork( iru ),n, rwork( irvt )& , n, dum, idum,rwork( nrwork ), iwork, info ) ! copy realmatrix rwork(iru,KIND=sp) to complex matrix work(iu) ! overwrite work(iu) by left singular vectors of r ! cworkspace: need n*n [u] + 2*n [tauq, taup] + n [work] ! cworkspace: prefer n*n [u] + 2*n [tauq, taup] + n*nb [work] ! rworkspace: need 0 call stdlib${ii}$_clacp2( 'F', n, n, rwork( iru ), n, work( iu ),ldwrku ) call stdlib${ii}$_cunmbr( 'Q', 'L', 'N', n, n, n, a, lda,work( itauq ), work( iu ), & ldwrku,work( nwork ), lwork-nwork+1, ierr ) ! copy realmatrix rwork(irvt,KIND=sp) to complex matrix vt ! overwrite vt by right singular vectors of r ! cworkspace: need n*n [u] + 2*n [tauq, taup] + n [work] ! cworkspace: prefer n*n [u] + 2*n [tauq, taup] + n*nb [work] ! rworkspace: need 0 call stdlib${ii}$_clacp2( 'F', n, n, rwork( irvt ), n, vt, ldvt ) call stdlib${ii}$_cunmbr( 'P', 'R', 'C', n, n, n, a, lda,work( itaup ), vt, ldvt, & work( nwork ),lwork-nwork+1, ierr ) ! multiply q in u by left singular vectors of r in ! work(iu), storing result in a ! cworkspace: need n*n [u] ! rworkspace: need 0 call stdlib${ii}$_cgemm( 'N', 'N', m, n, n, cone, u, ldu, work( iu ),ldwrku, czero, & a, lda ) ! copy left singular vectors of a from a to u call stdlib${ii}$_clacpy( 'F', m, n, a, lda, u, ldu ) end if else if( m>=mnthr2 ) then ! mnthr2 <= m < mnthr1 ! path 5 (m >> n, but not as much as mnthr1) ! reduce to bidiagonal form without qr decomposition, use ! stdlib${ii}$_cungbr and matrix multiplication to compute singular vectors ie = 1_${ik}$ nrwork = ie + n itauq = 1_${ik}$ itaup = itauq + n nwork = itaup + n ! bidiagonalize a ! cworkspace: need 2*n [tauq, taup] + m [work] ! cworkspace: prefer 2*n [tauq, taup] + (m+n)*nb [work] ! rworkspace: need n [e] call stdlib${ii}$_cgebrd( m, n, a, lda, s, rwork( ie ), work( itauq ),work( itaup ), & work( nwork ), lwork-nwork+1,ierr ) if( wntqn ) then ! path 5n (m >> n, jobz='n') ! compute singular values only ! cworkspace: need 0 ! rworkspace: need n [e] + bdspac call stdlib${ii}$_sbdsdc( 'U', 'N', n, s, rwork( ie ), dum, 1_${ik}$,dum,1_${ik}$,dum, idum, & rwork( nrwork ), iwork, info ) else if( wntqo ) then iu = nwork iru = nrwork irvt = iru + n*n nrwork = irvt + n*n ! path 5o (m >> n, jobz='o') ! copy a to vt, generate p**h ! cworkspace: need 2*n [tauq, taup] + n [work] ! cworkspace: prefer 2*n [tauq, taup] + n*nb [work] ! rworkspace: need 0 call stdlib${ii}$_clacpy( 'U', n, n, a, lda, vt, ldvt ) call stdlib${ii}$_cungbr( 'P', n, n, n, vt, ldvt, work( itaup ),work( nwork ), & lwork-nwork+1, ierr ) ! generate q in a ! cworkspace: need 2*n [tauq, taup] + n [work] ! cworkspace: prefer 2*n [tauq, taup] + n*nb [work] ! rworkspace: need 0 call stdlib${ii}$_cungbr( 'Q', m, n, n, a, lda, work( itauq ),work( nwork ), lwork-& nwork+1, ierr ) if( lwork >= m*n + 3_${ik}$*n ) then ! work( iu ) is m by n ldwrku = m else ! work(iu) is ldwrku by n ldwrku = ( lwork - 3_${ik}$*n ) / n end if nwork = iu + ldwrku*n ! perform bidiagonal svd, computing left singular vectors ! of bidiagonal matrix in rwork(iru) and computing right ! singular vectors of bidiagonal matrix in rwork(irvt) ! cworkspace: need 0 ! rworkspace: need n [e] + n*n [ru] + n*n [rvt] + bdspac call stdlib${ii}$_sbdsdc( 'U', 'I', n, s, rwork( ie ), rwork( iru ),n, rwork( irvt )& , n, dum, idum,rwork( nrwork ), iwork, info ) ! multiply realmatrix rwork(irvt,KIND=sp) by p**h in vt, ! storing the result in work(iu), copying to vt ! cworkspace: need 2*n [tauq, taup] + n*n [u] ! rworkspace: need n [e] + n*n [ru] + n*n [rvt] + 2*n*n [rwork] call stdlib${ii}$_clarcm( n, n, rwork( irvt ), n, vt, ldvt,work( iu ), ldwrku, & rwork( nrwork ) ) call stdlib${ii}$_clacpy( 'F', n, n, work( iu ), ldwrku, vt, ldvt ) ! multiply q in a by realmatrix rwork(iru,KIND=sp), storing the ! result in work(iu), copying to a ! cworkspace: need 2*n [tauq, taup] + n*n [u] ! cworkspace: prefer 2*n [tauq, taup] + m*n [u] ! rworkspace: need n [e] + n*n [ru] + 2*n*n [rwork] ! rworkspace: prefer n [e] + n*n [ru] + 2*m*n [rwork] < n + 5*n*n since m < 2*n here nrwork = irvt do i = 1, m, ldwrku chunk = min( m-i+1, ldwrku ) call stdlib${ii}$_clacrm( chunk, n, a( i, 1_${ik}$ ), lda, rwork( iru ),n, work( iu ), & ldwrku, rwork( nrwork ) ) call stdlib${ii}$_clacpy( 'F', chunk, n, work( iu ), ldwrku,a( i, 1_${ik}$ ), lda ) end do else if( wntqs ) then ! path 5s (m >> n, jobz='s') ! copy a to vt, generate p**h ! cworkspace: need 2*n [tauq, taup] + n [work] ! cworkspace: prefer 2*n [tauq, taup] + n*nb [work] ! rworkspace: need 0 call stdlib${ii}$_clacpy( 'U', n, n, a, lda, vt, ldvt ) call stdlib${ii}$_cungbr( 'P', n, n, n, vt, ldvt, work( itaup ),work( nwork ), & lwork-nwork+1, ierr ) ! copy a to u, generate q ! cworkspace: need 2*n [tauq, taup] + n [work] ! cworkspace: prefer 2*n [tauq, taup] + n*nb [work] ! rworkspace: need 0 call stdlib${ii}$_clacpy( 'L', m, n, a, lda, u, ldu ) call stdlib${ii}$_cungbr( 'Q', m, n, n, u, ldu, work( itauq ),work( nwork ), lwork-& nwork+1, ierr ) ! perform bidiagonal svd, computing left singular vectors ! of bidiagonal matrix in rwork(iru) and computing right ! singular vectors of bidiagonal matrix in rwork(irvt) ! cworkspace: need 0 ! rworkspace: need n [e] + n*n [ru] + n*n [rvt] + bdspac iru = nrwork irvt = iru + n*n nrwork = irvt + n*n call stdlib${ii}$_sbdsdc( 'U', 'I', n, s, rwork( ie ), rwork( iru ),n, rwork( irvt )& , n, dum, idum,rwork( nrwork ), iwork, info ) ! multiply realmatrix rwork(irvt,KIND=sp) by p**h in vt, ! storing the result in a, copying to vt ! cworkspace: need 0 ! rworkspace: need n [e] + n*n [ru] + n*n [rvt] + 2*n*n [rwork] call stdlib${ii}$_clarcm( n, n, rwork( irvt ), n, vt, ldvt, a, lda,rwork( nrwork ) ) call stdlib${ii}$_clacpy( 'F', n, n, a, lda, vt, ldvt ) ! multiply q in u by realmatrix rwork(iru,KIND=sp), storing the ! result in a, copying to u ! cworkspace: need 0 ! rworkspace: need n [e] + n*n [ru] + 2*m*n [rwork] < n + 5*n*n since m < 2*n here nrwork = irvt call stdlib${ii}$_clacrm( m, n, u, ldu, rwork( iru ), n, a, lda,rwork( nrwork ) ) call stdlib${ii}$_clacpy( 'F', m, n, a, lda, u, ldu ) else ! path 5a (m >> n, jobz='a') ! copy a to vt, generate p**h ! cworkspace: need 2*n [tauq, taup] + n [work] ! cworkspace: prefer 2*n [tauq, taup] + n*nb [work] ! rworkspace: need 0 call stdlib${ii}$_clacpy( 'U', n, n, a, lda, vt, ldvt ) call stdlib${ii}$_cungbr( 'P', n, n, n, vt, ldvt, work( itaup ),work( nwork ), & lwork-nwork+1, ierr ) ! copy a to u, generate q ! cworkspace: need 2*n [tauq, taup] + m [work] ! cworkspace: prefer 2*n [tauq, taup] + m*nb [work] ! rworkspace: need 0 call stdlib${ii}$_clacpy( 'L', m, n, a, lda, u, ldu ) call stdlib${ii}$_cungbr( 'Q', m, m, n, u, ldu, work( itauq ),work( nwork ), lwork-& nwork+1, ierr ) ! perform bidiagonal svd, computing left singular vectors ! of bidiagonal matrix in rwork(iru) and computing right ! singular vectors of bidiagonal matrix in rwork(irvt) ! cworkspace: need 0 ! rworkspace: need n [e] + n*n [ru] + n*n [rvt] + bdspac iru = nrwork irvt = iru + n*n nrwork = irvt + n*n call stdlib${ii}$_sbdsdc( 'U', 'I', n, s, rwork( ie ), rwork( iru ),n, rwork( irvt )& , n, dum, idum,rwork( nrwork ), iwork, info ) ! multiply realmatrix rwork(irvt,KIND=sp) by p**h in vt, ! storing the result in a, copying to vt ! cworkspace: need 0 ! rworkspace: need n [e] + n*n [ru] + n*n [rvt] + 2*n*n [rwork] call stdlib${ii}$_clarcm( n, n, rwork( irvt ), n, vt, ldvt, a, lda,rwork( nrwork ) ) call stdlib${ii}$_clacpy( 'F', n, n, a, lda, vt, ldvt ) ! multiply q in u by realmatrix rwork(iru,KIND=sp), storing the ! result in a, copying to u ! cworkspace: need 0 ! rworkspace: need n [e] + n*n [ru] + 2*m*n [rwork] < n + 5*n*n since m < 2*n here nrwork = irvt call stdlib${ii}$_clacrm( m, n, u, ldu, rwork( iru ), n, a, lda,rwork( nrwork ) ) call stdlib${ii}$_clacpy( 'F', m, n, a, lda, u, ldu ) end if else ! m < mnthr2 ! path 6 (m >= n, but not much larger) ! reduce to bidiagonal form without qr decomposition ! use stdlib_cunmbr to compute singular vectors ie = 1_${ik}$ nrwork = ie + n itauq = 1_${ik}$ itaup = itauq + n nwork = itaup + n ! bidiagonalize a ! cworkspace: need 2*n [tauq, taup] + m [work] ! cworkspace: prefer 2*n [tauq, taup] + (m+n)*nb [work] ! rworkspace: need n [e] call stdlib${ii}$_cgebrd( m, n, a, lda, s, rwork( ie ), work( itauq ),work( itaup ), & work( nwork ), lwork-nwork+1,ierr ) if( wntqn ) then ! path 6n (m >= n, jobz='n') ! compute singular values only ! cworkspace: need 0 ! rworkspace: need n [e] + bdspac call stdlib${ii}$_sbdsdc( 'U', 'N', n, s, rwork( ie ), dum,1_${ik}$,dum,1_${ik}$,dum, idum, rwork(& nrwork ), iwork, info ) else if( wntqo ) then iu = nwork iru = nrwork irvt = iru + n*n nrwork = irvt + n*n if( lwork >= m*n + 3_${ik}$*n ) then ! work( iu ) is m by n ldwrku = m else ! work( iu ) is ldwrku by n ldwrku = ( lwork - 3_${ik}$*n ) / n end if nwork = iu + ldwrku*n ! path 6o (m >= n, jobz='o') ! perform bidiagonal svd, computing left singular vectors ! of bidiagonal matrix in rwork(iru) and computing right ! singular vectors of bidiagonal matrix in rwork(irvt) ! cworkspace: need 0 ! rworkspace: need n [e] + n*n [ru] + n*n [rvt] + bdspac call stdlib${ii}$_sbdsdc( 'U', 'I', n, s, rwork( ie ), rwork( iru ),n, rwork( irvt )& , n, dum, idum,rwork( nrwork ), iwork, info ) ! copy realmatrix rwork(irvt,KIND=sp) to complex matrix vt ! overwrite vt by right singular vectors of a ! cworkspace: need 2*n [tauq, taup] + n*n [u] + n [work] ! cworkspace: prefer 2*n [tauq, taup] + n*n [u] + n*nb [work] ! rworkspace: need n [e] + n*n [ru] + n*n [rvt] call stdlib${ii}$_clacp2( 'F', n, n, rwork( irvt ), n, vt, ldvt ) call stdlib${ii}$_cunmbr( 'P', 'R', 'C', n, n, n, a, lda,work( itaup ), vt, ldvt, & work( nwork ),lwork-nwork+1, ierr ) if( lwork >= m*n + 3_${ik}$*n ) then ! path 6o-fast ! copy realmatrix rwork(iru,KIND=sp) to complex matrix work(iu) ! overwrite work(iu) by left singular vectors of a, copying ! to a ! cworkspace: need 2*n [tauq, taup] + m*n [u] + n [work] ! cworkspace: prefer 2*n [tauq, taup] + m*n [u] + n*nb [work] ! rworkspace: need n [e] + n*n [ru] call stdlib${ii}$_claset( 'F', m, n, czero, czero, work( iu ),ldwrku ) call stdlib${ii}$_clacp2( 'F', n, n, rwork( iru ), n, work( iu ),ldwrku ) call stdlib${ii}$_cunmbr( 'Q', 'L', 'N', m, n, n, a, lda,work( itauq ), work( iu & ), ldwrku,work( nwork ), lwork-nwork+1, ierr ) call stdlib${ii}$_clacpy( 'F', m, n, work( iu ), ldwrku, a, lda ) else ! path 6o-slow ! generate q in a ! cworkspace: need 2*n [tauq, taup] + n*n [u] + n [work] ! cworkspace: prefer 2*n [tauq, taup] + n*n [u] + n*nb [work] ! rworkspace: need 0 call stdlib${ii}$_cungbr( 'Q', m, n, n, a, lda, work( itauq ),work( nwork ), & lwork-nwork+1, ierr ) ! multiply q in a by realmatrix rwork(iru,KIND=sp), storing the ! result in work(iu), copying to a ! cworkspace: need 2*n [tauq, taup] + n*n [u] ! cworkspace: prefer 2*n [tauq, taup] + m*n [u] ! rworkspace: need n [e] + n*n [ru] + 2*n*n [rwork] ! rworkspace: prefer n [e] + n*n [ru] + 2*m*n [rwork] < n + 5*n*n since m < 2*n here nrwork = irvt do i = 1, m, ldwrku chunk = min( m-i+1, ldwrku ) call stdlib${ii}$_clacrm( chunk, n, a( i, 1_${ik}$ ), lda,rwork( iru ), n, work( iu )& , ldwrku,rwork( nrwork ) ) call stdlib${ii}$_clacpy( 'F', chunk, n, work( iu ), ldwrku,a( i, 1_${ik}$ ), lda ) end do end if else if( wntqs ) then ! path 6s (m >= n, jobz='s') ! perform bidiagonal svd, computing left singular vectors ! of bidiagonal matrix in rwork(iru) and computing right ! singular vectors of bidiagonal matrix in rwork(irvt) ! cworkspace: need 0 ! rworkspace: need n [e] + n*n [ru] + n*n [rvt] + bdspac iru = nrwork irvt = iru + n*n nrwork = irvt + n*n call stdlib${ii}$_sbdsdc( 'U', 'I', n, s, rwork( ie ), rwork( iru ),n, rwork( irvt )& , n, dum, idum,rwork( nrwork ), iwork, info ) ! copy realmatrix rwork(iru,KIND=sp) to complex matrix u ! overwrite u by left singular vectors of a ! cworkspace: need 2*n [tauq, taup] + n [work] ! cworkspace: prefer 2*n [tauq, taup] + n*nb [work] ! rworkspace: need n [e] + n*n [ru] + n*n [rvt] call stdlib${ii}$_claset( 'F', m, n, czero, czero, u, ldu ) call stdlib${ii}$_clacp2( 'F', n, n, rwork( iru ), n, u, ldu ) call stdlib${ii}$_cunmbr( 'Q', 'L', 'N', m, n, n, a, lda,work( itauq ), u, ldu, & work( nwork ),lwork-nwork+1, ierr ) ! copy realmatrix rwork(irvt,KIND=sp) to complex matrix vt ! overwrite vt by right singular vectors of a ! cworkspace: need 2*n [tauq, taup] + n [work] ! cworkspace: prefer 2*n [tauq, taup] + n*nb [work] ! rworkspace: need n [e] + n*n [ru] + n*n [rvt] call stdlib${ii}$_clacp2( 'F', n, n, rwork( irvt ), n, vt, ldvt ) call stdlib${ii}$_cunmbr( 'P', 'R', 'C', n, n, n, a, lda,work( itaup ), vt, ldvt, & work( nwork ),lwork-nwork+1, ierr ) else ! path 6a (m >= n, jobz='a') ! perform bidiagonal svd, computing left singular vectors ! of bidiagonal matrix in rwork(iru) and computing right ! singular vectors of bidiagonal matrix in rwork(irvt) ! cworkspace: need 0 ! rworkspace: need n [e] + n*n [ru] + n*n [rvt] + bdspac iru = nrwork irvt = iru + n*n nrwork = irvt + n*n call stdlib${ii}$_sbdsdc( 'U', 'I', n, s, rwork( ie ), rwork( iru ),n, rwork( irvt )& , n, dum, idum,rwork( nrwork ), iwork, info ) ! set the right corner of u to identity matrix call stdlib${ii}$_claset( 'F', m, m, czero, czero, u, ldu ) if( m>n ) then call stdlib${ii}$_claset( 'F', m-n, m-n, czero, cone,u( n+1, n+1 ), ldu ) end if ! copy realmatrix rwork(iru,KIND=sp) to complex matrix u ! overwrite u by left singular vectors of a ! cworkspace: need 2*n [tauq, taup] + m [work] ! cworkspace: prefer 2*n [tauq, taup] + m*nb [work] ! rworkspace: need n [e] + n*n [ru] + n*n [rvt] call stdlib${ii}$_clacp2( 'F', n, n, rwork( iru ), n, u, ldu ) call stdlib${ii}$_cunmbr( 'Q', 'L', 'N', m, m, n, a, lda,work( itauq ), u, ldu, & work( nwork ),lwork-nwork+1, ierr ) ! copy realmatrix rwork(irvt,KIND=sp) to complex matrix vt ! overwrite vt by right singular vectors of a ! cworkspace: need 2*n [tauq, taup] + n [work] ! cworkspace: prefer 2*n [tauq, taup] + n*nb [work] ! rworkspace: need n [e] + n*n [ru] + n*n [rvt] call stdlib${ii}$_clacp2( 'F', n, n, rwork( irvt ), n, vt, ldvt ) call stdlib${ii}$_cunmbr( 'P', 'R', 'C', n, n, n, a, lda,work( itaup ), vt, ldvt, & work( nwork ),lwork-nwork+1, ierr ) end if end if else ! a has more columns than rows. if a has sufficiently more ! columns than rows, first reduce using the lq decomposition (if ! sufficient workspace available) if( n>=mnthr1 ) then if( wntqn ) then ! path 1t (n >> m, jobz='n') ! no singular vectors to be computed itau = 1_${ik}$ nwork = itau + m ! compute a=l*q ! cworkspace: need m [tau] + m [work] ! cworkspace: prefer m [tau] + m*nb [work] ! rworkspace: need 0 call stdlib${ii}$_cgelqf( m, n, a, lda, work( itau ), work( nwork ),lwork-nwork+1, & ierr ) ! zero out above l if (m>1_${ik}$) call stdlib${ii}$_claset( 'U', m-1, m-1, czero, czero, a( 1_${ik}$, 2_${ik}$ ),lda ) ie = 1_${ik}$ itauq = 1_${ik}$ itaup = itauq + m nwork = itaup + m ! bidiagonalize l in a ! cworkspace: need 2*m [tauq, taup] + m [work] ! cworkspace: prefer 2*m [tauq, taup] + 2*m*nb [work] ! rworkspace: need m [e] call stdlib${ii}$_cgebrd( m, m, a, lda, s, rwork( ie ), work( itauq ),work( itaup ),& work( nwork ), lwork-nwork+1,ierr ) nrwork = ie + m ! perform bidiagonal svd, compute singular values only ! cworkspace: need 0 ! rworkspace: need m [e] + bdspac call stdlib${ii}$_sbdsdc( 'U', 'N', m, s, rwork( ie ), dum,1_${ik}$,dum,1_${ik}$,dum, idum, rwork(& nrwork ), iwork, info ) else if( wntqo ) then ! path 2t (n >> m, jobz='o') ! m right singular vectors to be overwritten on a and ! m left singular vectors to be computed in u ivt = 1_${ik}$ ldwkvt = m ! work(ivt) is m by m il = ivt + ldwkvt*m if( lwork >= m*n + m*m + 3_${ik}$*m ) then ! work(il) m by n ldwrkl = m chunk = n else ! work(il) is m by chunk ldwrkl = m chunk = ( lwork - m*m - 3_${ik}$*m ) / m end if itau = il + ldwrkl*chunk nwork = itau + m ! compute a=l*q ! cworkspace: need m*m [vt] + m*m [l] + m [tau] + m [work] ! cworkspace: prefer m*m [vt] + m*m [l] + m [tau] + m*nb [work] ! rworkspace: need 0 call stdlib${ii}$_cgelqf( m, n, a, lda, work( itau ), work( nwork ),lwork-nwork+1, & ierr ) ! copy l to work(il), zeroing about above it call stdlib${ii}$_clacpy( 'L', m, m, a, lda, work( il ), ldwrkl ) call stdlib${ii}$_claset( 'U', m-1, m-1, czero, czero,work( il+ldwrkl ), ldwrkl ) ! generate q in a ! cworkspace: need m*m [vt] + m*m [l] + m [tau] + m [work] ! cworkspace: prefer m*m [vt] + m*m [l] + m [tau] + m*nb [work] ! rworkspace: need 0 call stdlib${ii}$_cunglq( m, n, m, a, lda, work( itau ),work( nwork ), lwork-nwork+& 1_${ik}$, ierr ) ie = 1_${ik}$ itauq = itau itaup = itauq + m nwork = itaup + m ! bidiagonalize l in work(il) ! cworkspace: need m*m [vt] + m*m [l] + 2*m [tauq, taup] + m [work] ! cworkspace: prefer m*m [vt] + m*m [l] + 2*m [tauq, taup] + 2*m*nb [work] ! rworkspace: need m [e] call stdlib${ii}$_cgebrd( m, m, work( il ), ldwrkl, s, rwork( ie ),work( itauq ), & work( itaup ), work( nwork ),lwork-nwork+1, ierr ) ! perform bidiagonal svd, computing left singular vectors ! of bidiagonal matrix in rwork(iru) and computing right ! singular vectors of bidiagonal matrix in rwork(irvt) ! cworkspace: need 0 ! rworkspace: need m [e] + m*m [ru] + m*m [rvt] + bdspac iru = ie + m irvt = iru + m*m nrwork = irvt + m*m call stdlib${ii}$_sbdsdc( 'U', 'I', m, s, rwork( ie ), rwork( iru ),m, rwork( irvt )& , m, dum, idum,rwork( nrwork ), iwork, info ) ! copy realmatrix rwork(iru,KIND=sp) to complex matrix work(iu) ! overwrite work(iu) by the left singular vectors of l ! cworkspace: need m*m [vt] + m*m [l] + 2*m [tauq, taup] + m [work] ! cworkspace: prefer m*m [vt] + m*m [l] + 2*m [tauq, taup] + m*nb [work] ! rworkspace: need 0 call stdlib${ii}$_clacp2( 'F', m, m, rwork( iru ), m, u, ldu ) call stdlib${ii}$_cunmbr( 'Q', 'L', 'N', m, m, m, work( il ), ldwrkl,work( itauq ), & u, ldu, work( nwork ),lwork-nwork+1, ierr ) ! copy realmatrix rwork(irvt,KIND=sp) to complex matrix work(ivt) ! overwrite work(ivt) by the right singular vectors of l ! cworkspace: need m*m [vt] + m*m [l] + 2*m [tauq, taup] + m [work] ! cworkspace: prefer m*m [vt] + m*m [l] + 2*m [tauq, taup] + m*nb [work] ! rworkspace: need 0 call stdlib${ii}$_clacp2( 'F', m, m, rwork( irvt ), m, work( ivt ),ldwkvt ) call stdlib${ii}$_cunmbr( 'P', 'R', 'C', m, m, m, work( il ), ldwrkl,work( itaup ), & work( ivt ), ldwkvt,work( nwork ), lwork-nwork+1, ierr ) ! multiply right singular vectors of l in work(il) by q ! in a, storing result in work(il) and copying to a ! cworkspace: need m*m [vt] + m*m [l] ! cworkspace: prefer m*m [vt] + m*n [l] ! rworkspace: need 0 do i = 1, n, chunk blk = min( n-i+1, chunk ) call stdlib${ii}$_cgemm( 'N', 'N', m, blk, m, cone, work( ivt ), m,a( 1_${ik}$, i ), & lda, czero, work( il ),ldwrkl ) call stdlib${ii}$_clacpy( 'F', m, blk, work( il ), ldwrkl,a( 1_${ik}$, i ), lda ) end do else if( wntqs ) then ! path 3t (n >> m, jobz='s') ! m right singular vectors to be computed in vt and ! m left singular vectors to be computed in u il = 1_${ik}$ ! work(il) is m by m ldwrkl = m itau = il + ldwrkl*m nwork = itau + m ! compute a=l*q ! cworkspace: need m*m [l] + m [tau] + m [work] ! cworkspace: prefer m*m [l] + m [tau] + m*nb [work] ! rworkspace: need 0 call stdlib${ii}$_cgelqf( m, n, a, lda, work( itau ), work( nwork ),lwork-nwork+1, & ierr ) ! copy l to work(il), zeroing out above it call stdlib${ii}$_clacpy( 'L', m, m, a, lda, work( il ), ldwrkl ) call stdlib${ii}$_claset( 'U', m-1, m-1, czero, czero,work( il+ldwrkl ), ldwrkl ) ! generate q in a ! cworkspace: need m*m [l] + m [tau] + m [work] ! cworkspace: prefer m*m [l] + m [tau] + m*nb [work] ! rworkspace: need 0 call stdlib${ii}$_cunglq( m, n, m, a, lda, work( itau ),work( nwork ), lwork-nwork+& 1_${ik}$, ierr ) ie = 1_${ik}$ itauq = itau itaup = itauq + m nwork = itaup + m ! bidiagonalize l in work(il) ! cworkspace: need m*m [l] + 2*m [tauq, taup] + m [work] ! cworkspace: prefer m*m [l] + 2*m [tauq, taup] + 2*m*nb [work] ! rworkspace: need m [e] call stdlib${ii}$_cgebrd( m, m, work( il ), ldwrkl, s, rwork( ie ),work( itauq ), & work( itaup ), work( nwork ),lwork-nwork+1, ierr ) ! perform bidiagonal svd, computing left singular vectors ! of bidiagonal matrix in rwork(iru) and computing right ! singular vectors of bidiagonal matrix in rwork(irvt) ! cworkspace: need 0 ! rworkspace: need m [e] + m*m [ru] + m*m [rvt] + bdspac iru = ie + m irvt = iru + m*m nrwork = irvt + m*m call stdlib${ii}$_sbdsdc( 'U', 'I', m, s, rwork( ie ), rwork( iru ),m, rwork( irvt )& , m, dum, idum,rwork( nrwork ), iwork, info ) ! copy realmatrix rwork(iru,KIND=sp) to complex matrix u ! overwrite u by left singular vectors of l ! cworkspace: need m*m [l] + 2*m [tauq, taup] + m [work] ! cworkspace: prefer m*m [l] + 2*m [tauq, taup] + m*nb [work] ! rworkspace: need 0 call stdlib${ii}$_clacp2( 'F', m, m, rwork( iru ), m, u, ldu ) call stdlib${ii}$_cunmbr( 'Q', 'L', 'N', m, m, m, work( il ), ldwrkl,work( itauq ), & u, ldu, work( nwork ),lwork-nwork+1, ierr ) ! copy realmatrix rwork(irvt,KIND=sp) to complex matrix vt ! overwrite vt by left singular vectors of l ! cworkspace: need m*m [l] + 2*m [tauq, taup] + m [work] ! cworkspace: prefer m*m [l] + 2*m [tauq, taup] + m*nb [work] ! rworkspace: need 0 call stdlib${ii}$_clacp2( 'F', m, m, rwork( irvt ), m, vt, ldvt ) call stdlib${ii}$_cunmbr( 'P', 'R', 'C', m, m, m, work( il ), ldwrkl,work( itaup ), & vt, ldvt, work( nwork ),lwork-nwork+1, ierr ) ! copy vt to work(il), multiply right singular vectors of l ! in work(il) by q in a, storing result in vt ! cworkspace: need m*m [l] ! rworkspace: need 0 call stdlib${ii}$_clacpy( 'F', m, m, vt, ldvt, work( il ), ldwrkl ) call stdlib${ii}$_cgemm( 'N', 'N', m, n, m, cone, work( il ), ldwrkl,a, lda, czero, & vt, ldvt ) else if( wntqa ) then ! path 4t (n >> m, jobz='a') ! n right singular vectors to be computed in vt and ! m left singular vectors to be computed in u ivt = 1_${ik}$ ! work(ivt) is m by m ldwkvt = m itau = ivt + ldwkvt*m nwork = itau + m ! compute a=l*q, copying result to vt ! cworkspace: need m*m [vt] + m [tau] + m [work] ! cworkspace: prefer m*m [vt] + m [tau] + m*nb [work] ! rworkspace: need 0 call stdlib${ii}$_cgelqf( m, n, a, lda, work( itau ), work( nwork ),lwork-nwork+1, & ierr ) call stdlib${ii}$_clacpy( 'U', m, n, a, lda, vt, ldvt ) ! generate q in vt ! cworkspace: need m*m [vt] + m [tau] + n [work] ! cworkspace: prefer m*m [vt] + m [tau] + n*nb [work] ! rworkspace: need 0 call stdlib${ii}$_cunglq( n, n, m, vt, ldvt, work( itau ),work( nwork ), lwork-& nwork+1, ierr ) ! produce l in a, zeroing out above it if (m>1_${ik}$) call stdlib${ii}$_claset( 'U', m-1, m-1, czero, czero, a( 1_${ik}$, 2_${ik}$ ),lda ) ie = 1_${ik}$ itauq = itau itaup = itauq + m nwork = itaup + m ! bidiagonalize l in a ! cworkspace: need m*m [vt] + 2*m [tauq, taup] + m [work] ! cworkspace: prefer m*m [vt] + 2*m [tauq, taup] + 2*m*nb [work] ! rworkspace: need m [e] call stdlib${ii}$_cgebrd( m, m, a, lda, s, rwork( ie ), work( itauq ),work( itaup ),& work( nwork ), lwork-nwork+1,ierr ) ! perform bidiagonal svd, computing left singular vectors ! of bidiagonal matrix in rwork(iru) and computing right ! singular vectors of bidiagonal matrix in rwork(irvt) ! cworkspace: need 0 ! rworkspace: need m [e] + m*m [ru] + m*m [rvt] + bdspac iru = ie + m irvt = iru + m*m nrwork = irvt + m*m call stdlib${ii}$_sbdsdc( 'U', 'I', m, s, rwork( ie ), rwork( iru ),m, rwork( irvt )& , m, dum, idum,rwork( nrwork ), iwork, info ) ! copy realmatrix rwork(iru,KIND=sp) to complex matrix u ! overwrite u by left singular vectors of l ! cworkspace: need m*m [vt] + 2*m [tauq, taup] + m [work] ! cworkspace: prefer m*m [vt] + 2*m [tauq, taup] + m*nb [work] ! rworkspace: need 0 call stdlib${ii}$_clacp2( 'F', m, m, rwork( iru ), m, u, ldu ) call stdlib${ii}$_cunmbr( 'Q', 'L', 'N', m, m, m, a, lda,work( itauq ), u, ldu, & work( nwork ),lwork-nwork+1, ierr ) ! copy realmatrix rwork(irvt,KIND=sp) to complex matrix work(ivt) ! overwrite work(ivt) by right singular vectors of l ! cworkspace: need m*m [vt] + 2*m [tauq, taup] + m [work] ! cworkspace: prefer m*m [vt] + 2*m [tauq, taup] + m*nb [work] ! rworkspace: need 0 call stdlib${ii}$_clacp2( 'F', m, m, rwork( irvt ), m, work( ivt ),ldwkvt ) call stdlib${ii}$_cunmbr( 'P', 'R', 'C', m, m, m, a, lda,work( itaup ), work( ivt ),& ldwkvt,work( nwork ), lwork-nwork+1, ierr ) ! multiply right singular vectors of l in work(ivt) by ! q in vt, storing result in a ! cworkspace: need m*m [vt] ! rworkspace: need 0 call stdlib${ii}$_cgemm( 'N', 'N', m, n, m, cone, work( ivt ), ldwkvt,vt, ldvt, & czero, a, lda ) ! copy right singular vectors of a from a to vt call stdlib${ii}$_clacpy( 'F', m, n, a, lda, vt, ldvt ) end if else if( n>=mnthr2 ) then ! mnthr2 <= n < mnthr1 ! path 5t (n >> m, but not as much as mnthr1) ! reduce to bidiagonal form without qr decomposition, use ! stdlib${ii}$_cungbr and matrix multiplication to compute singular vectors ie = 1_${ik}$ nrwork = ie + m itauq = 1_${ik}$ itaup = itauq + m nwork = itaup + m ! bidiagonalize a ! cworkspace: need 2*m [tauq, taup] + n [work] ! cworkspace: prefer 2*m [tauq, taup] + (m+n)*nb [work] ! rworkspace: need m [e] call stdlib${ii}$_cgebrd( m, n, a, lda, s, rwork( ie ), work( itauq ),work( itaup ), & work( nwork ), lwork-nwork+1,ierr ) if( wntqn ) then ! path 5tn (n >> m, jobz='n') ! compute singular values only ! cworkspace: need 0 ! rworkspace: need m [e] + bdspac call stdlib${ii}$_sbdsdc( 'L', 'N', m, s, rwork( ie ), dum,1_${ik}$,dum,1_${ik}$,dum, idum, rwork(& nrwork ), iwork, info ) else if( wntqo ) then irvt = nrwork iru = irvt + m*m nrwork = iru + m*m ivt = nwork ! path 5to (n >> m, jobz='o') ! copy a to u, generate q ! cworkspace: need 2*m [tauq, taup] + m [work] ! cworkspace: prefer 2*m [tauq, taup] + m*nb [work] ! rworkspace: need 0 call stdlib${ii}$_clacpy( 'L', m, m, a, lda, u, ldu ) call stdlib${ii}$_cungbr( 'Q', m, m, n, u, ldu, work( itauq ),work( nwork ), lwork-& nwork+1, ierr ) ! generate p**h in a ! cworkspace: need 2*m [tauq, taup] + m [work] ! cworkspace: prefer 2*m [tauq, taup] + m*nb [work] ! rworkspace: need 0 call stdlib${ii}$_cungbr( 'P', m, n, m, a, lda, work( itaup ),work( nwork ), lwork-& nwork+1, ierr ) ldwkvt = m if( lwork >= m*n + 3_${ik}$*m ) then ! work( ivt ) is m by n nwork = ivt + ldwkvt*n chunk = n else ! work( ivt ) is m by chunk chunk = ( lwork - 3_${ik}$*m ) / m nwork = ivt + ldwkvt*chunk end if ! perform bidiagonal svd, computing left singular vectors ! of bidiagonal matrix in rwork(iru) and computing right ! singular vectors of bidiagonal matrix in rwork(irvt) ! cworkspace: need 0 ! rworkspace: need m [e] + m*m [rvt] + m*m [ru] + bdspac call stdlib${ii}$_sbdsdc( 'L', 'I', m, s, rwork( ie ), rwork( iru ),m, rwork( irvt )& , m, dum, idum,rwork( nrwork ), iwork, info ) ! multiply q in u by realmatrix rwork(irvt,KIND=sp) ! storing the result in work(ivt), copying to u ! cworkspace: need 2*m [tauq, taup] + m*m [vt] ! rworkspace: need m [e] + m*m [rvt] + m*m [ru] + 2*m*m [rwork] call stdlib${ii}$_clacrm( m, m, u, ldu, rwork( iru ), m, work( ivt ),ldwkvt, rwork( & nrwork ) ) call stdlib${ii}$_clacpy( 'F', m, m, work( ivt ), ldwkvt, u, ldu ) ! multiply rwork(irvt) by p**h in a, storing the ! result in work(ivt), copying to a ! cworkspace: need 2*m [tauq, taup] + m*m [vt] ! cworkspace: prefer 2*m [tauq, taup] + m*n [vt] ! rworkspace: need m [e] + m*m [rvt] + 2*m*m [rwork] ! rworkspace: prefer m [e] + m*m [rvt] + 2*m*n [rwork] < m + 5*m*m since n < 2*m here nrwork = iru do i = 1, n, chunk blk = min( n-i+1, chunk ) call stdlib${ii}$_clarcm( m, blk, rwork( irvt ), m, a( 1_${ik}$, i ), lda,work( ivt ), & ldwkvt, rwork( nrwork ) ) call stdlib${ii}$_clacpy( 'F', m, blk, work( ivt ), ldwkvt,a( 1_${ik}$, i ), lda ) end do else if( wntqs ) then ! path 5ts (n >> m, jobz='s') ! copy a to u, generate q ! cworkspace: need 2*m [tauq, taup] + m [work] ! cworkspace: prefer 2*m [tauq, taup] + m*nb [work] ! rworkspace: need 0 call stdlib${ii}$_clacpy( 'L', m, m, a, lda, u, ldu ) call stdlib${ii}$_cungbr( 'Q', m, m, n, u, ldu, work( itauq ),work( nwork ), lwork-& nwork+1, ierr ) ! copy a to vt, generate p**h ! cworkspace: need 2*m [tauq, taup] + m [work] ! cworkspace: prefer 2*m [tauq, taup] + m*nb [work] ! rworkspace: need 0 call stdlib${ii}$_clacpy( 'U', m, n, a, lda, vt, ldvt ) call stdlib${ii}$_cungbr( 'P', m, n, m, vt, ldvt, work( itaup ),work( nwork ), & lwork-nwork+1, ierr ) ! perform bidiagonal svd, computing left singular vectors ! of bidiagonal matrix in rwork(iru) and computing right ! singular vectors of bidiagonal matrix in rwork(irvt) ! cworkspace: need 0 ! rworkspace: need m [e] + m*m [rvt] + m*m [ru] + bdspac irvt = nrwork iru = irvt + m*m nrwork = iru + m*m call stdlib${ii}$_sbdsdc( 'L', 'I', m, s, rwork( ie ), rwork( iru ),m, rwork( irvt )& , m, dum, idum,rwork( nrwork ), iwork, info ) ! multiply q in u by realmatrix rwork(iru,KIND=sp), storing the ! result in a, copying to u ! cworkspace: need 0 ! rworkspace: need m [e] + m*m [rvt] + m*m [ru] + 2*m*m [rwork] call stdlib${ii}$_clacrm( m, m, u, ldu, rwork( iru ), m, a, lda,rwork( nrwork ) ) call stdlib${ii}$_clacpy( 'F', m, m, a, lda, u, ldu ) ! multiply realmatrix rwork(irvt,KIND=sp) by p**h in vt, ! storing the result in a, copying to vt ! cworkspace: need 0 ! rworkspace: need m [e] + m*m [rvt] + 2*m*n [rwork] < m + 5*m*m since n < 2*m here nrwork = iru call stdlib${ii}$_clarcm( m, n, rwork( irvt ), m, vt, ldvt, a, lda,rwork( nrwork ) ) call stdlib${ii}$_clacpy( 'F', m, n, a, lda, vt, ldvt ) else ! path 5ta (n >> m, jobz='a') ! copy a to u, generate q ! cworkspace: need 2*m [tauq, taup] + m [work] ! cworkspace: prefer 2*m [tauq, taup] + m*nb [work] ! rworkspace: need 0 call stdlib${ii}$_clacpy( 'L', m, m, a, lda, u, ldu ) call stdlib${ii}$_cungbr( 'Q', m, m, n, u, ldu, work( itauq ),work( nwork ), lwork-& nwork+1, ierr ) ! copy a to vt, generate p**h ! cworkspace: need 2*m [tauq, taup] + n [work] ! cworkspace: prefer 2*m [tauq, taup] + n*nb [work] ! rworkspace: need 0 call stdlib${ii}$_clacpy( 'U', m, n, a, lda, vt, ldvt ) call stdlib${ii}$_cungbr( 'P', n, n, m, vt, ldvt, work( itaup ),work( nwork ), & lwork-nwork+1, ierr ) ! perform bidiagonal svd, computing left singular vectors ! of bidiagonal matrix in rwork(iru) and computing right ! singular vectors of bidiagonal matrix in rwork(irvt) ! cworkspace: need 0 ! rworkspace: need m [e] + m*m [rvt] + m*m [ru] + bdspac irvt = nrwork iru = irvt + m*m nrwork = iru + m*m call stdlib${ii}$_sbdsdc( 'L', 'I', m, s, rwork( ie ), rwork( iru ),m, rwork( irvt )& , m, dum, idum,rwork( nrwork ), iwork, info ) ! multiply q in u by realmatrix rwork(iru,KIND=sp), storing the ! result in a, copying to u ! cworkspace: need 0 ! rworkspace: need m [e] + m*m [rvt] + m*m [ru] + 2*m*m [rwork] call stdlib${ii}$_clacrm( m, m, u, ldu, rwork( iru ), m, a, lda,rwork( nrwork ) ) call stdlib${ii}$_clacpy( 'F', m, m, a, lda, u, ldu ) ! multiply realmatrix rwork(irvt,KIND=sp) by p**h in vt, ! storing the result in a, copying to vt ! cworkspace: need 0 ! rworkspace: need m [e] + m*m [rvt] + 2*m*n [rwork] < m + 5*m*m since n < 2*m here nrwork = iru call stdlib${ii}$_clarcm( m, n, rwork( irvt ), m, vt, ldvt, a, lda,rwork( nrwork ) ) call stdlib${ii}$_clacpy( 'F', m, n, a, lda, vt, ldvt ) end if else ! n < mnthr2 ! path 6t (n > m, but not much larger) ! reduce to bidiagonal form without lq decomposition ! use stdlib_cunmbr to compute singular vectors ie = 1_${ik}$ nrwork = ie + m itauq = 1_${ik}$ itaup = itauq + m nwork = itaup + m ! bidiagonalize a ! cworkspace: need 2*m [tauq, taup] + n [work] ! cworkspace: prefer 2*m [tauq, taup] + (m+n)*nb [work] ! rworkspace: need m [e] call stdlib${ii}$_cgebrd( m, n, a, lda, s, rwork( ie ), work( itauq ),work( itaup ), & work( nwork ), lwork-nwork+1,ierr ) if( wntqn ) then ! path 6tn (n > m, jobz='n') ! compute singular values only ! cworkspace: need 0 ! rworkspace: need m [e] + bdspac call stdlib${ii}$_sbdsdc( 'L', 'N', m, s, rwork( ie ), dum,1_${ik}$,dum,1_${ik}$,dum, idum, rwork(& nrwork ), iwork, info ) else if( wntqo ) then ! path 6to (n > m, jobz='o') ldwkvt = m ivt = nwork if( lwork >= m*n + 3_${ik}$*m ) then ! work( ivt ) is m by n call stdlib${ii}$_claset( 'F', m, n, czero, czero, work( ivt ),ldwkvt ) nwork = ivt + ldwkvt*n else ! work( ivt ) is m by chunk chunk = ( lwork - 3_${ik}$*m ) / m nwork = ivt + ldwkvt*chunk end if ! perform bidiagonal svd, computing left singular vectors ! of bidiagonal matrix in rwork(iru) and computing right ! singular vectors of bidiagonal matrix in rwork(irvt) ! cworkspace: need 0 ! rworkspace: need m [e] + m*m [rvt] + m*m [ru] + bdspac irvt = nrwork iru = irvt + m*m nrwork = iru + m*m call stdlib${ii}$_sbdsdc( 'L', 'I', m, s, rwork( ie ), rwork( iru ),m, rwork( irvt )& , m, dum, idum,rwork( nrwork ), iwork, info ) ! copy realmatrix rwork(iru,KIND=sp) to complex matrix u ! overwrite u by left singular vectors of a ! cworkspace: need 2*m [tauq, taup] + m*m [vt] + m [work] ! cworkspace: prefer 2*m [tauq, taup] + m*m [vt] + m*nb [work] ! rworkspace: need m [e] + m*m [rvt] + m*m [ru] call stdlib${ii}$_clacp2( 'F', m, m, rwork( iru ), m, u, ldu ) call stdlib${ii}$_cunmbr( 'Q', 'L', 'N', m, m, n, a, lda,work( itauq ), u, ldu, & work( nwork ),lwork-nwork+1, ierr ) if( lwork >= m*n + 3_${ik}$*m ) then ! path 6to-fast ! copy realmatrix rwork(irvt,KIND=sp) to complex matrix work(ivt) ! overwrite work(ivt) by right singular vectors of a, ! copying to a ! cworkspace: need 2*m [tauq, taup] + m*n [vt] + m [work] ! cworkspace: prefer 2*m [tauq, taup] + m*n [vt] + m*nb [work] ! rworkspace: need m [e] + m*m [rvt] call stdlib${ii}$_clacp2( 'F', m, m, rwork( irvt ), m, work( ivt ),ldwkvt ) call stdlib${ii}$_cunmbr( 'P', 'R', 'C', m, n, m, a, lda,work( itaup ), work( & ivt ), ldwkvt,work( nwork ), lwork-nwork+1, ierr ) call stdlib${ii}$_clacpy( 'F', m, n, work( ivt ), ldwkvt, a, lda ) else ! path 6to-slow ! generate p**h in a ! cworkspace: need 2*m [tauq, taup] + m*m [vt] + m [work] ! cworkspace: prefer 2*m [tauq, taup] + m*m [vt] + m*nb [work] ! rworkspace: need 0 call stdlib${ii}$_cungbr( 'P', m, n, m, a, lda, work( itaup ),work( nwork ), & lwork-nwork+1, ierr ) ! multiply q in a by realmatrix rwork(iru,KIND=sp), storing the ! result in work(iu), copying to a ! cworkspace: need 2*m [tauq, taup] + m*m [vt] ! cworkspace: prefer 2*m [tauq, taup] + m*n [vt] ! rworkspace: need m [e] + m*m [rvt] + 2*m*m [rwork] ! rworkspace: prefer m [e] + m*m [rvt] + 2*m*n [rwork] < m + 5*m*m since n < 2*m here nrwork = iru do i = 1, n, chunk blk = min( n-i+1, chunk ) call stdlib${ii}$_clarcm( m, blk, rwork( irvt ), m, a( 1_${ik}$, i ),lda, work( ivt )& , ldwkvt,rwork( nrwork ) ) call stdlib${ii}$_clacpy( 'F', m, blk, work( ivt ), ldwkvt,a( 1_${ik}$, i ), lda ) end do end if else if( wntqs ) then ! path 6ts (n > m, jobz='s') ! perform bidiagonal svd, computing left singular vectors ! of bidiagonal matrix in rwork(iru) and computing right ! singular vectors of bidiagonal matrix in rwork(irvt) ! cworkspace: need 0 ! rworkspace: need m [e] + m*m [rvt] + m*m [ru] + bdspac irvt = nrwork iru = irvt + m*m nrwork = iru + m*m call stdlib${ii}$_sbdsdc( 'L', 'I', m, s, rwork( ie ), rwork( iru ),m, rwork( irvt )& , m, dum, idum,rwork( nrwork ), iwork, info ) ! copy realmatrix rwork(iru,KIND=sp) to complex matrix u ! overwrite u by left singular vectors of a ! cworkspace: need 2*m [tauq, taup] + m [work] ! cworkspace: prefer 2*m [tauq, taup] + m*nb [work] ! rworkspace: need m [e] + m*m [rvt] + m*m [ru] call stdlib${ii}$_clacp2( 'F', m, m, rwork( iru ), m, u, ldu ) call stdlib${ii}$_cunmbr( 'Q', 'L', 'N', m, m, n, a, lda,work( itauq ), u, ldu, & work( nwork ),lwork-nwork+1, ierr ) ! copy realmatrix rwork(irvt,KIND=sp) to complex matrix vt ! overwrite vt by right singular vectors of a ! cworkspace: need 2*m [tauq, taup] + m [work] ! cworkspace: prefer 2*m [tauq, taup] + m*nb [work] ! rworkspace: need m [e] + m*m [rvt] call stdlib${ii}$_claset( 'F', m, n, czero, czero, vt, ldvt ) call stdlib${ii}$_clacp2( 'F', m, m, rwork( irvt ), m, vt, ldvt ) call stdlib${ii}$_cunmbr( 'P', 'R', 'C', m, n, m, a, lda,work( itaup ), vt, ldvt, & work( nwork ),lwork-nwork+1, ierr ) else ! path 6ta (n > m, jobz='a') ! perform bidiagonal svd, computing left singular vectors ! of bidiagonal matrix in rwork(iru) and computing right ! singular vectors of bidiagonal matrix in rwork(irvt) ! cworkspace: need 0 ! rworkspace: need m [e] + m*m [rvt] + m*m [ru] + bdspac irvt = nrwork iru = irvt + m*m nrwork = iru + m*m call stdlib${ii}$_sbdsdc( 'L', 'I', m, s, rwork( ie ), rwork( iru ),m, rwork( irvt )& , m, dum, idum,rwork( nrwork ), iwork, info ) ! copy realmatrix rwork(iru,KIND=sp) to complex matrix u ! overwrite u by left singular vectors of a ! cworkspace: need 2*m [tauq, taup] + m [work] ! cworkspace: prefer 2*m [tauq, taup] + m*nb [work] ! rworkspace: need m [e] + m*m [rvt] + m*m [ru] call stdlib${ii}$_clacp2( 'F', m, m, rwork( iru ), m, u, ldu ) call stdlib${ii}$_cunmbr( 'Q', 'L', 'N', m, m, n, a, lda,work( itauq ), u, ldu, & work( nwork ),lwork-nwork+1, ierr ) ! set all of vt to identity matrix call stdlib${ii}$_claset( 'F', n, n, czero, cone, vt, ldvt ) ! copy realmatrix rwork(irvt,KIND=sp) to complex matrix vt ! overwrite vt by right singular vectors of a ! cworkspace: need 2*m [tauq, taup] + n [work] ! cworkspace: prefer 2*m [tauq, taup] + n*nb [work] ! rworkspace: need m [e] + m*m [rvt] call stdlib${ii}$_clacp2( 'F', m, m, rwork( irvt ), m, vt, ldvt ) call stdlib${ii}$_cunmbr( 'P', 'R', 'C', n, n, m, a, lda,work( itaup ), vt, ldvt, & work( nwork ),lwork-nwork+1, ierr ) end if end if end if ! undo scaling if necessary if( iscl==1_${ik}$ ) then if( anrm>bignum )call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, bignum, anrm, minmn, 1_${ik}$, s, minmn,& ierr ) if( info/=0_${ik}$ .and. anrm>bignum )call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, bignum, anrm, minmn-1,& 1_${ik}$,rwork( ie ), minmn, ierr ) if( anrm<smlnum )call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, smlnum, anrm, minmn, 1_${ik}$, s, minmn,& ierr ) if( info/=0_${ik}$ .and. anrm<smlnum )call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, smlnum, anrm, minmn-1,& 1_${ik}$,rwork( ie ), minmn, ierr ) end if ! return optimal workspace in work(1) work( 1_${ik}$ ) = stdlib${ii}$_sroundup_lwork( maxwrk ) return end subroutine stdlib${ii}$_cgesdd module subroutine stdlib${ii}$_zgesdd( jobz, m, n, a, lda, s, u, ldu, vt, ldvt,work, lwork, rwork, iwork, & !! ZGESDD computes the singular value decomposition (SVD) of a complex !! M-by-N matrix A, optionally computing the left and/or right singular !! vectors, by using divide-and-conquer method. The SVD is written !! A = U * SIGMA * conjugate-transpose(V) !! where SIGMA is an M-by-N matrix which is zero except for its !! min(m,n) diagonal elements, U is an M-by-M unitary matrix, and !! V is an N-by-N unitary matrix. The diagonal elements of SIGMA !! are the singular values of A; they are real and non-negative, and !! are returned in descending order. The first min(m,n) columns of !! U and V are the left and right singular vectors of A. !! Note that the routine returns VT = V**H, not V. !! The divide and conquer algorithm makes very mild assumptions about !! floating point arithmetic. It will work on machines with a guard !! digit in add/subtract, or on those binary machines without guard !! digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or !! Cray-2. It could conceivably fail on hexadecimal or decimal machines !! without guard digits, but we know of none. info ) ! -- lapack driver routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments character, intent(in) :: jobz integer(${ik}$), intent(out) :: info integer(${ik}$), intent(in) :: lda, ldu, ldvt, lwork, m, n ! Array Arguments integer(${ik}$), intent(out) :: iwork(*) real(dp), intent(out) :: rwork(*), s(*) complex(dp), intent(inout) :: a(lda,*) complex(dp), intent(out) :: u(ldu,*), vt(ldvt,*), work(*) ! ===================================================================== ! Local Scalars logical(lk) :: lquery, wntqa, wntqas, wntqn, wntqo, wntqs integer(${ik}$) :: blk, chunk, i, ie, ierr, il, ir, iru, irvt, iscl, itau, itaup, itauq, & iu, ivt, ldwkvt, ldwrkl, ldwrkr, ldwrku, maxwrk, minmn, minwrk, mnthr1, mnthr2, nrwork,& nwork, wrkbl integer(${ik}$) :: lwork_zgebrd_mn, lwork_zgebrd_mm, lwork_zgebrd_nn, lwork_zgelqf_mn, & lwork_zgeqrf_mn, lwork_zungbr_p_mn, lwork_zungbr_p_nn, lwork_zungbr_q_mn, & lwork_zungbr_q_mm, lwork_zunglq_mn, lwork_zunglq_nn, lwork_zungqr_mm, lwork_zungqr_mn, & lwork_zunmbr_prc_mm, lwork_zunmbr_qln_mm, lwork_zunmbr_prc_mn, lwork_zunmbr_qln_mn, & lwork_zunmbr_prc_nn, lwork_zunmbr_qln_nn real(dp) :: anrm, bignum, eps, smlnum ! Local Arrays integer(${ik}$) :: idum(1_${ik}$) real(dp) :: dum(1_${ik}$) complex(dp) :: cdum(1_${ik}$) ! Intrinsic Functions ! Executable Statements ! test the input arguments info = 0_${ik}$ minmn = min( m, n ) mnthr1 = int( minmn*17.0_dp / 9.0_dp,KIND=${ik}$) mnthr2 = int( minmn*5.0_dp / 3.0_dp,KIND=${ik}$) wntqa = stdlib_lsame( jobz, 'A' ) wntqs = stdlib_lsame( jobz, 'S' ) wntqas = wntqa .or. wntqs wntqo = stdlib_lsame( jobz, 'O' ) wntqn = stdlib_lsame( jobz, 'N' ) lquery = ( lwork==-1_${ik}$ ) minwrk = 1_${ik}$ maxwrk = 1_${ik}$ if( .not.( wntqa .or. wntqs .or. wntqo .or. wntqn ) ) then info = -1_${ik}$ else if( m<0_${ik}$ ) then info = -2_${ik}$ else if( n<0_${ik}$ ) then info = -3_${ik}$ else if( lda<max( 1_${ik}$, m ) ) then info = -5_${ik}$ else if( ldu<1_${ik}$ .or. ( wntqas .and. ldu<m ) .or.( wntqo .and. m<n .and. ldu<m ) ) & then info = -8_${ik}$ else if( ldvt<1_${ik}$ .or. ( wntqa .and. ldvt<n ) .or.( wntqs .and. ldvt<minmn ) .or.( wntqo & .and. m>=n .and. ldvt<n ) ) then info = -10_${ik}$ end if ! compute workspace ! note: comments in the code beginning "workspace:" describe the ! minimal amount of workspace allocated at that point in the code, ! as well as the preferred amount for good performance. ! cworkspace refers to complex workspace, and rworkspace to ! real workspace. nb refers to the optimal block size for the ! immediately following subroutine, as returned by stdlib${ii}$_ilaenv.) if( info==0_${ik}$ ) then minwrk = 1_${ik}$ maxwrk = 1_${ik}$ if( m>=n .and. minmn>0_${ik}$ ) then ! there is no complex work space needed for bidiagonal svd ! the realwork space needed for bidiagonal svd (stdlib${ii}$_dbdsdc,KIND=dp) is ! bdspac = 3*n*n + 4*n for singular values and vectors; ! bdspac = 4*n for singular values only; ! not including e, ru, and rvt matrices. ! compute space preferred for each routine call stdlib${ii}$_zgebrd( m, n, cdum(1_${ik}$), m, dum(1_${ik}$), dum(1_${ik}$), cdum(1_${ik}$),cdum(1_${ik}$), cdum(1_${ik}$), -& 1_${ik}$, ierr ) lwork_zgebrd_mn = int( cdum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_zgebrd( n, n, cdum(1_${ik}$), n, dum(1_${ik}$), dum(1_${ik}$), cdum(1_${ik}$),cdum(1_${ik}$), cdum(1_${ik}$), -& 1_${ik}$, ierr ) lwork_zgebrd_nn = int( cdum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_zgeqrf( m, n, cdum(1_${ik}$), m, cdum(1_${ik}$), cdum(1_${ik}$), -1_${ik}$, ierr ) lwork_zgeqrf_mn = int( cdum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_zungbr( 'P', n, n, n, cdum(1_${ik}$), n, cdum(1_${ik}$), cdum(1_${ik}$),-1_${ik}$, ierr ) lwork_zungbr_p_nn = int( cdum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_zungbr( 'Q', m, m, n, cdum(1_${ik}$), m, cdum(1_${ik}$), cdum(1_${ik}$),-1_${ik}$, ierr ) lwork_zungbr_q_mm = int( cdum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_zungbr( 'Q', m, n, n, cdum(1_${ik}$), m, cdum(1_${ik}$), cdum(1_${ik}$),-1_${ik}$, ierr ) lwork_zungbr_q_mn = int( cdum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_zungqr( m, m, n, cdum(1_${ik}$), m, cdum(1_${ik}$), cdum(1_${ik}$),-1_${ik}$, ierr ) lwork_zungqr_mm = int( cdum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_zungqr( m, n, n, cdum(1_${ik}$), m, cdum(1_${ik}$), cdum(1_${ik}$),-1_${ik}$, ierr ) lwork_zungqr_mn = int( cdum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_zunmbr( 'P', 'R', 'C', n, n, n, cdum(1_${ik}$), n, cdum(1_${ik}$),cdum(1_${ik}$), n, cdum(& 1_${ik}$), -1_${ik}$, ierr ) lwork_zunmbr_prc_nn = int( cdum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_zunmbr( 'Q', 'L', 'N', m, m, n, cdum(1_${ik}$), m, cdum(1_${ik}$),cdum(1_${ik}$), m, cdum(& 1_${ik}$), -1_${ik}$, ierr ) lwork_zunmbr_qln_mm = int( cdum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_zunmbr( 'Q', 'L', 'N', m, n, n, cdum(1_${ik}$), m, cdum(1_${ik}$),cdum(1_${ik}$), m, cdum(& 1_${ik}$), -1_${ik}$, ierr ) lwork_zunmbr_qln_mn = int( cdum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_zunmbr( 'Q', 'L', 'N', n, n, n, cdum(1_${ik}$), n, cdum(1_${ik}$),cdum(1_${ik}$), n, cdum(& 1_${ik}$), -1_${ik}$, ierr ) lwork_zunmbr_qln_nn = int( cdum(1_${ik}$),KIND=${ik}$) if( m>=mnthr1 ) then if( wntqn ) then ! path 1 (m >> n, jobz='n') maxwrk = n + lwork_zgeqrf_mn maxwrk = max( maxwrk, 2_${ik}$*n + lwork_zgebrd_nn ) minwrk = 3_${ik}$*n else if( wntqo ) then ! path 2 (m >> n, jobz='o') wrkbl = n + lwork_zgeqrf_mn wrkbl = max( wrkbl, n + lwork_zungqr_mn ) wrkbl = max( wrkbl, 2_${ik}$*n + lwork_zgebrd_nn ) wrkbl = max( wrkbl, 2_${ik}$*n + lwork_zunmbr_qln_nn ) wrkbl = max( wrkbl, 2_${ik}$*n + lwork_zunmbr_prc_nn ) maxwrk = m*n + n*n + wrkbl minwrk = 2_${ik}$*n*n + 3_${ik}$*n else if( wntqs ) then ! path 3 (m >> n, jobz='s') wrkbl = n + lwork_zgeqrf_mn wrkbl = max( wrkbl, n + lwork_zungqr_mn ) wrkbl = max( wrkbl, 2_${ik}$*n + lwork_zgebrd_nn ) wrkbl = max( wrkbl, 2_${ik}$*n + lwork_zunmbr_qln_nn ) wrkbl = max( wrkbl, 2_${ik}$*n + lwork_zunmbr_prc_nn ) maxwrk = n*n + wrkbl minwrk = n*n + 3_${ik}$*n else if( wntqa ) then ! path 4 (m >> n, jobz='a') wrkbl = n + lwork_zgeqrf_mn wrkbl = max( wrkbl, n + lwork_zungqr_mm ) wrkbl = max( wrkbl, 2_${ik}$*n + lwork_zgebrd_nn ) wrkbl = max( wrkbl, 2_${ik}$*n + lwork_zunmbr_qln_nn ) wrkbl = max( wrkbl, 2_${ik}$*n + lwork_zunmbr_prc_nn ) maxwrk = n*n + wrkbl minwrk = n*n + max( 3_${ik}$*n, n + m ) end if else if( m>=mnthr2 ) then ! path 5 (m >> n, but not as much as mnthr1) maxwrk = 2_${ik}$*n + lwork_zgebrd_mn minwrk = 2_${ik}$*n + m if( wntqo ) then ! path 5o (m >> n, jobz='o') maxwrk = max( maxwrk, 2_${ik}$*n + lwork_zungbr_p_nn ) maxwrk = max( maxwrk, 2_${ik}$*n + lwork_zungbr_q_mn ) maxwrk = maxwrk + m*n minwrk = minwrk + n*n else if( wntqs ) then ! path 5s (m >> n, jobz='s') maxwrk = max( maxwrk, 2_${ik}$*n + lwork_zungbr_p_nn ) maxwrk = max( maxwrk, 2_${ik}$*n + lwork_zungbr_q_mn ) else if( wntqa ) then ! path 5a (m >> n, jobz='a') maxwrk = max( maxwrk, 2_${ik}$*n + lwork_zungbr_p_nn ) maxwrk = max( maxwrk, 2_${ik}$*n + lwork_zungbr_q_mm ) end if else ! path 6 (m >= n, but not much larger) maxwrk = 2_${ik}$*n + lwork_zgebrd_mn minwrk = 2_${ik}$*n + m if( wntqo ) then ! path 6o (m >= n, jobz='o') maxwrk = max( maxwrk, 2_${ik}$*n + lwork_zunmbr_prc_nn ) maxwrk = max( maxwrk, 2_${ik}$*n + lwork_zunmbr_qln_mn ) maxwrk = maxwrk + m*n minwrk = minwrk + n*n else if( wntqs ) then ! path 6s (m >= n, jobz='s') maxwrk = max( maxwrk, 2_${ik}$*n + lwork_zunmbr_qln_mn ) maxwrk = max( maxwrk, 2_${ik}$*n + lwork_zunmbr_prc_nn ) else if( wntqa ) then ! path 6a (m >= n, jobz='a') maxwrk = max( maxwrk, 2_${ik}$*n + lwork_zunmbr_qln_mm ) maxwrk = max( maxwrk, 2_${ik}$*n + lwork_zunmbr_prc_nn ) end if end if else if( minmn>0_${ik}$ ) then ! there is no complex work space needed for bidiagonal svd ! the realwork space needed for bidiagonal svd (stdlib${ii}$_dbdsdc,KIND=dp) is ! bdspac = 3*m*m + 4*m for singular values and vectors; ! bdspac = 4*m for singular values only; ! not including e, ru, and rvt matrices. ! compute space preferred for each routine call stdlib${ii}$_zgebrd( m, n, cdum(1_${ik}$), m, dum(1_${ik}$), dum(1_${ik}$), cdum(1_${ik}$),cdum(1_${ik}$), cdum(1_${ik}$), -& 1_${ik}$, ierr ) lwork_zgebrd_mn = int( cdum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_zgebrd( m, m, cdum(1_${ik}$), m, dum(1_${ik}$), dum(1_${ik}$), cdum(1_${ik}$),cdum(1_${ik}$), cdum(1_${ik}$), -& 1_${ik}$, ierr ) lwork_zgebrd_mm = int( cdum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_zgelqf( m, n, cdum(1_${ik}$), m, cdum(1_${ik}$), cdum(1_${ik}$), -1_${ik}$, ierr ) lwork_zgelqf_mn = int( cdum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_zungbr( 'P', m, n, m, cdum(1_${ik}$), m, cdum(1_${ik}$), cdum(1_${ik}$),-1_${ik}$, ierr ) lwork_zungbr_p_mn = int( cdum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_zungbr( 'P', n, n, m, cdum(1_${ik}$), n, cdum(1_${ik}$), cdum(1_${ik}$),-1_${ik}$, ierr ) lwork_zungbr_p_nn = int( cdum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_zungbr( 'Q', m, m, n, cdum(1_${ik}$), m, cdum(1_${ik}$), cdum(1_${ik}$),-1_${ik}$, ierr ) lwork_zungbr_q_mm = int( cdum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_zunglq( m, n, m, cdum(1_${ik}$), m, cdum(1_${ik}$), cdum(1_${ik}$),-1_${ik}$, ierr ) lwork_zunglq_mn = int( cdum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_zunglq( n, n, m, cdum(1_${ik}$), n, cdum(1_${ik}$), cdum(1_${ik}$),-1_${ik}$, ierr ) lwork_zunglq_nn = int( cdum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_zunmbr( 'P', 'R', 'C', m, m, m, cdum(1_${ik}$), m, cdum(1_${ik}$),cdum(1_${ik}$), m, cdum(& 1_${ik}$), -1_${ik}$, ierr ) lwork_zunmbr_prc_mm = int( cdum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_zunmbr( 'P', 'R', 'C', m, n, m, cdum(1_${ik}$), m, cdum(1_${ik}$),cdum(1_${ik}$), m, cdum(& 1_${ik}$), -1_${ik}$, ierr ) lwork_zunmbr_prc_mn = int( cdum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_zunmbr( 'P', 'R', 'C', n, n, m, cdum(1_${ik}$), n, cdum(1_${ik}$),cdum(1_${ik}$), n, cdum(& 1_${ik}$), -1_${ik}$, ierr ) lwork_zunmbr_prc_nn = int( cdum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_zunmbr( 'Q', 'L', 'N', m, m, m, cdum(1_${ik}$), m, cdum(1_${ik}$),cdum(1_${ik}$), m, cdum(& 1_${ik}$), -1_${ik}$, ierr ) lwork_zunmbr_qln_mm = int( cdum(1_${ik}$),KIND=${ik}$) if( n>=mnthr1 ) then if( wntqn ) then ! path 1t (n >> m, jobz='n') maxwrk = m + lwork_zgelqf_mn maxwrk = max( maxwrk, 2_${ik}$*m + lwork_zgebrd_mm ) minwrk = 3_${ik}$*m else if( wntqo ) then ! path 2t (n >> m, jobz='o') wrkbl = m + lwork_zgelqf_mn wrkbl = max( wrkbl, m + lwork_zunglq_mn ) wrkbl = max( wrkbl, 2_${ik}$*m + lwork_zgebrd_mm ) wrkbl = max( wrkbl, 2_${ik}$*m + lwork_zunmbr_qln_mm ) wrkbl = max( wrkbl, 2_${ik}$*m + lwork_zunmbr_prc_mm ) maxwrk = m*n + m*m + wrkbl minwrk = 2_${ik}$*m*m + 3_${ik}$*m else if( wntqs ) then ! path 3t (n >> m, jobz='s') wrkbl = m + lwork_zgelqf_mn wrkbl = max( wrkbl, m + lwork_zunglq_mn ) wrkbl = max( wrkbl, 2_${ik}$*m + lwork_zgebrd_mm ) wrkbl = max( wrkbl, 2_${ik}$*m + lwork_zunmbr_qln_mm ) wrkbl = max( wrkbl, 2_${ik}$*m + lwork_zunmbr_prc_mm ) maxwrk = m*m + wrkbl minwrk = m*m + 3_${ik}$*m else if( wntqa ) then ! path 4t (n >> m, jobz='a') wrkbl = m + lwork_zgelqf_mn wrkbl = max( wrkbl, m + lwork_zunglq_nn ) wrkbl = max( wrkbl, 2_${ik}$*m + lwork_zgebrd_mm ) wrkbl = max( wrkbl, 2_${ik}$*m + lwork_zunmbr_qln_mm ) wrkbl = max( wrkbl, 2_${ik}$*m + lwork_zunmbr_prc_mm ) maxwrk = m*m + wrkbl minwrk = m*m + max( 3_${ik}$*m, m + n ) end if else if( n>=mnthr2 ) then ! path 5t (n >> m, but not as much as mnthr1) maxwrk = 2_${ik}$*m + lwork_zgebrd_mn minwrk = 2_${ik}$*m + n if( wntqo ) then ! path 5to (n >> m, jobz='o') maxwrk = max( maxwrk, 2_${ik}$*m + lwork_zungbr_q_mm ) maxwrk = max( maxwrk, 2_${ik}$*m + lwork_zungbr_p_mn ) maxwrk = maxwrk + m*n minwrk = minwrk + m*m else if( wntqs ) then ! path 5ts (n >> m, jobz='s') maxwrk = max( maxwrk, 2_${ik}$*m + lwork_zungbr_q_mm ) maxwrk = max( maxwrk, 2_${ik}$*m + lwork_zungbr_p_mn ) else if( wntqa ) then ! path 5ta (n >> m, jobz='a') maxwrk = max( maxwrk, 2_${ik}$*m + lwork_zungbr_q_mm ) maxwrk = max( maxwrk, 2_${ik}$*m + lwork_zungbr_p_nn ) end if else ! path 6t (n > m, but not much larger) maxwrk = 2_${ik}$*m + lwork_zgebrd_mn minwrk = 2_${ik}$*m + n if( wntqo ) then ! path 6to (n > m, jobz='o') maxwrk = max( maxwrk, 2_${ik}$*m + lwork_zunmbr_qln_mm ) maxwrk = max( maxwrk, 2_${ik}$*m + lwork_zunmbr_prc_mn ) maxwrk = maxwrk + m*n minwrk = minwrk + m*m else if( wntqs ) then ! path 6ts (n > m, jobz='s') maxwrk = max( maxwrk, 2_${ik}$*m + lwork_zunmbr_qln_mm ) maxwrk = max( maxwrk, 2_${ik}$*m + lwork_zunmbr_prc_mn ) else if( wntqa ) then ! path 6ta (n > m, jobz='a') maxwrk = max( maxwrk, 2_${ik}$*m + lwork_zunmbr_qln_mm ) maxwrk = max( maxwrk, 2_${ik}$*m + lwork_zunmbr_prc_nn ) end if end if end if maxwrk = max( maxwrk, minwrk ) end if if( info==0_${ik}$ ) then work( 1_${ik}$ ) = stdlib${ii}$_droundup_lwork( maxwrk ) if( lwork<minwrk .and. .not. lquery ) then info = -12_${ik}$ end if end if if( info/=0_${ik}$ ) then call stdlib${ii}$_xerbla( 'ZGESDD', -info ) return else if( lquery ) then return end if ! quick return if possible if( m==0_${ik}$ .or. n==0_${ik}$ ) then return end if ! get machine constants eps = stdlib${ii}$_dlamch( 'P' ) smlnum = sqrt( stdlib${ii}$_dlamch( 'S' ) ) / eps bignum = one / smlnum ! scale a if max element outside range [smlnum,bignum] anrm = stdlib${ii}$_zlange( 'M', m, n, a, lda, dum ) if( stdlib${ii}$_disnan( anrm ) ) then info = -4_${ik}$ return end if iscl = 0_${ik}$ if( anrm>zero .and. anrm<smlnum ) then iscl = 1_${ik}$ call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, m, n, a, lda, ierr ) else if( anrm>bignum ) then iscl = 1_${ik}$ call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, m, n, a, lda, ierr ) end if if( m>=n ) then ! a has at least as many rows as columns. if a has sufficiently ! more rows than columns, first reduce using the qr ! decomposition (if sufficient workspace available) if( m>=mnthr1 ) then if( wntqn ) then ! path 1 (m >> n, jobz='n') ! no singular vectors to be computed itau = 1_${ik}$ nwork = itau + n ! compute a=q*r ! cworkspace: need n [tau] + n [work] ! cworkspace: prefer n [tau] + n*nb [work] ! rworkspace: need 0 call stdlib${ii}$_zgeqrf( m, n, a, lda, work( itau ), work( nwork ),lwork-nwork+1, & ierr ) ! zero out below r if (n>1_${ik}$) call stdlib${ii}$_zlaset( 'L', n-1, n-1, czero, czero, a( 2_${ik}$, 1_${ik}$ ),lda ) ie = 1_${ik}$ itauq = 1_${ik}$ itaup = itauq + n nwork = itaup + n ! bidiagonalize r in a ! cworkspace: need 2*n [tauq, taup] + n [work] ! cworkspace: prefer 2*n [tauq, taup] + 2*n*nb [work] ! rworkspace: need n [e] call stdlib${ii}$_zgebrd( n, n, a, lda, s, rwork( ie ), work( itauq ),work( itaup ),& work( nwork ), lwork-nwork+1,ierr ) nrwork = ie + n ! perform bidiagonal svd, compute singular values only ! cworkspace: need 0 ! rworkspace: need n [e] + bdspac call stdlib${ii}$_dbdsdc( 'U', 'N', n, s, rwork( ie ), dum,1_${ik}$,dum,1_${ik}$,dum, idum, rwork(& nrwork ), iwork, info ) else if( wntqo ) then ! path 2 (m >> n, jobz='o') ! n left singular vectors to be overwritten on a and ! n right singular vectors to be computed in vt iu = 1_${ik}$ ! work(iu) is n by n ldwrku = n ir = iu + ldwrku*n if( lwork >= m*n + n*n + 3_${ik}$*n ) then ! work(ir) is m by n ldwrkr = m else ldwrkr = ( lwork - n*n - 3_${ik}$*n ) / n end if itau = ir + ldwrkr*n nwork = itau + n ! compute a=q*r ! cworkspace: need n*n [u] + n*n [r] + n [tau] + n [work] ! cworkspace: prefer n*n [u] + n*n [r] + n [tau] + n*nb [work] ! rworkspace: need 0 call stdlib${ii}$_zgeqrf( m, n, a, lda, work( itau ), work( nwork ),lwork-nwork+1, & ierr ) ! copy r to work( ir ), zeroing out below it call stdlib${ii}$_zlacpy( 'U', n, n, a, lda, work( ir ), ldwrkr ) call stdlib${ii}$_zlaset( 'L', n-1, n-1, czero, czero, work( ir+1 ),ldwrkr ) ! generate q in a ! cworkspace: need n*n [u] + n*n [r] + n [tau] + n [work] ! cworkspace: prefer n*n [u] + n*n [r] + n [tau] + n*nb [work] ! rworkspace: need 0 call stdlib${ii}$_zungqr( m, n, n, a, lda, work( itau ),work( nwork ), lwork-nwork+& 1_${ik}$, ierr ) ie = 1_${ik}$ itauq = itau itaup = itauq + n nwork = itaup + n ! bidiagonalize r in work(ir) ! cworkspace: need n*n [u] + n*n [r] + 2*n [tauq, taup] + n [work] ! cworkspace: prefer n*n [u] + n*n [r] + 2*n [tauq, taup] + 2*n*nb [work] ! rworkspace: need n [e] call stdlib${ii}$_zgebrd( n, n, work( ir ), ldwrkr, s, rwork( ie ),work( itauq ), & work( itaup ), work( nwork ),lwork-nwork+1, ierr ) ! perform bidiagonal svd, computing left singular vectors ! of r in work(iru) and computing right singular vectors ! of r in work(irvt) ! cworkspace: need 0 ! rworkspace: need n [e] + n*n [ru] + n*n [rvt] + bdspac iru = ie + n irvt = iru + n*n nrwork = irvt + n*n call stdlib${ii}$_dbdsdc( 'U', 'I', n, s, rwork( ie ), rwork( iru ),n, rwork( irvt )& , n, dum, idum,rwork( nrwork ), iwork, info ) ! copy realmatrix rwork(iru,KIND=dp) to complex matrix work(iu) ! overwrite work(iu) by the left singular vectors of r ! cworkspace: need n*n [u] + n*n [r] + 2*n [tauq, taup] + n [work] ! cworkspace: prefer n*n [u] + n*n [r] + 2*n [tauq, taup] + n*nb [work] ! rworkspace: need 0 call stdlib${ii}$_zlacp2( 'F', n, n, rwork( iru ), n, work( iu ),ldwrku ) call stdlib${ii}$_zunmbr( 'Q', 'L', 'N', n, n, n, work( ir ), ldwrkr,work( itauq ), & work( iu ), ldwrku,work( nwork ), lwork-nwork+1, ierr ) ! copy realmatrix rwork(irvt,KIND=dp) to complex matrix vt ! overwrite vt by the right singular vectors of r ! cworkspace: need n*n [u] + n*n [r] + 2*n [tauq, taup] + n [work] ! cworkspace: prefer n*n [u] + n*n [r] + 2*n [tauq, taup] + n*nb [work] ! rworkspace: need 0 call stdlib${ii}$_zlacp2( 'F', n, n, rwork( irvt ), n, vt, ldvt ) call stdlib${ii}$_zunmbr( 'P', 'R', 'C', n, n, n, work( ir ), ldwrkr,work( itaup ), & vt, ldvt, work( nwork ),lwork-nwork+1, ierr ) ! multiply q in a by left singular vectors of r in ! work(iu), storing result in work(ir) and copying to a ! cworkspace: need n*n [u] + n*n [r] ! cworkspace: prefer n*n [u] + m*n [r] ! rworkspace: need 0 do i = 1, m, ldwrkr chunk = min( m-i+1, ldwrkr ) call stdlib${ii}$_zgemm( 'N', 'N', chunk, n, n, cone, a( i, 1_${ik}$ ),lda, work( iu ), & ldwrku, czero,work( ir ), ldwrkr ) call stdlib${ii}$_zlacpy( 'F', chunk, n, work( ir ), ldwrkr,a( i, 1_${ik}$ ), lda ) end do else if( wntqs ) then ! path 3 (m >> n, jobz='s') ! n left singular vectors to be computed in u and ! n right singular vectors to be computed in vt ir = 1_${ik}$ ! work(ir) is n by n ldwrkr = n itau = ir + ldwrkr*n nwork = itau + n ! compute a=q*r ! cworkspace: need n*n [r] + n [tau] + n [work] ! cworkspace: prefer n*n [r] + n [tau] + n*nb [work] ! rworkspace: need 0 call stdlib${ii}$_zgeqrf( m, n, a, lda, work( itau ), work( nwork ),lwork-nwork+1, & ierr ) ! copy r to work(ir), zeroing out below it call stdlib${ii}$_zlacpy( 'U', n, n, a, lda, work( ir ), ldwrkr ) call stdlib${ii}$_zlaset( 'L', n-1, n-1, czero, czero, work( ir+1 ),ldwrkr ) ! generate q in a ! cworkspace: need n*n [r] + n [tau] + n [work] ! cworkspace: prefer n*n [r] + n [tau] + n*nb [work] ! rworkspace: need 0 call stdlib${ii}$_zungqr( m, n, n, a, lda, work( itau ),work( nwork ), lwork-nwork+& 1_${ik}$, ierr ) ie = 1_${ik}$ itauq = itau itaup = itauq + n nwork = itaup + n ! bidiagonalize r in work(ir) ! cworkspace: need n*n [r] + 2*n [tauq, taup] + n [work] ! cworkspace: prefer n*n [r] + 2*n [tauq, taup] + 2*n*nb [work] ! rworkspace: need n [e] call stdlib${ii}$_zgebrd( n, n, work( ir ), ldwrkr, s, rwork( ie ),work( itauq ), & work( itaup ), work( nwork ),lwork-nwork+1, ierr ) ! perform bidiagonal svd, computing left singular vectors ! of bidiagonal matrix in rwork(iru) and computing right ! singular vectors of bidiagonal matrix in rwork(irvt) ! cworkspace: need 0 ! rworkspace: need n [e] + n*n [ru] + n*n [rvt] + bdspac iru = ie + n irvt = iru + n*n nrwork = irvt + n*n call stdlib${ii}$_dbdsdc( 'U', 'I', n, s, rwork( ie ), rwork( iru ),n, rwork( irvt )& , n, dum, idum,rwork( nrwork ), iwork, info ) ! copy realmatrix rwork(iru,KIND=dp) to complex matrix u ! overwrite u by left singular vectors of r ! cworkspace: need n*n [r] + 2*n [tauq, taup] + n [work] ! cworkspace: prefer n*n [r] + 2*n [tauq, taup] + n*nb [work] ! rworkspace: need 0 call stdlib${ii}$_zlacp2( 'F', n, n, rwork( iru ), n, u, ldu ) call stdlib${ii}$_zunmbr( 'Q', 'L', 'N', n, n, n, work( ir ), ldwrkr,work( itauq ), & u, ldu, work( nwork ),lwork-nwork+1, ierr ) ! copy realmatrix rwork(irvt,KIND=dp) to complex matrix vt ! overwrite vt by right singular vectors of r ! cworkspace: need n*n [r] + 2*n [tauq, taup] + n [work] ! cworkspace: prefer n*n [r] + 2*n [tauq, taup] + n*nb [work] ! rworkspace: need 0 call stdlib${ii}$_zlacp2( 'F', n, n, rwork( irvt ), n, vt, ldvt ) call stdlib${ii}$_zunmbr( 'P', 'R', 'C', n, n, n, work( ir ), ldwrkr,work( itaup ), & vt, ldvt, work( nwork ),lwork-nwork+1, ierr ) ! multiply q in a by left singular vectors of r in ! work(ir), storing result in u ! cworkspace: need n*n [r] ! rworkspace: need 0 call stdlib${ii}$_zlacpy( 'F', n, n, u, ldu, work( ir ), ldwrkr ) call stdlib${ii}$_zgemm( 'N', 'N', m, n, n, cone, a, lda, work( ir ),ldwrkr, czero, & u, ldu ) else if( wntqa ) then ! path 4 (m >> n, jobz='a') ! m left singular vectors to be computed in u and ! n right singular vectors to be computed in vt iu = 1_${ik}$ ! work(iu) is n by n ldwrku = n itau = iu + ldwrku*n nwork = itau + n ! compute a=q*r, copying result to u ! cworkspace: need n*n [u] + n [tau] + n [work] ! cworkspace: prefer n*n [u] + n [tau] + n*nb [work] ! rworkspace: need 0 call stdlib${ii}$_zgeqrf( m, n, a, lda, work( itau ), work( nwork ),lwork-nwork+1, & ierr ) call stdlib${ii}$_zlacpy( 'L', m, n, a, lda, u, ldu ) ! generate q in u ! cworkspace: need n*n [u] + n [tau] + m [work] ! cworkspace: prefer n*n [u] + n [tau] + m*nb [work] ! rworkspace: need 0 call stdlib${ii}$_zungqr( m, m, n, u, ldu, work( itau ),work( nwork ), lwork-nwork+& 1_${ik}$, ierr ) ! produce r in a, zeroing out below it if (n>1_${ik}$) call stdlib${ii}$_zlaset( 'L', n-1, n-1, czero, czero, a( 2_${ik}$, 1_${ik}$ ),lda ) ie = 1_${ik}$ itauq = itau itaup = itauq + n nwork = itaup + n ! bidiagonalize r in a ! cworkspace: need n*n [u] + 2*n [tauq, taup] + n [work] ! cworkspace: prefer n*n [u] + 2*n [tauq, taup] + 2*n*nb [work] ! rworkspace: need n [e] call stdlib${ii}$_zgebrd( n, n, a, lda, s, rwork( ie ), work( itauq ),work( itaup ),& work( nwork ), lwork-nwork+1,ierr ) iru = ie + n irvt = iru + n*n nrwork = irvt + n*n ! perform bidiagonal svd, computing left singular vectors ! of bidiagonal matrix in rwork(iru) and computing right ! singular vectors of bidiagonal matrix in rwork(irvt) ! cworkspace: need 0 ! rworkspace: need n [e] + n*n [ru] + n*n [rvt] + bdspac call stdlib${ii}$_dbdsdc( 'U', 'I', n, s, rwork( ie ), rwork( iru ),n, rwork( irvt )& , n, dum, idum,rwork( nrwork ), iwork, info ) ! copy realmatrix rwork(iru,KIND=dp) to complex matrix work(iu) ! overwrite work(iu) by left singular vectors of r ! cworkspace: need n*n [u] + 2*n [tauq, taup] + n [work] ! cworkspace: prefer n*n [u] + 2*n [tauq, taup] + n*nb [work] ! rworkspace: need 0 call stdlib${ii}$_zlacp2( 'F', n, n, rwork( iru ), n, work( iu ),ldwrku ) call stdlib${ii}$_zunmbr( 'Q', 'L', 'N', n, n, n, a, lda,work( itauq ), work( iu ), & ldwrku,work( nwork ), lwork-nwork+1, ierr ) ! copy realmatrix rwork(irvt,KIND=dp) to complex matrix vt ! overwrite vt by right singular vectors of r ! cworkspace: need n*n [u] + 2*n [tauq, taup] + n [work] ! cworkspace: prefer n*n [u] + 2*n [tauq, taup] + n*nb [work] ! rworkspace: need 0 call stdlib${ii}$_zlacp2( 'F', n, n, rwork( irvt ), n, vt, ldvt ) call stdlib${ii}$_zunmbr( 'P', 'R', 'C', n, n, n, a, lda,work( itaup ), vt, ldvt, & work( nwork ),lwork-nwork+1, ierr ) ! multiply q in u by left singular vectors of r in ! work(iu), storing result in a ! cworkspace: need n*n [u] ! rworkspace: need 0 call stdlib${ii}$_zgemm( 'N', 'N', m, n, n, cone, u, ldu, work( iu ),ldwrku, czero, & a, lda ) ! copy left singular vectors of a from a to u call stdlib${ii}$_zlacpy( 'F', m, n, a, lda, u, ldu ) end if else if( m>=mnthr2 ) then ! mnthr2 <= m < mnthr1 ! path 5 (m >> n, but not as much as mnthr1) ! reduce to bidiagonal form without qr decomposition, use ! stdlib${ii}$_zungbr and matrix multiplication to compute singular vectors ie = 1_${ik}$ nrwork = ie + n itauq = 1_${ik}$ itaup = itauq + n nwork = itaup + n ! bidiagonalize a ! cworkspace: need 2*n [tauq, taup] + m [work] ! cworkspace: prefer 2*n [tauq, taup] + (m+n)*nb [work] ! rworkspace: need n [e] call stdlib${ii}$_zgebrd( m, n, a, lda, s, rwork( ie ), work( itauq ),work( itaup ), & work( nwork ), lwork-nwork+1,ierr ) if( wntqn ) then ! path 5n (m >> n, jobz='n') ! compute singular values only ! cworkspace: need 0 ! rworkspace: need n [e] + bdspac call stdlib${ii}$_dbdsdc( 'U', 'N', n, s, rwork( ie ), dum, 1_${ik}$,dum,1_${ik}$,dum, idum, & rwork( nrwork ), iwork, info ) else if( wntqo ) then iu = nwork iru = nrwork irvt = iru + n*n nrwork = irvt + n*n ! path 5o (m >> n, jobz='o') ! copy a to vt, generate p**h ! cworkspace: need 2*n [tauq, taup] + n [work] ! cworkspace: prefer 2*n [tauq, taup] + n*nb [work] ! rworkspace: need 0 call stdlib${ii}$_zlacpy( 'U', n, n, a, lda, vt, ldvt ) call stdlib${ii}$_zungbr( 'P', n, n, n, vt, ldvt, work( itaup ),work( nwork ), & lwork-nwork+1, ierr ) ! generate q in a ! cworkspace: need 2*n [tauq, taup] + n [work] ! cworkspace: prefer 2*n [tauq, taup] + n*nb [work] ! rworkspace: need 0 call stdlib${ii}$_zungbr( 'Q', m, n, n, a, lda, work( itauq ),work( nwork ), lwork-& nwork+1, ierr ) if( lwork >= m*n + 3_${ik}$*n ) then ! work( iu ) is m by n ldwrku = m else ! work(iu) is ldwrku by n ldwrku = ( lwork - 3_${ik}$*n ) / n end if nwork = iu + ldwrku*n ! perform bidiagonal svd, computing left singular vectors ! of bidiagonal matrix in rwork(iru) and computing right ! singular vectors of bidiagonal matrix in rwork(irvt) ! cworkspace: need 0 ! rworkspace: need n [e] + n*n [ru] + n*n [rvt] + bdspac call stdlib${ii}$_dbdsdc( 'U', 'I', n, s, rwork( ie ), rwork( iru ),n, rwork( irvt )& , n, dum, idum,rwork( nrwork ), iwork, info ) ! multiply realmatrix rwork(irvt,KIND=dp) by p**h in vt, ! storing the result in work(iu), copying to vt ! cworkspace: need 2*n [tauq, taup] + n*n [u] ! rworkspace: need n [e] + n*n [ru] + n*n [rvt] + 2*n*n [rwork] call stdlib${ii}$_zlarcm( n, n, rwork( irvt ), n, vt, ldvt,work( iu ), ldwrku, & rwork( nrwork ) ) call stdlib${ii}$_zlacpy( 'F', n, n, work( iu ), ldwrku, vt, ldvt ) ! multiply q in a by realmatrix rwork(iru,KIND=dp), storing the ! result in work(iu), copying to a ! cworkspace: need 2*n [tauq, taup] + n*n [u] ! cworkspace: prefer 2*n [tauq, taup] + m*n [u] ! rworkspace: need n [e] + n*n [ru] + 2*n*n [rwork] ! rworkspace: prefer n [e] + n*n [ru] + 2*m*n [rwork] < n + 5*n*n since m < 2*n here nrwork = irvt do i = 1, m, ldwrku chunk = min( m-i+1, ldwrku ) call stdlib${ii}$_zlacrm( chunk, n, a( i, 1_${ik}$ ), lda, rwork( iru ),n, work( iu ), & ldwrku, rwork( nrwork ) ) call stdlib${ii}$_zlacpy( 'F', chunk, n, work( iu ), ldwrku,a( i, 1_${ik}$ ), lda ) end do else if( wntqs ) then ! path 5s (m >> n, jobz='s') ! copy a to vt, generate p**h ! cworkspace: need 2*n [tauq, taup] + n [work] ! cworkspace: prefer 2*n [tauq, taup] + n*nb [work] ! rworkspace: need 0 call stdlib${ii}$_zlacpy( 'U', n, n, a, lda, vt, ldvt ) call stdlib${ii}$_zungbr( 'P', n, n, n, vt, ldvt, work( itaup ),work( nwork ), & lwork-nwork+1, ierr ) ! copy a to u, generate q ! cworkspace: need 2*n [tauq, taup] + n [work] ! cworkspace: prefer 2*n [tauq, taup] + n*nb [work] ! rworkspace: need 0 call stdlib${ii}$_zlacpy( 'L', m, n, a, lda, u, ldu ) call stdlib${ii}$_zungbr( 'Q', m, n, n, u, ldu, work( itauq ),work( nwork ), lwork-& nwork+1, ierr ) ! perform bidiagonal svd, computing left singular vectors ! of bidiagonal matrix in rwork(iru) and computing right ! singular vectors of bidiagonal matrix in rwork(irvt) ! cworkspace: need 0 ! rworkspace: need n [e] + n*n [ru] + n*n [rvt] + bdspac iru = nrwork irvt = iru + n*n nrwork = irvt + n*n call stdlib${ii}$_dbdsdc( 'U', 'I', n, s, rwork( ie ), rwork( iru ),n, rwork( irvt )& , n, dum, idum,rwork( nrwork ), iwork, info ) ! multiply realmatrix rwork(irvt,KIND=dp) by p**h in vt, ! storing the result in a, copying to vt ! cworkspace: need 0 ! rworkspace: need n [e] + n*n [ru] + n*n [rvt] + 2*n*n [rwork] call stdlib${ii}$_zlarcm( n, n, rwork( irvt ), n, vt, ldvt, a, lda,rwork( nrwork ) ) call stdlib${ii}$_zlacpy( 'F', n, n, a, lda, vt, ldvt ) ! multiply q in u by realmatrix rwork(iru,KIND=dp), storing the ! result in a, copying to u ! cworkspace: need 0 ! rworkspace: need n [e] + n*n [ru] + 2*m*n [rwork] < n + 5*n*n since m < 2*n here nrwork = irvt call stdlib${ii}$_zlacrm( m, n, u, ldu, rwork( iru ), n, a, lda,rwork( nrwork ) ) call stdlib${ii}$_zlacpy( 'F', m, n, a, lda, u, ldu ) else ! path 5a (m >> n, jobz='a') ! copy a to vt, generate p**h ! cworkspace: need 2*n [tauq, taup] + n [work] ! cworkspace: prefer 2*n [tauq, taup] + n*nb [work] ! rworkspace: need 0 call stdlib${ii}$_zlacpy( 'U', n, n, a, lda, vt, ldvt ) call stdlib${ii}$_zungbr( 'P', n, n, n, vt, ldvt, work( itaup ),work( nwork ), & lwork-nwork+1, ierr ) ! copy a to u, generate q ! cworkspace: need 2*n [tauq, taup] + m [work] ! cworkspace: prefer 2*n [tauq, taup] + m*nb [work] ! rworkspace: need 0 call stdlib${ii}$_zlacpy( 'L', m, n, a, lda, u, ldu ) call stdlib${ii}$_zungbr( 'Q', m, m, n, u, ldu, work( itauq ),work( nwork ), lwork-& nwork+1, ierr ) ! perform bidiagonal svd, computing left singular vectors ! of bidiagonal matrix in rwork(iru) and computing right ! singular vectors of bidiagonal matrix in rwork(irvt) ! cworkspace: need 0 ! rworkspace: need n [e] + n*n [ru] + n*n [rvt] + bdspac iru = nrwork irvt = iru + n*n nrwork = irvt + n*n call stdlib${ii}$_dbdsdc( 'U', 'I', n, s, rwork( ie ), rwork( iru ),n, rwork( irvt )& , n, dum, idum,rwork( nrwork ), iwork, info ) ! multiply realmatrix rwork(irvt,KIND=dp) by p**h in vt, ! storing the result in a, copying to vt ! cworkspace: need 0 ! rworkspace: need n [e] + n*n [ru] + n*n [rvt] + 2*n*n [rwork] call stdlib${ii}$_zlarcm( n, n, rwork( irvt ), n, vt, ldvt, a, lda,rwork( nrwork ) ) call stdlib${ii}$_zlacpy( 'F', n, n, a, lda, vt, ldvt ) ! multiply q in u by realmatrix rwork(iru,KIND=dp), storing the ! result in a, copying to u ! cworkspace: need 0 ! rworkspace: need n [e] + n*n [ru] + 2*m*n [rwork] < n + 5*n*n since m < 2*n here nrwork = irvt call stdlib${ii}$_zlacrm( m, n, u, ldu, rwork( iru ), n, a, lda,rwork( nrwork ) ) call stdlib${ii}$_zlacpy( 'F', m, n, a, lda, u, ldu ) end if else ! m < mnthr2 ! path 6 (m >= n, but not much larger) ! reduce to bidiagonal form without qr decomposition ! use stdlib_zunmbr to compute singular vectors ie = 1_${ik}$ nrwork = ie + n itauq = 1_${ik}$ itaup = itauq + n nwork = itaup + n ! bidiagonalize a ! cworkspace: need 2*n [tauq, taup] + m [work] ! cworkspace: prefer 2*n [tauq, taup] + (m+n)*nb [work] ! rworkspace: need n [e] call stdlib${ii}$_zgebrd( m, n, a, lda, s, rwork( ie ), work( itauq ),work( itaup ), & work( nwork ), lwork-nwork+1,ierr ) if( wntqn ) then ! path 6n (m >= n, jobz='n') ! compute singular values only ! cworkspace: need 0 ! rworkspace: need n [e] + bdspac call stdlib${ii}$_dbdsdc( 'U', 'N', n, s, rwork( ie ), dum,1_${ik}$,dum,1_${ik}$,dum, idum, rwork(& nrwork ), iwork, info ) else if( wntqo ) then iu = nwork iru = nrwork irvt = iru + n*n nrwork = irvt + n*n if( lwork >= m*n + 3_${ik}$*n ) then ! work( iu ) is m by n ldwrku = m else ! work( iu ) is ldwrku by n ldwrku = ( lwork - 3_${ik}$*n ) / n end if nwork = iu + ldwrku*n ! path 6o (m >= n, jobz='o') ! perform bidiagonal svd, computing left singular vectors ! of bidiagonal matrix in rwork(iru) and computing right ! singular vectors of bidiagonal matrix in rwork(irvt) ! cworkspace: need 0 ! rworkspace: need n [e] + n*n [ru] + n*n [rvt] + bdspac call stdlib${ii}$_dbdsdc( 'U', 'I', n, s, rwork( ie ), rwork( iru ),n, rwork( irvt )& , n, dum, idum,rwork( nrwork ), iwork, info ) ! copy realmatrix rwork(irvt,KIND=dp) to complex matrix vt ! overwrite vt by right singular vectors of a ! cworkspace: need 2*n [tauq, taup] + n*n [u] + n [work] ! cworkspace: prefer 2*n [tauq, taup] + n*n [u] + n*nb [work] ! rworkspace: need n [e] + n*n [ru] + n*n [rvt] call stdlib${ii}$_zlacp2( 'F', n, n, rwork( irvt ), n, vt, ldvt ) call stdlib${ii}$_zunmbr( 'P', 'R', 'C', n, n, n, a, lda,work( itaup ), vt, ldvt, & work( nwork ),lwork-nwork+1, ierr ) if( lwork >= m*n + 3_${ik}$*n ) then ! path 6o-fast ! copy realmatrix rwork(iru,KIND=dp) to complex matrix work(iu) ! overwrite work(iu) by left singular vectors of a, copying ! to a ! cworkspace: need 2*n [tauq, taup] + m*n [u] + n [work] ! cworkspace: prefer 2*n [tauq, taup] + m*n [u] + n*nb [work] ! rworkspace: need n [e] + n*n [ru] call stdlib${ii}$_zlaset( 'F', m, n, czero, czero, work( iu ),ldwrku ) call stdlib${ii}$_zlacp2( 'F', n, n, rwork( iru ), n, work( iu ),ldwrku ) call stdlib${ii}$_zunmbr( 'Q', 'L', 'N', m, n, n, a, lda,work( itauq ), work( iu & ), ldwrku,work( nwork ), lwork-nwork+1, ierr ) call stdlib${ii}$_zlacpy( 'F', m, n, work( iu ), ldwrku, a, lda ) else ! path 6o-slow ! generate q in a ! cworkspace: need 2*n [tauq, taup] + n*n [u] + n [work] ! cworkspace: prefer 2*n [tauq, taup] + n*n [u] + n*nb [work] ! rworkspace: need 0 call stdlib${ii}$_zungbr( 'Q', m, n, n, a, lda, work( itauq ),work( nwork ), & lwork-nwork+1, ierr ) ! multiply q in a by realmatrix rwork(iru,KIND=dp), storing the ! result in work(iu), copying to a ! cworkspace: need 2*n [tauq, taup] + n*n [u] ! cworkspace: prefer 2*n [tauq, taup] + m*n [u] ! rworkspace: need n [e] + n*n [ru] + 2*n*n [rwork] ! rworkspace: prefer n [e] + n*n [ru] + 2*m*n [rwork] < n + 5*n*n since m < 2*n here nrwork = irvt do i = 1, m, ldwrku chunk = min( m-i+1, ldwrku ) call stdlib${ii}$_zlacrm( chunk, n, a( i, 1_${ik}$ ), lda,rwork( iru ), n, work( iu )& , ldwrku,rwork( nrwork ) ) call stdlib${ii}$_zlacpy( 'F', chunk, n, work( iu ), ldwrku,a( i, 1_${ik}$ ), lda ) end do end if else if( wntqs ) then ! path 6s (m >= n, jobz='s') ! perform bidiagonal svd, computing left singular vectors ! of bidiagonal matrix in rwork(iru) and computing right ! singular vectors of bidiagonal matrix in rwork(irvt) ! cworkspace: need 0 ! rworkspace: need n [e] + n*n [ru] + n*n [rvt] + bdspac iru = nrwork irvt = iru + n*n nrwork = irvt + n*n call stdlib${ii}$_dbdsdc( 'U', 'I', n, s, rwork( ie ), rwork( iru ),n, rwork( irvt )& , n, dum, idum,rwork( nrwork ), iwork, info ) ! copy realmatrix rwork(iru,KIND=dp) to complex matrix u ! overwrite u by left singular vectors of a ! cworkspace: need 2*n [tauq, taup] + n [work] ! cworkspace: prefer 2*n [tauq, taup] + n*nb [work] ! rworkspace: need n [e] + n*n [ru] + n*n [rvt] call stdlib${ii}$_zlaset( 'F', m, n, czero, czero, u, ldu ) call stdlib${ii}$_zlacp2( 'F', n, n, rwork( iru ), n, u, ldu ) call stdlib${ii}$_zunmbr( 'Q', 'L', 'N', m, n, n, a, lda,work( itauq ), u, ldu, & work( nwork ),lwork-nwork+1, ierr ) ! copy realmatrix rwork(irvt,KIND=dp) to complex matrix vt ! overwrite vt by right singular vectors of a ! cworkspace: need 2*n [tauq, taup] + n [work] ! cworkspace: prefer 2*n [tauq, taup] + n*nb [work] ! rworkspace: need n [e] + n*n [ru] + n*n [rvt] call stdlib${ii}$_zlacp2( 'F', n, n, rwork( irvt ), n, vt, ldvt ) call stdlib${ii}$_zunmbr( 'P', 'R', 'C', n, n, n, a, lda,work( itaup ), vt, ldvt, & work( nwork ),lwork-nwork+1, ierr ) else ! path 6a (m >= n, jobz='a') ! perform bidiagonal svd, computing left singular vectors ! of bidiagonal matrix in rwork(iru) and computing right ! singular vectors of bidiagonal matrix in rwork(irvt) ! cworkspace: need 0 ! rworkspace: need n [e] + n*n [ru] + n*n [rvt] + bdspac iru = nrwork irvt = iru + n*n nrwork = irvt + n*n call stdlib${ii}$_dbdsdc( 'U', 'I', n, s, rwork( ie ), rwork( iru ),n, rwork( irvt )& , n, dum, idum,rwork( nrwork ), iwork, info ) ! set the right corner of u to identity matrix call stdlib${ii}$_zlaset( 'F', m, m, czero, czero, u, ldu ) if( m>n ) then call stdlib${ii}$_zlaset( 'F', m-n, m-n, czero, cone,u( n+1, n+1 ), ldu ) end if ! copy realmatrix rwork(iru,KIND=dp) to complex matrix u ! overwrite u by left singular vectors of a ! cworkspace: need 2*n [tauq, taup] + m [work] ! cworkspace: prefer 2*n [tauq, taup] + m*nb [work] ! rworkspace: need n [e] + n*n [ru] + n*n [rvt] call stdlib${ii}$_zlacp2( 'F', n, n, rwork( iru ), n, u, ldu ) call stdlib${ii}$_zunmbr( 'Q', 'L', 'N', m, m, n, a, lda,work( itauq ), u, ldu, & work( nwork ),lwork-nwork+1, ierr ) ! copy realmatrix rwork(irvt,KIND=dp) to complex matrix vt ! overwrite vt by right singular vectors of a ! cworkspace: need 2*n [tauq, taup] + n [work] ! cworkspace: prefer 2*n [tauq, taup] + n*nb [work] ! rworkspace: need n [e] + n*n [ru] + n*n [rvt] call stdlib${ii}$_zlacp2( 'F', n, n, rwork( irvt ), n, vt, ldvt ) call stdlib${ii}$_zunmbr( 'P', 'R', 'C', n, n, n, a, lda,work( itaup ), vt, ldvt, & work( nwork ),lwork-nwork+1, ierr ) end if end if else ! a has more columns than rows. if a has sufficiently more ! columns than rows, first reduce using the lq decomposition (if ! sufficient workspace available) if( n>=mnthr1 ) then if( wntqn ) then ! path 1t (n >> m, jobz='n') ! no singular vectors to be computed itau = 1_${ik}$ nwork = itau + m ! compute a=l*q ! cworkspace: need m [tau] + m [work] ! cworkspace: prefer m [tau] + m*nb [work] ! rworkspace: need 0 call stdlib${ii}$_zgelqf( m, n, a, lda, work( itau ), work( nwork ),lwork-nwork+1, & ierr ) ! zero out above l if (m>1_${ik}$) call stdlib${ii}$_zlaset( 'U', m-1, m-1, czero, czero, a( 1_${ik}$, 2_${ik}$ ),lda ) ie = 1_${ik}$ itauq = 1_${ik}$ itaup = itauq + m nwork = itaup + m ! bidiagonalize l in a ! cworkspace: need 2*m [tauq, taup] + m [work] ! cworkspace: prefer 2*m [tauq, taup] + 2*m*nb [work] ! rworkspace: need m [e] call stdlib${ii}$_zgebrd( m, m, a, lda, s, rwork( ie ), work( itauq ),work( itaup ),& work( nwork ), lwork-nwork+1,ierr ) nrwork = ie + m ! perform bidiagonal svd, compute singular values only ! cworkspace: need 0 ! rworkspace: need m [e] + bdspac call stdlib${ii}$_dbdsdc( 'U', 'N', m, s, rwork( ie ), dum,1_${ik}$,dum,1_${ik}$,dum, idum, rwork(& nrwork ), iwork, info ) else if( wntqo ) then ! path 2t (n >> m, jobz='o') ! m right singular vectors to be overwritten on a and ! m left singular vectors to be computed in u ivt = 1_${ik}$ ldwkvt = m ! work(ivt) is m by m il = ivt + ldwkvt*m if( lwork >= m*n + m*m + 3_${ik}$*m ) then ! work(il) m by n ldwrkl = m chunk = n else ! work(il) is m by chunk ldwrkl = m chunk = ( lwork - m*m - 3_${ik}$*m ) / m end if itau = il + ldwrkl*chunk nwork = itau + m ! compute a=l*q ! cworkspace: need m*m [vt] + m*m [l] + m [tau] + m [work] ! cworkspace: prefer m*m [vt] + m*m [l] + m [tau] + m*nb [work] ! rworkspace: need 0 call stdlib${ii}$_zgelqf( m, n, a, lda, work( itau ), work( nwork ),lwork-nwork+1, & ierr ) ! copy l to work(il), zeroing about above it call stdlib${ii}$_zlacpy( 'L', m, m, a, lda, work( il ), ldwrkl ) call stdlib${ii}$_zlaset( 'U', m-1, m-1, czero, czero,work( il+ldwrkl ), ldwrkl ) ! generate q in a ! cworkspace: need m*m [vt] + m*m [l] + m [tau] + m [work] ! cworkspace: prefer m*m [vt] + m*m [l] + m [tau] + m*nb [work] ! rworkspace: need 0 call stdlib${ii}$_zunglq( m, n, m, a, lda, work( itau ),work( nwork ), lwork-nwork+& 1_${ik}$, ierr ) ie = 1_${ik}$ itauq = itau itaup = itauq + m nwork = itaup + m ! bidiagonalize l in work(il) ! cworkspace: need m*m [vt] + m*m [l] + 2*m [tauq, taup] + m [work] ! cworkspace: prefer m*m [vt] + m*m [l] + 2*m [tauq, taup] + 2*m*nb [work] ! rworkspace: need m [e] call stdlib${ii}$_zgebrd( m, m, work( il ), ldwrkl, s, rwork( ie ),work( itauq ), & work( itaup ), work( nwork ),lwork-nwork+1, ierr ) ! perform bidiagonal svd, computing left singular vectors ! of bidiagonal matrix in rwork(iru) and computing right ! singular vectors of bidiagonal matrix in rwork(irvt) ! cworkspace: need 0 ! rworkspace: need m [e] + m*m [ru] + m*m [rvt] + bdspac iru = ie + m irvt = iru + m*m nrwork = irvt + m*m call stdlib${ii}$_dbdsdc( 'U', 'I', m, s, rwork( ie ), rwork( iru ),m, rwork( irvt )& , m, dum, idum,rwork( nrwork ), iwork, info ) ! copy realmatrix rwork(iru,KIND=dp) to complex matrix work(iu) ! overwrite work(iu) by the left singular vectors of l ! cworkspace: need m*m [vt] + m*m [l] + 2*m [tauq, taup] + m [work] ! cworkspace: prefer m*m [vt] + m*m [l] + 2*m [tauq, taup] + m*nb [work] ! rworkspace: need 0 call stdlib${ii}$_zlacp2( 'F', m, m, rwork( iru ), m, u, ldu ) call stdlib${ii}$_zunmbr( 'Q', 'L', 'N', m, m, m, work( il ), ldwrkl,work( itauq ), & u, ldu, work( nwork ),lwork-nwork+1, ierr ) ! copy realmatrix rwork(irvt,KIND=dp) to complex matrix work(ivt) ! overwrite work(ivt) by the right singular vectors of l ! cworkspace: need m*m [vt] + m*m [l] + 2*m [tauq, taup] + m [work] ! cworkspace: prefer m*m [vt] + m*m [l] + 2*m [tauq, taup] + m*nb [work] ! rworkspace: need 0 call stdlib${ii}$_zlacp2( 'F', m, m, rwork( irvt ), m, work( ivt ),ldwkvt ) call stdlib${ii}$_zunmbr( 'P', 'R', 'C', m, m, m, work( il ), ldwrkl,work( itaup ), & work( ivt ), ldwkvt,work( nwork ), lwork-nwork+1, ierr ) ! multiply right singular vectors of l in work(il) by q ! in a, storing result in work(il) and copying to a ! cworkspace: need m*m [vt] + m*m [l] ! cworkspace: prefer m*m [vt] + m*n [l] ! rworkspace: need 0 do i = 1, n, chunk blk = min( n-i+1, chunk ) call stdlib${ii}$_zgemm( 'N', 'N', m, blk, m, cone, work( ivt ), m,a( 1_${ik}$, i ), & lda, czero, work( il ),ldwrkl ) call stdlib${ii}$_zlacpy( 'F', m, blk, work( il ), ldwrkl,a( 1_${ik}$, i ), lda ) end do else if( wntqs ) then ! path 3t (n >> m, jobz='s') ! m right singular vectors to be computed in vt and ! m left singular vectors to be computed in u il = 1_${ik}$ ! work(il) is m by m ldwrkl = m itau = il + ldwrkl*m nwork = itau + m ! compute a=l*q ! cworkspace: need m*m [l] + m [tau] + m [work] ! cworkspace: prefer m*m [l] + m [tau] + m*nb [work] ! rworkspace: need 0 call stdlib${ii}$_zgelqf( m, n, a, lda, work( itau ), work( nwork ),lwork-nwork+1, & ierr ) ! copy l to work(il), zeroing out above it call stdlib${ii}$_zlacpy( 'L', m, m, a, lda, work( il ), ldwrkl ) call stdlib${ii}$_zlaset( 'U', m-1, m-1, czero, czero,work( il+ldwrkl ), ldwrkl ) ! generate q in a ! cworkspace: need m*m [l] + m [tau] + m [work] ! cworkspace: prefer m*m [l] + m [tau] + m*nb [work] ! rworkspace: need 0 call stdlib${ii}$_zunglq( m, n, m, a, lda, work( itau ),work( nwork ), lwork-nwork+& 1_${ik}$, ierr ) ie = 1_${ik}$ itauq = itau itaup = itauq + m nwork = itaup + m ! bidiagonalize l in work(il) ! cworkspace: need m*m [l] + 2*m [tauq, taup] + m [work] ! cworkspace: prefer m*m [l] + 2*m [tauq, taup] + 2*m*nb [work] ! rworkspace: need m [e] call stdlib${ii}$_zgebrd( m, m, work( il ), ldwrkl, s, rwork( ie ),work( itauq ), & work( itaup ), work( nwork ),lwork-nwork+1, ierr ) ! perform bidiagonal svd, computing left singular vectors ! of bidiagonal matrix in rwork(iru) and computing right ! singular vectors of bidiagonal matrix in rwork(irvt) ! cworkspace: need 0 ! rworkspace: need m [e] + m*m [ru] + m*m [rvt] + bdspac iru = ie + m irvt = iru + m*m nrwork = irvt + m*m call stdlib${ii}$_dbdsdc( 'U', 'I', m, s, rwork( ie ), rwork( iru ),m, rwork( irvt )& , m, dum, idum,rwork( nrwork ), iwork, info ) ! copy realmatrix rwork(iru,KIND=dp) to complex matrix u ! overwrite u by left singular vectors of l ! cworkspace: need m*m [l] + 2*m [tauq, taup] + m [work] ! cworkspace: prefer m*m [l] + 2*m [tauq, taup] + m*nb [work] ! rworkspace: need 0 call stdlib${ii}$_zlacp2( 'F', m, m, rwork( iru ), m, u, ldu ) call stdlib${ii}$_zunmbr( 'Q', 'L', 'N', m, m, m, work( il ), ldwrkl,work( itauq ), & u, ldu, work( nwork ),lwork-nwork+1, ierr ) ! copy realmatrix rwork(irvt,KIND=dp) to complex matrix vt ! overwrite vt by left singular vectors of l ! cworkspace: need m*m [l] + 2*m [tauq, taup] + m [work] ! cworkspace: prefer m*m [l] + 2*m [tauq, taup] + m*nb [work] ! rworkspace: need 0 call stdlib${ii}$_zlacp2( 'F', m, m, rwork( irvt ), m, vt, ldvt ) call stdlib${ii}$_zunmbr( 'P', 'R', 'C', m, m, m, work( il ), ldwrkl,work( itaup ), & vt, ldvt, work( nwork ),lwork-nwork+1, ierr ) ! copy vt to work(il), multiply right singular vectors of l ! in work(il) by q in a, storing result in vt ! cworkspace: need m*m [l] ! rworkspace: need 0 call stdlib${ii}$_zlacpy( 'F', m, m, vt, ldvt, work( il ), ldwrkl ) call stdlib${ii}$_zgemm( 'N', 'N', m, n, m, cone, work( il ), ldwrkl,a, lda, czero, & vt, ldvt ) else if( wntqa ) then ! path 4t (n >> m, jobz='a') ! n right singular vectors to be computed in vt and ! m left singular vectors to be computed in u ivt = 1_${ik}$ ! work(ivt) is m by m ldwkvt = m itau = ivt + ldwkvt*m nwork = itau + m ! compute a=l*q, copying result to vt ! cworkspace: need m*m [vt] + m [tau] + m [work] ! cworkspace: prefer m*m [vt] + m [tau] + m*nb [work] ! rworkspace: need 0 call stdlib${ii}$_zgelqf( m, n, a, lda, work( itau ), work( nwork ),lwork-nwork+1, & ierr ) call stdlib${ii}$_zlacpy( 'U', m, n, a, lda, vt, ldvt ) ! generate q in vt ! cworkspace: need m*m [vt] + m [tau] + n [work] ! cworkspace: prefer m*m [vt] + m [tau] + n*nb [work] ! rworkspace: need 0 call stdlib${ii}$_zunglq( n, n, m, vt, ldvt, work( itau ),work( nwork ), lwork-& nwork+1, ierr ) ! produce l in a, zeroing out above it if (m>1_${ik}$) call stdlib${ii}$_zlaset( 'U', m-1, m-1, czero, czero, a( 1_${ik}$, 2_${ik}$ ),lda ) ie = 1_${ik}$ itauq = itau itaup = itauq + m nwork = itaup + m ! bidiagonalize l in a ! cworkspace: need m*m [vt] + 2*m [tauq, taup] + m [work] ! cworkspace: prefer m*m [vt] + 2*m [tauq, taup] + 2*m*nb [work] ! rworkspace: need m [e] call stdlib${ii}$_zgebrd( m, m, a, lda, s, rwork( ie ), work( itauq ),work( itaup ),& work( nwork ), lwork-nwork+1,ierr ) ! perform bidiagonal svd, computing left singular vectors ! of bidiagonal matrix in rwork(iru) and computing right ! singular vectors of bidiagonal matrix in rwork(irvt) ! cworkspace: need 0 ! rworkspace: need m [e] + m*m [ru] + m*m [rvt] + bdspac iru = ie + m irvt = iru + m*m nrwork = irvt + m*m call stdlib${ii}$_dbdsdc( 'U', 'I', m, s, rwork( ie ), rwork( iru ),m, rwork( irvt )& , m, dum, idum,rwork( nrwork ), iwork, info ) ! copy realmatrix rwork(iru,KIND=dp) to complex matrix u ! overwrite u by left singular vectors of l ! cworkspace: need m*m [vt] + 2*m [tauq, taup] + m [work] ! cworkspace: prefer m*m [vt] + 2*m [tauq, taup] + m*nb [work] ! rworkspace: need 0 call stdlib${ii}$_zlacp2( 'F', m, m, rwork( iru ), m, u, ldu ) call stdlib${ii}$_zunmbr( 'Q', 'L', 'N', m, m, m, a, lda,work( itauq ), u, ldu, & work( nwork ),lwork-nwork+1, ierr ) ! copy realmatrix rwork(irvt,KIND=dp) to complex matrix work(ivt) ! overwrite work(ivt) by right singular vectors of l ! cworkspace: need m*m [vt] + 2*m [tauq, taup] + m [work] ! cworkspace: prefer m*m [vt] + 2*m [tauq, taup] + m*nb [work] ! rworkspace: need 0 call stdlib${ii}$_zlacp2( 'F', m, m, rwork( irvt ), m, work( ivt ),ldwkvt ) call stdlib${ii}$_zunmbr( 'P', 'R', 'C', m, m, m, a, lda,work( itaup ), work( ivt ),& ldwkvt,work( nwork ), lwork-nwork+1, ierr ) ! multiply right singular vectors of l in work(ivt) by ! q in vt, storing result in a ! cworkspace: need m*m [vt] ! rworkspace: need 0 call stdlib${ii}$_zgemm( 'N', 'N', m, n, m, cone, work( ivt ), ldwkvt,vt, ldvt, & czero, a, lda ) ! copy right singular vectors of a from a to vt call stdlib${ii}$_zlacpy( 'F', m, n, a, lda, vt, ldvt ) end if else if( n>=mnthr2 ) then ! mnthr2 <= n < mnthr1 ! path 5t (n >> m, but not as much as mnthr1) ! reduce to bidiagonal form without qr decomposition, use ! stdlib${ii}$_zungbr and matrix multiplication to compute singular vectors ie = 1_${ik}$ nrwork = ie + m itauq = 1_${ik}$ itaup = itauq + m nwork = itaup + m ! bidiagonalize a ! cworkspace: need 2*m [tauq, taup] + n [work] ! cworkspace: prefer 2*m [tauq, taup] + (m+n)*nb [work] ! rworkspace: need m [e] call stdlib${ii}$_zgebrd( m, n, a, lda, s, rwork( ie ), work( itauq ),work( itaup ), & work( nwork ), lwork-nwork+1,ierr ) if( wntqn ) then ! path 5tn (n >> m, jobz='n') ! compute singular values only ! cworkspace: need 0 ! rworkspace: need m [e] + bdspac call stdlib${ii}$_dbdsdc( 'L', 'N', m, s, rwork( ie ), dum,1_${ik}$,dum,1_${ik}$,dum, idum, rwork(& nrwork ), iwork, info ) else if( wntqo ) then irvt = nrwork iru = irvt + m*m nrwork = iru + m*m ivt = nwork ! path 5to (n >> m, jobz='o') ! copy a to u, generate q ! cworkspace: need 2*m [tauq, taup] + m [work] ! cworkspace: prefer 2*m [tauq, taup] + m*nb [work] ! rworkspace: need 0 call stdlib${ii}$_zlacpy( 'L', m, m, a, lda, u, ldu ) call stdlib${ii}$_zungbr( 'Q', m, m, n, u, ldu, work( itauq ),work( nwork ), lwork-& nwork+1, ierr ) ! generate p**h in a ! cworkspace: need 2*m [tauq, taup] + m [work] ! cworkspace: prefer 2*m [tauq, taup] + m*nb [work] ! rworkspace: need 0 call stdlib${ii}$_zungbr( 'P', m, n, m, a, lda, work( itaup ),work( nwork ), lwork-& nwork+1, ierr ) ldwkvt = m if( lwork >= m*n + 3_${ik}$*m ) then ! work( ivt ) is m by n nwork = ivt + ldwkvt*n chunk = n else ! work( ivt ) is m by chunk chunk = ( lwork - 3_${ik}$*m ) / m nwork = ivt + ldwkvt*chunk end if ! perform bidiagonal svd, computing left singular vectors ! of bidiagonal matrix in rwork(iru) and computing right ! singular vectors of bidiagonal matrix in rwork(irvt) ! cworkspace: need 0 ! rworkspace: need m [e] + m*m [rvt] + m*m [ru] + bdspac call stdlib${ii}$_dbdsdc( 'L', 'I', m, s, rwork( ie ), rwork( iru ),m, rwork( irvt )& , m, dum, idum,rwork( nrwork ), iwork, info ) ! multiply q in u by realmatrix rwork(irvt,KIND=dp) ! storing the result in work(ivt), copying to u ! cworkspace: need 2*m [tauq, taup] + m*m [vt] ! rworkspace: need m [e] + m*m [rvt] + m*m [ru] + 2*m*m [rwork] call stdlib${ii}$_zlacrm( m, m, u, ldu, rwork( iru ), m, work( ivt ),ldwkvt, rwork( & nrwork ) ) call stdlib${ii}$_zlacpy( 'F', m, m, work( ivt ), ldwkvt, u, ldu ) ! multiply rwork(irvt) by p**h in a, storing the ! result in work(ivt), copying to a ! cworkspace: need 2*m [tauq, taup] + m*m [vt] ! cworkspace: prefer 2*m [tauq, taup] + m*n [vt] ! rworkspace: need m [e] + m*m [rvt] + 2*m*m [rwork] ! rworkspace: prefer m [e] + m*m [rvt] + 2*m*n [rwork] < m + 5*m*m since n < 2*m here nrwork = iru do i = 1, n, chunk blk = min( n-i+1, chunk ) call stdlib${ii}$_zlarcm( m, blk, rwork( irvt ), m, a( 1_${ik}$, i ), lda,work( ivt ), & ldwkvt, rwork( nrwork ) ) call stdlib${ii}$_zlacpy( 'F', m, blk, work( ivt ), ldwkvt,a( 1_${ik}$, i ), lda ) end do else if( wntqs ) then ! path 5ts (n >> m, jobz='s') ! copy a to u, generate q ! cworkspace: need 2*m [tauq, taup] + m [work] ! cworkspace: prefer 2*m [tauq, taup] + m*nb [work] ! rworkspace: need 0 call stdlib${ii}$_zlacpy( 'L', m, m, a, lda, u, ldu ) call stdlib${ii}$_zungbr( 'Q', m, m, n, u, ldu, work( itauq ),work( nwork ), lwork-& nwork+1, ierr ) ! copy a to vt, generate p**h ! cworkspace: need 2*m [tauq, taup] + m [work] ! cworkspace: prefer 2*m [tauq, taup] + m*nb [work] ! rworkspace: need 0 call stdlib${ii}$_zlacpy( 'U', m, n, a, lda, vt, ldvt ) call stdlib${ii}$_zungbr( 'P', m, n, m, vt, ldvt, work( itaup ),work( nwork ), & lwork-nwork+1, ierr ) ! perform bidiagonal svd, computing left singular vectors ! of bidiagonal matrix in rwork(iru) and computing right ! singular vectors of bidiagonal matrix in rwork(irvt) ! cworkspace: need 0 ! rworkspace: need m [e] + m*m [rvt] + m*m [ru] + bdspac irvt = nrwork iru = irvt + m*m nrwork = iru + m*m call stdlib${ii}$_dbdsdc( 'L', 'I', m, s, rwork( ie ), rwork( iru ),m, rwork( irvt )& , m, dum, idum,rwork( nrwork ), iwork, info ) ! multiply q in u by realmatrix rwork(iru,KIND=dp), storing the ! result in a, copying to u ! cworkspace: need 0 ! rworkspace: need m [e] + m*m [rvt] + m*m [ru] + 2*m*m [rwork] call stdlib${ii}$_zlacrm( m, m, u, ldu, rwork( iru ), m, a, lda,rwork( nrwork ) ) call stdlib${ii}$_zlacpy( 'F', m, m, a, lda, u, ldu ) ! multiply realmatrix rwork(irvt,KIND=dp) by p**h in vt, ! storing the result in a, copying to vt ! cworkspace: need 0 ! rworkspace: need m [e] + m*m [rvt] + 2*m*n [rwork] < m + 5*m*m since n < 2*m here nrwork = iru call stdlib${ii}$_zlarcm( m, n, rwork( irvt ), m, vt, ldvt, a, lda,rwork( nrwork ) ) call stdlib${ii}$_zlacpy( 'F', m, n, a, lda, vt, ldvt ) else ! path 5ta (n >> m, jobz='a') ! copy a to u, generate q ! cworkspace: need 2*m [tauq, taup] + m [work] ! cworkspace: prefer 2*m [tauq, taup] + m*nb [work] ! rworkspace: need 0 call stdlib${ii}$_zlacpy( 'L', m, m, a, lda, u, ldu ) call stdlib${ii}$_zungbr( 'Q', m, m, n, u, ldu, work( itauq ),work( nwork ), lwork-& nwork+1, ierr ) ! copy a to vt, generate p**h ! cworkspace: need 2*m [tauq, taup] + n [work] ! cworkspace: prefer 2*m [tauq, taup] + n*nb [work] ! rworkspace: need 0 call stdlib${ii}$_zlacpy( 'U', m, n, a, lda, vt, ldvt ) call stdlib${ii}$_zungbr( 'P', n, n, m, vt, ldvt, work( itaup ),work( nwork ), & lwork-nwork+1, ierr ) ! perform bidiagonal svd, computing left singular vectors ! of bidiagonal matrix in rwork(iru) and computing right ! singular vectors of bidiagonal matrix in rwork(irvt) ! cworkspace: need 0 ! rworkspace: need m [e] + m*m [rvt] + m*m [ru] + bdspac irvt = nrwork iru = irvt + m*m nrwork = iru + m*m call stdlib${ii}$_dbdsdc( 'L', 'I', m, s, rwork( ie ), rwork( iru ),m, rwork( irvt )& , m, dum, idum,rwork( nrwork ), iwork, info ) ! multiply q in u by realmatrix rwork(iru,KIND=dp), storing the ! result in a, copying to u ! cworkspace: need 0 ! rworkspace: need m [e] + m*m [rvt] + m*m [ru] + 2*m*m [rwork] call stdlib${ii}$_zlacrm( m, m, u, ldu, rwork( iru ), m, a, lda,rwork( nrwork ) ) call stdlib${ii}$_zlacpy( 'F', m, m, a, lda, u, ldu ) ! multiply realmatrix rwork(irvt,KIND=dp) by p**h in vt, ! storing the result in a, copying to vt ! cworkspace: need 0 ! rworkspace: need m [e] + m*m [rvt] + 2*m*n [rwork] < m + 5*m*m since n < 2*m here nrwork = iru call stdlib${ii}$_zlarcm( m, n, rwork( irvt ), m, vt, ldvt, a, lda,rwork( nrwork ) ) call stdlib${ii}$_zlacpy( 'F', m, n, a, lda, vt, ldvt ) end if else ! n < mnthr2 ! path 6t (n > m, but not much larger) ! reduce to bidiagonal form without lq decomposition ! use stdlib_zunmbr to compute singular vectors ie = 1_${ik}$ nrwork = ie + m itauq = 1_${ik}$ itaup = itauq + m nwork = itaup + m ! bidiagonalize a ! cworkspace: need 2*m [tauq, taup] + n [work] ! cworkspace: prefer 2*m [tauq, taup] + (m+n)*nb [work] ! rworkspace: need m [e] call stdlib${ii}$_zgebrd( m, n, a, lda, s, rwork( ie ), work( itauq ),work( itaup ), & work( nwork ), lwork-nwork+1,ierr ) if( wntqn ) then ! path 6tn (n > m, jobz='n') ! compute singular values only ! cworkspace: need 0 ! rworkspace: need m [e] + bdspac call stdlib${ii}$_dbdsdc( 'L', 'N', m, s, rwork( ie ), dum,1_${ik}$,dum,1_${ik}$,dum, idum, rwork(& nrwork ), iwork, info ) else if( wntqo ) then ! path 6to (n > m, jobz='o') ldwkvt = m ivt = nwork if( lwork >= m*n + 3_${ik}$*m ) then ! work( ivt ) is m by n call stdlib${ii}$_zlaset( 'F', m, n, czero, czero, work( ivt ),ldwkvt ) nwork = ivt + ldwkvt*n else ! work( ivt ) is m by chunk chunk = ( lwork - 3_${ik}$*m ) / m nwork = ivt + ldwkvt*chunk end if ! perform bidiagonal svd, computing left singular vectors ! of bidiagonal matrix in rwork(iru) and computing right ! singular vectors of bidiagonal matrix in rwork(irvt) ! cworkspace: need 0 ! rworkspace: need m [e] + m*m [rvt] + m*m [ru] + bdspac irvt = nrwork iru = irvt + m*m nrwork = iru + m*m call stdlib${ii}$_dbdsdc( 'L', 'I', m, s, rwork( ie ), rwork( iru ),m, rwork( irvt )& , m, dum, idum,rwork( nrwork ), iwork, info ) ! copy realmatrix rwork(iru,KIND=dp) to complex matrix u ! overwrite u by left singular vectors of a ! cworkspace: need 2*m [tauq, taup] + m*m [vt] + m [work] ! cworkspace: prefer 2*m [tauq, taup] + m*m [vt] + m*nb [work] ! rworkspace: need m [e] + m*m [rvt] + m*m [ru] call stdlib${ii}$_zlacp2( 'F', m, m, rwork( iru ), m, u, ldu ) call stdlib${ii}$_zunmbr( 'Q', 'L', 'N', m, m, n, a, lda,work( itauq ), u, ldu, & work( nwork ),lwork-nwork+1, ierr ) if( lwork >= m*n + 3_${ik}$*m ) then ! path 6to-fast ! copy realmatrix rwork(irvt,KIND=dp) to complex matrix work(ivt) ! overwrite work(ivt) by right singular vectors of a, ! copying to a ! cworkspace: need 2*m [tauq, taup] + m*n [vt] + m [work] ! cworkspace: prefer 2*m [tauq, taup] + m*n [vt] + m*nb [work] ! rworkspace: need m [e] + m*m [rvt] call stdlib${ii}$_zlacp2( 'F', m, m, rwork( irvt ), m, work( ivt ),ldwkvt ) call stdlib${ii}$_zunmbr( 'P', 'R', 'C', m, n, m, a, lda,work( itaup ), work( & ivt ), ldwkvt,work( nwork ), lwork-nwork+1, ierr ) call stdlib${ii}$_zlacpy( 'F', m, n, work( ivt ), ldwkvt, a, lda ) else ! path 6to-slow ! generate p**h in a ! cworkspace: need 2*m [tauq, taup] + m*m [vt] + m [work] ! cworkspace: prefer 2*m [tauq, taup] + m*m [vt] + m*nb [work] ! rworkspace: need 0 call stdlib${ii}$_zungbr( 'P', m, n, m, a, lda, work( itaup ),work( nwork ), & lwork-nwork+1, ierr ) ! multiply q in a by realmatrix rwork(iru,KIND=dp), storing the ! result in work(iu), copying to a ! cworkspace: need 2*m [tauq, taup] + m*m [vt] ! cworkspace: prefer 2*m [tauq, taup] + m*n [vt] ! rworkspace: need m [e] + m*m [rvt] + 2*m*m [rwork] ! rworkspace: prefer m [e] + m*m [rvt] + 2*m*n [rwork] < m + 5*m*m since n < 2*m here nrwork = iru do i = 1, n, chunk blk = min( n-i+1, chunk ) call stdlib${ii}$_zlarcm( m, blk, rwork( irvt ), m, a( 1_${ik}$, i ),lda, work( ivt )& , ldwkvt,rwork( nrwork ) ) call stdlib${ii}$_zlacpy( 'F', m, blk, work( ivt ), ldwkvt,a( 1_${ik}$, i ), lda ) end do end if else if( wntqs ) then ! path 6ts (n > m, jobz='s') ! perform bidiagonal svd, computing left singular vectors ! of bidiagonal matrix in rwork(iru) and computing right ! singular vectors of bidiagonal matrix in rwork(irvt) ! cworkspace: need 0 ! rworkspace: need m [e] + m*m [rvt] + m*m [ru] + bdspac irvt = nrwork iru = irvt + m*m nrwork = iru + m*m call stdlib${ii}$_dbdsdc( 'L', 'I', m, s, rwork( ie ), rwork( iru ),m, rwork( irvt )& , m, dum, idum,rwork( nrwork ), iwork, info ) ! copy realmatrix rwork(iru,KIND=dp) to complex matrix u ! overwrite u by left singular vectors of a ! cworkspace: need 2*m [tauq, taup] + m [work] ! cworkspace: prefer 2*m [tauq, taup] + m*nb [work] ! rworkspace: need m [e] + m*m [rvt] + m*m [ru] call stdlib${ii}$_zlacp2( 'F', m, m, rwork( iru ), m, u, ldu ) call stdlib${ii}$_zunmbr( 'Q', 'L', 'N', m, m, n, a, lda,work( itauq ), u, ldu, & work( nwork ),lwork-nwork+1, ierr ) ! copy realmatrix rwork(irvt,KIND=dp) to complex matrix vt ! overwrite vt by right singular vectors of a ! cworkspace: need 2*m [tauq, taup] + m [work] ! cworkspace: prefer 2*m [tauq, taup] + m*nb [work] ! rworkspace: need m [e] + m*m [rvt] call stdlib${ii}$_zlaset( 'F', m, n, czero, czero, vt, ldvt ) call stdlib${ii}$_zlacp2( 'F', m, m, rwork( irvt ), m, vt, ldvt ) call stdlib${ii}$_zunmbr( 'P', 'R', 'C', m, n, m, a, lda,work( itaup ), vt, ldvt, & work( nwork ),lwork-nwork+1, ierr ) else ! path 6ta (n > m, jobz='a') ! perform bidiagonal svd, computing left singular vectors ! of bidiagonal matrix in rwork(iru) and computing right ! singular vectors of bidiagonal matrix in rwork(irvt) ! cworkspace: need 0 ! rworkspace: need m [e] + m*m [rvt] + m*m [ru] + bdspac irvt = nrwork iru = irvt + m*m nrwork = iru + m*m call stdlib${ii}$_dbdsdc( 'L', 'I', m, s, rwork( ie ), rwork( iru ),m, rwork( irvt )& , m, dum, idum,rwork( nrwork ), iwork, info ) ! copy realmatrix rwork(iru,KIND=dp) to complex matrix u ! overwrite u by left singular vectors of a ! cworkspace: need 2*m [tauq, taup] + m [work] ! cworkspace: prefer 2*m [tauq, taup] + m*nb [work] ! rworkspace: need m [e] + m*m [rvt] + m*m [ru] call stdlib${ii}$_zlacp2( 'F', m, m, rwork( iru ), m, u, ldu ) call stdlib${ii}$_zunmbr( 'Q', 'L', 'N', m, m, n, a, lda,work( itauq ), u, ldu, & work( nwork ),lwork-nwork+1, ierr ) ! set all of vt to identity matrix call stdlib${ii}$_zlaset( 'F', n, n, czero, cone, vt, ldvt ) ! copy realmatrix rwork(irvt,KIND=dp) to complex matrix vt ! overwrite vt by right singular vectors of a ! cworkspace: need 2*m [tauq, taup] + n [work] ! cworkspace: prefer 2*m [tauq, taup] + n*nb [work] ! rworkspace: need m [e] + m*m [rvt] call stdlib${ii}$_zlacp2( 'F', m, m, rwork( irvt ), m, vt, ldvt ) call stdlib${ii}$_zunmbr( 'P', 'R', 'C', n, n, m, a, lda,work( itaup ), vt, ldvt, & work( nwork ),lwork-nwork+1, ierr ) end if end if end if ! undo scaling if necessary if( iscl==1_${ik}$ ) then if( anrm>bignum )call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, bignum, anrm, minmn, 1_${ik}$, s, minmn,& ierr ) if( info/=0_${ik}$ .and. anrm>bignum )call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, bignum, anrm, minmn-1,& 1_${ik}$,rwork( ie ), minmn, ierr ) if( anrm<smlnum )call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, smlnum, anrm, minmn, 1_${ik}$, s, minmn,& ierr ) if( info/=0_${ik}$ .and. anrm<smlnum )call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, smlnum, anrm, minmn-1,& 1_${ik}$,rwork( ie ), minmn, ierr ) end if ! return optimal workspace in work(1) work( 1_${ik}$ ) = stdlib${ii}$_droundup_lwork( maxwrk ) return end subroutine stdlib${ii}$_zgesdd #:for ck,ct,ci in CMPLX_KINDS_TYPES #:if not ck in ["sp","dp"] module subroutine stdlib${ii}$_${ci}$gesdd( jobz, m, n, a, lda, s, u, ldu, vt, ldvt,work, lwork, rwork, iwork, & !! ZGESDD: computes the singular value decomposition (SVD) of a complex !! M-by-N matrix A, optionally computing the left and/or right singular !! vectors, by using divide-and-conquer method. The SVD is written !! A = U * SIGMA * conjugate-transpose(V) !! where SIGMA is an M-by-N matrix which is zero except for its !! min(m,n) diagonal elements, U is an M-by-M unitary matrix, and !! V is an N-by-N unitary matrix. The diagonal elements of SIGMA !! are the singular values of A; they are real and non-negative, and !! are returned in descending order. The first min(m,n) columns of !! U and V are the left and right singular vectors of A. !! Note that the routine returns VT = V**H, not V. !! The divide and conquer algorithm makes very mild assumptions about !! floating point arithmetic. It will work on machines with a guard !! digit in add/subtract, or on those binary machines without guard !! digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or !! Cray-2. It could conceivably fail on hexadecimal or decimal machines !! without guard digits, but we know of none. info ) ! -- lapack driver routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_${ck}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments character, intent(in) :: jobz integer(${ik}$), intent(out) :: info integer(${ik}$), intent(in) :: lda, ldu, ldvt, lwork, m, n ! Array Arguments integer(${ik}$), intent(out) :: iwork(*) real(${ck}$), intent(out) :: rwork(*), s(*) complex(${ck}$), intent(inout) :: a(lda,*) complex(${ck}$), intent(out) :: u(ldu,*), vt(ldvt,*), work(*) ! ===================================================================== ! Local Scalars logical(lk) :: lquery, wntqa, wntqas, wntqn, wntqo, wntqs integer(${ik}$) :: blk, chunk, i, ie, ierr, il, ir, iru, irvt, iscl, itau, itaup, itauq, & iu, ivt, ldwkvt, ldwrkl, ldwrkr, ldwrku, maxwrk, minmn, minwrk, mnthr1, mnthr2, nrwork,& nwork, wrkbl integer(${ik}$) :: lwork_wgebrd_mn, lwork_wgebrd_mm, lwork_wgebrd_nn, lwork_wgelqf_mn, & lwork_wgeqrf_mn, lwork_wungbr_p_mn, lwork_wungbr_p_nn, lwork_wungbr_q_mn, & lwork_wungbr_q_mm, lwork_wunglq_mn, lwork_wunglq_nn, lwork_wungqr_mm, lwork_wungqr_mn, & lwork_wunmbr_prc_mm, lwork_wunmbr_qln_mm, lwork_wunmbr_prc_mn, lwork_wunmbr_qln_mn, & lwork_wunmbr_prc_nn, lwork_wunmbr_qln_nn real(${ck}$) :: anrm, bignum, eps, smlnum ! Local Arrays integer(${ik}$) :: idum(1_${ik}$) real(${ck}$) :: dum(1_${ik}$) complex(${ck}$) :: cdum(1_${ik}$) ! Intrinsic Functions ! Executable Statements ! test the input arguments info = 0_${ik}$ minmn = min( m, n ) mnthr1 = int( minmn*17.0_${ck}$ / 9.0_${ck}$,KIND=${ik}$) mnthr2 = int( minmn*5.0_${ck}$ / 3.0_${ck}$,KIND=${ik}$) wntqa = stdlib_lsame( jobz, 'A' ) wntqs = stdlib_lsame( jobz, 'S' ) wntqas = wntqa .or. wntqs wntqo = stdlib_lsame( jobz, 'O' ) wntqn = stdlib_lsame( jobz, 'N' ) lquery = ( lwork==-1_${ik}$ ) minwrk = 1_${ik}$ maxwrk = 1_${ik}$ if( .not.( wntqa .or. wntqs .or. wntqo .or. wntqn ) ) then info = -1_${ik}$ else if( m<0_${ik}$ ) then info = -2_${ik}$ else if( n<0_${ik}$ ) then info = -3_${ik}$ else if( lda<max( 1_${ik}$, m ) ) then info = -5_${ik}$ else if( ldu<1_${ik}$ .or. ( wntqas .and. ldu<m ) .or.( wntqo .and. m<n .and. ldu<m ) ) & then info = -8_${ik}$ else if( ldvt<1_${ik}$ .or. ( wntqa .and. ldvt<n ) .or.( wntqs .and. ldvt<minmn ) .or.( wntqo & .and. m>=n .and. ldvt<n ) ) then info = -10_${ik}$ end if ! compute workspace ! note: comments in the code beginning "workspace:" describe the ! minimal amount of workspace allocated at that point in the code, ! as well as the preferred amount for good performance. ! cworkspace refers to complex workspace, and rworkspace to ! real workspace. nb refers to the optimal block size for the ! immediately following subroutine, as returned by stdlib${ii}$_ilaenv.) if( info==0_${ik}$ ) then minwrk = 1_${ik}$ maxwrk = 1_${ik}$ if( m>=n .and. minmn>0_${ik}$ ) then ! there is no complex work space needed for bidiagonal svd ! the realwork space needed for bidiagonal svd (stdlib${ii}$_${c2ri(ci)}$bdsdc,KIND=${ck}$) is ! bdspac = 3*n*n + 4*n for singular values and vectors; ! bdspac = 4*n for singular values only; ! not including e, ru, and rvt matrices. ! compute space preferred for each routine call stdlib${ii}$_${ci}$gebrd( m, n, cdum(1_${ik}$), m, dum(1_${ik}$), dum(1_${ik}$), cdum(1_${ik}$),cdum(1_${ik}$), cdum(1_${ik}$), -& 1_${ik}$, ierr ) lwork_wgebrd_mn = int( cdum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_${ci}$gebrd( n, n, cdum(1_${ik}$), n, dum(1_${ik}$), dum(1_${ik}$), cdum(1_${ik}$),cdum(1_${ik}$), cdum(1_${ik}$), -& 1_${ik}$, ierr ) lwork_wgebrd_nn = int( cdum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_${ci}$geqrf( m, n, cdum(1_${ik}$), m, cdum(1_${ik}$), cdum(1_${ik}$), -1_${ik}$, ierr ) lwork_wgeqrf_mn = int( cdum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_${ci}$ungbr( 'P', n, n, n, cdum(1_${ik}$), n, cdum(1_${ik}$), cdum(1_${ik}$),-1_${ik}$, ierr ) lwork_wungbr_p_nn = int( cdum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_${ci}$ungbr( 'Q', m, m, n, cdum(1_${ik}$), m, cdum(1_${ik}$), cdum(1_${ik}$),-1_${ik}$, ierr ) lwork_wungbr_q_mm = int( cdum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_${ci}$ungbr( 'Q', m, n, n, cdum(1_${ik}$), m, cdum(1_${ik}$), cdum(1_${ik}$),-1_${ik}$, ierr ) lwork_wungbr_q_mn = int( cdum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_${ci}$ungqr( m, m, n, cdum(1_${ik}$), m, cdum(1_${ik}$), cdum(1_${ik}$),-1_${ik}$, ierr ) lwork_wungqr_mm = int( cdum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_${ci}$ungqr( m, n, n, cdum(1_${ik}$), m, cdum(1_${ik}$), cdum(1_${ik}$),-1_${ik}$, ierr ) lwork_wungqr_mn = int( cdum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_${ci}$unmbr( 'P', 'R', 'C', n, n, n, cdum(1_${ik}$), n, cdum(1_${ik}$),cdum(1_${ik}$), n, cdum(& 1_${ik}$), -1_${ik}$, ierr ) lwork_wunmbr_prc_nn = int( cdum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_${ci}$unmbr( 'Q', 'L', 'N', m, m, n, cdum(1_${ik}$), m, cdum(1_${ik}$),cdum(1_${ik}$), m, cdum(& 1_${ik}$), -1_${ik}$, ierr ) lwork_wunmbr_qln_mm = int( cdum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_${ci}$unmbr( 'Q', 'L', 'N', m, n, n, cdum(1_${ik}$), m, cdum(1_${ik}$),cdum(1_${ik}$), m, cdum(& 1_${ik}$), -1_${ik}$, ierr ) lwork_wunmbr_qln_mn = int( cdum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_${ci}$unmbr( 'Q', 'L', 'N', n, n, n, cdum(1_${ik}$), n, cdum(1_${ik}$),cdum(1_${ik}$), n, cdum(& 1_${ik}$), -1_${ik}$, ierr ) lwork_wunmbr_qln_nn = int( cdum(1_${ik}$),KIND=${ik}$) if( m>=mnthr1 ) then if( wntqn ) then ! path 1 (m >> n, jobz='n') maxwrk = n + lwork_wgeqrf_mn maxwrk = max( maxwrk, 2_${ik}$*n + lwork_wgebrd_nn ) minwrk = 3_${ik}$*n else if( wntqo ) then ! path 2 (m >> n, jobz='o') wrkbl = n + lwork_wgeqrf_mn wrkbl = max( wrkbl, n + lwork_wungqr_mn ) wrkbl = max( wrkbl, 2_${ik}$*n + lwork_wgebrd_nn ) wrkbl = max( wrkbl, 2_${ik}$*n + lwork_wunmbr_qln_nn ) wrkbl = max( wrkbl, 2_${ik}$*n + lwork_wunmbr_prc_nn ) maxwrk = m*n + n*n + wrkbl minwrk = 2_${ik}$*n*n + 3_${ik}$*n else if( wntqs ) then ! path 3 (m >> n, jobz='s') wrkbl = n + lwork_wgeqrf_mn wrkbl = max( wrkbl, n + lwork_wungqr_mn ) wrkbl = max( wrkbl, 2_${ik}$*n + lwork_wgebrd_nn ) wrkbl = max( wrkbl, 2_${ik}$*n + lwork_wunmbr_qln_nn ) wrkbl = max( wrkbl, 2_${ik}$*n + lwork_wunmbr_prc_nn ) maxwrk = n*n + wrkbl minwrk = n*n + 3_${ik}$*n else if( wntqa ) then ! path 4 (m >> n, jobz='a') wrkbl = n + lwork_wgeqrf_mn wrkbl = max( wrkbl, n + lwork_wungqr_mm ) wrkbl = max( wrkbl, 2_${ik}$*n + lwork_wgebrd_nn ) wrkbl = max( wrkbl, 2_${ik}$*n + lwork_wunmbr_qln_nn ) wrkbl = max( wrkbl, 2_${ik}$*n + lwork_wunmbr_prc_nn ) maxwrk = n*n + wrkbl minwrk = n*n + max( 3_${ik}$*n, n + m ) end if else if( m>=mnthr2 ) then ! path 5 (m >> n, but not as much as mnthr1) maxwrk = 2_${ik}$*n + lwork_wgebrd_mn minwrk = 2_${ik}$*n + m if( wntqo ) then ! path 5o (m >> n, jobz='o') maxwrk = max( maxwrk, 2_${ik}$*n + lwork_wungbr_p_nn ) maxwrk = max( maxwrk, 2_${ik}$*n + lwork_wungbr_q_mn ) maxwrk = maxwrk + m*n minwrk = minwrk + n*n else if( wntqs ) then ! path 5s (m >> n, jobz='s') maxwrk = max( maxwrk, 2_${ik}$*n + lwork_wungbr_p_nn ) maxwrk = max( maxwrk, 2_${ik}$*n + lwork_wungbr_q_mn ) else if( wntqa ) then ! path 5a (m >> n, jobz='a') maxwrk = max( maxwrk, 2_${ik}$*n + lwork_wungbr_p_nn ) maxwrk = max( maxwrk, 2_${ik}$*n + lwork_wungbr_q_mm ) end if else ! path 6 (m >= n, but not much larger) maxwrk = 2_${ik}$*n + lwork_wgebrd_mn minwrk = 2_${ik}$*n + m if( wntqo ) then ! path 6o (m >= n, jobz='o') maxwrk = max( maxwrk, 2_${ik}$*n + lwork_wunmbr_prc_nn ) maxwrk = max( maxwrk, 2_${ik}$*n + lwork_wunmbr_qln_mn ) maxwrk = maxwrk + m*n minwrk = minwrk + n*n else if( wntqs ) then ! path 6s (m >= n, jobz='s') maxwrk = max( maxwrk, 2_${ik}$*n + lwork_wunmbr_qln_mn ) maxwrk = max( maxwrk, 2_${ik}$*n + lwork_wunmbr_prc_nn ) else if( wntqa ) then ! path 6a (m >= n, jobz='a') maxwrk = max( maxwrk, 2_${ik}$*n + lwork_wunmbr_qln_mm ) maxwrk = max( maxwrk, 2_${ik}$*n + lwork_wunmbr_prc_nn ) end if end if else if( minmn>0_${ik}$ ) then ! there is no complex work space needed for bidiagonal svd ! the realwork space needed for bidiagonal svd (stdlib${ii}$_${c2ri(ci)}$bdsdc,KIND=${ck}$) is ! bdspac = 3*m*m + 4*m for singular values and vectors; ! bdspac = 4*m for singular values only; ! not including e, ru, and rvt matrices. ! compute space preferred for each routine call stdlib${ii}$_${ci}$gebrd( m, n, cdum(1_${ik}$), m, dum(1_${ik}$), dum(1_${ik}$), cdum(1_${ik}$),cdum(1_${ik}$), cdum(1_${ik}$), -& 1_${ik}$, ierr ) lwork_wgebrd_mn = int( cdum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_${ci}$gebrd( m, m, cdum(1_${ik}$), m, dum(1_${ik}$), dum(1_${ik}$), cdum(1_${ik}$),cdum(1_${ik}$), cdum(1_${ik}$), -& 1_${ik}$, ierr ) lwork_wgebrd_mm = int( cdum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_${ci}$gelqf( m, n, cdum(1_${ik}$), m, cdum(1_${ik}$), cdum(1_${ik}$), -1_${ik}$, ierr ) lwork_wgelqf_mn = int( cdum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_${ci}$ungbr( 'P', m, n, m, cdum(1_${ik}$), m, cdum(1_${ik}$), cdum(1_${ik}$),-1_${ik}$, ierr ) lwork_wungbr_p_mn = int( cdum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_${ci}$ungbr( 'P', n, n, m, cdum(1_${ik}$), n, cdum(1_${ik}$), cdum(1_${ik}$),-1_${ik}$, ierr ) lwork_wungbr_p_nn = int( cdum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_${ci}$ungbr( 'Q', m, m, n, cdum(1_${ik}$), m, cdum(1_${ik}$), cdum(1_${ik}$),-1_${ik}$, ierr ) lwork_wungbr_q_mm = int( cdum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_${ci}$unglq( m, n, m, cdum(1_${ik}$), m, cdum(1_${ik}$), cdum(1_${ik}$),-1_${ik}$, ierr ) lwork_wunglq_mn = int( cdum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_${ci}$unglq( n, n, m, cdum(1_${ik}$), n, cdum(1_${ik}$), cdum(1_${ik}$),-1_${ik}$, ierr ) lwork_wunglq_nn = int( cdum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_${ci}$unmbr( 'P', 'R', 'C', m, m, m, cdum(1_${ik}$), m, cdum(1_${ik}$),cdum(1_${ik}$), m, cdum(& 1_${ik}$), -1_${ik}$, ierr ) lwork_wunmbr_prc_mm = int( cdum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_${ci}$unmbr( 'P', 'R', 'C', m, n, m, cdum(1_${ik}$), m, cdum(1_${ik}$),cdum(1_${ik}$), m, cdum(& 1_${ik}$), -1_${ik}$, ierr ) lwork_wunmbr_prc_mn = int( cdum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_${ci}$unmbr( 'P', 'R', 'C', n, n, m, cdum(1_${ik}$), n, cdum(1_${ik}$),cdum(1_${ik}$), n, cdum(& 1_${ik}$), -1_${ik}$, ierr ) lwork_wunmbr_prc_nn = int( cdum(1_${ik}$),KIND=${ik}$) call stdlib${ii}$_${ci}$unmbr( 'Q', 'L', 'N', m, m, m, cdum(1_${ik}$), m, cdum(1_${ik}$),cdum(1_${ik}$), m, cdum(& 1_${ik}$), -1_${ik}$, ierr ) lwork_wunmbr_qln_mm = int( cdum(1_${ik}$),KIND=${ik}$) if( n>=mnthr1 ) then if( wntqn ) then ! path 1t (n >> m, jobz='n') maxwrk = m + lwork_wgelqf_mn maxwrk = max( maxwrk, 2_${ik}$*m + lwork_wgebrd_mm ) minwrk = 3_${ik}$*m else if( wntqo ) then ! path 2t (n >> m, jobz='o') wrkbl = m + lwork_wgelqf_mn wrkbl = max( wrkbl, m + lwork_wunglq_mn ) wrkbl = max( wrkbl, 2_${ik}$*m + lwork_wgebrd_mm ) wrkbl = max( wrkbl, 2_${ik}$*m + lwork_wunmbr_qln_mm ) wrkbl = max( wrkbl, 2_${ik}$*m + lwork_wunmbr_prc_mm ) maxwrk = m*n + m*m + wrkbl minwrk = 2_${ik}$*m*m + 3_${ik}$*m else if( wntqs ) then ! path 3t (n >> m, jobz='s') wrkbl = m + lwork_wgelqf_mn wrkbl = max( wrkbl, m + lwork_wunglq_mn ) wrkbl = max( wrkbl, 2_${ik}$*m + lwork_wgebrd_mm ) wrkbl = max( wrkbl, 2_${ik}$*m + lwork_wunmbr_qln_mm ) wrkbl = max( wrkbl, 2_${ik}$*m + lwork_wunmbr_prc_mm ) maxwrk = m*m + wrkbl minwrk = m*m + 3_${ik}$*m else if( wntqa ) then ! path 4t (n >> m, jobz='a') wrkbl = m + lwork_wgelqf_mn wrkbl = max( wrkbl, m + lwork_wunglq_nn ) wrkbl = max( wrkbl, 2_${ik}$*m + lwork_wgebrd_mm ) wrkbl = max( wrkbl, 2_${ik}$*m + lwork_wunmbr_qln_mm ) wrkbl = max( wrkbl, 2_${ik}$*m + lwork_wunmbr_prc_mm ) maxwrk = m*m + wrkbl minwrk = m*m + max( 3_${ik}$*m, m + n ) end if else if( n>=mnthr2 ) then ! path 5t (n >> m, but not as much as mnthr1) maxwrk = 2_${ik}$*m + lwork_wgebrd_mn minwrk = 2_${ik}$*m + n if( wntqo ) then ! path 5to (n >> m, jobz='o') maxwrk = max( maxwrk, 2_${ik}$*m + lwork_wungbr_q_mm ) maxwrk = max( maxwrk, 2_${ik}$*m + lwork_wungbr_p_mn ) maxwrk = maxwrk + m*n minwrk = minwrk + m*m else if( wntqs ) then ! path 5ts (n >> m, jobz='s') maxwrk = max( maxwrk, 2_${ik}$*m + lwork_wungbr_q_mm ) maxwrk = max( maxwrk, 2_${ik}$*m + lwork_wungbr_p_mn ) else if( wntqa ) then ! path 5ta (n >> m, jobz='a') maxwrk = max( maxwrk, 2_${ik}$*m + lwork_wungbr_q_mm ) maxwrk = max( maxwrk, 2_${ik}$*m + lwork_wungbr_p_nn ) end if else ! path 6t (n > m, but not much larger) maxwrk = 2_${ik}$*m + lwork_wgebrd_mn minwrk = 2_${ik}$*m + n if( wntqo ) then ! path 6to (n > m, jobz='o') maxwrk = max( maxwrk, 2_${ik}$*m + lwork_wunmbr_qln_mm ) maxwrk = max( maxwrk, 2_${ik}$*m + lwork_wunmbr_prc_mn ) maxwrk = maxwrk + m*n minwrk = minwrk + m*m else if( wntqs ) then ! path 6ts (n > m, jobz='s') maxwrk = max( maxwrk, 2_${ik}$*m + lwork_wunmbr_qln_mm ) maxwrk = max( maxwrk, 2_${ik}$*m + lwork_wunmbr_prc_mn ) else if( wntqa ) then ! path 6ta (n > m, jobz='a') maxwrk = max( maxwrk, 2_${ik}$*m + lwork_wunmbr_qln_mm ) maxwrk = max( maxwrk, 2_${ik}$*m + lwork_wunmbr_prc_nn ) end if end if end if maxwrk = max( maxwrk, minwrk ) end if if( info==0_${ik}$ ) then work( 1_${ik}$ ) = stdlib${ii}$_${c2ri(ci)}$roundup_lwork( maxwrk ) if( lwork<minwrk .and. .not. lquery ) then info = -12_${ik}$ end if end if if( info/=0_${ik}$ ) then call stdlib${ii}$_xerbla( 'ZGESDD', -info ) return else if( lquery ) then return end if ! quick return if possible if( m==0_${ik}$ .or. n==0_${ik}$ ) then return end if ! get machine constants eps = stdlib${ii}$_${c2ri(ci)}$lamch( 'P' ) smlnum = sqrt( stdlib${ii}$_${c2ri(ci)}$lamch( 'S' ) ) / eps bignum = one / smlnum ! scale a if max element outside range [smlnum,bignum] anrm = stdlib${ii}$_${ci}$lange( 'M', m, n, a, lda, dum ) if( stdlib${ii}$_${c2ri(ci)}$isnan( anrm ) ) then info = -4_${ik}$ return end if iscl = 0_${ik}$ if( anrm>zero .and. anrm<smlnum ) then iscl = 1_${ik}$ call stdlib${ii}$_${ci}$lascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, m, n, a, lda, ierr ) else if( anrm>bignum ) then iscl = 1_${ik}$ call stdlib${ii}$_${ci}$lascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, m, n, a, lda, ierr ) end if if( m>=n ) then ! a has at least as many rows as columns. if a has sufficiently ! more rows than columns, first reduce using the qr ! decomposition (if sufficient workspace available) if( m>=mnthr1 ) then if( wntqn ) then ! path 1 (m >> n, jobz='n') ! no singular vectors to be computed itau = 1_${ik}$ nwork = itau + n ! compute a=q*r ! cworkspace: need n [tau] + n [work] ! cworkspace: prefer n [tau] + n*nb [work] ! rworkspace: need 0 call stdlib${ii}$_${ci}$geqrf( m, n, a, lda, work( itau ), work( nwork ),lwork-nwork+1, & ierr ) ! zero out below r if (n>1_${ik}$) call stdlib${ii}$_${ci}$laset( 'L', n-1, n-1, czero, czero, a( 2_${ik}$, 1_${ik}$ ),lda ) ie = 1_${ik}$ itauq = 1_${ik}$ itaup = itauq + n nwork = itaup + n ! bidiagonalize r in a ! cworkspace: need 2*n [tauq, taup] + n [work] ! cworkspace: prefer 2*n [tauq, taup] + 2*n*nb [work] ! rworkspace: need n [e] call stdlib${ii}$_${ci}$gebrd( n, n, a, lda, s, rwork( ie ), work( itauq ),work( itaup ),& work( nwork ), lwork-nwork+1,ierr ) nrwork = ie + n ! perform bidiagonal svd, compute singular values only ! cworkspace: need 0 ! rworkspace: need n [e] + bdspac call stdlib${ii}$_${c2ri(ci)}$bdsdc( 'U', 'N', n, s, rwork( ie ), dum,1_${ik}$,dum,1_${ik}$,dum, idum, rwork(& nrwork ), iwork, info ) else if( wntqo ) then ! path 2 (m >> n, jobz='o') ! n left singular vectors to be overwritten on a and ! n right singular vectors to be computed in vt iu = 1_${ik}$ ! work(iu) is n by n ldwrku = n ir = iu + ldwrku*n if( lwork >= m*n + n*n + 3_${ik}$*n ) then ! work(ir) is m by n ldwrkr = m else ldwrkr = ( lwork - n*n - 3_${ik}$*n ) / n end if itau = ir + ldwrkr*n nwork = itau + n ! compute a=q*r ! cworkspace: need n*n [u] + n*n [r] + n [tau] + n [work] ! cworkspace: prefer n*n [u] + n*n [r] + n [tau] + n*nb [work] ! rworkspace: need 0 call stdlib${ii}$_${ci}$geqrf( m, n, a, lda, work( itau ), work( nwork ),lwork-nwork+1, & ierr ) ! copy r to work( ir ), zeroing out below it call stdlib${ii}$_${ci}$lacpy( 'U', n, n, a, lda, work( ir ), ldwrkr ) call stdlib${ii}$_${ci}$laset( 'L', n-1, n-1, czero, czero, work( ir+1 ),ldwrkr ) ! generate q in a ! cworkspace: need n*n [u] + n*n [r] + n [tau] + n [work] ! cworkspace: prefer n*n [u] + n*n [r] + n [tau] + n*nb [work] ! rworkspace: need 0 call stdlib${ii}$_${ci}$ungqr( m, n, n, a, lda, work( itau ),work( nwork ), lwork-nwork+& 1_${ik}$, ierr ) ie = 1_${ik}$ itauq = itau itaup = itauq + n nwork = itaup + n ! bidiagonalize r in work(ir) ! cworkspace: need n*n [u] + n*n [r] + 2*n [tauq, taup] + n [work] ! cworkspace: prefer n*n [u] + n*n [r] + 2*n [tauq, taup] + 2*n*nb [work] ! rworkspace: need n [e] call stdlib${ii}$_${ci}$gebrd( n, n, work( ir ), ldwrkr, s, rwork( ie ),work( itauq ), & work( itaup ), work( nwork ),lwork-nwork+1, ierr ) ! perform bidiagonal svd, computing left singular vectors ! of r in work(iru) and computing right singular vectors ! of r in work(irvt) ! cworkspace: need 0 ! rworkspace: need n [e] + n*n [ru] + n*n [rvt] + bdspac iru = ie + n irvt = iru + n*n nrwork = irvt + n*n call stdlib${ii}$_${c2ri(ci)}$bdsdc( 'U', 'I', n, s, rwork( ie ), rwork( iru ),n, rwork( irvt )& , n, dum, idum,rwork( nrwork ), iwork, info ) ! copy realmatrix rwork(iru,KIND=${ck}$) to complex matrix work(iu) ! overwrite work(iu) by the left singular vectors of r ! cworkspace: need n*n [u] + n*n [r] + 2*n [tauq, taup] + n [work] ! cworkspace: prefer n*n [u] + n*n [r] + 2*n [tauq, taup] + n*nb [work] ! rworkspace: need 0 call stdlib${ii}$_${ci}$lacp2( 'F', n, n, rwork( iru ), n, work( iu ),ldwrku ) call stdlib${ii}$_${ci}$unmbr( 'Q', 'L', 'N', n, n, n, work( ir ), ldwrkr,work( itauq ), & work( iu ), ldwrku,work( nwork ), lwork-nwork+1, ierr ) ! copy realmatrix rwork(irvt,KIND=${ck}$) to complex matrix vt ! overwrite vt by the right singular vectors of r ! cworkspace: need n*n [u] + n*n [r] + 2*n [tauq, taup] + n [work] ! cworkspace: prefer n*n [u] + n*n [r] + 2*n [tauq, taup] + n*nb [work] ! rworkspace: need 0 call stdlib${ii}$_${ci}$lacp2( 'F', n, n, rwork( irvt ), n, vt, ldvt ) call stdlib${ii}$_${ci}$unmbr( 'P', 'R', 'C', n, n, n, work( ir ), ldwrkr,work( itaup ), & vt, ldvt, work( nwork ),lwork-nwork+1, ierr ) ! multiply q in a by left singular vectors of r in ! work(iu), storing result in work(ir) and copying to a ! cworkspace: need n*n [u] + n*n [r] ! cworkspace: prefer n*n [u] + m*n [r] ! rworkspace: need 0 do i = 1, m, ldwrkr chunk = min( m-i+1, ldwrkr ) call stdlib${ii}$_${ci}$gemm( 'N', 'N', chunk, n, n, cone, a( i, 1_${ik}$ ),lda, work( iu ), & ldwrku, czero,work( ir ), ldwrkr ) call stdlib${ii}$_${ci}$lacpy( 'F', chunk, n, work( ir ), ldwrkr,a( i, 1_${ik}$ ), lda ) end do else if( wntqs ) then ! path 3 (m >> n, jobz='s') ! n left singular vectors to be computed in u and ! n right singular vectors to be computed in vt ir = 1_${ik}$ ! work(ir) is n by n ldwrkr = n itau = ir + ldwrkr*n nwork = itau + n ! compute a=q*r ! cworkspace: need n*n [r] + n [tau] + n [work] ! cworkspace: prefer n*n [r] + n [tau] + n*nb [work] ! rworkspace: need 0 call stdlib${ii}$_${ci}$geqrf( m, n, a, lda, work( itau ), work( nwork ),lwork-nwork+1, & ierr ) ! copy r to work(ir), zeroing out below it call stdlib${ii}$_${ci}$lacpy( 'U', n, n, a, lda, work( ir ), ldwrkr ) call stdlib${ii}$_${ci}$laset( 'L', n-1, n-1, czero, czero, work( ir+1 ),ldwrkr ) ! generate q in a ! cworkspace: need n*n [r] + n [tau] + n [work] ! cworkspace: prefer n*n [r] + n [tau] + n*nb [work] ! rworkspace: need 0 call stdlib${ii}$_${ci}$ungqr( m, n, n, a, lda, work( itau ),work( nwork ), lwork-nwork+& 1_${ik}$, ierr ) ie = 1_${ik}$ itauq = itau itaup = itauq + n nwork = itaup + n ! bidiagonalize r in work(ir) ! cworkspace: need n*n [r] + 2*n [tauq, taup] + n [work] ! cworkspace: prefer n*n [r] + 2*n [tauq, taup] + 2*n*nb [work] ! rworkspace: need n [e] call stdlib${ii}$_${ci}$gebrd( n, n, work( ir ), ldwrkr, s, rwork( ie ),work( itauq ), & work( itaup ), work( nwork ),lwork-nwork+1, ierr ) ! perform bidiagonal svd, computing left singular vectors ! of bidiagonal matrix in rwork(iru) and computing right ! singular vectors of bidiagonal matrix in rwork(irvt) ! cworkspace: need 0 ! rworkspace: need n [e] + n*n [ru] + n*n [rvt] + bdspac iru = ie + n irvt = iru + n*n nrwork = irvt + n*n call stdlib${ii}$_${c2ri(ci)}$bdsdc( 'U', 'I', n, s, rwork( ie ), rwork( iru ),n, rwork( irvt )& , n, dum, idum,rwork( nrwork ), iwork, info ) ! copy realmatrix rwork(iru,KIND=${ck}$) to complex matrix u ! overwrite u by left singular vectors of r ! cworkspace: need n*n [r] + 2*n [tauq, taup] + n [work] ! cworkspace: prefer n*n [r] + 2*n [tauq, taup] + n*nb [work] ! rworkspace: need 0 call stdlib${ii}$_${ci}$lacp2( 'F', n, n, rwork( iru ), n, u, ldu ) call stdlib${ii}$_${ci}$unmbr( 'Q', 'L', 'N', n, n, n, work( ir ), ldwrkr,work( itauq ), & u, ldu, work( nwork ),lwork-nwork+1, ierr ) ! copy realmatrix rwork(irvt,KIND=${ck}$) to complex matrix vt ! overwrite vt by right singular vectors of r ! cworkspace: need n*n [r] + 2*n [tauq, taup] + n [work] ! cworkspace: prefer n*n [r] + 2*n [tauq, taup] + n*nb [work] ! rworkspace: need 0 call stdlib${ii}$_${ci}$lacp2( 'F', n, n, rwork( irvt ), n, vt, ldvt ) call stdlib${ii}$_${ci}$unmbr( 'P', 'R', 'C', n, n, n, work( ir ), ldwrkr,work( itaup ), & vt, ldvt, work( nwork ),lwork-nwork+1, ierr ) ! multiply q in a by left singular vectors of r in ! work(ir), storing result in u ! cworkspace: need n*n [r] ! rworkspace: need 0 call stdlib${ii}$_${ci}$lacpy( 'F', n, n, u, ldu, work( ir ), ldwrkr ) call stdlib${ii}$_${ci}$gemm( 'N', 'N', m, n, n, cone, a, lda, work( ir ),ldwrkr, czero, & u, ldu ) else if( wntqa ) then ! path 4 (m >> n, jobz='a') ! m left singular vectors to be computed in u and ! n right singular vectors to be computed in vt iu = 1_${ik}$ ! work(iu) is n by n ldwrku = n itau = iu + ldwrku*n nwork = itau + n ! compute a=q*r, copying result to u ! cworkspace: need n*n [u] + n [tau] + n [work] ! cworkspace: prefer n*n [u] + n [tau] + n*nb [work] ! rworkspace: need 0 call stdlib${ii}$_${ci}$geqrf( m, n, a, lda, work( itau ), work( nwork ),lwork-nwork+1, & ierr ) call stdlib${ii}$_${ci}$lacpy( 'L', m, n, a, lda, u, ldu ) ! generate q in u ! cworkspace: need n*n [u] + n [tau] + m [work] ! cworkspace: prefer n*n [u] + n [tau] + m*nb [work] ! rworkspace: need 0 call stdlib${ii}$_${ci}$ungqr( m, m, n, u, ldu, work( itau ),work( nwork ), lwork-nwork+& 1_${ik}$, ierr ) ! produce r in a, zeroing out below it if (n>1_${ik}$) call stdlib${ii}$_${ci}$laset( 'L', n-1, n-1, czero, czero, a( 2_${ik}$, 1_${ik}$ ),lda ) ie = 1_${ik}$ itauq = itau itaup = itauq + n nwork = itaup + n ! bidiagonalize r in a ! cworkspace: need n*n [u] + 2*n [tauq, taup] + n [work] ! cworkspace: prefer n*n [u] + 2*n [tauq, taup] + 2*n*nb [work] ! rworkspace: need n [e] call stdlib${ii}$_${ci}$gebrd( n, n, a, lda, s, rwork( ie ), work( itauq ),work( itaup ),& work( nwork ), lwork-nwork+1,ierr ) iru = ie + n irvt = iru + n*n nrwork = irvt + n*n ! perform bidiagonal svd, computing left singular vectors ! of bidiagonal matrix in rwork(iru) and computing right ! singular vectors of bidiagonal matrix in rwork(irvt) ! cworkspace: need 0 ! rworkspace: need n [e] + n*n [ru] + n*n [rvt] + bdspac call stdlib${ii}$_${c2ri(ci)}$bdsdc( 'U', 'I', n, s, rwork( ie ), rwork( iru ),n, rwork( irvt )& , n, dum, idum,rwork( nrwork ), iwork, info ) ! copy realmatrix rwork(iru,KIND=${ck}$) to complex matrix work(iu) ! overwrite work(iu) by left singular vectors of r ! cworkspace: need n*n [u] + 2*n [tauq, taup] + n [work] ! cworkspace: prefer n*n [u] + 2*n [tauq, taup] + n*nb [work] ! rworkspace: need 0 call stdlib${ii}$_${ci}$lacp2( 'F', n, n, rwork( iru ), n, work( iu ),ldwrku ) call stdlib${ii}$_${ci}$unmbr( 'Q', 'L', 'N', n, n, n, a, lda,work( itauq ), work( iu ), & ldwrku,work( nwork ), lwork-nwork+1, ierr ) ! copy realmatrix rwork(irvt,KIND=${ck}$) to complex matrix vt ! overwrite vt by right singular vectors of r ! cworkspace: need n*n [u] + 2*n [tauq, taup] + n [work] ! cworkspace: prefer n*n [u] + 2*n [tauq, taup] + n*nb [work] ! rworkspace: need 0 call stdlib${ii}$_${ci}$lacp2( 'F', n, n, rwork( irvt ), n, vt, ldvt ) call stdlib${ii}$_${ci}$unmbr( 'P', 'R', 'C', n, n, n, a, lda,work( itaup ), vt, ldvt, & work( nwork ),lwork-nwork+1, ierr ) ! multiply q in u by left singular vectors of r in ! work(iu), storing result in a ! cworkspace: need n*n [u] ! rworkspace: need 0 call stdlib${ii}$_${ci}$gemm( 'N', 'N', m, n, n, cone, u, ldu, work( iu ),ldwrku, czero, & a, lda ) ! copy left singular vectors of a from a to u call stdlib${ii}$_${ci}$lacpy( 'F', m, n, a, lda, u, ldu ) end if else if( m>=mnthr2 ) then ! mnthr2 <= m < mnthr1 ! path 5 (m >> n, but not as much as mnthr1) ! reduce to bidiagonal form without qr decomposition, use ! stdlib${ii}$_${ci}$ungbr and matrix multiplication to compute singular vectors ie = 1_${ik}$ nrwork = ie + n itauq = 1_${ik}$ itaup = itauq + n nwork = itaup + n ! bidiagonalize a ! cworkspace: need 2*n [tauq, taup] + m [work] ! cworkspace: prefer 2*n [tauq, taup] + (m+n)*nb [work] ! rworkspace: need n [e] call stdlib${ii}$_${ci}$gebrd( m, n, a, lda, s, rwork( ie ), work( itauq ),work( itaup ), & work( nwork ), lwork-nwork+1,ierr ) if( wntqn ) then ! path 5n (m >> n, jobz='n') ! compute singular values only ! cworkspace: need 0 ! rworkspace: need n [e] + bdspac call stdlib${ii}$_${c2ri(ci)}$bdsdc( 'U', 'N', n, s, rwork( ie ), dum, 1_${ik}$,dum,1_${ik}$,dum, idum, & rwork( nrwork ), iwork, info ) else if( wntqo ) then iu = nwork iru = nrwork irvt = iru + n*n nrwork = irvt + n*n ! path 5o (m >> n, jobz='o') ! copy a to vt, generate p**h ! cworkspace: need 2*n [tauq, taup] + n [work] ! cworkspace: prefer 2*n [tauq, taup] + n*nb [work] ! rworkspace: need 0 call stdlib${ii}$_${ci}$lacpy( 'U', n, n, a, lda, vt, ldvt ) call stdlib${ii}$_${ci}$ungbr( 'P', n, n, n, vt, ldvt, work( itaup ),work( nwork ), & lwork-nwork+1, ierr ) ! generate q in a ! cworkspace: need 2*n [tauq, taup] + n [work] ! cworkspace: prefer 2*n [tauq, taup] + n*nb [work] ! rworkspace: need 0 call stdlib${ii}$_${ci}$ungbr( 'Q', m, n, n, a, lda, work( itauq ),work( nwork ), lwork-& nwork+1, ierr ) if( lwork >= m*n + 3_${ik}$*n ) then ! work( iu ) is m by n ldwrku = m else ! work(iu) is ldwrku by n ldwrku = ( lwork - 3_${ik}$*n ) / n end if nwork = iu + ldwrku*n ! perform bidiagonal svd, computing left singular vectors ! of bidiagonal matrix in rwork(iru) and computing right ! singular vectors of bidiagonal matrix in rwork(irvt) ! cworkspace: need 0 ! rworkspace: need n [e] + n*n [ru] + n*n [rvt] + bdspac call stdlib${ii}$_${c2ri(ci)}$bdsdc( 'U', 'I', n, s, rwork( ie ), rwork( iru ),n, rwork( irvt )& , n, dum, idum,rwork( nrwork ), iwork, info ) ! multiply realmatrix rwork(irvt,KIND=${ck}$) by p**h in vt, ! storing the result in work(iu), copying to vt ! cworkspace: need 2*n [tauq, taup] + n*n [u] ! rworkspace: need n [e] + n*n [ru] + n*n [rvt] + 2*n*n [rwork] call stdlib${ii}$_${ci}$larcm( n, n, rwork( irvt ), n, vt, ldvt,work( iu ), ldwrku, & rwork( nrwork ) ) call stdlib${ii}$_${ci}$lacpy( 'F', n, n, work( iu ), ldwrku, vt, ldvt ) ! multiply q in a by realmatrix rwork(iru,KIND=${ck}$), storing the ! result in work(iu), copying to a ! cworkspace: need 2*n [tauq, taup] + n*n [u] ! cworkspace: prefer 2*n [tauq, taup] + m*n [u] ! rworkspace: need n [e] + n*n [ru] + 2*n*n [rwork] ! rworkspace: prefer n [e] + n*n [ru] + 2*m*n [rwork] < n + 5*n*n since m < 2*n here nrwork = irvt do i = 1, m, ldwrku chunk = min( m-i+1, ldwrku ) call stdlib${ii}$_${ci}$lacrm( chunk, n, a( i, 1_${ik}$ ), lda, rwork( iru ),n, work( iu ), & ldwrku, rwork( nrwork ) ) call stdlib${ii}$_${ci}$lacpy( 'F', chunk, n, work( iu ), ldwrku,a( i, 1_${ik}$ ), lda ) end do else if( wntqs ) then ! path 5s (m >> n, jobz='s') ! copy a to vt, generate p**h ! cworkspace: need 2*n [tauq, taup] + n [work] ! cworkspace: prefer 2*n [tauq, taup] + n*nb [work] ! rworkspace: need 0 call stdlib${ii}$_${ci}$lacpy( 'U', n, n, a, lda, vt, ldvt ) call stdlib${ii}$_${ci}$ungbr( 'P', n, n, n, vt, ldvt, work( itaup ),work( nwork ), & lwork-nwork+1, ierr ) ! copy a to u, generate q ! cworkspace: need 2*n [tauq, taup] + n [work] ! cworkspace: prefer 2*n [tauq, taup] + n*nb [work] ! rworkspace: need 0 call stdlib${ii}$_${ci}$lacpy( 'L', m, n, a, lda, u, ldu ) call stdlib${ii}$_${ci}$ungbr( 'Q', m, n, n, u, ldu, work( itauq ),work( nwork ), lwork-& nwork+1, ierr ) ! perform bidiagonal svd, computing left singular vectors ! of bidiagonal matrix in rwork(iru) and computing right ! singular vectors of bidiagonal matrix in rwork(irvt) ! cworkspace: need 0 ! rworkspace: need n [e] + n*n [ru] + n*n [rvt] + bdspac iru = nrwork irvt = iru + n*n nrwork = irvt + n*n call stdlib${ii}$_${c2ri(ci)}$bdsdc( 'U', 'I', n, s, rwork( ie ), rwork( iru ),n, rwork( irvt )& , n, dum, idum,rwork( nrwork ), iwork, info ) ! multiply realmatrix rwork(irvt,KIND=${ck}$) by p**h in vt, ! storing the result in a, copying to vt ! cworkspace: need 0 ! rworkspace: need n [e] + n*n [ru] + n*n [rvt] + 2*n*n [rwork] call stdlib${ii}$_${ci}$larcm( n, n, rwork( irvt ), n, vt, ldvt, a, lda,rwork( nrwork ) ) call stdlib${ii}$_${ci}$lacpy( 'F', n, n, a, lda, vt, ldvt ) ! multiply q in u by realmatrix rwork(iru,KIND=${ck}$), storing the ! result in a, copying to u ! cworkspace: need 0 ! rworkspace: need n [e] + n*n [ru] + 2*m*n [rwork] < n + 5*n*n since m < 2*n here nrwork = irvt call stdlib${ii}$_${ci}$lacrm( m, n, u, ldu, rwork( iru ), n, a, lda,rwork( nrwork ) ) call stdlib${ii}$_${ci}$lacpy( 'F', m, n, a, lda, u, ldu ) else ! path 5a (m >> n, jobz='a') ! copy a to vt, generate p**h ! cworkspace: need 2*n [tauq, taup] + n [work] ! cworkspace: prefer 2*n [tauq, taup] + n*nb [work] ! rworkspace: need 0 call stdlib${ii}$_${ci}$lacpy( 'U', n, n, a, lda, vt, ldvt ) call stdlib${ii}$_${ci}$ungbr( 'P', n, n, n, vt, ldvt, work( itaup ),work( nwork ), & lwork-nwork+1, ierr ) ! copy a to u, generate q ! cworkspace: need 2*n [tauq, taup] + m [work] ! cworkspace: prefer 2*n [tauq, taup] + m*nb [work] ! rworkspace: need 0 call stdlib${ii}$_${ci}$lacpy( 'L', m, n, a, lda, u, ldu ) call stdlib${ii}$_${ci}$ungbr( 'Q', m, m, n, u, ldu, work( itauq ),work( nwork ), lwork-& nwork+1, ierr ) ! perform bidiagonal svd, computing left singular vectors ! of bidiagonal matrix in rwork(iru) and computing right ! singular vectors of bidiagonal matrix in rwork(irvt) ! cworkspace: need 0 ! rworkspace: need n [e] + n*n [ru] + n*n [rvt] + bdspac iru = nrwork irvt = iru + n*n nrwork = irvt + n*n call stdlib${ii}$_${c2ri(ci)}$bdsdc( 'U', 'I', n, s, rwork( ie ), rwork( iru ),n, rwork( irvt )& , n, dum, idum,rwork( nrwork ), iwork, info ) ! multiply realmatrix rwork(irvt,KIND=${ck}$) by p**h in vt, ! storing the result in a, copying to vt ! cworkspace: need 0 ! rworkspace: need n [e] + n*n [ru] + n*n [rvt] + 2*n*n [rwork] call stdlib${ii}$_${ci}$larcm( n, n, rwork( irvt ), n, vt, ldvt, a, lda,rwork( nrwork ) ) call stdlib${ii}$_${ci}$lacpy( 'F', n, n, a, lda, vt, ldvt ) ! multiply q in u by realmatrix rwork(iru,KIND=${ck}$), storing the ! result in a, copying to u ! cworkspace: need 0 ! rworkspace: need n [e] + n*n [ru] + 2*m*n [rwork] < n + 5*n*n since m < 2*n here nrwork = irvt call stdlib${ii}$_${ci}$lacrm( m, n, u, ldu, rwork( iru ), n, a, lda,rwork( nrwork ) ) call stdlib${ii}$_${ci}$lacpy( 'F', m, n, a, lda, u, ldu ) end if else ! m < mnthr2 ! path 6 (m >= n, but not much larger) ! reduce to bidiagonal form without qr decomposition ! use stdlib_${ci}$unmbr to compute singular vectors ie = 1_${ik}$ nrwork = ie + n itauq = 1_${ik}$ itaup = itauq + n nwork = itaup + n ! bidiagonalize a ! cworkspace: need 2*n [tauq, taup] + m [work] ! cworkspace: prefer 2*n [tauq, taup] + (m+n)*nb [work] ! rworkspace: need n [e] call stdlib${ii}$_${ci}$gebrd( m, n, a, lda, s, rwork( ie ), work( itauq ),work( itaup ), & work( nwork ), lwork-nwork+1,ierr ) if( wntqn ) then ! path 6n (m >= n, jobz='n') ! compute singular values only ! cworkspace: need 0 ! rworkspace: need n [e] + bdspac call stdlib${ii}$_${c2ri(ci)}$bdsdc( 'U', 'N', n, s, rwork( ie ), dum,1_${ik}$,dum,1_${ik}$,dum, idum, rwork(& nrwork ), iwork, info ) else if( wntqo ) then iu = nwork iru = nrwork irvt = iru + n*n nrwork = irvt + n*n if( lwork >= m*n + 3_${ik}$*n ) then ! work( iu ) is m by n ldwrku = m else ! work( iu ) is ldwrku by n ldwrku = ( lwork - 3_${ik}$*n ) / n end if nwork = iu + ldwrku*n ! path 6o (m >= n, jobz='o') ! perform bidiagonal svd, computing left singular vectors ! of bidiagonal matrix in rwork(iru) and computing right ! singular vectors of bidiagonal matrix in rwork(irvt) ! cworkspace: need 0 ! rworkspace: need n [e] + n*n [ru] + n*n [rvt] + bdspac call stdlib${ii}$_${c2ri(ci)}$bdsdc( 'U', 'I', n, s, rwork( ie ), rwork( iru ),n, rwork( irvt )& , n, dum, idum,rwork( nrwork ), iwork, info ) ! copy realmatrix rwork(irvt,KIND=${ck}$) to complex matrix vt ! overwrite vt by right singular vectors of a ! cworkspace: need 2*n [tauq, taup] + n*n [u] + n [work] ! cworkspace: prefer 2*n [tauq, taup] + n*n [u] + n*nb [work] ! rworkspace: need n [e] + n*n [ru] + n*n [rvt] call stdlib${ii}$_${ci}$lacp2( 'F', n, n, rwork( irvt ), n, vt, ldvt ) call stdlib${ii}$_${ci}$unmbr( 'P', 'R', 'C', n, n, n, a, lda,work( itaup ), vt, ldvt, & work( nwork ),lwork-nwork+1, ierr ) if( lwork >= m*n + 3_${ik}$*n ) then ! path 6o-fast ! copy realmatrix rwork(iru,KIND=${ck}$) to complex matrix work(iu) ! overwrite work(iu) by left singular vectors of a, copying ! to a ! cworkspace: need 2*n [tauq, taup] + m*n [u] + n [work] ! cworkspace: prefer 2*n [tauq, taup] + m*n [u] + n*nb [work] ! rworkspace: need n [e] + n*n [ru] call stdlib${ii}$_${ci}$laset( 'F', m, n, czero, czero, work( iu ),ldwrku ) call stdlib${ii}$_${ci}$lacp2( 'F', n, n, rwork( iru ), n, work( iu ),ldwrku ) call stdlib${ii}$_${ci}$unmbr( 'Q', 'L', 'N', m, n, n, a, lda,work( itauq ), work( iu & ), ldwrku,work( nwork ), lwork-nwork+1, ierr ) call stdlib${ii}$_${ci}$lacpy( 'F', m, n, work( iu ), ldwrku, a, lda ) else ! path 6o-slow ! generate q in a ! cworkspace: need 2*n [tauq, taup] + n*n [u] + n [work] ! cworkspace: prefer 2*n [tauq, taup] + n*n [u] + n*nb [work] ! rworkspace: need 0 call stdlib${ii}$_${ci}$ungbr( 'Q', m, n, n, a, lda, work( itauq ),work( nwork ), & lwork-nwork+1, ierr ) ! multiply q in a by realmatrix rwork(iru,KIND=${ck}$), storing the ! result in work(iu), copying to a ! cworkspace: need 2*n [tauq, taup] + n*n [u] ! cworkspace: prefer 2*n [tauq, taup] + m*n [u] ! rworkspace: need n [e] + n*n [ru] + 2*n*n [rwork] ! rworkspace: prefer n [e] + n*n [ru] + 2*m*n [rwork] < n + 5*n*n since m < 2*n here nrwork = irvt do i = 1, m, ldwrku chunk = min( m-i+1, ldwrku ) call stdlib${ii}$_${ci}$lacrm( chunk, n, a( i, 1_${ik}$ ), lda,rwork( iru ), n, work( iu )& , ldwrku,rwork( nrwork ) ) call stdlib${ii}$_${ci}$lacpy( 'F', chunk, n, work( iu ), ldwrku,a( i, 1_${ik}$ ), lda ) end do end if else if( wntqs ) then ! path 6s (m >= n, jobz='s') ! perform bidiagonal svd, computing left singular vectors ! of bidiagonal matrix in rwork(iru) and computing right ! singular vectors of bidiagonal matrix in rwork(irvt) ! cworkspace: need 0 ! rworkspace: need n [e] + n*n [ru] + n*n [rvt] + bdspac iru = nrwork irvt = iru + n*n nrwork = irvt + n*n call stdlib${ii}$_${c2ri(ci)}$bdsdc( 'U', 'I', n, s, rwork( ie ), rwork( iru ),n, rwork( irvt )& , n, dum, idum,rwork( nrwork ), iwork, info ) ! copy realmatrix rwork(iru,KIND=${ck}$) to complex matrix u ! overwrite u by left singular vectors of a ! cworkspace: need 2*n [tauq, taup] + n [work] ! cworkspace: prefer 2*n [tauq, taup] + n*nb [work] ! rworkspace: need n [e] + n*n [ru] + n*n [rvt] call stdlib${ii}$_${ci}$laset( 'F', m, n, czero, czero, u, ldu ) call stdlib${ii}$_${ci}$lacp2( 'F', n, n, rwork( iru ), n, u, ldu ) call stdlib${ii}$_${ci}$unmbr( 'Q', 'L', 'N', m, n, n, a, lda,work( itauq ), u, ldu, & work( nwork ),lwork-nwork+1, ierr ) ! copy realmatrix rwork(irvt,KIND=${ck}$) to complex matrix vt ! overwrite vt by right singular vectors of a ! cworkspace: need 2*n [tauq, taup] + n [work] ! cworkspace: prefer 2*n [tauq, taup] + n*nb [work] ! rworkspace: need n [e] + n*n [ru] + n*n [rvt] call stdlib${ii}$_${ci}$lacp2( 'F', n, n, rwork( irvt ), n, vt, ldvt ) call stdlib${ii}$_${ci}$unmbr( 'P', 'R', 'C', n, n, n, a, lda,work( itaup ), vt, ldvt, & work( nwork ),lwork-nwork+1, ierr ) else ! path 6a (m >= n, jobz='a') ! perform bidiagonal svd, computing left singular vectors ! of bidiagonal matrix in rwork(iru) and computing right ! singular vectors of bidiagonal matrix in rwork(irvt) ! cworkspace: need 0 ! rworkspace: need n [e] + n*n [ru] + n*n [rvt] + bdspac iru = nrwork irvt = iru + n*n nrwork = irvt + n*n call stdlib${ii}$_${c2ri(ci)}$bdsdc( 'U', 'I', n, s, rwork( ie ), rwork( iru ),n, rwork( irvt )& , n, dum, idum,rwork( nrwork ), iwork, info ) ! set the right corner of u to identity matrix call stdlib${ii}$_${ci}$laset( 'F', m, m, czero, czero, u, ldu ) if( m>n ) then call stdlib${ii}$_${ci}$laset( 'F', m-n, m-n, czero, cone,u( n+1, n+1 ), ldu ) end if ! copy realmatrix rwork(iru,KIND=${ck}$) to complex matrix u ! overwrite u by left singular vectors of a ! cworkspace: need 2*n [tauq, taup] + m [work] ! cworkspace: prefer 2*n [tauq, taup] + m*nb [work] ! rworkspace: need n [e] + n*n [ru] + n*n [rvt] call stdlib${ii}$_${ci}$lacp2( 'F', n, n, rwork( iru ), n, u, ldu ) call stdlib${ii}$_${ci}$unmbr( 'Q', 'L', 'N', m, m, n, a, lda,work( itauq ), u, ldu, & work( nwork ),lwork-nwork+1, ierr ) ! copy realmatrix rwork(irvt,KIND=${ck}$) to complex matrix vt ! overwrite vt by right singular vectors of a ! cworkspace: need 2*n [tauq, taup] + n [work] ! cworkspace: prefer 2*n [tauq, taup] + n*nb [work] ! rworkspace: need n [e] + n*n [ru] + n*n [rvt] call stdlib${ii}$_${ci}$lacp2( 'F', n, n, rwork( irvt ), n, vt, ldvt ) call stdlib${ii}$_${ci}$unmbr( 'P', 'R', 'C', n, n, n, a, lda,work( itaup ), vt, ldvt, & work( nwork ),lwork-nwork+1, ierr ) end if end if else ! a has more columns than rows. if a has sufficiently more ! columns than rows, first reduce using the lq decomposition (if ! sufficient workspace available) if( n>=mnthr1 ) then if( wntqn ) then ! path 1t (n >> m, jobz='n') ! no singular vectors to be computed itau = 1_${ik}$ nwork = itau + m ! compute a=l*q ! cworkspace: need m [tau] + m [work] ! cworkspace: prefer m [tau] + m*nb [work] ! rworkspace: need 0 call stdlib${ii}$_${ci}$gelqf( m, n, a, lda, work( itau ), work( nwork ),lwork-nwork+1, & ierr ) ! zero out above l if (m>1_${ik}$) call stdlib${ii}$_${ci}$laset( 'U', m-1, m-1, czero, czero, a( 1_${ik}$, 2_${ik}$ ),lda ) ie = 1_${ik}$ itauq = 1_${ik}$ itaup = itauq + m nwork = itaup + m ! bidiagonalize l in a ! cworkspace: need 2*m [tauq, taup] + m [work] ! cworkspace: prefer 2*m [tauq, taup] + 2*m*nb [work] ! rworkspace: need m [e] call stdlib${ii}$_${ci}$gebrd( m, m, a, lda, s, rwork( ie ), work( itauq ),work( itaup ),& work( nwork ), lwork-nwork+1,ierr ) nrwork = ie + m ! perform bidiagonal svd, compute singular values only ! cworkspace: need 0 ! rworkspace: need m [e] + bdspac call stdlib${ii}$_${c2ri(ci)}$bdsdc( 'U', 'N', m, s, rwork( ie ), dum,1_${ik}$,dum,1_${ik}$,dum, idum, rwork(& nrwork ), iwork, info ) else if( wntqo ) then ! path 2t (n >> m, jobz='o') ! m right singular vectors to be overwritten on a and ! m left singular vectors to be computed in u ivt = 1_${ik}$ ldwkvt = m ! work(ivt) is m by m il = ivt + ldwkvt*m if( lwork >= m*n + m*m + 3_${ik}$*m ) then ! work(il) m by n ldwrkl = m chunk = n else ! work(il) is m by chunk ldwrkl = m chunk = ( lwork - m*m - 3_${ik}$*m ) / m end if itau = il + ldwrkl*chunk nwork = itau + m ! compute a=l*q ! cworkspace: need m*m [vt] + m*m [l] + m [tau] + m [work] ! cworkspace: prefer m*m [vt] + m*m [l] + m [tau] + m*nb [work] ! rworkspace: need 0 call stdlib${ii}$_${ci}$gelqf( m, n, a, lda, work( itau ), work( nwork ),lwork-nwork+1, & ierr ) ! copy l to work(il), zeroing about above it call stdlib${ii}$_${ci}$lacpy( 'L', m, m, a, lda, work( il ), ldwrkl ) call stdlib${ii}$_${ci}$laset( 'U', m-1, m-1, czero, czero,work( il+ldwrkl ), ldwrkl ) ! generate q in a ! cworkspace: need m*m [vt] + m*m [l] + m [tau] + m [work] ! cworkspace: prefer m*m [vt] + m*m [l] + m [tau] + m*nb [work] ! rworkspace: need 0 call stdlib${ii}$_${ci}$unglq( m, n, m, a, lda, work( itau ),work( nwork ), lwork-nwork+& 1_${ik}$, ierr ) ie = 1_${ik}$ itauq = itau itaup = itauq + m nwork = itaup + m ! bidiagonalize l in work(il) ! cworkspace: need m*m [vt] + m*m [l] + 2*m [tauq, taup] + m [work] ! cworkspace: prefer m*m [vt] + m*m [l] + 2*m [tauq, taup] + 2*m*nb [work] ! rworkspace: need m [e] call stdlib${ii}$_${ci}$gebrd( m, m, work( il ), ldwrkl, s, rwork( ie ),work( itauq ), & work( itaup ), work( nwork ),lwork-nwork+1, ierr ) ! perform bidiagonal svd, computing left singular vectors ! of bidiagonal matrix in rwork(iru) and computing right ! singular vectors of bidiagonal matrix in rwork(irvt) ! cworkspace: need 0 ! rworkspace: need m [e] + m*m [ru] + m*m [rvt] + bdspac iru = ie + m irvt = iru + m*m nrwork = irvt + m*m call stdlib${ii}$_${c2ri(ci)}$bdsdc( 'U', 'I', m, s, rwork( ie ), rwork( iru ),m, rwork( irvt )& , m, dum, idum,rwork( nrwork ), iwork, info ) ! copy realmatrix rwork(iru,KIND=${ck}$) to complex matrix work(iu) ! overwrite work(iu) by the left singular vectors of l ! cworkspace: need m*m [vt] + m*m [l] + 2*m [tauq, taup] + m [work] ! cworkspace: prefer m*m [vt] + m*m [l] + 2*m [tauq, taup] + m*nb [work] ! rworkspace: need 0 call stdlib${ii}$_${ci}$lacp2( 'F', m, m, rwork( iru ), m, u, ldu ) call stdlib${ii}$_${ci}$unmbr( 'Q', 'L', 'N', m, m, m, work( il ), ldwrkl,work( itauq ), & u, ldu, work( nwork ),lwork-nwork+1, ierr ) ! copy realmatrix rwork(irvt,KIND=${ck}$) to complex matrix work(ivt) ! overwrite work(ivt) by the right singular vectors of l ! cworkspace: need m*m [vt] + m*m [l] + 2*m [tauq, taup] + m [work] ! cworkspace: prefer m*m [vt] + m*m [l] + 2*m [tauq, taup] + m*nb [work] ! rworkspace: need 0 call stdlib${ii}$_${ci}$lacp2( 'F', m, m, rwork( irvt ), m, work( ivt ),ldwkvt ) call stdlib${ii}$_${ci}$unmbr( 'P', 'R', 'C', m, m, m, work( il ), ldwrkl,work( itaup ), & work( ivt ), ldwkvt,work( nwork ), lwork-nwork+1, ierr ) ! multiply right singular vectors of l in work(il) by q ! in a, storing result in work(il) and copying to a ! cworkspace: need m*m [vt] + m*m [l] ! cworkspace: prefer m*m [vt] + m*n [l] ! rworkspace: need 0 do i = 1, n, chunk blk = min( n-i+1, chunk ) call stdlib${ii}$_${ci}$gemm( 'N', 'N', m, blk, m, cone, work( ivt ), m,a( 1_${ik}$, i ), & lda, czero, work( il ),ldwrkl ) call stdlib${ii}$_${ci}$lacpy( 'F', m, blk, work( il ), ldwrkl,a( 1_${ik}$, i ), lda ) end do else if( wntqs ) then ! path 3t (n >> m, jobz='s') ! m right singular vectors to be computed in vt and ! m left singular vectors to be computed in u il = 1_${ik}$ ! work(il) is m by m ldwrkl = m itau = il + ldwrkl*m nwork = itau + m ! compute a=l*q ! cworkspace: need m*m [l] + m [tau] + m [work] ! cworkspace: prefer m*m [l] + m [tau] + m*nb [work] ! rworkspace: need 0 call stdlib${ii}$_${ci}$gelqf( m, n, a, lda, work( itau ), work( nwork ),lwork-nwork+1, & ierr ) ! copy l to work(il), zeroing out above it call stdlib${ii}$_${ci}$lacpy( 'L', m, m, a, lda, work( il ), ldwrkl ) call stdlib${ii}$_${ci}$laset( 'U', m-1, m-1, czero, czero,work( il+ldwrkl ), ldwrkl ) ! generate q in a ! cworkspace: need m*m [l] + m [tau] + m [work] ! cworkspace: prefer m*m [l] + m [tau] + m*nb [work] ! rworkspace: need 0 call stdlib${ii}$_${ci}$unglq( m, n, m, a, lda, work( itau ),work( nwork ), lwork-nwork+& 1_${ik}$, ierr ) ie = 1_${ik}$ itauq = itau itaup = itauq + m nwork = itaup + m ! bidiagonalize l in work(il) ! cworkspace: need m*m [l] + 2*m [tauq, taup] + m [work] ! cworkspace: prefer m*m [l] + 2*m [tauq, taup] + 2*m*nb [work] ! rworkspace: need m [e] call stdlib${ii}$_${ci}$gebrd( m, m, work( il ), ldwrkl, s, rwork( ie ),work( itauq ), & work( itaup ), work( nwork ),lwork-nwork+1, ierr ) ! perform bidiagonal svd, computing left singular vectors ! of bidiagonal matrix in rwork(iru) and computing right ! singular vectors of bidiagonal matrix in rwork(irvt) ! cworkspace: need 0 ! rworkspace: need m [e] + m*m [ru] + m*m [rvt] + bdspac iru = ie + m irvt = iru + m*m nrwork = irvt + m*m call stdlib${ii}$_${c2ri(ci)}$bdsdc( 'U', 'I', m, s, rwork( ie ), rwork( iru ),m, rwork( irvt )& , m, dum, idum,rwork( nrwork ), iwork, info ) ! copy realmatrix rwork(iru,KIND=${ck}$) to complex matrix u ! overwrite u by left singular vectors of l ! cworkspace: need m*m [l] + 2*m [tauq, taup] + m [work] ! cworkspace: prefer m*m [l] + 2*m [tauq, taup] + m*nb [work] ! rworkspace: need 0 call stdlib${ii}$_${ci}$lacp2( 'F', m, m, rwork( iru ), m, u, ldu ) call stdlib${ii}$_${ci}$unmbr( 'Q', 'L', 'N', m, m, m, work( il ), ldwrkl,work( itauq ), & u, ldu, work( nwork ),lwork-nwork+1, ierr ) ! copy realmatrix rwork(irvt,KIND=${ck}$) to complex matrix vt ! overwrite vt by left singular vectors of l ! cworkspace: need m*m [l] + 2*m [tauq, taup] + m [work] ! cworkspace: prefer m*m [l] + 2*m [tauq, taup] + m*nb [work] ! rworkspace: need 0 call stdlib${ii}$_${ci}$lacp2( 'F', m, m, rwork( irvt ), m, vt, ldvt ) call stdlib${ii}$_${ci}$unmbr( 'P', 'R', 'C', m, m, m, work( il ), ldwrkl,work( itaup ), & vt, ldvt, work( nwork ),lwork-nwork+1, ierr ) ! copy vt to work(il), multiply right singular vectors of l ! in work(il) by q in a, storing result in vt ! cworkspace: need m*m [l] ! rworkspace: need 0 call stdlib${ii}$_${ci}$lacpy( 'F', m, m, vt, ldvt, work( il ), ldwrkl ) call stdlib${ii}$_${ci}$gemm( 'N', 'N', m, n, m, cone, work( il ), ldwrkl,a, lda, czero, & vt, ldvt ) else if( wntqa ) then ! path 4t (n >> m, jobz='a') ! n right singular vectors to be computed in vt and ! m left singular vectors to be computed in u ivt = 1_${ik}$ ! work(ivt) is m by m ldwkvt = m itau = ivt + ldwkvt*m nwork = itau + m ! compute a=l*q, copying result to vt ! cworkspace: need m*m [vt] + m [tau] + m [work] ! cworkspace: prefer m*m [vt] + m [tau] + m*nb [work] ! rworkspace: need 0 call stdlib${ii}$_${ci}$gelqf( m, n, a, lda, work( itau ), work( nwork ),lwork-nwork+1, & ierr ) call stdlib${ii}$_${ci}$lacpy( 'U', m, n, a, lda, vt, ldvt ) ! generate q in vt ! cworkspace: need m*m [vt] + m [tau] + n [work] ! cworkspace: prefer m*m [vt] + m [tau] + n*nb [work] ! rworkspace: need 0 call stdlib${ii}$_${ci}$unglq( n, n, m, vt, ldvt, work( itau ),work( nwork ), lwork-& nwork+1, ierr ) ! produce l in a, zeroing out above it if (m>1_${ik}$) call stdlib${ii}$_${ci}$laset( 'U', m-1, m-1, czero, czero, a( 1_${ik}$, 2_${ik}$ ),lda ) ie = 1_${ik}$ itauq = itau itaup = itauq + m nwork = itaup + m ! bidiagonalize l in a ! cworkspace: need m*m [vt] + 2*m [tauq, taup] + m [work] ! cworkspace: prefer m*m [vt] + 2*m [tauq, taup] + 2*m*nb [work] ! rworkspace: need m [e] call stdlib${ii}$_${ci}$gebrd( m, m, a, lda, s, rwork( ie ), work( itauq ),work( itaup ),& work( nwork ), lwork-nwork+1,ierr ) ! perform bidiagonal svd, computing left singular vectors ! of bidiagonal matrix in rwork(iru) and computing right ! singular vectors of bidiagonal matrix in rwork(irvt) ! cworkspace: need 0 ! rworkspace: need m [e] + m*m [ru] + m*m [rvt] + bdspac iru = ie + m irvt = iru + m*m nrwork = irvt + m*m call stdlib${ii}$_${c2ri(ci)}$bdsdc( 'U', 'I', m, s, rwork( ie ), rwork( iru ),m, rwork( irvt )& , m, dum, idum,rwork( nrwork ), iwork, info ) ! copy realmatrix rwork(iru,KIND=${ck}$) to complex matrix u ! overwrite u by left singular vectors of l ! cworkspace: need m*m [vt] + 2*m [tauq, taup] + m [work] ! cworkspace: prefer m*m [vt] + 2*m [tauq, taup] + m*nb [work] ! rworkspace: need 0 call stdlib${ii}$_${ci}$lacp2( 'F', m, m, rwork( iru ), m, u, ldu ) call stdlib${ii}$_${ci}$unmbr( 'Q', 'L', 'N', m, m, m, a, lda,work( itauq ), u, ldu, & work( nwork ),lwork-nwork+1, ierr ) ! copy realmatrix rwork(irvt,KIND=${ck}$) to complex matrix work(ivt) ! overwrite work(ivt) by right singular vectors of l ! cworkspace: need m*m [vt] + 2*m [tauq, taup] + m [work] ! cworkspace: prefer m*m [vt] + 2*m [tauq, taup] + m*nb [work] ! rworkspace: need 0 call stdlib${ii}$_${ci}$lacp2( 'F', m, m, rwork( irvt ), m, work( ivt ),ldwkvt ) call stdlib${ii}$_${ci}$unmbr( 'P', 'R', 'C', m, m, m, a, lda,work( itaup ), work( ivt ),& ldwkvt,work( nwork ), lwork-nwork+1, ierr ) ! multiply right singular vectors of l in work(ivt) by ! q in vt, storing result in a ! cworkspace: need m*m [vt] ! rworkspace: need 0 call stdlib${ii}$_${ci}$gemm( 'N', 'N', m, n, m, cone, work( ivt ), ldwkvt,vt, ldvt, & czero, a, lda ) ! copy right singular vectors of a from a to vt call stdlib${ii}$_${ci}$lacpy( 'F', m, n, a, lda, vt, ldvt ) end if else if( n>=mnthr2 ) then ! mnthr2 <= n < mnthr1 ! path 5t (n >> m, but not as much as mnthr1) ! reduce to bidiagonal form without qr decomposition, use ! stdlib${ii}$_${ci}$ungbr and matrix multiplication to compute singular vectors ie = 1_${ik}$ nrwork = ie + m itauq = 1_${ik}$ itaup = itauq + m nwork = itaup + m ! bidiagonalize a ! cworkspace: need 2*m [tauq, taup] + n [work] ! cworkspace: prefer 2*m [tauq, taup] + (m+n)*nb [work] ! rworkspace: need m [e] call stdlib${ii}$_${ci}$gebrd( m, n, a, lda, s, rwork( ie ), work( itauq ),work( itaup ), & work( nwork ), lwork-nwork+1,ierr ) if( wntqn ) then ! path 5tn (n >> m, jobz='n') ! compute singular values only ! cworkspace: need 0 ! rworkspace: need m [e] + bdspac call stdlib${ii}$_${c2ri(ci)}$bdsdc( 'L', 'N', m, s, rwork( ie ), dum,1_${ik}$,dum,1_${ik}$,dum, idum, rwork(& nrwork ), iwork, info ) else if( wntqo ) then irvt = nrwork iru = irvt + m*m nrwork = iru + m*m ivt = nwork ! path 5to (n >> m, jobz='o') ! copy a to u, generate q ! cworkspace: need 2*m [tauq, taup] + m [work] ! cworkspace: prefer 2*m [tauq, taup] + m*nb [work] ! rworkspace: need 0 call stdlib${ii}$_${ci}$lacpy( 'L', m, m, a, lda, u, ldu ) call stdlib${ii}$_${ci}$ungbr( 'Q', m, m, n, u, ldu, work( itauq ),work( nwork ), lwork-& nwork+1, ierr ) ! generate p**h in a ! cworkspace: need 2*m [tauq, taup] + m [work] ! cworkspace: prefer 2*m [tauq, taup] + m*nb [work] ! rworkspace: need 0 call stdlib${ii}$_${ci}$ungbr( 'P', m, n, m, a, lda, work( itaup ),work( nwork ), lwork-& nwork+1, ierr ) ldwkvt = m if( lwork >= m*n + 3_${ik}$*m ) then ! work( ivt ) is m by n nwork = ivt + ldwkvt*n chunk = n else ! work( ivt ) is m by chunk chunk = ( lwork - 3_${ik}$*m ) / m nwork = ivt + ldwkvt*chunk end if ! perform bidiagonal svd, computing left singular vectors ! of bidiagonal matrix in rwork(iru) and computing right ! singular vectors of bidiagonal matrix in rwork(irvt) ! cworkspace: need 0 ! rworkspace: need m [e] + m*m [rvt] + m*m [ru] + bdspac call stdlib${ii}$_${c2ri(ci)}$bdsdc( 'L', 'I', m, s, rwork( ie ), rwork( iru ),m, rwork( irvt )& , m, dum, idum,rwork( nrwork ), iwork, info ) ! multiply q in u by realmatrix rwork(irvt,KIND=${ck}$) ! storing the result in work(ivt), copying to u ! cworkspace: need 2*m [tauq, taup] + m*m [vt] ! rworkspace: need m [e] + m*m [rvt] + m*m [ru] + 2*m*m [rwork] call stdlib${ii}$_${ci}$lacrm( m, m, u, ldu, rwork( iru ), m, work( ivt ),ldwkvt, rwork( & nrwork ) ) call stdlib${ii}$_${ci}$lacpy( 'F', m, m, work( ivt ), ldwkvt, u, ldu ) ! multiply rwork(irvt) by p**h in a, storing the ! result in work(ivt), copying to a ! cworkspace: need 2*m [tauq, taup] + m*m [vt] ! cworkspace: prefer 2*m [tauq, taup] + m*n [vt] ! rworkspace: need m [e] + m*m [rvt] + 2*m*m [rwork] ! rworkspace: prefer m [e] + m*m [rvt] + 2*m*n [rwork] < m + 5*m*m since n < 2*m here nrwork = iru do i = 1, n, chunk blk = min( n-i+1, chunk ) call stdlib${ii}$_${ci}$larcm( m, blk, rwork( irvt ), m, a( 1_${ik}$, i ), lda,work( ivt ), & ldwkvt, rwork( nrwork ) ) call stdlib${ii}$_${ci}$lacpy( 'F', m, blk, work( ivt ), ldwkvt,a( 1_${ik}$, i ), lda ) end do else if( wntqs ) then ! path 5ts (n >> m, jobz='s') ! copy a to u, generate q ! cworkspace: need 2*m [tauq, taup] + m [work] ! cworkspace: prefer 2*m [tauq, taup] + m*nb [work] ! rworkspace: need 0 call stdlib${ii}$_${ci}$lacpy( 'L', m, m, a, lda, u, ldu ) call stdlib${ii}$_${ci}$ungbr( 'Q', m, m, n, u, ldu, work( itauq ),work( nwork ), lwork-& nwork+1, ierr ) ! copy a to vt, generate p**h ! cworkspace: need 2*m [tauq, taup] + m [work] ! cworkspace: prefer 2*m [tauq, taup] + m*nb [work] ! rworkspace: need 0 call stdlib${ii}$_${ci}$lacpy( 'U', m, n, a, lda, vt, ldvt ) call stdlib${ii}$_${ci}$ungbr( 'P', m, n, m, vt, ldvt, work( itaup ),work( nwork ), & lwork-nwork+1, ierr ) ! perform bidiagonal svd, computing left singular vectors ! of bidiagonal matrix in rwork(iru) and computing right ! singular vectors of bidiagonal matrix in rwork(irvt) ! cworkspace: need 0 ! rworkspace: need m [e] + m*m [rvt] + m*m [ru] + bdspac irvt = nrwork iru = irvt + m*m nrwork = iru + m*m call stdlib${ii}$_${c2ri(ci)}$bdsdc( 'L', 'I', m, s, rwork( ie ), rwork( iru ),m, rwork( irvt )& , m, dum, idum,rwork( nrwork ), iwork, info ) ! multiply q in u by realmatrix rwork(iru,KIND=${ck}$), storing the ! result in a, copying to u ! cworkspace: need 0 ! rworkspace: need m [e] + m*m [rvt] + m*m [ru] + 2*m*m [rwork] call stdlib${ii}$_${ci}$lacrm( m, m, u, ldu, rwork( iru ), m, a, lda,rwork( nrwork ) ) call stdlib${ii}$_${ci}$lacpy( 'F', m, m, a, lda, u, ldu ) ! multiply realmatrix rwork(irvt,KIND=${ck}$) by p**h in vt, ! storing the result in a, copying to vt ! cworkspace: need 0 ! rworkspace: need m [e] + m*m [rvt] + 2*m*n [rwork] < m + 5*m*m since n < 2*m here nrwork = iru call stdlib${ii}$_${ci}$larcm( m, n, rwork( irvt ), m, vt, ldvt, a, lda,rwork( nrwork ) ) call stdlib${ii}$_${ci}$lacpy( 'F', m, n, a, lda, vt, ldvt ) else ! path 5ta (n >> m, jobz='a') ! copy a to u, generate q ! cworkspace: need 2*m [tauq, taup] + m [work] ! cworkspace: prefer 2*m [tauq, taup] + m*nb [work] ! rworkspace: need 0 call stdlib${ii}$_${ci}$lacpy( 'L', m, m, a, lda, u, ldu ) call stdlib${ii}$_${ci}$ungbr( 'Q', m, m, n, u, ldu, work( itauq ),work( nwork ), lwork-& nwork+1, ierr ) ! copy a to vt, generate p**h ! cworkspace: need 2*m [tauq, taup] + n [work] ! cworkspace: prefer 2*m [tauq, taup] + n*nb [work] ! rworkspace: need 0 call stdlib${ii}$_${ci}$lacpy( 'U', m, n, a, lda, vt, ldvt ) call stdlib${ii}$_${ci}$ungbr( 'P', n, n, m, vt, ldvt, work( itaup ),work( nwork ), & lwork-nwork+1, ierr ) ! perform bidiagonal svd, computing left singular vectors ! of bidiagonal matrix in rwork(iru) and computing right ! singular vectors of bidiagonal matrix in rwork(irvt) ! cworkspace: need 0 ! rworkspace: need m [e] + m*m [rvt] + m*m [ru] + bdspac irvt = nrwork iru = irvt + m*m nrwork = iru + m*m call stdlib${ii}$_${c2ri(ci)}$bdsdc( 'L', 'I', m, s, rwork( ie ), rwork( iru ),m, rwork( irvt )& , m, dum, idum,rwork( nrwork ), iwork, info ) ! multiply q in u by realmatrix rwork(iru,KIND=${ck}$), storing the ! result in a, copying to u ! cworkspace: need 0 ! rworkspace: need m [e] + m*m [rvt] + m*m [ru] + 2*m*m [rwork] call stdlib${ii}$_${ci}$lacrm( m, m, u, ldu, rwork( iru ), m, a, lda,rwork( nrwork ) ) call stdlib${ii}$_${ci}$lacpy( 'F', m, m, a, lda, u, ldu ) ! multiply realmatrix rwork(irvt,KIND=${ck}$) by p**h in vt, ! storing the result in a, copying to vt ! cworkspace: need 0 ! rworkspace: need m [e] + m*m [rvt] + 2*m*n [rwork] < m + 5*m*m since n < 2*m here nrwork = iru call stdlib${ii}$_${ci}$larcm( m, n, rwork( irvt ), m, vt, ldvt, a, lda,rwork( nrwork ) ) call stdlib${ii}$_${ci}$lacpy( 'F', m, n, a, lda, vt, ldvt ) end if else ! n < mnthr2 ! path 6t (n > m, but not much larger) ! reduce to bidiagonal form without lq decomposition ! use stdlib_${ci}$unmbr to compute singular vectors ie = 1_${ik}$ nrwork = ie + m itauq = 1_${ik}$ itaup = itauq + m nwork = itaup + m ! bidiagonalize a ! cworkspace: need 2*m [tauq, taup] + n [work] ! cworkspace: prefer 2*m [tauq, taup] + (m+n)*nb [work] ! rworkspace: need m [e] call stdlib${ii}$_${ci}$gebrd( m, n, a, lda, s, rwork( ie ), work( itauq ),work( itaup ), & work( nwork ), lwork-nwork+1,ierr ) if( wntqn ) then ! path 6tn (n > m, jobz='n') ! compute singular values only ! cworkspace: need 0 ! rworkspace: need m [e] + bdspac call stdlib${ii}$_${c2ri(ci)}$bdsdc( 'L', 'N', m, s, rwork( ie ), dum,1_${ik}$,dum,1_${ik}$,dum, idum, rwork(& nrwork ), iwork, info ) else if( wntqo ) then ! path 6to (n > m, jobz='o') ldwkvt = m ivt = nwork if( lwork >= m*n + 3_${ik}$*m ) then ! work( ivt ) is m by n call stdlib${ii}$_${ci}$laset( 'F', m, n, czero, czero, work( ivt ),ldwkvt ) nwork = ivt + ldwkvt*n else ! work( ivt ) is m by chunk chunk = ( lwork - 3_${ik}$*m ) / m nwork = ivt + ldwkvt*chunk end if ! perform bidiagonal svd, computing left singular vectors ! of bidiagonal matrix in rwork(iru) and computing right ! singular vectors of bidiagonal matrix in rwork(irvt) ! cworkspace: need 0 ! rworkspace: need m [e] + m*m [rvt] + m*m [ru] + bdspac irvt = nrwork iru = irvt + m*m nrwork = iru + m*m call stdlib${ii}$_${c2ri(ci)}$bdsdc( 'L', 'I', m, s, rwork( ie ), rwork( iru ),m, rwork( irvt )& , m, dum, idum,rwork( nrwork ), iwork, info ) ! copy realmatrix rwork(iru,KIND=${ck}$) to complex matrix u ! overwrite u by left singular vectors of a ! cworkspace: need 2*m [tauq, taup] + m*m [vt] + m [work] ! cworkspace: prefer 2*m [tauq, taup] + m*m [vt] + m*nb [work] ! rworkspace: need m [e] + m*m [rvt] + m*m [ru] call stdlib${ii}$_${ci}$lacp2( 'F', m, m, rwork( iru ), m, u, ldu ) call stdlib${ii}$_${ci}$unmbr( 'Q', 'L', 'N', m, m, n, a, lda,work( itauq ), u, ldu, & work( nwork ),lwork-nwork+1, ierr ) if( lwork >= m*n + 3_${ik}$*m ) then ! path 6to-fast ! copy realmatrix rwork(irvt,KIND=${ck}$) to complex matrix work(ivt) ! overwrite work(ivt) by right singular vectors of a, ! copying to a ! cworkspace: need 2*m [tauq, taup] + m*n [vt] + m [work] ! cworkspace: prefer 2*m [tauq, taup] + m*n [vt] + m*nb [work] ! rworkspace: need m [e] + m*m [rvt] call stdlib${ii}$_${ci}$lacp2( 'F', m, m, rwork( irvt ), m, work( ivt ),ldwkvt ) call stdlib${ii}$_${ci}$unmbr( 'P', 'R', 'C', m, n, m, a, lda,work( itaup ), work( & ivt ), ldwkvt,work( nwork ), lwork-nwork+1, ierr ) call stdlib${ii}$_${ci}$lacpy( 'F', m, n, work( ivt ), ldwkvt, a, lda ) else ! path 6to-slow ! generate p**h in a ! cworkspace: need 2*m [tauq, taup] + m*m [vt] + m [work] ! cworkspace: prefer 2*m [tauq, taup] + m*m [vt] + m*nb [work] ! rworkspace: need 0 call stdlib${ii}$_${ci}$ungbr( 'P', m, n, m, a, lda, work( itaup ),work( nwork ), & lwork-nwork+1, ierr ) ! multiply q in a by realmatrix rwork(iru,KIND=${ck}$), storing the ! result in work(iu), copying to a ! cworkspace: need 2*m [tauq, taup] + m*m [vt] ! cworkspace: prefer 2*m [tauq, taup] + m*n [vt] ! rworkspace: need m [e] + m*m [rvt] + 2*m*m [rwork] ! rworkspace: prefer m [e] + m*m [rvt] + 2*m*n [rwork] < m + 5*m*m since n < 2*m here nrwork = iru do i = 1, n, chunk blk = min( n-i+1, chunk ) call stdlib${ii}$_${ci}$larcm( m, blk, rwork( irvt ), m, a( 1_${ik}$, i ),lda, work( ivt )& , ldwkvt,rwork( nrwork ) ) call stdlib${ii}$_${ci}$lacpy( 'F', m, blk, work( ivt ), ldwkvt,a( 1_${ik}$, i ), lda ) end do end if else if( wntqs ) then ! path 6ts (n > m, jobz='s') ! perform bidiagonal svd, computing left singular vectors ! of bidiagonal matrix in rwork(iru) and computing right ! singular vectors of bidiagonal matrix in rwork(irvt) ! cworkspace: need 0 ! rworkspace: need m [e] + m*m [rvt] + m*m [ru] + bdspac irvt = nrwork iru = irvt + m*m nrwork = iru + m*m call stdlib${ii}$_${c2ri(ci)}$bdsdc( 'L', 'I', m, s, rwork( ie ), rwork( iru ),m, rwork( irvt )& , m, dum, idum,rwork( nrwork ), iwork, info ) ! copy realmatrix rwork(iru,KIND=${ck}$) to complex matrix u ! overwrite u by left singular vectors of a ! cworkspace: need 2*m [tauq, taup] + m [work] ! cworkspace: prefer 2*m [tauq, taup] + m*nb [work] ! rworkspace: need m [e] + m*m [rvt] + m*m [ru] call stdlib${ii}$_${ci}$lacp2( 'F', m, m, rwork( iru ), m, u, ldu ) call stdlib${ii}$_${ci}$unmbr( 'Q', 'L', 'N', m, m, n, a, lda,work( itauq ), u, ldu, & work( nwork ),lwork-nwork+1, ierr ) ! copy realmatrix rwork(irvt,KIND=${ck}$) to complex matrix vt ! overwrite vt by right singular vectors of a ! cworkspace: need 2*m [tauq, taup] + m [work] ! cworkspace: prefer 2*m [tauq, taup] + m*nb [work] ! rworkspace: need m [e] + m*m [rvt] call stdlib${ii}$_${ci}$laset( 'F', m, n, czero, czero, vt, ldvt ) call stdlib${ii}$_${ci}$lacp2( 'F', m, m, rwork( irvt ), m, vt, ldvt ) call stdlib${ii}$_${ci}$unmbr( 'P', 'R', 'C', m, n, m, a, lda,work( itaup ), vt, ldvt, & work( nwork ),lwork-nwork+1, ierr ) else ! path 6ta (n > m, jobz='a') ! perform bidiagonal svd, computing left singular vectors ! of bidiagonal matrix in rwork(iru) and computing right ! singular vectors of bidiagonal matrix in rwork(irvt) ! cworkspace: need 0 ! rworkspace: need m [e] + m*m [rvt] + m*m [ru] + bdspac irvt = nrwork iru = irvt + m*m nrwork = iru + m*m call stdlib${ii}$_${c2ri(ci)}$bdsdc( 'L', 'I', m, s, rwork( ie ), rwork( iru ),m, rwork( irvt )& , m, dum, idum,rwork( nrwork ), iwork, info ) ! copy realmatrix rwork(iru,KIND=${ck}$) to complex matrix u ! overwrite u by left singular vectors of a ! cworkspace: need 2*m [tauq, taup] + m [work] ! cworkspace: prefer 2*m [tauq, taup] + m*nb [work] ! rworkspace: need m [e] + m*m [rvt] + m*m [ru] call stdlib${ii}$_${ci}$lacp2( 'F', m, m, rwork( iru ), m, u, ldu ) call stdlib${ii}$_${ci}$unmbr( 'Q', 'L', 'N', m, m, n, a, lda,work( itauq ), u, ldu, & work( nwork ),lwork-nwork+1, ierr ) ! set all of vt to identity matrix call stdlib${ii}$_${ci}$laset( 'F', n, n, czero, cone, vt, ldvt ) ! copy realmatrix rwork(irvt,KIND=${ck}$) to complex matrix vt ! overwrite vt by right singular vectors of a ! cworkspace: need 2*m [tauq, taup] + n [work] ! cworkspace: prefer 2*m [tauq, taup] + n*nb [work] ! rworkspace: need m [e] + m*m [rvt] call stdlib${ii}$_${ci}$lacp2( 'F', m, m, rwork( irvt ), m, vt, ldvt ) call stdlib${ii}$_${ci}$unmbr( 'P', 'R', 'C', n, n, m, a, lda,work( itaup ), vt, ldvt, & work( nwork ),lwork-nwork+1, ierr ) end if end if end if ! undo scaling if necessary if( iscl==1_${ik}$ ) then if( anrm>bignum )call stdlib${ii}$_${c2ri(ci)}$lascl( 'G', 0_${ik}$, 0_${ik}$, bignum, anrm, minmn, 1_${ik}$, s, minmn,& ierr ) if( info/=0_${ik}$ .and. anrm>bignum )call stdlib${ii}$_${c2ri(ci)}$lascl( 'G', 0_${ik}$, 0_${ik}$, bignum, anrm, minmn-1,& 1_${ik}$,rwork( ie ), minmn, ierr ) if( anrm<smlnum )call stdlib${ii}$_${c2ri(ci)}$lascl( 'G', 0_${ik}$, 0_${ik}$, smlnum, anrm, minmn, 1_${ik}$, s, minmn,& ierr ) if( info/=0_${ik}$ .and. anrm<smlnum )call stdlib${ii}$_${c2ri(ci)}$lascl( 'G', 0_${ik}$, 0_${ik}$, smlnum, anrm, minmn-1,& 1_${ik}$,rwork( ie ), minmn, ierr ) end if ! return optimal workspace in work(1) work( 1_${ik}$ ) = stdlib${ii}$_${c2ri(ci)}$roundup_lwork( maxwrk ) return end subroutine stdlib${ii}$_${ci}$gesdd #:endif #:endfor pure module subroutine stdlib${ii}$_sgejsv( joba, jobu, jobv, jobr, jobt, jobp,m, n, a, lda, sva, u, ldu, & !! SGEJSV computes the singular value decomposition (SVD) of a real M-by-N !! matrix [A], where M >= N. The SVD of [A] is written as !! [A] = [U] * [SIGMA] * [V]^t, !! where [SIGMA] is an N-by-N (M-by-N) matrix which is zero except for its N !! diagonal elements, [U] is an M-by-N (or M-by-M) orthonormal matrix, and !! [V] is an N-by-N orthogonal matrix. The diagonal elements of [SIGMA] are !! the singular values of [A]. The columns of [U] and [V] are the left and !! the right singular vectors of [A], respectively. The matrices [U] and [V] !! are computed and stored in the arrays U and V, respectively. The diagonal !! of [SIGMA] is computed and stored in the array SVA. !! SGEJSV can sometimes compute tiny singular values and their singular vectors much !! more accurately than other SVD routines, see below under Further Details. v, ldv,work, lwork, iwork, info ) ! -- lapack computational routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments integer(${ik}$), intent(out) :: info integer(${ik}$), intent(in) :: lda, ldu, ldv, lwork, m, n ! Array Arguments real(sp), intent(inout) :: a(lda,*) real(sp), intent(out) :: sva(n), u(ldu,*), v(ldv,*), work(lwork) integer(${ik}$), intent(out) :: iwork(*) character, intent(in) :: joba, jobp, jobr, jobt, jobu, jobv ! =========================================================================== ! Local Scalars real(sp) :: aapp, aaqq, aatmax, aatmin, big, big1, cond_ok, condr1, condr2, entra, & entrat, epsln, maxprj, scalem, sconda, sfmin, small, temp1, uscal1, uscal2, xsc integer(${ik}$) :: ierr, n1, nr, numrank, p, q, warning logical(lk) :: almort, defr, errest, goscal, jracc, kill, lsvec, l2aber, l2kill, & l2pert, l2rank, l2tran, noscal, rowpiv, rsvec, transp ! Intrinsic Functions ! test the input arguments lsvec = stdlib_lsame( jobu, 'U' ) .or. stdlib_lsame( jobu, 'F' ) jracc = stdlib_lsame( jobv, 'J' ) rsvec = stdlib_lsame( jobv, 'V' ) .or. jracc rowpiv = stdlib_lsame( joba, 'F' ) .or. stdlib_lsame( joba, 'G' ) l2rank = stdlib_lsame( joba, 'R' ) l2aber = stdlib_lsame( joba, 'A' ) errest = stdlib_lsame( joba, 'E' ) .or. stdlib_lsame( joba, 'G' ) l2tran = stdlib_lsame( jobt, 'T' ) l2kill = stdlib_lsame( jobr, 'R' ) defr = stdlib_lsame( jobr, 'N' ) l2pert = stdlib_lsame( jobp, 'P' ) if ( .not.(rowpiv .or. l2rank .or. l2aber .or.errest .or. stdlib_lsame( joba, 'C' ) )) & then info = - 1_${ik}$ else if ( .not.( lsvec .or. stdlib_lsame( jobu, 'N' ) .or.stdlib_lsame( jobu, 'W' )) )& then info = - 2_${ik}$ else if ( .not.( rsvec .or. stdlib_lsame( jobv, 'N' ) .or.stdlib_lsame( jobv, 'W' )) & .or. ( jracc .and. (.not.lsvec) ) ) then info = - 3_${ik}$ else if ( .not. ( l2kill .or. defr ) ) then info = - 4_${ik}$ else if ( .not. ( l2tran .or. stdlib_lsame( jobt, 'N' ) ) ) then info = - 5_${ik}$ else if ( .not. ( l2pert .or. stdlib_lsame( jobp, 'N' ) ) ) then info = - 6_${ik}$ else if ( m < 0_${ik}$ ) then info = - 7_${ik}$ else if ( ( n < 0_${ik}$ ) .or. ( n > m ) ) then info = - 8_${ik}$ else if ( lda < m ) then info = - 10_${ik}$ else if ( lsvec .and. ( ldu < m ) ) then info = - 13_${ik}$ else if ( rsvec .and. ( ldv < n ) ) then info = - 15_${ik}$ else if ( (.not.(lsvec .or. rsvec .or. errest).and.(lwork < max(7_${ik}$,4_${ik}$*n+1,2_${ik}$*m+n))) .or.(& .not.(lsvec .or. rsvec) .and. errest .and.(lwork < max(7_${ik}$,4_${ik}$*n+n*n,2_${ik}$*m+n))) .or.(lsvec & .and. (.not.rsvec) .and. (lwork < max(7_${ik}$,2_${ik}$*m+n,4_${ik}$*n+1))).or.(rsvec .and. (.not.lsvec) & .and. (lwork < max(7_${ik}$,2_${ik}$*m+n,4_${ik}$*n+1))).or.(lsvec .and. rsvec .and. (.not.jracc) .and.(& lwork<max(2_${ik}$*m+n,6_${ik}$*n+2*n*n))).or. (lsvec .and. rsvec .and. jracc .and.lwork<max(2_${ik}$*m+n,& 4_${ik}$*n+n*n,2_${ik}$*n+n*n+6)))then info = - 17_${ik}$ else ! #:) info = 0_${ik}$ end if if ( info /= 0_${ik}$ ) then ! #:( call stdlib${ii}$_xerbla( 'SGEJSV', - info ) return end if ! quick return for void matrix (y3k safe) ! #:) if ( ( m == 0_${ik}$ ) .or. ( n == 0_${ik}$ ) ) then iwork(1_${ik}$:3_${ik}$) = 0_${ik}$ work(1_${ik}$:7_${ik}$) = 0_${ik}$ return endif ! determine whether the matrix u should be m x n or m x m if ( lsvec ) then n1 = n if ( stdlib_lsame( jobu, 'F' ) ) n1 = m end if ! set numerical parameters ! ! note: make sure stdlib${ii}$_slamch() does not fail on the target architecture. epsln = stdlib${ii}$_slamch('EPSILON') sfmin = stdlib${ii}$_slamch('SAFEMINIMUM') small = sfmin / epsln big = stdlib${ii}$_slamch('O') ! big = one / sfmin ! initialize sva(1:n) = diag( ||a e_i||_2 )_1^n ! (!) if necessary, scale sva() to protect the largest norm from ! overflow. it is possible that this scaling pushes the smallest ! column norm left from the underflow threshold (extreme case). scalem = one / sqrt(real(m,KIND=sp)*real(n,KIND=sp)) noscal = .true. goscal = .true. do p = 1, n aapp = zero aaqq = one call stdlib${ii}$_slassq( m, a(1_${ik}$,p), 1_${ik}$, aapp, aaqq ) if ( aapp > big ) then info = - 9_${ik}$ call stdlib${ii}$_xerbla( 'SGEJSV', -info ) return end if aaqq = sqrt(aaqq) if ( ( aapp < (big / aaqq) ) .and. noscal ) then sva(p) = aapp * aaqq else noscal = .false. sva(p) = aapp * ( aaqq * scalem ) if ( goscal ) then goscal = .false. call stdlib${ii}$_sscal( p-1, scalem, sva, 1_${ik}$ ) end if end if end do if ( noscal ) scalem = one aapp = zero aaqq = big do p = 1, n aapp = max( aapp, sva(p) ) if ( sva(p) /= zero ) aaqq = min( aaqq, sva(p) ) end do ! quick return for zero m x n matrix ! #:) if ( aapp == zero ) then if ( lsvec ) call stdlib${ii}$_slaset( 'G', m, n1, zero, one, u, ldu ) if ( rsvec ) call stdlib${ii}$_slaset( 'G', n, n, zero, one, v, ldv ) work(1_${ik}$) = one work(2_${ik}$) = one if ( errest ) work(3_${ik}$) = one if ( lsvec .and. rsvec ) then work(4_${ik}$) = one work(5_${ik}$) = one end if if ( l2tran ) then work(6_${ik}$) = zero work(7_${ik}$) = zero end if iwork(1_${ik}$) = 0_${ik}$ iwork(2_${ik}$) = 0_${ik}$ iwork(3_${ik}$) = 0_${ik}$ return end if ! issue warning if denormalized column norms detected. override the ! high relative accuracy request. issue licence to kill columns ! (set them to zero) whose norm is less than sigma_max / big (roughly). ! #:( warning = 0_${ik}$ if ( aaqq <= sfmin ) then l2rank = .true. l2kill = .true. warning = 1_${ik}$ end if ! quick return for one-column matrix ! #:) if ( n == 1_${ik}$ ) then if ( lsvec ) then call stdlib${ii}$_slascl( 'G',0_${ik}$,0_${ik}$,sva(1_${ik}$),scalem, m,1_${ik}$,a(1_${ik}$,1_${ik}$),lda,ierr ) call stdlib${ii}$_slacpy( 'A', m, 1_${ik}$, a, lda, u, ldu ) ! computing all m left singular vectors of the m x 1 matrix if ( n1 /= n ) then call stdlib${ii}$_sgeqrf( m, n, u,ldu, work, work(n+1),lwork-n,ierr ) call stdlib${ii}$_sorgqr( m,n1,1_${ik}$, u,ldu,work,work(n+1),lwork-n,ierr ) call stdlib${ii}$_scopy( m, a(1_${ik}$,1_${ik}$), 1_${ik}$, u(1_${ik}$,1_${ik}$), 1_${ik}$ ) end if end if if ( rsvec ) then v(1_${ik}$,1_${ik}$) = one end if if ( sva(1_${ik}$) < (big*scalem) ) then sva(1_${ik}$) = sva(1_${ik}$) / scalem scalem = one end if work(1_${ik}$) = one / scalem work(2_${ik}$) = one if ( sva(1_${ik}$) /= zero ) then iwork(1_${ik}$) = 1_${ik}$ if ( ( sva(1_${ik}$) / scalem) >= sfmin ) then iwork(2_${ik}$) = 1_${ik}$ else iwork(2_${ik}$) = 0_${ik}$ end if else iwork(1_${ik}$) = 0_${ik}$ iwork(2_${ik}$) = 0_${ik}$ end if iwork(3_${ik}$) = 0_${ik}$ if ( errest ) work(3_${ik}$) = one if ( lsvec .and. rsvec ) then work(4_${ik}$) = one work(5_${ik}$) = one end if if ( l2tran ) then work(6_${ik}$) = zero work(7_${ik}$) = zero end if return end if transp = .false. l2tran = l2tran .and. ( m == n ) aatmax = -one aatmin = big if ( rowpiv .or. l2tran ) then ! compute the row norms, needed to determine row pivoting sequence ! (in the case of heavily row weighted a, row pivoting is strongly ! advised) and to collect information needed to compare the ! structures of a * a^t and a^t * a (in the case l2tran==.true.). if ( l2tran ) then do p = 1, m xsc = zero temp1 = one call stdlib${ii}$_slassq( n, a(p,1_${ik}$), lda, xsc, temp1 ) ! stdlib${ii}$_slassq gets both the ell_2 and the ell_infinity norm ! in one pass through the vector work(m+n+p) = xsc * scalem work(n+p) = xsc * (scalem*sqrt(temp1)) aatmax = max( aatmax, work(n+p) ) if (work(n+p) /= zero) aatmin = min(aatmin,work(n+p)) end do else do p = 1, m work(m+n+p) = scalem*abs( a(p,stdlib${ii}$_isamax(n,a(p,1_${ik}$),lda)) ) aatmax = max( aatmax, work(m+n+p) ) aatmin = min( aatmin, work(m+n+p) ) end do end if end if ! for square matrix a try to determine whether a^t would be better ! input for the preconditioned jacobi svd, with faster convergence. ! the decision is based on an o(n) function of the vector of column ! and row norms of a, based on the shannon entropy. this should give ! the right choice in most cases when the difference actually matters. ! it may fail and pick the slower converging side. entra = zero entrat = zero if ( l2tran ) then xsc = zero temp1 = one call stdlib${ii}$_slassq( n, sva, 1_${ik}$, xsc, temp1 ) temp1 = one / temp1 entra = zero do p = 1, n big1 = ( ( sva(p) / xsc )**2_${ik}$ ) * temp1 if ( big1 /= zero ) entra = entra + big1 * log(big1) end do entra = - entra / log(real(n,KIND=sp)) ! now, sva().^2/trace(a^t * a) is a point in the probability simplex. ! it is derived from the diagonal of a^t * a. do the same with the ! diagonal of a * a^t, compute the entropy of the corresponding ! probability distribution. note that a * a^t and a^t * a have the ! same trace. entrat = zero do p = n+1, n+m big1 = ( ( work(p) / xsc )**2_${ik}$ ) * temp1 if ( big1 /= zero ) entrat = entrat + big1 * log(big1) end do entrat = - entrat / log(real(m,KIND=sp)) ! analyze the entropies and decide a or a^t. smaller entropy ! usually means better input for the algorithm. transp = ( entrat < entra ) ! if a^t is better than a, transpose a. if ( transp ) then ! in an optimal implementation, this trivial transpose ! should be replaced with faster transpose. do p = 1, n - 1 do q = p + 1, n temp1 = a(q,p) a(q,p) = a(p,q) a(p,q) = temp1 end do end do do p = 1, n work(m+n+p) = sva(p) sva(p) = work(n+p) end do temp1 = aapp aapp = aatmax aatmax = temp1 temp1 = aaqq aaqq = aatmin aatmin = temp1 kill = lsvec lsvec = rsvec rsvec = kill if ( lsvec ) n1 = n rowpiv = .true. end if end if ! end if l2tran ! scale the matrix so that its maximal singular value remains less ! than sqrt(big) -- the matrix is scaled so that its maximal column ! has euclidean norm equal to sqrt(big/n). the only reason to keep ! sqrt(big) instead of big is the fact that stdlib${ii}$_sgejsv uses lapack and ! blas routines that, in some implementations, are not capable of ! working in the full interval [sfmin,big] and that they may provoke ! overflows in the intermediate results. if the singular values spread ! from sfmin to big, then stdlib${ii}$_sgesvj will compute them. so, in that case, ! one should use stdlib_sgesvj instead of stdlib${ii}$_sgejsv. big1 = sqrt( big ) temp1 = sqrt( big / real(n,KIND=sp) ) call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, aapp, temp1, n, 1_${ik}$, sva, n, ierr ) if ( aaqq > (aapp * sfmin) ) then aaqq = ( aaqq / aapp ) * temp1 else aaqq = ( aaqq * temp1 ) / aapp end if temp1 = temp1 * scalem call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, aapp, temp1, m, n, a, lda, ierr ) ! to undo scaling at the end of this procedure, multiply the ! computed singular values with uscal2 / uscal1. uscal1 = temp1 uscal2 = aapp if ( l2kill ) then ! l2kill enforces computation of nonzero singular values in ! the restricted range of condition number of the initial a, ! sigma_max(a) / sigma_min(a) approx. sqrt(big)/sqrt(sfmin). xsc = sqrt( sfmin ) else xsc = small ! now, if the condition number of a is too big, ! sigma_max(a) / sigma_min(a) > sqrt(big/n) * epsln / sfmin, ! as a precaution measure, the full svd is computed using stdlib${ii}$_sgesvj ! with accumulated jacobi rotations. this provides numerically ! more robust computation, at the cost of slightly increased run ! time. depending on the concrete implementation of blas and lapack ! (i.e. how they behave in presence of extreme ill-conditioning) the ! implementor may decide to remove this switch. if ( ( aaqq<sqrt(sfmin) ) .and. lsvec .and. rsvec ) then jracc = .true. end if end if if ( aaqq < xsc ) then do p = 1, n if ( sva(p) < xsc ) then call stdlib${ii}$_slaset( 'A', m, 1_${ik}$, zero, zero, a(1_${ik}$,p), lda ) sva(p) = zero end if end do end if ! preconditioning using qr factorization with pivoting if ( rowpiv ) then ! optional row permutation (bjoerck row pivoting): ! a result by cox and higham shows that the bjoerck's ! row pivoting combined with standard column pivoting ! has similar effect as powell-reid complete pivoting. ! the ell-infinity norms of a are made nonincreasing. do p = 1, m - 1 q = stdlib${ii}$_isamax( m-p+1, work(m+n+p), 1_${ik}$ ) + p - 1_${ik}$ iwork(2_${ik}$*n+p) = q if ( p /= q ) then temp1 = work(m+n+p) work(m+n+p) = work(m+n+q) work(m+n+q) = temp1 end if end do call stdlib${ii}$_slaswp( n, a, lda, 1_${ik}$, m-1, iwork(2_${ik}$*n+1), 1_${ik}$ ) end if ! end of the preparation phase (scaling, optional sorting and ! transposing, optional flushing of small columns). ! preconditioning ! if the full svd is needed, the right singular vectors are computed ! from a matrix equation, and for that we need theoretical analysis ! of the businger-golub pivoting. so we use stdlib_sgeqp3 as the first rr qrf. ! in all other cases the first rr qrf can be chosen by other criteria ! (eg speed by replacing global with restricted window pivoting, such ! as in sgeqpx from toms # 782). good results will be obtained using ! sgeqpx with properly (!) chosen numerical parameters. ! any improvement of stdlib${ii}$_sgeqp3 improves overall performance of stdlib${ii}$_sgejsv. ! a * p1 = q1 * [ r1^t 0]^t: do p = 1, n ! All Columns Are Free Columns iwork(p) = 0_${ik}$ end do call stdlib${ii}$_sgeqp3( m,n,a,lda, iwork,work, work(n+1),lwork-n, ierr ) ! the upper triangular matrix r1 from the first qrf is inspected for ! rank deficiency and possibilities for deflation, or possible ! ill-conditioning. depending on the user specified flag l2rank, ! the procedure explores possibilities to reduce the numerical ! rank by inspecting the computed upper triangular factor. if ! l2rank or l2aber are up, then stdlib${ii}$_sgejsv will compute the svd of ! a + da, where ||da|| <= f(m,n)*epsln. nr = 1_${ik}$ if ( l2aber ) then ! standard absolute error bound suffices. all sigma_i with ! sigma_i < n*epsln*||a|| are flushed to zero. this is an ! aggressive enforcement of lower numerical rank by introducing a ! backward error of the order of n*epsln*||a||. temp1 = sqrt(real(n,KIND=sp))*epsln do p = 2, n if ( abs(a(p,p)) >= (temp1*abs(a(1_${ik}$,1_${ik}$))) ) then nr = nr + 1_${ik}$ else go to 3002 end if end do 3002 continue else if ( l2rank ) then ! .. similarly as above, only slightly more gentle (less aggressive). ! sudden drop on the diagonal of r1 is used as the criterion for ! close-to-rank-deficient. temp1 = sqrt(sfmin) do p = 2, n if ( ( abs(a(p,p)) < (epsln*abs(a(p-1,p-1))) ) .or.( abs(a(p,p)) < small ) .or.( & l2kill .and. (abs(a(p,p)) < temp1) ) ) go to 3402 nr = nr + 1_${ik}$ end do 3402 continue else ! the goal is high relative accuracy. however, if the matrix ! has high scaled condition number the relative accuracy is in ! general not feasible. later on, a condition number estimator ! will be deployed to estimate the scaled condition number. ! here we just remove the underflowed part of the triangular ! factor. this prevents the situation in which the code is ! working hard to get the accuracy not warranted by the data. temp1 = sqrt(sfmin) do p = 2, n if ( ( abs(a(p,p)) < small ) .or.( l2kill .and. (abs(a(p,p)) < temp1) ) ) go to 3302 nr = nr + 1_${ik}$ end do 3302 continue end if almort = .false. if ( nr == n ) then maxprj = one do p = 2, n temp1 = abs(a(p,p)) / sva(iwork(p)) maxprj = min( maxprj, temp1 ) end do if ( maxprj**2_${ik}$ >= one - real(n,KIND=sp)*epsln ) almort = .true. end if sconda = - one condr1 = - one condr2 = - one if ( errest ) then if ( n == nr ) then if ( rsvec ) then ! V Is Available As Workspace call stdlib${ii}$_slacpy( 'U', n, n, a, lda, v, ldv ) do p = 1, n temp1 = sva(iwork(p)) call stdlib${ii}$_sscal( p, one/temp1, v(1_${ik}$,p), 1_${ik}$ ) end do call stdlib${ii}$_spocon( 'U', n, v, ldv, one, temp1,work(n+1), iwork(2_${ik}$*n+m+1), & ierr ) else if ( lsvec ) then ! U Is Available As Workspace call stdlib${ii}$_slacpy( 'U', n, n, a, lda, u, ldu ) do p = 1, n temp1 = sva(iwork(p)) call stdlib${ii}$_sscal( p, one/temp1, u(1_${ik}$,p), 1_${ik}$ ) end do call stdlib${ii}$_spocon( 'U', n, u, ldu, one, temp1,work(n+1), iwork(2_${ik}$*n+m+1), & ierr ) else call stdlib${ii}$_slacpy( 'U', n, n, a, lda, work(n+1), n ) do p = 1, n temp1 = sva(iwork(p)) call stdlib${ii}$_sscal( p, one/temp1, work(n+(p-1)*n+1), 1_${ik}$ ) end do ! The Columns Of R Are Scaled To Have Unit Euclidean Lengths call stdlib${ii}$_spocon( 'U', n, work(n+1), n, one, temp1,work(n+n*n+1), iwork(2_${ik}$*n+& m+1), ierr ) end if sconda = one / sqrt(temp1) ! sconda is an estimate of sqrt(||(r^t * r)^(-1)||_1). ! n^(-1/4) * sconda <= ||r^(-1)||_2 <= n^(1/4) * sconda else sconda = - one end if end if l2pert = l2pert .and. ( abs( a(1_${ik}$,1_${ik}$)/a(nr,nr) ) > sqrt(big1) ) ! if there is no violent scaling, artificial perturbation is not needed. ! phase 3: if ( .not. ( rsvec .or. lsvec ) ) then ! singular values only ! .. transpose a(1:nr,1:n) do p = 1, min( n-1, nr ) call stdlib${ii}$_scopy( n-p, a(p,p+1), lda, a(p+1,p), 1_${ik}$ ) end do ! the following two do-loops introduce small relative perturbation ! into the strict upper triangle of the lower triangular matrix. ! small entries below the main diagonal are also changed. ! this modification is useful if the computing environment does not ! provide/allow flush to zero underflow, for it prevents many ! annoying denormalized numbers in case of strongly scaled matrices. ! the perturbation is structured so that it does not introduce any ! new perturbation of the singular values, and it does not destroy ! the job done by the preconditioner. ! the licence for this perturbation is in the variable l2pert, which ! should be .false. if flush to zero underflow is active. if ( .not. almort ) then if ( l2pert ) then ! xsc = sqrt(small) xsc = epsln / real(n,KIND=sp) do q = 1, nr temp1 = xsc*abs(a(q,q)) do p = 1, n if ( ( (p>q) .and. (abs(a(p,q))<=temp1) ).or. ( p < q ) )a(p,q) = sign( & temp1, a(p,q) ) end do end do else if (nr>1_${ik}$) call stdlib${ii}$_slaset( 'U', nr-1,nr-1, zero,zero, a(1_${ik}$,2_${ik}$),lda ) end if ! Second Preconditioning Using The Qr Factorization call stdlib${ii}$_sgeqrf( n,nr, a,lda, work, work(n+1),lwork-n, ierr ) ! And Transpose Upper To Lower Triangular do p = 1, nr - 1 call stdlib${ii}$_scopy( nr-p, a(p,p+1), lda, a(p+1,p), 1_${ik}$ ) end do end if ! row-cyclic jacobi svd algorithm with column pivoting ! .. again some perturbation (a "background noise") is added ! to drown denormals if ( l2pert ) then ! xsc = sqrt(small) xsc = epsln / real(n,KIND=sp) do q = 1, nr temp1 = xsc*abs(a(q,q)) do p = 1, nr if ( ( (p>q) .and. (abs(a(p,q))<=temp1) ).or. ( p < q ) )a(p,q) = sign( & temp1, a(p,q) ) end do end do else if (nr>1_${ik}$) call stdlib${ii}$_slaset( 'U', nr-1, nr-1, zero, zero, a(1_${ik}$,2_${ik}$), lda ) end if ! .. and one-sided jacobi rotations are started on a lower ! triangular matrix (plus perturbation which is ignored in ! the part which destroys triangular form (confusing?!)) call stdlib${ii}$_sgesvj( 'L', 'NOU', 'NOV', nr, nr, a, lda, sva,n, v, ldv, work, & lwork, info ) scalem = work(1_${ik}$) numrank = nint(work(2_${ik}$),KIND=${ik}$) else if ( rsvec .and. ( .not. lsvec ) ) then ! -> singular values and right singular vectors <- if ( almort ) then ! In This Case Nr Equals N do p = 1, nr call stdlib${ii}$_scopy( n-p+1, a(p,p), lda, v(p,p), 1_${ik}$ ) end do if (nr>1_${ik}$) call stdlib${ii}$_slaset( 'UPPER', nr-1, nr-1, zero, zero, v(1_${ik}$,2_${ik}$), ldv ) call stdlib${ii}$_sgesvj( 'L','U','N', n, nr, v,ldv, sva, nr, a,lda,work, lwork, info ) scalem = work(1_${ik}$) numrank = nint(work(2_${ik}$),KIND=${ik}$) else ! .. two more qr factorizations ( one qrf is not enough, two require ! accumulated product of jacobi rotations, three are perfect ) if (nr>1_${ik}$) call stdlib${ii}$_slaset( 'LOWER', nr-1, nr-1, zero, zero, a(2_${ik}$,1_${ik}$), lda ) call stdlib${ii}$_sgelqf( nr, n, a, lda, work, work(n+1), lwork-n, ierr) call stdlib${ii}$_slacpy( 'LOWER', nr, nr, a, lda, v, ldv ) if (nr>1_${ik}$) call stdlib${ii}$_slaset( 'UPPER', nr-1, nr-1, zero, zero, v(1_${ik}$,2_${ik}$), ldv ) call stdlib${ii}$_sgeqrf( nr, nr, v, ldv, work(n+1), work(2_${ik}$*n+1),lwork-2*n, ierr ) do p = 1, nr call stdlib${ii}$_scopy( nr-p+1, v(p,p), ldv, v(p,p), 1_${ik}$ ) end do if (nr>1_${ik}$) call stdlib${ii}$_slaset( 'UPPER', nr-1, nr-1, zero, zero, v(1_${ik}$,2_${ik}$), ldv ) call stdlib${ii}$_sgesvj( 'LOWER', 'U','N', nr, nr, v,ldv, sva, nr, u,ldu, work(n+1), & lwork-n, info ) scalem = work(n+1) numrank = nint(work(n+2),KIND=${ik}$) if ( nr < n ) then call stdlib${ii}$_slaset( 'A',n-nr, nr, zero,zero, v(nr+1,1_${ik}$), ldv ) call stdlib${ii}$_slaset( 'A',nr, n-nr, zero,zero, v(1_${ik}$,nr+1), ldv ) call stdlib${ii}$_slaset( 'A',n-nr,n-nr,zero,one, v(nr+1,nr+1), ldv ) end if call stdlib${ii}$_sormlq( 'LEFT', 'TRANSPOSE', n, n, nr, a, lda, work,v, ldv, work(n+1), & lwork-n, ierr ) end if do p = 1, n call stdlib${ii}$_scopy( n, v(p,1_${ik}$), ldv, a(iwork(p),1_${ik}$), lda ) end do call stdlib${ii}$_slacpy( 'ALL', n, n, a, lda, v, ldv ) if ( transp ) then call stdlib${ii}$_slacpy( 'ALL', n, n, v, ldv, u, ldu ) end if else if ( lsvec .and. ( .not. rsvec ) ) then ! Singular Values And Left Singular Vectors ! Second Preconditioning Step To Avoid Need To Accumulate ! jacobi rotations in the jacobi iterations. do p = 1, nr call stdlib${ii}$_scopy( n-p+1, a(p,p), lda, u(p,p), 1_${ik}$ ) end do if (nr>1_${ik}$) call stdlib${ii}$_slaset( 'UPPER', nr-1, nr-1, zero, zero, u(1_${ik}$,2_${ik}$), ldu ) call stdlib${ii}$_sgeqrf( n, nr, u, ldu, work(n+1), work(2_${ik}$*n+1),lwork-2*n, ierr ) do p = 1, nr - 1 call stdlib${ii}$_scopy( nr-p, u(p,p+1), ldu, u(p+1,p), 1_${ik}$ ) end do if (nr>1_${ik}$) call stdlib${ii}$_slaset( 'UPPER', nr-1, nr-1, zero, zero, u(1_${ik}$,2_${ik}$), ldu ) call stdlib${ii}$_sgesvj( 'LOWER', 'U', 'N', nr,nr, u, ldu, sva, nr, a,lda, work(n+1), & lwork-n, info ) scalem = work(n+1) numrank = nint(work(n+2),KIND=${ik}$) if ( nr < m ) then call stdlib${ii}$_slaset( 'A', m-nr, nr,zero, zero, u(nr+1,1_${ik}$), ldu ) if ( nr < n1 ) then call stdlib${ii}$_slaset( 'A',nr, n1-nr, zero, zero, u(1_${ik}$,nr+1), ldu ) call stdlib${ii}$_slaset( 'A',m-nr,n1-nr,zero,one,u(nr+1,nr+1), ldu ) end if end if call stdlib${ii}$_sormqr( 'LEFT', 'NO TR', m, n1, n, a, lda, work, u,ldu, work(n+1), & lwork-n, ierr ) if ( rowpiv )call stdlib${ii}$_slaswp( n1, u, ldu, 1_${ik}$, m-1, iwork(2_${ik}$*n+1), -1_${ik}$ ) do p = 1, n1 xsc = one / stdlib${ii}$_snrm2( m, u(1_${ik}$,p), 1_${ik}$ ) call stdlib${ii}$_sscal( m, xsc, u(1_${ik}$,p), 1_${ik}$ ) end do if ( transp ) then call stdlib${ii}$_slacpy( 'ALL', n, n, u, ldu, v, ldv ) end if else ! Full Svd if ( .not. jracc ) then if ( .not. almort ) then ! second preconditioning step (qrf [with pivoting]) ! note that the composition of transpose, qrf and transpose is ! equivalent to an lqf call. since in many libraries the qrf ! seems to be better optimized than the lqf, we do explicit ! transpose and use the qrf. this is subject to changes in an ! optimized implementation of stdlib${ii}$_sgejsv. do p = 1, nr call stdlib${ii}$_scopy( n-p+1, a(p,p), lda, v(p,p), 1_${ik}$ ) end do ! The Following Two Loops Perturb Small Entries To Avoid ! denormals in the second qr factorization, where they are ! as good as zeros. this is done to avoid painfully slow ! computation with denormals. the relative size of the perturbation ! is a parameter that can be changed by the implementer. ! this perturbation device will be obsolete on machines with ! properly implemented arithmetic. ! to switch it off, set l2pert=.false. to remove it from the ! code, remove the action under l2pert=.true., leave the else part. ! the following two loops should be blocked and fused with the ! transposed copy above. if ( l2pert ) then xsc = sqrt(small) do q = 1, nr temp1 = xsc*abs( v(q,q) ) do p = 1, n if ( ( p > q ) .and. ( abs(v(p,q)) <= temp1 ).or. ( p < q ) )v(p,q) = & sign( temp1, v(p,q) ) if ( p < q ) v(p,q) = - v(p,q) end do end do else if (nr>1_${ik}$) call stdlib${ii}$_slaset( 'U', nr-1, nr-1, zero, zero, v(1_${ik}$,2_${ik}$), ldv ) end if ! estimate the row scaled condition number of r1 ! (if r1 is rectangular, n > nr, then the condition number ! of the leading nr x nr submatrix is estimated.) call stdlib${ii}$_slacpy( 'L', nr, nr, v, ldv, work(2_${ik}$*n+1), nr ) do p = 1, nr temp1 = stdlib${ii}$_snrm2(nr-p+1,work(2_${ik}$*n+(p-1)*nr+p),1_${ik}$) call stdlib${ii}$_sscal(nr-p+1,one/temp1,work(2_${ik}$*n+(p-1)*nr+p),1_${ik}$) end do call stdlib${ii}$_spocon('LOWER',nr,work(2_${ik}$*n+1),nr,one,temp1,work(2_${ik}$*n+nr*nr+1),iwork(m+& 2_${ik}$*n+1),ierr) condr1 = one / sqrt(temp1) ! Here Need A Second Opinion On The Condition Number ! Then Assume Worst Case Scenario ! r1 is ok for inverse <=> condr1 < real(n,KIND=sp) ! more conservative <=> condr1 < sqrt(real(n,KIND=sp)) cond_ok = sqrt(real(nr,KIND=sp)) ! [tp] cond_ok is a tuning parameter. if ( condr1 < cond_ok ) then ! .. the second qrf without pivoting. note: in an optimized ! implementation, this qrf should be implemented as the qrf ! of a lower triangular matrix. ! r1^t = q2 * r2 call stdlib${ii}$_sgeqrf( n, nr, v, ldv, work(n+1), work(2_${ik}$*n+1),lwork-2*n, ierr ) if ( l2pert ) then xsc = sqrt(small)/epsln do p = 2, nr do q = 1, p - 1 temp1 = xsc * min(abs(v(p,p)),abs(v(q,q))) if ( abs(v(q,p)) <= temp1 )v(q,p) = sign( temp1, v(q,p) ) end do end do end if if ( nr /= n )call stdlib${ii}$_slacpy( 'A', n, nr, v, ldv, work(2_${ik}$*n+1), n ) ! .. save ... ! This Transposed Copy Should Be Better Than Naive do p = 1, nr - 1 call stdlib${ii}$_scopy( nr-p, v(p,p+1), ldv, v(p+1,p), 1_${ik}$ ) end do condr2 = condr1 else ! .. ill-conditioned case: second qrf with pivoting ! note that windowed pivoting would be equally good ! numerically, and more run-time efficient. so, in ! an optimal implementation, the next call to stdlib${ii}$_sgeqp3 ! should be replaced with eg. call sgeqpx (acm toms #782) ! with properly (carefully) chosen parameters. ! r1^t * p2 = q2 * r2 do p = 1, nr iwork(n+p) = 0_${ik}$ end do call stdlib${ii}$_sgeqp3( n, nr, v, ldv, iwork(n+1), work(n+1),work(2_${ik}$*n+1), lwork-& 2_${ik}$*n, ierr ) ! * call stdlib${ii}$_sgeqrf( n, nr, v, ldv, work(n+1), work(2*n+1), ! * $ lwork-2*n, ierr ) if ( l2pert ) then xsc = sqrt(small) do p = 2, nr do q = 1, p - 1 temp1 = xsc * min(abs(v(p,p)),abs(v(q,q))) if ( abs(v(q,p)) <= temp1 )v(q,p) = sign( temp1, v(q,p) ) end do end do end if call stdlib${ii}$_slacpy( 'A', n, nr, v, ldv, work(2_${ik}$*n+1), n ) if ( l2pert ) then xsc = sqrt(small) do p = 2, nr do q = 1, p - 1 temp1 = xsc * min(abs(v(p,p)),abs(v(q,q))) v(p,q) = - sign( temp1, v(q,p) ) end do end do else if (nr>1_${ik}$) call stdlib${ii}$_slaset( 'L',nr-1,nr-1,zero,zero,v(2_${ik}$,1_${ik}$),ldv ) end if ! now, compute r2 = l3 * q3, the lq factorization. call stdlib${ii}$_sgelqf( nr, nr, v, ldv, work(2_${ik}$*n+n*nr+1),work(2_${ik}$*n+n*nr+nr+1), & lwork-2*n-n*nr-nr, ierr ) ! And Estimate The Condition Number call stdlib${ii}$_slacpy( 'L',nr,nr,v,ldv,work(2_${ik}$*n+n*nr+nr+1),nr ) do p = 1, nr temp1 = stdlib${ii}$_snrm2( p, work(2_${ik}$*n+n*nr+nr+p), nr ) call stdlib${ii}$_sscal( p, one/temp1, work(2_${ik}$*n+n*nr+nr+p), nr ) end do call stdlib${ii}$_spocon( 'L',nr,work(2_${ik}$*n+n*nr+nr+1),nr,one,temp1,work(2_${ik}$*n+n*nr+nr+& nr*nr+1),iwork(m+2*n+1),ierr ) condr2 = one / sqrt(temp1) if ( condr2 >= cond_ok ) then ! Save The Householder Vectors Used For Q3 ! (this overwrites the copy of r2, as it will not be ! needed in this branch, but it does not overwritte the ! huseholder vectors of q2.). call stdlib${ii}$_slacpy( 'U', nr, nr, v, ldv, work(2_${ik}$*n+1), n ) ! And The Rest Of The Information On Q3 Is In ! work(2*n+n*nr+1:2*n+n*nr+n) end if end if if ( l2pert ) then xsc = sqrt(small) do q = 2, nr temp1 = xsc * v(q,q) do p = 1, q - 1 ! v(p,q) = - sign( temp1, v(q,p) ) v(p,q) = - sign( temp1, v(p,q) ) end do end do else if (nr>1_${ik}$) call stdlib${ii}$_slaset( 'U', nr-1,nr-1, zero,zero, v(1_${ik}$,2_${ik}$), ldv ) end if ! second preconditioning finished; continue with jacobi svd ! the input matrix is lower trinagular. ! recover the right singular vectors as solution of a well ! conditioned triangular matrix equation. if ( condr1 < cond_ok ) then call stdlib${ii}$_sgesvj( 'L','U','N',nr,nr,v,ldv,sva,nr,u,ldu,work(2_${ik}$*n+n*nr+nr+1),& lwork-2*n-n*nr-nr,info ) scalem = work(2_${ik}$*n+n*nr+nr+1) numrank = nint(work(2_${ik}$*n+n*nr+nr+2),KIND=${ik}$) do p = 1, nr call stdlib${ii}$_scopy( nr, v(1_${ik}$,p), 1_${ik}$, u(1_${ik}$,p), 1_${ik}$ ) call stdlib${ii}$_sscal( nr, sva(p), v(1_${ik}$,p), 1_${ik}$ ) end do ! Pick The Right Matrix Equation And Solve It if ( nr == n ) then ! :)) .. best case, r1 is inverted. the solution of this matrix ! equation is q2*v2 = the product of the jacobi rotations ! used in stdlib${ii}$_sgesvj, premultiplied with the orthogonal matrix ! from the second qr factorization. call stdlib${ii}$_strsm( 'L','U','N','N', nr,nr,one, a,lda, v,ldv ) else ! .. r1 is well conditioned, but non-square. transpose(r2) ! is inverted to get the product of the jacobi rotations ! used in stdlib${ii}$_sgesvj. the q-factor from the second qr ! factorization is then built in explicitly. call stdlib${ii}$_strsm('L','U','T','N',nr,nr,one,work(2_${ik}$*n+1),n,v,ldv) if ( nr < n ) then call stdlib${ii}$_slaset('A',n-nr,nr,zero,zero,v(nr+1,1_${ik}$),ldv) call stdlib${ii}$_slaset('A',nr,n-nr,zero,zero,v(1_${ik}$,nr+1),ldv) call stdlib${ii}$_slaset('A',n-nr,n-nr,zero,one,v(nr+1,nr+1),ldv) end if call stdlib${ii}$_sormqr('L','N',n,n,nr,work(2_${ik}$*n+1),n,work(n+1),v,ldv,work(2_${ik}$*n+& n*nr+nr+1),lwork-2*n-n*nr-nr,ierr) end if else if ( condr2 < cond_ok ) then ! :) .. the input matrix a is very likely a relative of ! the kahan matrix :) ! the matrix r2 is inverted. the solution of the matrix equation ! is q3^t*v3 = the product of the jacobi rotations (appplied to ! the lower triangular l3 from the lq factorization of ! r2=l3*q3), pre-multiplied with the transposed q3. call stdlib${ii}$_sgesvj( 'L', 'U', 'N', nr, nr, v, ldv, sva, nr, u,ldu, work(2_${ik}$*n+& n*nr+nr+1), lwork-2*n-n*nr-nr, info ) scalem = work(2_${ik}$*n+n*nr+nr+1) numrank = nint(work(2_${ik}$*n+n*nr+nr+2),KIND=${ik}$) do p = 1, nr call stdlib${ii}$_scopy( nr, v(1_${ik}$,p), 1_${ik}$, u(1_${ik}$,p), 1_${ik}$ ) call stdlib${ii}$_sscal( nr, sva(p), u(1_${ik}$,p), 1_${ik}$ ) end do call stdlib${ii}$_strsm('L','U','N','N',nr,nr,one,work(2_${ik}$*n+1),n,u,ldu) ! Apply The Permutation From The Second Qr Factorization do q = 1, nr do p = 1, nr work(2_${ik}$*n+n*nr+nr+iwork(n+p)) = u(p,q) end do do p = 1, nr u(p,q) = work(2_${ik}$*n+n*nr+nr+p) end do end do if ( nr < n ) then call stdlib${ii}$_slaset( 'A',n-nr,nr,zero,zero,v(nr+1,1_${ik}$),ldv ) call stdlib${ii}$_slaset( 'A',nr,n-nr,zero,zero,v(1_${ik}$,nr+1),ldv ) call stdlib${ii}$_slaset( 'A',n-nr,n-nr,zero,one,v(nr+1,nr+1),ldv ) end if call stdlib${ii}$_sormqr( 'L','N',n,n,nr,work(2_${ik}$*n+1),n,work(n+1),v,ldv,work(2_${ik}$*n+& n*nr+nr+1),lwork-2*n-n*nr-nr,ierr ) else ! last line of defense. ! #:( this is a rather pathological case: no scaled condition ! improvement after two pivoted qr factorizations. other ! possibility is that the rank revealing qr factorization ! or the condition estimator has failed, or the cond_ok ! is set very close to one (which is unnecessary). normally, ! this branch should never be executed, but in rare cases of ! failure of the rrqr or condition estimator, the last line of ! defense ensures that stdlib${ii}$_sgejsv completes the task. ! compute the full svd of l3 using stdlib${ii}$_sgesvj with explicit ! accumulation of jacobi rotations. call stdlib${ii}$_sgesvj( 'L', 'U', 'V', nr, nr, v, ldv, sva, nr, u,ldu, work(2_${ik}$*n+& n*nr+nr+1), lwork-2*n-n*nr-nr, info ) scalem = work(2_${ik}$*n+n*nr+nr+1) numrank = nint(work(2_${ik}$*n+n*nr+nr+2),KIND=${ik}$) if ( nr < n ) then call stdlib${ii}$_slaset( 'A',n-nr,nr,zero,zero,v(nr+1,1_${ik}$),ldv ) call stdlib${ii}$_slaset( 'A',nr,n-nr,zero,zero,v(1_${ik}$,nr+1),ldv ) call stdlib${ii}$_slaset( 'A',n-nr,n-nr,zero,one,v(nr+1,nr+1),ldv ) end if call stdlib${ii}$_sormqr( 'L','N',n,n,nr,work(2_${ik}$*n+1),n,work(n+1),v,ldv,work(2_${ik}$*n+& n*nr+nr+1),lwork-2*n-n*nr-nr,ierr ) call stdlib${ii}$_sormlq( 'L', 'T', nr, nr, nr, work(2_${ik}$*n+1), n,work(2_${ik}$*n+n*nr+1), u, & ldu, work(2_${ik}$*n+n*nr+nr+1),lwork-2*n-n*nr-nr, ierr ) do q = 1, nr do p = 1, nr work(2_${ik}$*n+n*nr+nr+iwork(n+p)) = u(p,q) end do do p = 1, nr u(p,q) = work(2_${ik}$*n+n*nr+nr+p) end do end do end if ! permute the rows of v using the (column) permutation from the ! first qrf. also, scale the columns to make them unit in ! euclidean norm. this applies to all cases. temp1 = sqrt(real(n,KIND=sp)) * epsln do q = 1, n do p = 1, n work(2_${ik}$*n+n*nr+nr+iwork(p)) = v(p,q) end do do p = 1, n v(p,q) = work(2_${ik}$*n+n*nr+nr+p) end do xsc = one / stdlib${ii}$_snrm2( n, v(1_${ik}$,q), 1_${ik}$ ) if ( (xsc < (one-temp1)) .or. (xsc > (one+temp1)) )call stdlib${ii}$_sscal( n, xsc, & v(1_${ik}$,q), 1_${ik}$ ) end do ! at this moment, v contains the right singular vectors of a. ! next, assemble the left singular vector matrix u (m x n). if ( nr < m ) then call stdlib${ii}$_slaset( 'A', m-nr, nr, zero, zero, u(nr+1,1_${ik}$), ldu ) if ( nr < n1 ) then call stdlib${ii}$_slaset('A',nr,n1-nr,zero,zero,u(1_${ik}$,nr+1),ldu) call stdlib${ii}$_slaset('A',m-nr,n1-nr,zero,one,u(nr+1,nr+1),ldu) end if end if ! the q matrix from the first qrf is built into the left singular ! matrix u. this applies to all cases. call stdlib${ii}$_sormqr( 'LEFT', 'NO_TR', m, n1, n, a, lda, work, u,ldu, work(n+1), & lwork-n, ierr ) ! the columns of u are normalized. the cost is o(m*n) flops. temp1 = sqrt(real(m,KIND=sp)) * epsln do p = 1, nr xsc = one / stdlib${ii}$_snrm2( m, u(1_${ik}$,p), 1_${ik}$ ) if ( (xsc < (one-temp1)) .or. (xsc > (one+temp1)) )call stdlib${ii}$_sscal( m, xsc, & u(1_${ik}$,p), 1_${ik}$ ) end do ! if the initial qrf is computed with row pivoting, the left ! singular vectors must be adjusted. if ( rowpiv )call stdlib${ii}$_slaswp( n1, u, ldu, 1_${ik}$, m-1, iwork(2_${ik}$*n+1), -1_${ik}$ ) else ! The Initial Matrix A Has Almost Orthogonal Columns And ! the second qrf is not needed call stdlib${ii}$_slacpy( 'UPPER', n, n, a, lda, work(n+1), n ) if ( l2pert ) then xsc = sqrt(small) do p = 2, n temp1 = xsc * work( n + (p-1)*n + p ) do q = 1, p - 1 work(n+(q-1)*n+p)=-sign(temp1,work(n+(p-1)*n+q)) end do end do else call stdlib${ii}$_slaset( 'LOWER',n-1,n-1,zero,zero,work(n+2),n ) end if call stdlib${ii}$_sgesvj( 'UPPER', 'U', 'N', n, n, work(n+1), n, sva,n, u, ldu, work(n+& n*n+1), lwork-n-n*n, info ) scalem = work(n+n*n+1) numrank = nint(work(n+n*n+2),KIND=${ik}$) do p = 1, n call stdlib${ii}$_scopy( n, work(n+(p-1)*n+1), 1_${ik}$, u(1_${ik}$,p), 1_${ik}$ ) call stdlib${ii}$_sscal( n, sva(p), work(n+(p-1)*n+1), 1_${ik}$ ) end do call stdlib${ii}$_strsm( 'LEFT', 'UPPER', 'NOTRANS', 'NO UD', n, n,one, a, lda, work(n+& 1_${ik}$), n ) do p = 1, n call stdlib${ii}$_scopy( n, work(n+p), n, v(iwork(p),1_${ik}$), ldv ) end do temp1 = sqrt(real(n,KIND=sp))*epsln do p = 1, n xsc = one / stdlib${ii}$_snrm2( n, v(1_${ik}$,p), 1_${ik}$ ) if ( (xsc < (one-temp1)) .or. (xsc > (one+temp1)) )call stdlib${ii}$_sscal( n, xsc, & v(1_${ik}$,p), 1_${ik}$ ) end do ! assemble the left singular vector matrix u (m x n). if ( n < m ) then call stdlib${ii}$_slaset( 'A', m-n, n, zero, zero, u(n+1,1_${ik}$), ldu ) if ( n < n1 ) then call stdlib${ii}$_slaset( 'A',n, n1-n, zero, zero, u(1_${ik}$,n+1),ldu ) call stdlib${ii}$_slaset( 'A',m-n,n1-n, zero, one,u(n+1,n+1),ldu ) end if end if call stdlib${ii}$_sormqr( 'LEFT', 'NO TR', m, n1, n, a, lda, work, u,ldu, work(n+1), & lwork-n, ierr ) temp1 = sqrt(real(m,KIND=sp))*epsln do p = 1, n1 xsc = one / stdlib${ii}$_snrm2( m, u(1_${ik}$,p), 1_${ik}$ ) if ( (xsc < (one-temp1)) .or. (xsc > (one+temp1)) )call stdlib${ii}$_sscal( m, xsc, & u(1_${ik}$,p), 1_${ik}$ ) end do if ( rowpiv )call stdlib${ii}$_slaswp( n1, u, ldu, 1_${ik}$, m-1, iwork(2_${ik}$*n+1), -1_${ik}$ ) end if ! end of the >> almost orthogonal case << in the full svd else ! this branch deploys a preconditioned jacobi svd with explicitly ! accumulated rotations. it is included as optional, mainly for ! experimental purposes. it does perform well, and can also be used. ! in this implementation, this branch will be automatically activated ! if the condition number sigma_max(a) / sigma_min(a) is predicted ! to be greater than the overflow threshold. this is because the ! a posteriori computation of the singular vectors assumes robust ! implementation of blas and some lapack procedures, capable of working ! in presence of extreme values. since that is not always the case, ... do p = 1, nr call stdlib${ii}$_scopy( n-p+1, a(p,p), lda, v(p,p), 1_${ik}$ ) end do if ( l2pert ) then xsc = sqrt(small/epsln) do q = 1, nr temp1 = xsc*abs( v(q,q) ) do p = 1, n if ( ( p > q ) .and. ( abs(v(p,q)) <= temp1 ).or. ( p < q ) )v(p,q) = sign(& temp1, v(p,q) ) if ( p < q ) v(p,q) = - v(p,q) end do end do else if (nr>1_${ik}$) call stdlib${ii}$_slaset( 'U', nr-1, nr-1, zero, zero, v(1_${ik}$,2_${ik}$), ldv ) end if call stdlib${ii}$_sgeqrf( n, nr, v, ldv, work(n+1), work(2_${ik}$*n+1),lwork-2*n, ierr ) call stdlib${ii}$_slacpy( 'L', n, nr, v, ldv, work(2_${ik}$*n+1), n ) do p = 1, nr call stdlib${ii}$_scopy( nr-p+1, v(p,p), ldv, u(p,p), 1_${ik}$ ) end do if ( l2pert ) then xsc = sqrt(small/epsln) do q = 2, nr do p = 1, q - 1 temp1 = xsc * min(abs(u(p,p)),abs(u(q,q))) u(p,q) = - sign( temp1, u(q,p) ) end do end do else if (nr>1_${ik}$) call stdlib${ii}$_slaset('U', nr-1, nr-1, zero, zero, u(1_${ik}$,2_${ik}$), ldu ) end if call stdlib${ii}$_sgesvj( 'L', 'U', 'V', nr, nr, u, ldu, sva,n, v, ldv, work(2_${ik}$*n+n*nr+1), & lwork-2*n-n*nr, info ) scalem = work(2_${ik}$*n+n*nr+1) numrank = nint(work(2_${ik}$*n+n*nr+2),KIND=${ik}$) if ( nr < n ) then call stdlib${ii}$_slaset( 'A',n-nr,nr,zero,zero,v(nr+1,1_${ik}$),ldv ) call stdlib${ii}$_slaset( 'A',nr,n-nr,zero,zero,v(1_${ik}$,nr+1),ldv ) call stdlib${ii}$_slaset( 'A',n-nr,n-nr,zero,one,v(nr+1,nr+1),ldv ) end if call stdlib${ii}$_sormqr( 'L','N',n,n,nr,work(2_${ik}$*n+1),n,work(n+1),v,ldv,work(2_${ik}$*n+n*nr+nr+1)& ,lwork-2*n-n*nr-nr,ierr ) ! permute the rows of v using the (column) permutation from the ! first qrf. also, scale the columns to make them unit in ! euclidean norm. this applies to all cases. temp1 = sqrt(real(n,KIND=sp)) * epsln do q = 1, n do p = 1, n work(2_${ik}$*n+n*nr+nr+iwork(p)) = v(p,q) end do do p = 1, n v(p,q) = work(2_${ik}$*n+n*nr+nr+p) end do xsc = one / stdlib${ii}$_snrm2( n, v(1_${ik}$,q), 1_${ik}$ ) if ( (xsc < (one-temp1)) .or. (xsc > (one+temp1)) )call stdlib${ii}$_sscal( n, xsc, & v(1_${ik}$,q), 1_${ik}$ ) end do ! at this moment, v contains the right singular vectors of a. ! next, assemble the left singular vector matrix u (m x n). if ( nr < m ) then call stdlib${ii}$_slaset( 'A', m-nr, nr, zero, zero, u(nr+1,1_${ik}$), ldu ) if ( nr < n1 ) then call stdlib${ii}$_slaset( 'A',nr, n1-nr, zero, zero, u(1_${ik}$,nr+1),ldu ) call stdlib${ii}$_slaset( 'A',m-nr,n1-nr, zero, one,u(nr+1,nr+1),ldu ) end if end if call stdlib${ii}$_sormqr( 'LEFT', 'NO TR', m, n1, n, a, lda, work, u,ldu, work(n+1), & lwork-n, ierr ) if ( rowpiv )call stdlib${ii}$_slaswp( n1, u, ldu, 1_${ik}$, m-1, iwork(2_${ik}$*n+1), -1_${ik}$ ) end if if ( transp ) then ! .. swap u and v because the procedure worked on a^t do p = 1, n call stdlib${ii}$_sswap( n, u(1_${ik}$,p), 1_${ik}$, v(1_${ik}$,p), 1_${ik}$ ) end do end if end if ! end of the full svd ! undo scaling, if necessary (and possible) if ( uscal2 <= (big/sva(1_${ik}$))*uscal1 ) then call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, uscal1, uscal2, nr, 1_${ik}$, sva, n, ierr ) uscal1 = one uscal2 = one end if if ( nr < n ) then do p = nr+1, n sva(p) = zero end do end if work(1_${ik}$) = uscal2 * scalem work(2_${ik}$) = uscal1 if ( errest ) work(3_${ik}$) = sconda if ( lsvec .and. rsvec ) then work(4_${ik}$) = condr1 work(5_${ik}$) = condr2 end if if ( l2tran ) then work(6_${ik}$) = entra work(7_${ik}$) = entrat end if iwork(1_${ik}$) = nr iwork(2_${ik}$) = numrank iwork(3_${ik}$) = warning return end subroutine stdlib${ii}$_sgejsv pure module subroutine stdlib${ii}$_dgejsv( joba, jobu, jobv, jobr, jobt, jobp,m, n, a, lda, sva, u, ldu, & !! DGEJSV computes the singular value decomposition (SVD) of a real M-by-N !! matrix [A], where M >= N. The SVD of [A] is written as !! [A] = [U] * [SIGMA] * [V]^t, !! where [SIGMA] is an N-by-N (M-by-N) matrix which is zero except for its N !! diagonal elements, [U] is an M-by-N (or M-by-M) orthonormal matrix, and !! [V] is an N-by-N orthogonal matrix. The diagonal elements of [SIGMA] are !! the singular values of [A]. The columns of [U] and [V] are the left and !! the right singular vectors of [A], respectively. The matrices [U] and [V] !! are computed and stored in the arrays U and V, respectively. The diagonal !! of [SIGMA] is computed and stored in the array SVA. !! DGEJSV can sometimes compute tiny singular values and their singular vectors much !! more accurately than other SVD routines, see below under Further Details. v, ldv,work, lwork, iwork, info ) ! -- lapack computational routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments integer(${ik}$), intent(out) :: info integer(${ik}$), intent(in) :: lda, ldu, ldv, lwork, m, n ! Array Arguments real(dp), intent(inout) :: a(lda,*) real(dp), intent(out) :: sva(n), u(ldu,*), v(ldv,*), work(lwork) integer(${ik}$), intent(out) :: iwork(*) character, intent(in) :: joba, jobp, jobr, jobt, jobu, jobv ! =========================================================================== ! Local Scalars real(dp) :: aapp, aaqq, aatmax, aatmin, big, big1, cond_ok, condr1, condr2, entra, & entrat, epsln, maxprj, scalem, sconda, sfmin, small, temp1, uscal1, uscal2, xsc integer(${ik}$) :: ierr, n1, nr, numrank, p, q, warning logical(lk) :: almort, defr, errest, goscal, jracc, kill, lsvec, l2aber, l2kill, & l2pert, l2rank, l2tran, noscal, rowpiv, rsvec, transp ! Intrinsic Functions ! test the input arguments lsvec = stdlib_lsame( jobu, 'U' ) .or. stdlib_lsame( jobu, 'F' ) jracc = stdlib_lsame( jobv, 'J' ) rsvec = stdlib_lsame( jobv, 'V' ) .or. jracc rowpiv = stdlib_lsame( joba, 'F' ) .or. stdlib_lsame( joba, 'G' ) l2rank = stdlib_lsame( joba, 'R' ) l2aber = stdlib_lsame( joba, 'A' ) errest = stdlib_lsame( joba, 'E' ) .or. stdlib_lsame( joba, 'G' ) l2tran = stdlib_lsame( jobt, 'T' ) l2kill = stdlib_lsame( jobr, 'R' ) defr = stdlib_lsame( jobr, 'N' ) l2pert = stdlib_lsame( jobp, 'P' ) if ( .not.(rowpiv .or. l2rank .or. l2aber .or.errest .or. stdlib_lsame( joba, 'C' ) )) & then info = - 1_${ik}$ else if ( .not.( lsvec .or. stdlib_lsame( jobu, 'N' ) .or.stdlib_lsame( jobu, 'W' )) )& then info = - 2_${ik}$ else if ( .not.( rsvec .or. stdlib_lsame( jobv, 'N' ) .or.stdlib_lsame( jobv, 'W' )) & .or. ( jracc .and. (.not.lsvec) ) ) then info = - 3_${ik}$ else if ( .not. ( l2kill .or. defr ) ) then info = - 4_${ik}$ else if ( .not. ( l2tran .or. stdlib_lsame( jobt, 'N' ) ) ) then info = - 5_${ik}$ else if ( .not. ( l2pert .or. stdlib_lsame( jobp, 'N' ) ) ) then info = - 6_${ik}$ else if ( m < 0_${ik}$ ) then info = - 7_${ik}$ else if ( ( n < 0_${ik}$ ) .or. ( n > m ) ) then info = - 8_${ik}$ else if ( lda < m ) then info = - 10_${ik}$ else if ( lsvec .and. ( ldu < m ) ) then info = - 13_${ik}$ else if ( rsvec .and. ( ldv < n ) ) then info = - 15_${ik}$ else if ( (.not.(lsvec .or. rsvec .or. errest).and.(lwork < max(7_${ik}$,4_${ik}$*n+1,2_${ik}$*m+n))) .or.(& .not.(lsvec .or. rsvec) .and. errest .and.(lwork < max(7_${ik}$,4_${ik}$*n+n*n,2_${ik}$*m+n))) .or.(lsvec & .and. (.not.rsvec) .and. (lwork < max(7_${ik}$,2_${ik}$*m+n,4_${ik}$*n+1))).or.(rsvec .and. (.not.lsvec) & .and. (lwork < max(7_${ik}$,2_${ik}$*m+n,4_${ik}$*n+1))).or.(lsvec .and. rsvec .and. (.not.jracc) .and.(& lwork<max(2_${ik}$*m+n,6_${ik}$*n+2*n*n))).or. (lsvec .and. rsvec .and. jracc .and.lwork<max(2_${ik}$*m+n,& 4_${ik}$*n+n*n,2_${ik}$*n+n*n+6)))then info = - 17_${ik}$ else ! #:) info = 0_${ik}$ end if if ( info /= 0_${ik}$ ) then ! #:( call stdlib${ii}$_xerbla( 'DGEJSV', - info ) return end if ! quick return for void matrix (y3k safe) ! #:) if ( ( m == 0_${ik}$ ) .or. ( n == 0_${ik}$ ) ) then iwork(1_${ik}$:3_${ik}$) = 0_${ik}$ work(1_${ik}$:7_${ik}$) = 0_${ik}$ return endif ! determine whether the matrix u should be m x n or m x m if ( lsvec ) then n1 = n if ( stdlib_lsame( jobu, 'F' ) ) n1 = m end if ! set numerical parameters ! ! note: make sure stdlib${ii}$_dlamch() does not fail on the target architecture. epsln = stdlib${ii}$_dlamch('EPSILON') sfmin = stdlib${ii}$_dlamch('SAFEMINIMUM') small = sfmin / epsln big = stdlib${ii}$_dlamch('O') ! big = one / sfmin ! initialize sva(1:n) = diag( ||a e_i||_2 )_1^n ! (!) if necessary, scale sva() to protect the largest norm from ! overflow. it is possible that this scaling pushes the smallest ! column norm left from the underflow threshold (extreme case). scalem = one / sqrt(real(m,KIND=dp)*real(n,KIND=dp)) noscal = .true. goscal = .true. do p = 1, n aapp = zero aaqq = one call stdlib${ii}$_dlassq( m, a(1_${ik}$,p), 1_${ik}$, aapp, aaqq ) if ( aapp > big ) then info = - 9_${ik}$ call stdlib${ii}$_xerbla( 'DGEJSV', -info ) return end if aaqq = sqrt(aaqq) if ( ( aapp < (big / aaqq) ) .and. noscal ) then sva(p) = aapp * aaqq else noscal = .false. sva(p) = aapp * ( aaqq * scalem ) if ( goscal ) then goscal = .false. call stdlib${ii}$_dscal( p-1, scalem, sva, 1_${ik}$ ) end if end if end do if ( noscal ) scalem = one aapp = zero aaqq = big do p = 1, n aapp = max( aapp, sva(p) ) if ( sva(p) /= zero ) aaqq = min( aaqq, sva(p) ) end do ! quick return for zero m x n matrix ! #:) if ( aapp == zero ) then if ( lsvec ) call stdlib${ii}$_dlaset( 'G', m, n1, zero, one, u, ldu ) if ( rsvec ) call stdlib${ii}$_dlaset( 'G', n, n, zero, one, v, ldv ) work(1_${ik}$) = one work(2_${ik}$) = one if ( errest ) work(3_${ik}$) = one if ( lsvec .and. rsvec ) then work(4_${ik}$) = one work(5_${ik}$) = one end if if ( l2tran ) then work(6_${ik}$) = zero work(7_${ik}$) = zero end if iwork(1_${ik}$) = 0_${ik}$ iwork(2_${ik}$) = 0_${ik}$ iwork(3_${ik}$) = 0_${ik}$ return end if ! issue warning if denormalized column norms detected. override the ! high relative accuracy request. issue licence to kill columns ! (set them to zero) whose norm is less than sigma_max / big (roughly). ! #:( warning = 0_${ik}$ if ( aaqq <= sfmin ) then l2rank = .true. l2kill = .true. warning = 1_${ik}$ end if ! quick return for one-column matrix ! #:) if ( n == 1_${ik}$ ) then if ( lsvec ) then call stdlib${ii}$_dlascl( 'G',0_${ik}$,0_${ik}$,sva(1_${ik}$),scalem, m,1_${ik}$,a(1_${ik}$,1_${ik}$),lda,ierr ) call stdlib${ii}$_dlacpy( 'A', m, 1_${ik}$, a, lda, u, ldu ) ! computing all m left singular vectors of the m x 1 matrix if ( n1 /= n ) then call stdlib${ii}$_dgeqrf( m, n, u,ldu, work, work(n+1),lwork-n,ierr ) call stdlib${ii}$_dorgqr( m,n1,1_${ik}$, u,ldu,work,work(n+1),lwork-n,ierr ) call stdlib${ii}$_dcopy( m, a(1_${ik}$,1_${ik}$), 1_${ik}$, u(1_${ik}$,1_${ik}$), 1_${ik}$ ) end if end if if ( rsvec ) then v(1_${ik}$,1_${ik}$) = one end if if ( sva(1_${ik}$) < (big*scalem) ) then sva(1_${ik}$) = sva(1_${ik}$) / scalem scalem = one end if work(1_${ik}$) = one / scalem work(2_${ik}$) = one if ( sva(1_${ik}$) /= zero ) then iwork(1_${ik}$) = 1_${ik}$ if ( ( sva(1_${ik}$) / scalem) >= sfmin ) then iwork(2_${ik}$) = 1_${ik}$ else iwork(2_${ik}$) = 0_${ik}$ end if else iwork(1_${ik}$) = 0_${ik}$ iwork(2_${ik}$) = 0_${ik}$ end if iwork(3_${ik}$) = 0_${ik}$ if ( errest ) work(3_${ik}$) = one if ( lsvec .and. rsvec ) then work(4_${ik}$) = one work(5_${ik}$) = one end if if ( l2tran ) then work(6_${ik}$) = zero work(7_${ik}$) = zero end if return end if transp = .false. l2tran = l2tran .and. ( m == n ) aatmax = -one aatmin = big if ( rowpiv .or. l2tran ) then ! compute the row norms, needed to determine row pivoting sequence ! (in the case of heavily row weighted a, row pivoting is strongly ! advised) and to collect information needed to compare the ! structures of a * a^t and a^t * a (in the case l2tran==.true.). if ( l2tran ) then do p = 1, m xsc = zero temp1 = one call stdlib${ii}$_dlassq( n, a(p,1_${ik}$), lda, xsc, temp1 ) ! stdlib${ii}$_dlassq gets both the ell_2 and the ell_infinity norm ! in one pass through the vector work(m+n+p) = xsc * scalem work(n+p) = xsc * (scalem*sqrt(temp1)) aatmax = max( aatmax, work(n+p) ) if (work(n+p) /= zero) aatmin = min(aatmin,work(n+p)) end do else do p = 1, m work(m+n+p) = scalem*abs( a(p,stdlib${ii}$_idamax(n,a(p,1_${ik}$),lda)) ) aatmax = max( aatmax, work(m+n+p) ) aatmin = min( aatmin, work(m+n+p) ) end do end if end if ! for square matrix a try to determine whether a^t would be better ! input for the preconditioned jacobi svd, with faster convergence. ! the decision is based on an o(n) function of the vector of column ! and row norms of a, based on the shannon entropy. this should give ! the right choice in most cases when the difference actually matters. ! it may fail and pick the slower converging side. entra = zero entrat = zero if ( l2tran ) then xsc = zero temp1 = one call stdlib${ii}$_dlassq( n, sva, 1_${ik}$, xsc, temp1 ) temp1 = one / temp1 entra = zero do p = 1, n big1 = ( ( sva(p) / xsc )**2_${ik}$ ) * temp1 if ( big1 /= zero ) entra = entra + big1 * log(big1) end do entra = - entra / log(real(n,KIND=dp)) ! now, sva().^2/trace(a^t * a) is a point in the probability simplex. ! it is derived from the diagonal of a^t * a. do the same with the ! diagonal of a * a^t, compute the entropy of the corresponding ! probability distribution. note that a * a^t and a^t * a have the ! same trace. entrat = zero do p = n+1, n+m big1 = ( ( work(p) / xsc )**2_${ik}$ ) * temp1 if ( big1 /= zero ) entrat = entrat + big1 * log(big1) end do entrat = - entrat / log(real(m,KIND=dp)) ! analyze the entropies and decide a or a^t. smaller entropy ! usually means better input for the algorithm. transp = ( entrat < entra ) ! if a^t is better than a, transpose a. if ( transp ) then ! in an optimal implementation, this trivial transpose ! should be replaced with faster transpose. do p = 1, n - 1 do q = p + 1, n temp1 = a(q,p) a(q,p) = a(p,q) a(p,q) = temp1 end do end do do p = 1, n work(m+n+p) = sva(p) sva(p) = work(n+p) end do temp1 = aapp aapp = aatmax aatmax = temp1 temp1 = aaqq aaqq = aatmin aatmin = temp1 kill = lsvec lsvec = rsvec rsvec = kill if ( lsvec ) n1 = n rowpiv = .true. end if end if ! end if l2tran ! scale the matrix so that its maximal singular value remains less ! than sqrt(big) -- the matrix is scaled so that its maximal column ! has euclidean norm equal to sqrt(big/n). the only reason to keep ! sqrt(big) instead of big is the fact that stdlib${ii}$_dgejsv uses lapack and ! blas routines that, in some implementations, are not capable of ! working in the full interval [sfmin,big] and that they may provoke ! overflows in the intermediate results. if the singular values spread ! from sfmin to big, then stdlib${ii}$_dgesvj will compute them. so, in that case, ! one should use stdlib_dgesvj instead of stdlib${ii}$_dgejsv. big1 = sqrt( big ) temp1 = sqrt( big / real(n,KIND=dp) ) call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, aapp, temp1, n, 1_${ik}$, sva, n, ierr ) if ( aaqq > (aapp * sfmin) ) then aaqq = ( aaqq / aapp ) * temp1 else aaqq = ( aaqq * temp1 ) / aapp end if temp1 = temp1 * scalem call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, aapp, temp1, m, n, a, lda, ierr ) ! to undo scaling at the end of this procedure, multiply the ! computed singular values with uscal2 / uscal1. uscal1 = temp1 uscal2 = aapp if ( l2kill ) then ! l2kill enforces computation of nonzero singular values in ! the restricted range of condition number of the initial a, ! sigma_max(a) / sigma_min(a) approx. sqrt(big)/sqrt(sfmin). xsc = sqrt( sfmin ) else xsc = small ! now, if the condition number of a is too big, ! sigma_max(a) / sigma_min(a) > sqrt(big/n) * epsln / sfmin, ! as a precaution measure, the full svd is computed using stdlib${ii}$_dgesvj ! with accumulated jacobi rotations. this provides numerically ! more robust computation, at the cost of slightly increased run ! time. depending on the concrete implementation of blas and lapack ! (i.e. how they behave in presence of extreme ill-conditioning) the ! implementor may decide to remove this switch. if ( ( aaqq<sqrt(sfmin) ) .and. lsvec .and. rsvec ) then jracc = .true. end if end if if ( aaqq < xsc ) then do p = 1, n if ( sva(p) < xsc ) then call stdlib${ii}$_dlaset( 'A', m, 1_${ik}$, zero, zero, a(1_${ik}$,p), lda ) sva(p) = zero end if end do end if ! preconditioning using qr factorization with pivoting if ( rowpiv ) then ! optional row permutation (bjoerck row pivoting): ! a result by cox and higham shows that the bjoerck's ! row pivoting combined with standard column pivoting ! has similar effect as powell-reid complete pivoting. ! the ell-infinity norms of a are made nonincreasing. do p = 1, m - 1 q = stdlib${ii}$_idamax( m-p+1, work(m+n+p), 1_${ik}$ ) + p - 1_${ik}$ iwork(2_${ik}$*n+p) = q if ( p /= q ) then temp1 = work(m+n+p) work(m+n+p) = work(m+n+q) work(m+n+q) = temp1 end if end do call stdlib${ii}$_dlaswp( n, a, lda, 1_${ik}$, m-1, iwork(2_${ik}$*n+1), 1_${ik}$ ) end if ! end of the preparation phase (scaling, optional sorting and ! transposing, optional flushing of small columns). ! preconditioning ! if the full svd is needed, the right singular vectors are computed ! from a matrix equation, and for that we need theoretical analysis ! of the businger-golub pivoting. so we use stdlib_dgeqp3 as the first rr qrf. ! in all other cases the first rr qrf can be chosen by other criteria ! (eg speed by replacing global with restricted window pivoting, such ! as in sgeqpx from toms # 782). good results will be obtained using ! sgeqpx with properly (!) chosen numerical parameters. ! any improvement of stdlib${ii}$_dgeqp3 improves overall performance of stdlib${ii}$_dgejsv. ! a * p1 = q1 * [ r1^t 0]^t: do p = 1, n ! All Columns Are Free Columns iwork(p) = 0_${ik}$ end do call stdlib${ii}$_dgeqp3( m,n,a,lda, iwork,work, work(n+1),lwork-n, ierr ) ! the upper triangular matrix r1 from the first qrf is inspected for ! rank deficiency and possibilities for deflation, or possible ! ill-conditioning. depending on the user specified flag l2rank, ! the procedure explores possibilities to reduce the numerical ! rank by inspecting the computed upper triangular factor. if ! l2rank or l2aber are up, then stdlib${ii}$_dgejsv will compute the svd of ! a + da, where ||da|| <= f(m,n)*epsln. nr = 1_${ik}$ if ( l2aber ) then ! standard absolute error bound suffices. all sigma_i with ! sigma_i < n*epsln*||a|| are flushed to zero. this is an ! aggressive enforcement of lower numerical rank by introducing a ! backward error of the order of n*epsln*||a||. temp1 = sqrt(real(n,KIND=dp))*epsln loop_3002: do p = 2, n if ( abs(a(p,p)) >= (temp1*abs(a(1_${ik}$,1_${ik}$))) ) then nr = nr + 1_${ik}$ else exit loop_3002 end if end do loop_3002 else if ( l2rank ) then ! .. similarly as above, only slightly more gentle (less aggressive). ! sudden drop on the diagonal of r1 is used as the criterion for ! close-to-rank-deficient. temp1 = sqrt(sfmin) loop_3402: do p = 2, n if ( ( abs(a(p,p)) < (epsln*abs(a(p-1,p-1))) ) .or.( abs(a(p,p)) < small ) .or.( & l2kill .and. (abs(a(p,p)) < temp1) ) ) exit loop_3402 nr = nr + 1_${ik}$ end do loop_3402 else ! the goal is high relative accuracy. however, if the matrix ! has high scaled condition number the relative accuracy is in ! general not feasible. later on, a condition number estimator ! will be deployed to estimate the scaled condition number. ! here we just remove the underflowed part of the triangular ! factor. this prevents the situation in which the code is ! working hard to get the accuracy not warranted by the data. temp1 = sqrt(sfmin) loop_3302: do p = 2, n if ( ( abs(a(p,p)) < small ) .or.( l2kill .and. (abs(a(p,p)) < temp1) ) ) exit loop_3302 nr = nr + 1_${ik}$ end do loop_3302 end if almort = .false. if ( nr == n ) then maxprj = one do p = 2, n temp1 = abs(a(p,p)) / sva(iwork(p)) maxprj = min( maxprj, temp1 ) end do if ( maxprj**2_${ik}$ >= one - real(n,KIND=dp)*epsln ) almort = .true. end if sconda = - one condr1 = - one condr2 = - one if ( errest ) then if ( n == nr ) then if ( rsvec ) then ! V Is Available As Workspace call stdlib${ii}$_dlacpy( 'U', n, n, a, lda, v, ldv ) do p = 1, n temp1 = sva(iwork(p)) call stdlib${ii}$_dscal( p, one/temp1, v(1_${ik}$,p), 1_${ik}$ ) end do call stdlib${ii}$_dpocon( 'U', n, v, ldv, one, temp1,work(n+1), iwork(2_${ik}$*n+m+1), & ierr ) else if ( lsvec ) then ! U Is Available As Workspace call stdlib${ii}$_dlacpy( 'U', n, n, a, lda, u, ldu ) do p = 1, n temp1 = sva(iwork(p)) call stdlib${ii}$_dscal( p, one/temp1, u(1_${ik}$,p), 1_${ik}$ ) end do call stdlib${ii}$_dpocon( 'U', n, u, ldu, one, temp1,work(n+1), iwork(2_${ik}$*n+m+1), & ierr ) else call stdlib${ii}$_dlacpy( 'U', n, n, a, lda, work(n+1), n ) do p = 1, n temp1 = sva(iwork(p)) call stdlib${ii}$_dscal( p, one/temp1, work(n+(p-1)*n+1), 1_${ik}$ ) end do ! The Columns Of R Are Scaled To Have Unit Euclidean Lengths call stdlib${ii}$_dpocon( 'U', n, work(n+1), n, one, temp1,work(n+n*n+1), iwork(2_${ik}$*n+& m+1), ierr ) end if sconda = one / sqrt(temp1) ! sconda is an estimate of sqrt(||(r^t * r)^(-1)||_1). ! n^(-1/4) * sconda <= ||r^(-1)||_2 <= n^(1/4) * sconda else sconda = - one end if end if l2pert = l2pert .and. ( abs( a(1_${ik}$,1_${ik}$)/a(nr,nr) ) > sqrt(big1) ) ! if there is no violent scaling, artificial perturbation is not needed. ! phase 3: if ( .not. ( rsvec .or. lsvec ) ) then ! singular values only ! .. transpose a(1:nr,1:n) do p = 1, min( n-1, nr ) call stdlib${ii}$_dcopy( n-p, a(p,p+1), lda, a(p+1,p), 1_${ik}$ ) end do ! the following two do-loops introduce small relative perturbation ! into the strict upper triangle of the lower triangular matrix. ! small entries below the main diagonal are also changed. ! this modification is useful if the computing environment does not ! provide/allow flush to zero underflow, for it prevents many ! annoying denormalized numbers in case of strongly scaled matrices. ! the perturbation is structured so that it does not introduce any ! new perturbation of the singular values, and it does not destroy ! the job done by the preconditioner. ! the licence for this perturbation is in the variable l2pert, which ! should be .false. if flush to zero underflow is active. if ( .not. almort ) then if ( l2pert ) then ! xsc = sqrt(small) xsc = epsln / real(n,KIND=dp) do q = 1, nr temp1 = xsc*abs(a(q,q)) do p = 1, n if ( ( (p>q) .and. (abs(a(p,q))<=temp1) ).or. ( p < q ) )a(p,q) = sign( & temp1, a(p,q) ) end do end do else call stdlib${ii}$_dlaset( 'U', nr-1,nr-1, zero,zero, a(1_${ik}$,2_${ik}$),lda ) end if ! Second Preconditioning Using The Qr Factorization call stdlib${ii}$_dgeqrf( n,nr, a,lda, work, work(n+1),lwork-n, ierr ) ! And Transpose Upper To Lower Triangular do p = 1, nr - 1 call stdlib${ii}$_dcopy( nr-p, a(p,p+1), lda, a(p+1,p), 1_${ik}$ ) end do end if ! row-cyclic jacobi svd algorithm with column pivoting ! .. again some perturbation (a "background noise") is added ! to drown denormals if ( l2pert ) then ! xsc = sqrt(small) xsc = epsln / real(n,KIND=dp) do q = 1, nr temp1 = xsc*abs(a(q,q)) do p = 1, nr if ( ( (p>q) .and. (abs(a(p,q))<=temp1) ).or. ( p < q ) )a(p,q) = sign( & temp1, a(p,q) ) end do end do else call stdlib${ii}$_dlaset( 'U', nr-1, nr-1, zero, zero, a(1_${ik}$,2_${ik}$), lda ) end if ! .. and one-sided jacobi rotations are started on a lower ! triangular matrix (plus perturbation which is ignored in ! the part which destroys triangular form (confusing?!)) call stdlib${ii}$_dgesvj( 'L', 'NOU', 'NOV', nr, nr, a, lda, sva,n, v, ldv, work, & lwork, info ) scalem = work(1_${ik}$) numrank = nint(work(2_${ik}$),KIND=${ik}$) else if ( rsvec .and. ( .not. lsvec ) ) then ! -> singular values and right singular vectors <- if ( almort ) then ! In This Case Nr Equals N do p = 1, nr call stdlib${ii}$_dcopy( n-p+1, a(p,p), lda, v(p,p), 1_${ik}$ ) end do call stdlib${ii}$_dlaset( 'UPPER', nr-1, nr-1, zero, zero, v(1_${ik}$,2_${ik}$), ldv ) call stdlib${ii}$_dgesvj( 'L','U','N', n, nr, v,ldv, sva, nr, a,lda,work, lwork, info ) scalem = work(1_${ik}$) numrank = nint(work(2_${ik}$),KIND=${ik}$) else ! .. two more qr factorizations ( one qrf is not enough, two require ! accumulated product of jacobi rotations, three are perfect ) call stdlib${ii}$_dlaset( 'LOWER', nr-1, nr-1, zero, zero, a(2_${ik}$,1_${ik}$), lda ) call stdlib${ii}$_dgelqf( nr, n, a, lda, work, work(n+1), lwork-n, ierr) call stdlib${ii}$_dlacpy( 'LOWER', nr, nr, a, lda, v, ldv ) call stdlib${ii}$_dlaset( 'UPPER', nr-1, nr-1, zero, zero, v(1_${ik}$,2_${ik}$), ldv ) call stdlib${ii}$_dgeqrf( nr, nr, v, ldv, work(n+1), work(2_${ik}$*n+1),lwork-2*n, ierr ) do p = 1, nr call stdlib${ii}$_dcopy( nr-p+1, v(p,p), ldv, v(p,p), 1_${ik}$ ) end do call stdlib${ii}$_dlaset( 'UPPER', nr-1, nr-1, zero, zero, v(1_${ik}$,2_${ik}$), ldv ) call stdlib${ii}$_dgesvj( 'LOWER', 'U','N', nr, nr, v,ldv, sva, nr, u,ldu, work(n+1), & lwork, info ) scalem = work(n+1) numrank = nint(work(n+2),KIND=${ik}$) if ( nr < n ) then call stdlib${ii}$_dlaset( 'A',n-nr, nr, zero,zero, v(nr+1,1_${ik}$), ldv ) call stdlib${ii}$_dlaset( 'A',nr, n-nr, zero,zero, v(1_${ik}$,nr+1), ldv ) call stdlib${ii}$_dlaset( 'A',n-nr,n-nr,zero,one, v(nr+1,nr+1), ldv ) end if call stdlib${ii}$_dormlq( 'LEFT', 'TRANSPOSE', n, n, nr, a, lda, work,v, ldv, work(n+1), & lwork-n, ierr ) end if do p = 1, n call stdlib${ii}$_dcopy( n, v(p,1_${ik}$), ldv, a(iwork(p),1_${ik}$), lda ) end do call stdlib${ii}$_dlacpy( 'ALL', n, n, a, lda, v, ldv ) if ( transp ) then call stdlib${ii}$_dlacpy( 'ALL', n, n, v, ldv, u, ldu ) end if else if ( lsvec .and. ( .not. rsvec ) ) then ! Singular Values And Left Singular Vectors ! Second Preconditioning Step To Avoid Need To Accumulate ! jacobi rotations in the jacobi iterations. do p = 1, nr call stdlib${ii}$_dcopy( n-p+1, a(p,p), lda, u(p,p), 1_${ik}$ ) end do call stdlib${ii}$_dlaset( 'UPPER', nr-1, nr-1, zero, zero, u(1_${ik}$,2_${ik}$), ldu ) call stdlib${ii}$_dgeqrf( n, nr, u, ldu, work(n+1), work(2_${ik}$*n+1),lwork-2*n, ierr ) do p = 1, nr - 1 call stdlib${ii}$_dcopy( nr-p, u(p,p+1), ldu, u(p+1,p), 1_${ik}$ ) end do call stdlib${ii}$_dlaset( 'UPPER', nr-1, nr-1, zero, zero, u(1_${ik}$,2_${ik}$), ldu ) call stdlib${ii}$_dgesvj( 'LOWER', 'U', 'N', nr,nr, u, ldu, sva, nr, a,lda, work(n+1), & lwork-n, info ) scalem = work(n+1) numrank = nint(work(n+2),KIND=${ik}$) if ( nr < m ) then call stdlib${ii}$_dlaset( 'A', m-nr, nr,zero, zero, u(nr+1,1_${ik}$), ldu ) if ( nr < n1 ) then call stdlib${ii}$_dlaset( 'A',nr, n1-nr, zero, zero, u(1_${ik}$,nr+1), ldu ) call stdlib${ii}$_dlaset( 'A',m-nr,n1-nr,zero,one,u(nr+1,nr+1), ldu ) end if end if call stdlib${ii}$_dormqr( 'LEFT', 'NO TR', m, n1, n, a, lda, work, u,ldu, work(n+1), & lwork-n, ierr ) if ( rowpiv )call stdlib${ii}$_dlaswp( n1, u, ldu, 1_${ik}$, m-1, iwork(2_${ik}$*n+1), -1_${ik}$ ) do p = 1, n1 xsc = one / stdlib${ii}$_dnrm2( m, u(1_${ik}$,p), 1_${ik}$ ) call stdlib${ii}$_dscal( m, xsc, u(1_${ik}$,p), 1_${ik}$ ) end do if ( transp ) then call stdlib${ii}$_dlacpy( 'ALL', n, n, u, ldu, v, ldv ) end if else ! Full Svd if ( .not. jracc ) then if ( .not. almort ) then ! second preconditioning step (qrf [with pivoting]) ! note that the composition of transpose, qrf and transpose is ! equivalent to an lqf call. since in many libraries the qrf ! seems to be better optimized than the lqf, we do explicit ! transpose and use the qrf. this is subject to changes in an ! optimized implementation of stdlib${ii}$_dgejsv. do p = 1, nr call stdlib${ii}$_dcopy( n-p+1, a(p,p), lda, v(p,p), 1_${ik}$ ) end do ! The Following Two Loops Perturb Small Entries To Avoid ! denormals in the second qr factorization, where they are ! as good as zeros. this is done to avoid painfully slow ! computation with denormals. the relative size of the perturbation ! is a parameter that can be changed by the implementer. ! this perturbation device will be obsolete on machines with ! properly implemented arithmetic. ! to switch it off, set l2pert=.false. to remove it from the ! code, remove the action under l2pert=.true., leave the else part. ! the following two loops should be blocked and fused with the ! transposed copy above. if ( l2pert ) then xsc = sqrt(small) do q = 1, nr temp1 = xsc*abs( v(q,q) ) do p = 1, n if ( ( p > q ) .and. ( abs(v(p,q)) <= temp1 ).or. ( p < q ) )v(p,q) = & sign( temp1, v(p,q) ) if ( p < q ) v(p,q) = - v(p,q) end do end do else call stdlib${ii}$_dlaset( 'U', nr-1, nr-1, zero, zero, v(1_${ik}$,2_${ik}$), ldv ) end if ! estimate the row scaled condition number of r1 ! (if r1 is rectangular, n > nr, then the condition number ! of the leading nr x nr submatrix is estimated.) call stdlib${ii}$_dlacpy( 'L', nr, nr, v, ldv, work(2_${ik}$*n+1), nr ) do p = 1, nr temp1 = stdlib${ii}$_dnrm2(nr-p+1,work(2_${ik}$*n+(p-1)*nr+p),1_${ik}$) call stdlib${ii}$_dscal(nr-p+1,one/temp1,work(2_${ik}$*n+(p-1)*nr+p),1_${ik}$) end do call stdlib${ii}$_dpocon('LOWER',nr,work(2_${ik}$*n+1),nr,one,temp1,work(2_${ik}$*n+nr*nr+1),iwork(m+& 2_${ik}$*n+1),ierr) condr1 = one / sqrt(temp1) ! Here Need A Second Opinion On The Condition Number ! Then Assume Worst Case Scenario ! r1 is ok for inverse <=> condr1 < real(n,KIND=dp) ! more conservative <=> condr1 < sqrt(real(n,KIND=dp)) cond_ok = sqrt(real(nr,KIND=dp)) ! [tp] cond_ok is a tuning parameter. if ( condr1 < cond_ok ) then ! .. the second qrf without pivoting. note: in an optimized ! implementation, this qrf should be implemented as the qrf ! of a lower triangular matrix. ! r1^t = q2 * r2 call stdlib${ii}$_dgeqrf( n, nr, v, ldv, work(n+1), work(2_${ik}$*n+1),lwork-2*n, ierr ) if ( l2pert ) then xsc = sqrt(small)/epsln do p = 2, nr do q = 1, p - 1 temp1 = xsc * min(abs(v(p,p)),abs(v(q,q))) if ( abs(v(q,p)) <= temp1 )v(q,p) = sign( temp1, v(q,p) ) end do end do end if if ( nr /= n )call stdlib${ii}$_dlacpy( 'A', n, nr, v, ldv, work(2_${ik}$*n+1), n ) ! .. save ... ! This Transposed Copy Should Be Better Than Naive do p = 1, nr - 1 call stdlib${ii}$_dcopy( nr-p, v(p,p+1), ldv, v(p+1,p), 1_${ik}$ ) end do condr2 = condr1 else ! .. ill-conditioned case: second qrf with pivoting ! note that windowed pivoting would be equally good ! numerically, and more run-time efficient. so, in ! an optimal implementation, the next call to stdlib${ii}$_dgeqp3 ! should be replaced with eg. call sgeqpx (acm toms #782) ! with properly (carefully) chosen parameters. ! r1^t * p2 = q2 * r2 do p = 1, nr iwork(n+p) = 0_${ik}$ end do call stdlib${ii}$_dgeqp3( n, nr, v, ldv, iwork(n+1), work(n+1),work(2_${ik}$*n+1), lwork-& 2_${ik}$*n, ierr ) ! * call stdlib${ii}$_dgeqrf( n, nr, v, ldv, work(n+1), work(2*n+1), ! * $ lwork-2*n, ierr ) if ( l2pert ) then xsc = sqrt(small) do p = 2, nr do q = 1, p - 1 temp1 = xsc * min(abs(v(p,p)),abs(v(q,q))) if ( abs(v(q,p)) <= temp1 )v(q,p) = sign( temp1, v(q,p) ) end do end do end if call stdlib${ii}$_dlacpy( 'A', n, nr, v, ldv, work(2_${ik}$*n+1), n ) if ( l2pert ) then xsc = sqrt(small) do p = 2, nr do q = 1, p - 1 temp1 = xsc * min(abs(v(p,p)),abs(v(q,q))) v(p,q) = - sign( temp1, v(q,p) ) end do end do else call stdlib${ii}$_dlaset( 'L',nr-1,nr-1,zero,zero,v(2_${ik}$,1_${ik}$),ldv ) end if ! now, compute r2 = l3 * q3, the lq factorization. call stdlib${ii}$_dgelqf( nr, nr, v, ldv, work(2_${ik}$*n+n*nr+1),work(2_${ik}$*n+n*nr+nr+1), & lwork-2*n-n*nr-nr, ierr ) ! And Estimate The Condition Number call stdlib${ii}$_dlacpy( 'L',nr,nr,v,ldv,work(2_${ik}$*n+n*nr+nr+1),nr ) do p = 1, nr temp1 = stdlib${ii}$_dnrm2( p, work(2_${ik}$*n+n*nr+nr+p), nr ) call stdlib${ii}$_dscal( p, one/temp1, work(2_${ik}$*n+n*nr+nr+p), nr ) end do call stdlib${ii}$_dpocon( 'L',nr,work(2_${ik}$*n+n*nr+nr+1),nr,one,temp1,work(2_${ik}$*n+n*nr+nr+& nr*nr+1),iwork(m+2*n+1),ierr ) condr2 = one / sqrt(temp1) if ( condr2 >= cond_ok ) then ! Save The Householder Vectors Used For Q3 ! (this overwrites the copy of r2, as it will not be ! needed in this branch, but it does not overwritte the ! huseholder vectors of q2.). call stdlib${ii}$_dlacpy( 'U', nr, nr, v, ldv, work(2_${ik}$*n+1), n ) ! And The Rest Of The Information On Q3 Is In ! work(2*n+n*nr+1:2*n+n*nr+n) end if end if if ( l2pert ) then xsc = sqrt(small) do q = 2, nr temp1 = xsc * v(q,q) do p = 1, q - 1 ! v(p,q) = - sign( temp1, v(q,p) ) v(p,q) = - sign( temp1, v(p,q) ) end do end do else call stdlib${ii}$_dlaset( 'U', nr-1,nr-1, zero,zero, v(1_${ik}$,2_${ik}$), ldv ) end if ! second preconditioning finished; continue with jacobi svd ! the input matrix is lower trinagular. ! recover the right singular vectors as solution of a well ! conditioned triangular matrix equation. if ( condr1 < cond_ok ) then call stdlib${ii}$_dgesvj( 'L','U','N',nr,nr,v,ldv,sva,nr,u,ldu,work(2_${ik}$*n+n*nr+nr+1),& lwork-2*n-n*nr-nr,info ) scalem = work(2_${ik}$*n+n*nr+nr+1) numrank = nint(work(2_${ik}$*n+n*nr+nr+2),KIND=${ik}$) do p = 1, nr call stdlib${ii}$_dcopy( nr, v(1_${ik}$,p), 1_${ik}$, u(1_${ik}$,p), 1_${ik}$ ) call stdlib${ii}$_dscal( nr, sva(p), v(1_${ik}$,p), 1_${ik}$ ) end do ! Pick The Right Matrix Equation And Solve It if ( nr == n ) then ! :)) .. best case, r1 is inverted. the solution of this matrix ! equation is q2*v2 = the product of the jacobi rotations ! used in stdlib${ii}$_dgesvj, premultiplied with the orthogonal matrix ! from the second qr factorization. call stdlib${ii}$_dtrsm( 'L','U','N','N', nr,nr,one, a,lda, v,ldv ) else ! .. r1 is well conditioned, but non-square. transpose(r2) ! is inverted to get the product of the jacobi rotations ! used in stdlib${ii}$_dgesvj. the q-factor from the second qr ! factorization is then built in explicitly. call stdlib${ii}$_dtrsm('L','U','T','N',nr,nr,one,work(2_${ik}$*n+1),n,v,ldv) if ( nr < n ) then call stdlib${ii}$_dlaset('A',n-nr,nr,zero,zero,v(nr+1,1_${ik}$),ldv) call stdlib${ii}$_dlaset('A',nr,n-nr,zero,zero,v(1_${ik}$,nr+1),ldv) call stdlib${ii}$_dlaset('A',n-nr,n-nr,zero,one,v(nr+1,nr+1),ldv) end if call stdlib${ii}$_dormqr('L','N',n,n,nr,work(2_${ik}$*n+1),n,work(n+1),v,ldv,work(2_${ik}$*n+& n*nr+nr+1),lwork-2*n-n*nr-nr,ierr) end if else if ( condr2 < cond_ok ) then ! :) .. the input matrix a is very likely a relative of ! the kahan matrix :) ! the matrix r2 is inverted. the solution of the matrix equation ! is q3^t*v3 = the product of the jacobi rotations (appplied to ! the lower triangular l3 from the lq factorization of ! r2=l3*q3), pre-multiplied with the transposed q3. call stdlib${ii}$_dgesvj( 'L', 'U', 'N', nr, nr, v, ldv, sva, nr, u,ldu, work(2_${ik}$*n+& n*nr+nr+1), lwork-2*n-n*nr-nr, info ) scalem = work(2_${ik}$*n+n*nr+nr+1) numrank = nint(work(2_${ik}$*n+n*nr+nr+2),KIND=${ik}$) do p = 1, nr call stdlib${ii}$_dcopy( nr, v(1_${ik}$,p), 1_${ik}$, u(1_${ik}$,p), 1_${ik}$ ) call stdlib${ii}$_dscal( nr, sva(p), u(1_${ik}$,p), 1_${ik}$ ) end do call stdlib${ii}$_dtrsm('L','U','N','N',nr,nr,one,work(2_${ik}$*n+1),n,u,ldu) ! Apply The Permutation From The Second Qr Factorization do q = 1, nr do p = 1, nr work(2_${ik}$*n+n*nr+nr+iwork(n+p)) = u(p,q) end do do p = 1, nr u(p,q) = work(2_${ik}$*n+n*nr+nr+p) end do end do if ( nr < n ) then call stdlib${ii}$_dlaset( 'A',n-nr,nr,zero,zero,v(nr+1,1_${ik}$),ldv ) call stdlib${ii}$_dlaset( 'A',nr,n-nr,zero,zero,v(1_${ik}$,nr+1),ldv ) call stdlib${ii}$_dlaset( 'A',n-nr,n-nr,zero,one,v(nr+1,nr+1),ldv ) end if call stdlib${ii}$_dormqr( 'L','N',n,n,nr,work(2_${ik}$*n+1),n,work(n+1),v,ldv,work(2_${ik}$*n+& n*nr+nr+1),lwork-2*n-n*nr-nr,ierr ) else ! last line of defense. ! #:( this is a rather pathological case: no scaled condition ! improvement after two pivoted qr factorizations. other ! possibility is that the rank revealing qr factorization ! or the condition estimator has failed, or the cond_ok ! is set very close to one (which is unnecessary). normally, ! this branch should never be executed, but in rare cases of ! failure of the rrqr or condition estimator, the last line of ! defense ensures that stdlib${ii}$_dgejsv completes the task. ! compute the full svd of l3 using stdlib${ii}$_dgesvj with explicit ! accumulation of jacobi rotations. call stdlib${ii}$_dgesvj( 'L', 'U', 'V', nr, nr, v, ldv, sva, nr, u,ldu, work(2_${ik}$*n+& n*nr+nr+1), lwork-2*n-n*nr-nr, info ) scalem = work(2_${ik}$*n+n*nr+nr+1) numrank = nint(work(2_${ik}$*n+n*nr+nr+2),KIND=${ik}$) if ( nr < n ) then call stdlib${ii}$_dlaset( 'A',n-nr,nr,zero,zero,v(nr+1,1_${ik}$),ldv ) call stdlib${ii}$_dlaset( 'A',nr,n-nr,zero,zero,v(1_${ik}$,nr+1),ldv ) call stdlib${ii}$_dlaset( 'A',n-nr,n-nr,zero,one,v(nr+1,nr+1),ldv ) end if call stdlib${ii}$_dormqr( 'L','N',n,n,nr,work(2_${ik}$*n+1),n,work(n+1),v,ldv,work(2_${ik}$*n+& n*nr+nr+1),lwork-2*n-n*nr-nr,ierr ) call stdlib${ii}$_dormlq( 'L', 'T', nr, nr, nr, work(2_${ik}$*n+1), n,work(2_${ik}$*n+n*nr+1), u, & ldu, work(2_${ik}$*n+n*nr+nr+1),lwork-2*n-n*nr-nr, ierr ) do q = 1, nr do p = 1, nr work(2_${ik}$*n+n*nr+nr+iwork(n+p)) = u(p,q) end do do p = 1, nr u(p,q) = work(2_${ik}$*n+n*nr+nr+p) end do end do end if ! permute the rows of v using the (column) permutation from the ! first qrf. also, scale the columns to make them unit in ! euclidean norm. this applies to all cases. temp1 = sqrt(real(n,KIND=dp)) * epsln do q = 1, n do p = 1, n work(2_${ik}$*n+n*nr+nr+iwork(p)) = v(p,q) end do do p = 1, n v(p,q) = work(2_${ik}$*n+n*nr+nr+p) end do xsc = one / stdlib${ii}$_dnrm2( n, v(1_${ik}$,q), 1_${ik}$ ) if ( (xsc < (one-temp1)) .or. (xsc > (one+temp1)) )call stdlib${ii}$_dscal( n, xsc, & v(1_${ik}$,q), 1_${ik}$ ) end do ! at this moment, v contains the right singular vectors of a. ! next, assemble the left singular vector matrix u (m x n). if ( nr < m ) then call stdlib${ii}$_dlaset( 'A', m-nr, nr, zero, zero, u(nr+1,1_${ik}$), ldu ) if ( nr < n1 ) then call stdlib${ii}$_dlaset('A',nr,n1-nr,zero,zero,u(1_${ik}$,nr+1),ldu) call stdlib${ii}$_dlaset('A',m-nr,n1-nr,zero,one,u(nr+1,nr+1),ldu) end if end if ! the q matrix from the first qrf is built into the left singular ! matrix u. this applies to all cases. call stdlib${ii}$_dormqr( 'LEFT', 'NO_TR', m, n1, n, a, lda, work, u,ldu, work(n+1), & lwork-n, ierr ) ! the columns of u are normalized. the cost is o(m*n) flops. temp1 = sqrt(real(m,KIND=dp)) * epsln do p = 1, nr xsc = one / stdlib${ii}$_dnrm2( m, u(1_${ik}$,p), 1_${ik}$ ) if ( (xsc < (one-temp1)) .or. (xsc > (one+temp1)) )call stdlib${ii}$_dscal( m, xsc, & u(1_${ik}$,p), 1_${ik}$ ) end do ! if the initial qrf is computed with row pivoting, the left ! singular vectors must be adjusted. if ( rowpiv )call stdlib${ii}$_dlaswp( n1, u, ldu, 1_${ik}$, m-1, iwork(2_${ik}$*n+1), -1_${ik}$ ) else ! The Initial Matrix A Has Almost Orthogonal Columns And ! the second qrf is not needed call stdlib${ii}$_dlacpy( 'UPPER', n, n, a, lda, work(n+1), n ) if ( l2pert ) then xsc = sqrt(small) do p = 2, n temp1 = xsc * work( n + (p-1)*n + p ) do q = 1, p - 1 work(n+(q-1)*n+p)=-sign(temp1,work(n+(p-1)*n+q)) end do end do else call stdlib${ii}$_dlaset( 'LOWER',n-1,n-1,zero,zero,work(n+2),n ) end if call stdlib${ii}$_dgesvj( 'UPPER', 'U', 'N', n, n, work(n+1), n, sva,n, u, ldu, work(n+& n*n+1), lwork-n-n*n, info ) scalem = work(n+n*n+1) numrank = nint(work(n+n*n+2),KIND=${ik}$) do p = 1, n call stdlib${ii}$_dcopy( n, work(n+(p-1)*n+1), 1_${ik}$, u(1_${ik}$,p), 1_${ik}$ ) call stdlib${ii}$_dscal( n, sva(p), work(n+(p-1)*n+1), 1_${ik}$ ) end do call stdlib${ii}$_dtrsm( 'LEFT', 'UPPER', 'NOTRANS', 'NO UD', n, n,one, a, lda, work(n+& 1_${ik}$), n ) do p = 1, n call stdlib${ii}$_dcopy( n, work(n+p), n, v(iwork(p),1_${ik}$), ldv ) end do temp1 = sqrt(real(n,KIND=dp))*epsln do p = 1, n xsc = one / stdlib${ii}$_dnrm2( n, v(1_${ik}$,p), 1_${ik}$ ) if ( (xsc < (one-temp1)) .or. (xsc > (one+temp1)) )call stdlib${ii}$_dscal( n, xsc, & v(1_${ik}$,p), 1_${ik}$ ) end do ! assemble the left singular vector matrix u (m x n). if ( n < m ) then call stdlib${ii}$_dlaset( 'A', m-n, n, zero, zero, u(n+1,1_${ik}$), ldu ) if ( n < n1 ) then call stdlib${ii}$_dlaset( 'A',n, n1-n, zero, zero, u(1_${ik}$,n+1),ldu ) call stdlib${ii}$_dlaset( 'A',m-n,n1-n, zero, one,u(n+1,n+1),ldu ) end if end if call stdlib${ii}$_dormqr( 'LEFT', 'NO TR', m, n1, n, a, lda, work, u,ldu, work(n+1), & lwork-n, ierr ) temp1 = sqrt(real(m,KIND=dp))*epsln do p = 1, n1 xsc = one / stdlib${ii}$_dnrm2( m, u(1_${ik}$,p), 1_${ik}$ ) if ( (xsc < (one-temp1)) .or. (xsc > (one+temp1)) )call stdlib${ii}$_dscal( m, xsc, & u(1_${ik}$,p), 1_${ik}$ ) end do if ( rowpiv )call stdlib${ii}$_dlaswp( n1, u, ldu, 1_${ik}$, m-1, iwork(2_${ik}$*n+1), -1_${ik}$ ) end if ! end of the >> almost orthogonal case << in the full svd else ! this branch deploys a preconditioned jacobi svd with explicitly ! accumulated rotations. it is included as optional, mainly for ! experimental purposes. it does perform well, and can also be used. ! in this implementation, this branch will be automatically activated ! if the condition number sigma_max(a) / sigma_min(a) is predicted ! to be greater than the overflow threshold. this is because the ! a posteriori computation of the singular vectors assumes robust ! implementation of blas and some lapack procedures, capable of working ! in presence of extreme values. since that is not always the case, ... do p = 1, nr call stdlib${ii}$_dcopy( n-p+1, a(p,p), lda, v(p,p), 1_${ik}$ ) end do if ( l2pert ) then xsc = sqrt(small/epsln) do q = 1, nr temp1 = xsc*abs( v(q,q) ) do p = 1, n if ( ( p > q ) .and. ( abs(v(p,q)) <= temp1 ).or. ( p < q ) )v(p,q) = sign(& temp1, v(p,q) ) if ( p < q ) v(p,q) = - v(p,q) end do end do else call stdlib${ii}$_dlaset( 'U', nr-1, nr-1, zero, zero, v(1_${ik}$,2_${ik}$), ldv ) end if call stdlib${ii}$_dgeqrf( n, nr, v, ldv, work(n+1), work(2_${ik}$*n+1),lwork-2*n, ierr ) call stdlib${ii}$_dlacpy( 'L', n, nr, v, ldv, work(2_${ik}$*n+1), n ) do p = 1, nr call stdlib${ii}$_dcopy( nr-p+1, v(p,p), ldv, u(p,p), 1_${ik}$ ) end do if ( l2pert ) then xsc = sqrt(small/epsln) do q = 2, nr do p = 1, q - 1 temp1 = xsc * min(abs(u(p,p)),abs(u(q,q))) u(p,q) = - sign( temp1, u(q,p) ) end do end do else call stdlib${ii}$_dlaset('U', nr-1, nr-1, zero, zero, u(1_${ik}$,2_${ik}$), ldu ) end if call stdlib${ii}$_dgesvj( 'G', 'U', 'V', nr, nr, u, ldu, sva,n, v, ldv, work(2_${ik}$*n+n*nr+1), & lwork-2*n-n*nr, info ) scalem = work(2_${ik}$*n+n*nr+1) numrank = nint(work(2_${ik}$*n+n*nr+2),KIND=${ik}$) if ( nr < n ) then call stdlib${ii}$_dlaset( 'A',n-nr,nr,zero,zero,v(nr+1,1_${ik}$),ldv ) call stdlib${ii}$_dlaset( 'A',nr,n-nr,zero,zero,v(1_${ik}$,nr+1),ldv ) call stdlib${ii}$_dlaset( 'A',n-nr,n-nr,zero,one,v(nr+1,nr+1),ldv ) end if call stdlib${ii}$_dormqr( 'L','N',n,n,nr,work(2_${ik}$*n+1),n,work(n+1),v,ldv,work(2_${ik}$*n+n*nr+nr+1)& ,lwork-2*n-n*nr-nr,ierr ) ! permute the rows of v using the (column) permutation from the ! first qrf. also, scale the columns to make them unit in ! euclidean norm. this applies to all cases. temp1 = sqrt(real(n,KIND=dp)) * epsln do q = 1, n do p = 1, n work(2_${ik}$*n+n*nr+nr+iwork(p)) = v(p,q) end do do p = 1, n v(p,q) = work(2_${ik}$*n+n*nr+nr+p) end do xsc = one / stdlib${ii}$_dnrm2( n, v(1_${ik}$,q), 1_${ik}$ ) if ( (xsc < (one-temp1)) .or. (xsc > (one+temp1)) )call stdlib${ii}$_dscal( n, xsc, & v(1_${ik}$,q), 1_${ik}$ ) end do ! at this moment, v contains the right singular vectors of a. ! next, assemble the left singular vector matrix u (m x n). if ( nr < m ) then call stdlib${ii}$_dlaset( 'A', m-nr, nr, zero, zero, u(nr+1,1_${ik}$), ldu ) if ( nr < n1 ) then call stdlib${ii}$_dlaset( 'A',nr, n1-nr, zero, zero, u(1_${ik}$,nr+1),ldu ) call stdlib${ii}$_dlaset( 'A',m-nr,n1-nr, zero, one,u(nr+1,nr+1),ldu ) end if end if call stdlib${ii}$_dormqr( 'LEFT', 'NO TR', m, n1, n, a, lda, work, u,ldu, work(n+1), & lwork-n, ierr ) if ( rowpiv )call stdlib${ii}$_dlaswp( n1, u, ldu, 1_${ik}$, m-1, iwork(2_${ik}$*n+1), -1_${ik}$ ) end if if ( transp ) then ! .. swap u and v because the procedure worked on a^t do p = 1, n call stdlib${ii}$_dswap( n, u(1_${ik}$,p), 1_${ik}$, v(1_${ik}$,p), 1_${ik}$ ) end do end if end if ! end of the full svd ! undo scaling, if necessary (and possible) if ( uscal2 <= (big/sva(1_${ik}$))*uscal1 ) then call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, uscal1, uscal2, nr, 1_${ik}$, sva, n, ierr ) uscal1 = one uscal2 = one end if if ( nr < n ) then do p = nr+1, n sva(p) = zero end do end if work(1_${ik}$) = uscal2 * scalem work(2_${ik}$) = uscal1 if ( errest ) work(3_${ik}$) = sconda if ( lsvec .and. rsvec ) then work(4_${ik}$) = condr1 work(5_${ik}$) = condr2 end if if ( l2tran ) then work(6_${ik}$) = entra work(7_${ik}$) = entrat end if iwork(1_${ik}$) = nr iwork(2_${ik}$) = numrank iwork(3_${ik}$) = warning return end subroutine stdlib${ii}$_dgejsv #:for rk,rt,ri in REAL_KINDS_TYPES #:if not rk in ["sp","dp"] pure module subroutine stdlib${ii}$_${ri}$gejsv( joba, jobu, jobv, jobr, jobt, jobp,m, n, a, lda, sva, u, ldu, & !! DGEJSV: computes the singular value decomposition (SVD) of a real M-by-N !! matrix [A], where M >= N. The SVD of [A] is written as !! [A] = [U] * [SIGMA] * [V]^t, !! where [SIGMA] is an N-by-N (M-by-N) matrix which is zero except for its N !! diagonal elements, [U] is an M-by-N (or M-by-M) orthonormal matrix, and !! [V] is an N-by-N orthogonal matrix. The diagonal elements of [SIGMA] are !! the singular values of [A]. The columns of [U] and [V] are the left and !! the right singular vectors of [A], respectively. The matrices [U] and [V] !! are computed and stored in the arrays U and V, respectively. The diagonal !! of [SIGMA] is computed and stored in the array SVA. !! DGEJSV can sometimes compute tiny singular values and their singular vectors much !! more accurately than other SVD routines, see below under Further Details. v, ldv,work, lwork, iwork, info ) ! -- lapack computational routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments integer(${ik}$), intent(out) :: info integer(${ik}$), intent(in) :: lda, ldu, ldv, lwork, m, n ! Array Arguments real(${rk}$), intent(inout) :: a(lda,*) real(${rk}$), intent(out) :: sva(n), u(ldu,*), v(ldv,*), work(lwork) integer(${ik}$), intent(out) :: iwork(*) character, intent(in) :: joba, jobp, jobr, jobt, jobu, jobv ! =========================================================================== ! Local Scalars real(${rk}$) :: aapp, aaqq, aatmax, aatmin, big, big1, cond_ok, condr1, condr2, entra, & entrat, epsln, maxprj, scalem, sconda, sfmin, small, temp1, uscal1, uscal2, xsc integer(${ik}$) :: ierr, n1, nr, numrank, p, q, warning logical(lk) :: almort, defr, errest, goscal, jracc, kill, lsvec, l2aber, l2kill, & l2pert, l2rank, l2tran, noscal, rowpiv, rsvec, transp ! Intrinsic Functions ! test the input arguments lsvec = stdlib_lsame( jobu, 'U' ) .or. stdlib_lsame( jobu, 'F' ) jracc = stdlib_lsame( jobv, 'J' ) rsvec = stdlib_lsame( jobv, 'V' ) .or. jracc rowpiv = stdlib_lsame( joba, 'F' ) .or. stdlib_lsame( joba, 'G' ) l2rank = stdlib_lsame( joba, 'R' ) l2aber = stdlib_lsame( joba, 'A' ) errest = stdlib_lsame( joba, 'E' ) .or. stdlib_lsame( joba, 'G' ) l2tran = stdlib_lsame( jobt, 'T' ) l2kill = stdlib_lsame( jobr, 'R' ) defr = stdlib_lsame( jobr, 'N' ) l2pert = stdlib_lsame( jobp, 'P' ) if ( .not.(rowpiv .or. l2rank .or. l2aber .or.errest .or. stdlib_lsame( joba, 'C' ) )) & then info = - 1_${ik}$ else if ( .not.( lsvec .or. stdlib_lsame( jobu, 'N' ) .or.stdlib_lsame( jobu, 'W' )) )& then info = - 2_${ik}$ else if ( .not.( rsvec .or. stdlib_lsame( jobv, 'N' ) .or.stdlib_lsame( jobv, 'W' )) & .or. ( jracc .and. (.not.lsvec) ) ) then info = - 3_${ik}$ else if ( .not. ( l2kill .or. defr ) ) then info = - 4_${ik}$ else if ( .not. ( l2tran .or. stdlib_lsame( jobt, 'N' ) ) ) then info = - 5_${ik}$ else if ( .not. ( l2pert .or. stdlib_lsame( jobp, 'N' ) ) ) then info = - 6_${ik}$ else if ( m < 0_${ik}$ ) then info = - 7_${ik}$ else if ( ( n < 0_${ik}$ ) .or. ( n > m ) ) then info = - 8_${ik}$ else if ( lda < m ) then info = - 10_${ik}$ else if ( lsvec .and. ( ldu < m ) ) then info = - 13_${ik}$ else if ( rsvec .and. ( ldv < n ) ) then info = - 15_${ik}$ else if ( (.not.(lsvec .or. rsvec .or. errest).and.(lwork < max(7_${ik}$,4_${ik}$*n+1,2_${ik}$*m+n))) .or.(& .not.(lsvec .or. rsvec) .and. errest .and.(lwork < max(7_${ik}$,4_${ik}$*n+n*n,2_${ik}$*m+n))) .or.(lsvec & .and. (.not.rsvec) .and. (lwork < max(7_${ik}$,2_${ik}$*m+n,4_${ik}$*n+1))).or.(rsvec .and. (.not.lsvec) & .and. (lwork < max(7_${ik}$,2_${ik}$*m+n,4_${ik}$*n+1))).or.(lsvec .and. rsvec .and. (.not.jracc) .and.(& lwork<max(2_${ik}$*m+n,6_${ik}$*n+2*n*n))).or. (lsvec .and. rsvec .and. jracc .and.lwork<max(2_${ik}$*m+n,& 4_${ik}$*n+n*n,2_${ik}$*n+n*n+6)))then info = - 17_${ik}$ else ! #:) info = 0_${ik}$ end if if ( info /= 0_${ik}$ ) then ! #:( call stdlib${ii}$_xerbla( 'DGEJSV', - info ) return end if ! quick return for void matrix (y3k safe) ! #:) if ( ( m == 0_${ik}$ ) .or. ( n == 0_${ik}$ ) ) then iwork(1_${ik}$:3_${ik}$) = 0_${ik}$ work(1_${ik}$:7_${ik}$) = 0_${ik}$ return endif ! determine whether the matrix u should be m x n or m x m if ( lsvec ) then n1 = n if ( stdlib_lsame( jobu, 'F' ) ) n1 = m end if ! set numerical parameters ! ! note: make sure stdlib${ii}$_${ri}$lamch() does not fail on the target architecture. epsln = stdlib${ii}$_${ri}$lamch('EPSILON') sfmin = stdlib${ii}$_${ri}$lamch('SAFEMINIMUM') small = sfmin / epsln big = stdlib${ii}$_${ri}$lamch('O') ! big = one / sfmin ! initialize sva(1:n) = diag( ||a e_i||_2 )_1^n ! (!) if necessary, scale sva() to protect the largest norm from ! overflow. it is possible that this scaling pushes the smallest ! column norm left from the underflow threshold (extreme case). scalem = one / sqrt(real(m,KIND=${rk}$)*real(n,KIND=${rk}$)) noscal = .true. goscal = .true. do p = 1, n aapp = zero aaqq = one call stdlib${ii}$_${ri}$lassq( m, a(1_${ik}$,p), 1_${ik}$, aapp, aaqq ) if ( aapp > big ) then info = - 9_${ik}$ call stdlib${ii}$_xerbla( 'DGEJSV', -info ) return end if aaqq = sqrt(aaqq) if ( ( aapp < (big / aaqq) ) .and. noscal ) then sva(p) = aapp * aaqq else noscal = .false. sva(p) = aapp * ( aaqq * scalem ) if ( goscal ) then goscal = .false. call stdlib${ii}$_${ri}$scal( p-1, scalem, sva, 1_${ik}$ ) end if end if end do if ( noscal ) scalem = one aapp = zero aaqq = big do p = 1, n aapp = max( aapp, sva(p) ) if ( sva(p) /= zero ) aaqq = min( aaqq, sva(p) ) end do ! quick return for zero m x n matrix ! #:) if ( aapp == zero ) then if ( lsvec ) call stdlib${ii}$_${ri}$laset( 'G', m, n1, zero, one, u, ldu ) if ( rsvec ) call stdlib${ii}$_${ri}$laset( 'G', n, n, zero, one, v, ldv ) work(1_${ik}$) = one work(2_${ik}$) = one if ( errest ) work(3_${ik}$) = one if ( lsvec .and. rsvec ) then work(4_${ik}$) = one work(5_${ik}$) = one end if if ( l2tran ) then work(6_${ik}$) = zero work(7_${ik}$) = zero end if iwork(1_${ik}$) = 0_${ik}$ iwork(2_${ik}$) = 0_${ik}$ iwork(3_${ik}$) = 0_${ik}$ return end if ! issue warning if denormalized column norms detected. override the ! high relative accuracy request. issue licence to kill columns ! (set them to zero) whose norm is less than sigma_max / big (roughly). ! #:( warning = 0_${ik}$ if ( aaqq <= sfmin ) then l2rank = .true. l2kill = .true. warning = 1_${ik}$ end if ! quick return for one-column matrix ! #:) if ( n == 1_${ik}$ ) then if ( lsvec ) then call stdlib${ii}$_${ri}$lascl( 'G',0_${ik}$,0_${ik}$,sva(1_${ik}$),scalem, m,1_${ik}$,a(1_${ik}$,1_${ik}$),lda,ierr ) call stdlib${ii}$_${ri}$lacpy( 'A', m, 1_${ik}$, a, lda, u, ldu ) ! computing all m left singular vectors of the m x 1 matrix if ( n1 /= n ) then call stdlib${ii}$_${ri}$geqrf( m, n, u,ldu, work, work(n+1),lwork-n,ierr ) call stdlib${ii}$_${ri}$orgqr( m,n1,1_${ik}$, u,ldu,work,work(n+1),lwork-n,ierr ) call stdlib${ii}$_${ri}$copy( m, a(1_${ik}$,1_${ik}$), 1_${ik}$, u(1_${ik}$,1_${ik}$), 1_${ik}$ ) end if end if if ( rsvec ) then v(1_${ik}$,1_${ik}$) = one end if if ( sva(1_${ik}$) < (big*scalem) ) then sva(1_${ik}$) = sva(1_${ik}$) / scalem scalem = one end if work(1_${ik}$) = one / scalem work(2_${ik}$) = one if ( sva(1_${ik}$) /= zero ) then iwork(1_${ik}$) = 1_${ik}$ if ( ( sva(1_${ik}$) / scalem) >= sfmin ) then iwork(2_${ik}$) = 1_${ik}$ else iwork(2_${ik}$) = 0_${ik}$ end if else iwork(1_${ik}$) = 0_${ik}$ iwork(2_${ik}$) = 0_${ik}$ end if iwork(3_${ik}$) = 0_${ik}$ if ( errest ) work(3_${ik}$) = one if ( lsvec .and. rsvec ) then work(4_${ik}$) = one work(5_${ik}$) = one end if if ( l2tran ) then work(6_${ik}$) = zero work(7_${ik}$) = zero end if return end if transp = .false. l2tran = l2tran .and. ( m == n ) aatmax = -one aatmin = big if ( rowpiv .or. l2tran ) then ! compute the row norms, needed to determine row pivoting sequence ! (in the case of heavily row weighted a, row pivoting is strongly ! advised) and to collect information needed to compare the ! structures of a * a^t and a^t * a (in the case l2tran==.true.). if ( l2tran ) then do p = 1, m xsc = zero temp1 = one call stdlib${ii}$_${ri}$lassq( n, a(p,1_${ik}$), lda, xsc, temp1 ) ! stdlib${ii}$_${ri}$lassq gets both the ell_2 and the ell_infinity norm ! in one pass through the vector work(m+n+p) = xsc * scalem work(n+p) = xsc * (scalem*sqrt(temp1)) aatmax = max( aatmax, work(n+p) ) if (work(n+p) /= zero) aatmin = min(aatmin,work(n+p)) end do else do p = 1, m work(m+n+p) = scalem*abs( a(p,stdlib${ii}$_i${ri}$amax(n,a(p,1_${ik}$),lda)) ) aatmax = max( aatmax, work(m+n+p) ) aatmin = min( aatmin, work(m+n+p) ) end do end if end if ! for square matrix a try to determine whether a^t would be better ! input for the preconditioned jacobi svd, with faster convergence. ! the decision is based on an o(n) function of the vector of column ! and row norms of a, based on the shannon entropy. this should give ! the right choice in most cases when the difference actually matters. ! it may fail and pick the slower converging side. entra = zero entrat = zero if ( l2tran ) then xsc = zero temp1 = one call stdlib${ii}$_${ri}$lassq( n, sva, 1_${ik}$, xsc, temp1 ) temp1 = one / temp1 entra = zero do p = 1, n big1 = ( ( sva(p) / xsc )**2_${ik}$ ) * temp1 if ( big1 /= zero ) entra = entra + big1 * log(big1) end do entra = - entra / log(real(n,KIND=${rk}$)) ! now, sva().^2/trace(a^t * a) is a point in the probability simplex. ! it is derived from the diagonal of a^t * a. do the same with the ! diagonal of a * a^t, compute the entropy of the corresponding ! probability distribution. note that a * a^t and a^t * a have the ! same trace. entrat = zero do p = n+1, n+m big1 = ( ( work(p) / xsc )**2_${ik}$ ) * temp1 if ( big1 /= zero ) entrat = entrat + big1 * log(big1) end do entrat = - entrat / log(real(m,KIND=${rk}$)) ! analyze the entropies and decide a or a^t. smaller entropy ! usually means better input for the algorithm. transp = ( entrat < entra ) ! if a^t is better than a, transpose a. if ( transp ) then ! in an optimal implementation, this trivial transpose ! should be replaced with faster transpose. do p = 1, n - 1 do q = p + 1, n temp1 = a(q,p) a(q,p) = a(p,q) a(p,q) = temp1 end do end do do p = 1, n work(m+n+p) = sva(p) sva(p) = work(n+p) end do temp1 = aapp aapp = aatmax aatmax = temp1 temp1 = aaqq aaqq = aatmin aatmin = temp1 kill = lsvec lsvec = rsvec rsvec = kill if ( lsvec ) n1 = n rowpiv = .true. end if end if ! end if l2tran ! scale the matrix so that its maximal singular value remains less ! than sqrt(big) -- the matrix is scaled so that its maximal column ! has euclidean norm equal to sqrt(big/n). the only reason to keep ! sqrt(big) instead of big is the fact that stdlib${ii}$_${ri}$gejsv uses lapack and ! blas routines that, in some implementations, are not capable of ! working in the full interval [sfmin,big] and that they may provoke ! overflows in the intermediate results. if the singular values spread ! from sfmin to big, then stdlib${ii}$_${ri}$gesvj will compute them. so, in that case, ! one should use stdlib_${ri}$gesvj instead of stdlib${ii}$_${ri}$gejsv. big1 = sqrt( big ) temp1 = sqrt( big / real(n,KIND=${rk}$) ) call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, aapp, temp1, n, 1_${ik}$, sva, n, ierr ) if ( aaqq > (aapp * sfmin) ) then aaqq = ( aaqq / aapp ) * temp1 else aaqq = ( aaqq * temp1 ) / aapp end if temp1 = temp1 * scalem call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, aapp, temp1, m, n, a, lda, ierr ) ! to undo scaling at the end of this procedure, multiply the ! computed singular values with uscal2 / uscal1. uscal1 = temp1 uscal2 = aapp if ( l2kill ) then ! l2kill enforces computation of nonzero singular values in ! the restricted range of condition number of the initial a, ! sigma_max(a) / sigma_min(a) approx. sqrt(big)/sqrt(sfmin). xsc = sqrt( sfmin ) else xsc = small ! now, if the condition number of a is too big, ! sigma_max(a) / sigma_min(a) > sqrt(big/n) * epsln / sfmin, ! as a precaution measure, the full svd is computed using stdlib${ii}$_${ri}$gesvj ! with accumulated jacobi rotations. this provides numerically ! more robust computation, at the cost of slightly increased run ! time. depending on the concrete implementation of blas and lapack ! (i.e. how they behave in presence of extreme ill-conditioning) the ! implementor may decide to remove this switch. if ( ( aaqq<sqrt(sfmin) ) .and. lsvec .and. rsvec ) then jracc = .true. end if end if if ( aaqq < xsc ) then do p = 1, n if ( sva(p) < xsc ) then call stdlib${ii}$_${ri}$laset( 'A', m, 1_${ik}$, zero, zero, a(1_${ik}$,p), lda ) sva(p) = zero end if end do end if ! preconditioning using qr factorization with pivoting if ( rowpiv ) then ! optional row permutation (bjoerck row pivoting): ! a result by cox and higham shows that the bjoerck's ! row pivoting combined with standard column pivoting ! has similar effect as powell-reid complete pivoting. ! the ell-infinity norms of a are made nonincreasing. do p = 1, m - 1 q = stdlib${ii}$_i${ri}$amax( m-p+1, work(m+n+p), 1_${ik}$ ) + p - 1_${ik}$ iwork(2_${ik}$*n+p) = q if ( p /= q ) then temp1 = work(m+n+p) work(m+n+p) = work(m+n+q) work(m+n+q) = temp1 end if end do call stdlib${ii}$_${ri}$laswp( n, a, lda, 1_${ik}$, m-1, iwork(2_${ik}$*n+1), 1_${ik}$ ) end if ! end of the preparation phase (scaling, optional sorting and ! transposing, optional flushing of small columns). ! preconditioning ! if the full svd is needed, the right singular vectors are computed ! from a matrix equation, and for that we need theoretical analysis ! of the businger-golub pivoting. so we use stdlib_${ri}$geqp3 as the first rr qrf. ! in all other cases the first rr qrf can be chosen by other criteria ! (eg speed by replacing global with restricted window pivoting, such ! as in sgeqpx from toms # 782). good results will be obtained using ! sgeqpx with properly (!) chosen numerical parameters. ! any improvement of stdlib${ii}$_${ri}$geqp3 improves overall performance of stdlib${ii}$_${ri}$gejsv. ! a * p1 = q1 * [ r1^t 0]^t: do p = 1, n ! All Columns Are Free Columns iwork(p) = 0_${ik}$ end do call stdlib${ii}$_${ri}$geqp3( m,n,a,lda, iwork,work, work(n+1),lwork-n, ierr ) ! the upper triangular matrix r1 from the first qrf is inspected for ! rank deficiency and possibilities for deflation, or possible ! ill-conditioning. depending on the user specified flag l2rank, ! the procedure explores possibilities to reduce the numerical ! rank by inspecting the computed upper triangular factor. if ! l2rank or l2aber are up, then stdlib${ii}$_${ri}$gejsv will compute the svd of ! a + da, where ||da|| <= f(m,n)*epsln. nr = 1_${ik}$ if ( l2aber ) then ! standard absolute error bound suffices. all sigma_i with ! sigma_i < n*epsln*||a|| are flushed to zero. this is an ! aggressive enforcement of lower numerical rank by introducing a ! backward error of the order of n*epsln*||a||. temp1 = sqrt(real(n,KIND=${rk}$))*epsln loop_3002: do p = 2, n if ( abs(a(p,p)) >= (temp1*abs(a(1_${ik}$,1_${ik}$))) ) then nr = nr + 1_${ik}$ else exit loop_3002 end if end do loop_3002 else if ( l2rank ) then ! .. similarly as above, only slightly more gentle (less aggressive). ! sudden drop on the diagonal of r1 is used as the criterion for ! close-to-rank-deficient. temp1 = sqrt(sfmin) loop_3402: do p = 2, n if ( ( abs(a(p,p)) < (epsln*abs(a(p-1,p-1))) ) .or.( abs(a(p,p)) < small ) .or.( & l2kill .and. (abs(a(p,p)) < temp1) ) ) exit loop_3402 nr = nr + 1_${ik}$ end do loop_3402 else ! the goal is high relative accuracy. however, if the matrix ! has high scaled condition number the relative accuracy is in ! general not feasible. later on, a condition number estimator ! will be deployed to estimate the scaled condition number. ! here we just remove the underflowed part of the triangular ! factor. this prevents the situation in which the code is ! working hard to get the accuracy not warranted by the data. temp1 = sqrt(sfmin) loop_3302: do p = 2, n if ( ( abs(a(p,p)) < small ) .or.( l2kill .and. (abs(a(p,p)) < temp1) ) ) exit loop_3302 nr = nr + 1_${ik}$ end do loop_3302 end if almort = .false. if ( nr == n ) then maxprj = one do p = 2, n temp1 = abs(a(p,p)) / sva(iwork(p)) maxprj = min( maxprj, temp1 ) end do if ( maxprj**2_${ik}$ >= one - real(n,KIND=${rk}$)*epsln ) almort = .true. end if sconda = - one condr1 = - one condr2 = - one if ( errest ) then if ( n == nr ) then if ( rsvec ) then ! V Is Available As Workspace call stdlib${ii}$_${ri}$lacpy( 'U', n, n, a, lda, v, ldv ) do p = 1, n temp1 = sva(iwork(p)) call stdlib${ii}$_${ri}$scal( p, one/temp1, v(1_${ik}$,p), 1_${ik}$ ) end do call stdlib${ii}$_${ri}$pocon( 'U', n, v, ldv, one, temp1,work(n+1), iwork(2_${ik}$*n+m+1), & ierr ) else if ( lsvec ) then ! U Is Available As Workspace call stdlib${ii}$_${ri}$lacpy( 'U', n, n, a, lda, u, ldu ) do p = 1, n temp1 = sva(iwork(p)) call stdlib${ii}$_${ri}$scal( p, one/temp1, u(1_${ik}$,p), 1_${ik}$ ) end do call stdlib${ii}$_${ri}$pocon( 'U', n, u, ldu, one, temp1,work(n+1), iwork(2_${ik}$*n+m+1), & ierr ) else call stdlib${ii}$_${ri}$lacpy( 'U', n, n, a, lda, work(n+1), n ) do p = 1, n temp1 = sva(iwork(p)) call stdlib${ii}$_${ri}$scal( p, one/temp1, work(n+(p-1)*n+1), 1_${ik}$ ) end do ! The Columns Of R Are Scaled To Have Unit Euclidean Lengths call stdlib${ii}$_${ri}$pocon( 'U', n, work(n+1), n, one, temp1,work(n+n*n+1), iwork(2_${ik}$*n+& m+1), ierr ) end if sconda = one / sqrt(temp1) ! sconda is an estimate of sqrt(||(r^t * r)^(-1)||_1). ! n^(-1/4) * sconda <= ||r^(-1)||_2 <= n^(1/4) * sconda else sconda = - one end if end if l2pert = l2pert .and. ( abs( a(1_${ik}$,1_${ik}$)/a(nr,nr) ) > sqrt(big1) ) ! if there is no violent scaling, artificial perturbation is not needed. ! phase 3: if ( .not. ( rsvec .or. lsvec ) ) then ! singular values only ! .. transpose a(1:nr,1:n) do p = 1, min( n-1, nr ) call stdlib${ii}$_${ri}$copy( n-p, a(p,p+1), lda, a(p+1,p), 1_${ik}$ ) end do ! the following two do-loops introduce small relative perturbation ! into the strict upper triangle of the lower triangular matrix. ! small entries below the main diagonal are also changed. ! this modification is useful if the computing environment does not ! provide/allow flush to zero underflow, for it prevents many ! annoying denormalized numbers in case of strongly scaled matrices. ! the perturbation is structured so that it does not introduce any ! new perturbation of the singular values, and it does not destroy ! the job done by the preconditioner. ! the licence for this perturbation is in the variable l2pert, which ! should be .false. if flush to zero underflow is active. if ( .not. almort ) then if ( l2pert ) then ! xsc = sqrt(small) xsc = epsln / real(n,KIND=${rk}$) do q = 1, nr temp1 = xsc*abs(a(q,q)) do p = 1, n if ( ( (p>q) .and. (abs(a(p,q))<=temp1) ).or. ( p < q ) )a(p,q) = sign( & temp1, a(p,q) ) end do end do else call stdlib${ii}$_${ri}$laset( 'U', nr-1,nr-1, zero,zero, a(1_${ik}$,2_${ik}$),lda ) end if ! Second Preconditioning Using The Qr Factorization call stdlib${ii}$_${ri}$geqrf( n,nr, a,lda, work, work(n+1),lwork-n, ierr ) ! And Transpose Upper To Lower Triangular do p = 1, nr - 1 call stdlib${ii}$_${ri}$copy( nr-p, a(p,p+1), lda, a(p+1,p), 1_${ik}$ ) end do end if ! row-cyclic jacobi svd algorithm with column pivoting ! .. again some perturbation (a "background noise") is added ! to drown denormals if ( l2pert ) then ! xsc = sqrt(small) xsc = epsln / real(n,KIND=${rk}$) do q = 1, nr temp1 = xsc*abs(a(q,q)) do p = 1, nr if ( ( (p>q) .and. (abs(a(p,q))<=temp1) ).or. ( p < q ) )a(p,q) = sign( & temp1, a(p,q) ) end do end do else call stdlib${ii}$_${ri}$laset( 'U', nr-1, nr-1, zero, zero, a(1_${ik}$,2_${ik}$), lda ) end if ! .. and one-sided jacobi rotations are started on a lower ! triangular matrix (plus perturbation which is ignored in ! the part which destroys triangular form (confusing?!)) call stdlib${ii}$_${ri}$gesvj( 'L', 'NOU', 'NOV', nr, nr, a, lda, sva,n, v, ldv, work, & lwork, info ) scalem = work(1_${ik}$) numrank = nint(work(2_${ik}$),KIND=${ik}$) else if ( rsvec .and. ( .not. lsvec ) ) then ! -> singular values and right singular vectors <- if ( almort ) then ! In This Case Nr Equals N do p = 1, nr call stdlib${ii}$_${ri}$copy( n-p+1, a(p,p), lda, v(p,p), 1_${ik}$ ) end do call stdlib${ii}$_${ri}$laset( 'UPPER', nr-1, nr-1, zero, zero, v(1_${ik}$,2_${ik}$), ldv ) call stdlib${ii}$_${ri}$gesvj( 'L','U','N', n, nr, v,ldv, sva, nr, a,lda,work, lwork, info ) scalem = work(1_${ik}$) numrank = nint(work(2_${ik}$),KIND=${ik}$) else ! .. two more qr factorizations ( one qrf is not enough, two require ! accumulated product of jacobi rotations, three are perfect ) call stdlib${ii}$_${ri}$laset( 'LOWER', nr-1, nr-1, zero, zero, a(2_${ik}$,1_${ik}$), lda ) call stdlib${ii}$_${ri}$gelqf( nr, n, a, lda, work, work(n+1), lwork-n, ierr) call stdlib${ii}$_${ri}$lacpy( 'LOWER', nr, nr, a, lda, v, ldv ) call stdlib${ii}$_${ri}$laset( 'UPPER', nr-1, nr-1, zero, zero, v(1_${ik}$,2_${ik}$), ldv ) call stdlib${ii}$_${ri}$geqrf( nr, nr, v, ldv, work(n+1), work(2_${ik}$*n+1),lwork-2*n, ierr ) do p = 1, nr call stdlib${ii}$_${ri}$copy( nr-p+1, v(p,p), ldv, v(p,p), 1_${ik}$ ) end do call stdlib${ii}$_${ri}$laset( 'UPPER', nr-1, nr-1, zero, zero, v(1_${ik}$,2_${ik}$), ldv ) call stdlib${ii}$_${ri}$gesvj( 'LOWER', 'U','N', nr, nr, v,ldv, sva, nr, u,ldu, work(n+1), & lwork, info ) scalem = work(n+1) numrank = nint(work(n+2),KIND=${ik}$) if ( nr < n ) then call stdlib${ii}$_${ri}$laset( 'A',n-nr, nr, zero,zero, v(nr+1,1_${ik}$), ldv ) call stdlib${ii}$_${ri}$laset( 'A',nr, n-nr, zero,zero, v(1_${ik}$,nr+1), ldv ) call stdlib${ii}$_${ri}$laset( 'A',n-nr,n-nr,zero,one, v(nr+1,nr+1), ldv ) end if call stdlib${ii}$_${ri}$ormlq( 'LEFT', 'TRANSPOSE', n, n, nr, a, lda, work,v, ldv, work(n+1), & lwork-n, ierr ) end if do p = 1, n call stdlib${ii}$_${ri}$copy( n, v(p,1_${ik}$), ldv, a(iwork(p),1_${ik}$), lda ) end do call stdlib${ii}$_${ri}$lacpy( 'ALL', n, n, a, lda, v, ldv ) if ( transp ) then call stdlib${ii}$_${ri}$lacpy( 'ALL', n, n, v, ldv, u, ldu ) end if else if ( lsvec .and. ( .not. rsvec ) ) then ! Singular Values And Left Singular Vectors ! Second Preconditioning Step To Avoid Need To Accumulate ! jacobi rotations in the jacobi iterations. do p = 1, nr call stdlib${ii}$_${ri}$copy( n-p+1, a(p,p), lda, u(p,p), 1_${ik}$ ) end do call stdlib${ii}$_${ri}$laset( 'UPPER', nr-1, nr-1, zero, zero, u(1_${ik}$,2_${ik}$), ldu ) call stdlib${ii}$_${ri}$geqrf( n, nr, u, ldu, work(n+1), work(2_${ik}$*n+1),lwork-2*n, ierr ) do p = 1, nr - 1 call stdlib${ii}$_${ri}$copy( nr-p, u(p,p+1), ldu, u(p+1,p), 1_${ik}$ ) end do call stdlib${ii}$_${ri}$laset( 'UPPER', nr-1, nr-1, zero, zero, u(1_${ik}$,2_${ik}$), ldu ) call stdlib${ii}$_${ri}$gesvj( 'LOWER', 'U', 'N', nr,nr, u, ldu, sva, nr, a,lda, work(n+1), & lwork-n, info ) scalem = work(n+1) numrank = nint(work(n+2),KIND=${ik}$) if ( nr < m ) then call stdlib${ii}$_${ri}$laset( 'A', m-nr, nr,zero, zero, u(nr+1,1_${ik}$), ldu ) if ( nr < n1 ) then call stdlib${ii}$_${ri}$laset( 'A',nr, n1-nr, zero, zero, u(1_${ik}$,nr+1), ldu ) call stdlib${ii}$_${ri}$laset( 'A',m-nr,n1-nr,zero,one,u(nr+1,nr+1), ldu ) end if end if call stdlib${ii}$_${ri}$ormqr( 'LEFT', 'NO TR', m, n1, n, a, lda, work, u,ldu, work(n+1), & lwork-n, ierr ) if ( rowpiv )call stdlib${ii}$_${ri}$laswp( n1, u, ldu, 1_${ik}$, m-1, iwork(2_${ik}$*n+1), -1_${ik}$ ) do p = 1, n1 xsc = one / stdlib${ii}$_${ri}$nrm2( m, u(1_${ik}$,p), 1_${ik}$ ) call stdlib${ii}$_${ri}$scal( m, xsc, u(1_${ik}$,p), 1_${ik}$ ) end do if ( transp ) then call stdlib${ii}$_${ri}$lacpy( 'ALL', n, n, u, ldu, v, ldv ) end if else ! Full Svd if ( .not. jracc ) then if ( .not. almort ) then ! second preconditioning step (qrf [with pivoting]) ! note that the composition of transpose, qrf and transpose is ! equivalent to an lqf call. since in many libraries the qrf ! seems to be better optimized than the lqf, we do explicit ! transpose and use the qrf. this is subject to changes in an ! optimized implementation of stdlib${ii}$_${ri}$gejsv. do p = 1, nr call stdlib${ii}$_${ri}$copy( n-p+1, a(p,p), lda, v(p,p), 1_${ik}$ ) end do ! The Following Two Loops Perturb Small Entries To Avoid ! denormals in the second qr factorization, where they are ! as good as zeros. this is done to avoid painfully slow ! computation with denormals. the relative size of the perturbation ! is a parameter that can be changed by the implementer. ! this perturbation device will be obsolete on machines with ! properly implemented arithmetic. ! to switch it off, set l2pert=.false. to remove it from the ! code, remove the action under l2pert=.true., leave the else part. ! the following two loops should be blocked and fused with the ! transposed copy above. if ( l2pert ) then xsc = sqrt(small) do q = 1, nr temp1 = xsc*abs( v(q,q) ) do p = 1, n if ( ( p > q ) .and. ( abs(v(p,q)) <= temp1 ).or. ( p < q ) )v(p,q) = & sign( temp1, v(p,q) ) if ( p < q ) v(p,q) = - v(p,q) end do end do else call stdlib${ii}$_${ri}$laset( 'U', nr-1, nr-1, zero, zero, v(1_${ik}$,2_${ik}$), ldv ) end if ! estimate the row scaled condition number of r1 ! (if r1 is rectangular, n > nr, then the condition number ! of the leading nr x nr submatrix is estimated.) call stdlib${ii}$_${ri}$lacpy( 'L', nr, nr, v, ldv, work(2_${ik}$*n+1), nr ) do p = 1, nr temp1 = stdlib${ii}$_${ri}$nrm2(nr-p+1,work(2_${ik}$*n+(p-1)*nr+p),1_${ik}$) call stdlib${ii}$_${ri}$scal(nr-p+1,one/temp1,work(2_${ik}$*n+(p-1)*nr+p),1_${ik}$) end do call stdlib${ii}$_${ri}$pocon('LOWER',nr,work(2_${ik}$*n+1),nr,one,temp1,work(2_${ik}$*n+nr*nr+1),iwork(m+& 2_${ik}$*n+1),ierr) condr1 = one / sqrt(temp1) ! Here Need A Second Opinion On The Condition Number ! Then Assume Worst Case Scenario ! r1 is ok for inverse <=> condr1 < real(n,KIND=${rk}$) ! more conservative <=> condr1 < sqrt(real(n,KIND=${rk}$)) cond_ok = sqrt(real(nr,KIND=${rk}$)) ! [tp] cond_ok is a tuning parameter. if ( condr1 < cond_ok ) then ! .. the second qrf without pivoting. note: in an optimized ! implementation, this qrf should be implemented as the qrf ! of a lower triangular matrix. ! r1^t = q2 * r2 call stdlib${ii}$_${ri}$geqrf( n, nr, v, ldv, work(n+1), work(2_${ik}$*n+1),lwork-2*n, ierr ) if ( l2pert ) then xsc = sqrt(small)/epsln do p = 2, nr do q = 1, p - 1 temp1 = xsc * min(abs(v(p,p)),abs(v(q,q))) if ( abs(v(q,p)) <= temp1 )v(q,p) = sign( temp1, v(q,p) ) end do end do end if if ( nr /= n )call stdlib${ii}$_${ri}$lacpy( 'A', n, nr, v, ldv, work(2_${ik}$*n+1), n ) ! .. save ... ! This Transposed Copy Should Be Better Than Naive do p = 1, nr - 1 call stdlib${ii}$_${ri}$copy( nr-p, v(p,p+1), ldv, v(p+1,p), 1_${ik}$ ) end do condr2 = condr1 else ! .. ill-conditioned case: second qrf with pivoting ! note that windowed pivoting would be equally good ! numerically, and more run-time efficient. so, in ! an optimal implementation, the next call to stdlib${ii}$_${ri}$geqp3 ! should be replaced with eg. call sgeqpx (acm toms #782) ! with properly (carefully) chosen parameters. ! r1^t * p2 = q2 * r2 do p = 1, nr iwork(n+p) = 0_${ik}$ end do call stdlib${ii}$_${ri}$geqp3( n, nr, v, ldv, iwork(n+1), work(n+1),work(2_${ik}$*n+1), lwork-& 2_${ik}$*n, ierr ) ! * call stdlib${ii}$_${ri}$geqrf( n, nr, v, ldv, work(n+1), work(2*n+1), ! * $ lwork-2*n, ierr ) if ( l2pert ) then xsc = sqrt(small) do p = 2, nr do q = 1, p - 1 temp1 = xsc * min(abs(v(p,p)),abs(v(q,q))) if ( abs(v(q,p)) <= temp1 )v(q,p) = sign( temp1, v(q,p) ) end do end do end if call stdlib${ii}$_${ri}$lacpy( 'A', n, nr, v, ldv, work(2_${ik}$*n+1), n ) if ( l2pert ) then xsc = sqrt(small) do p = 2, nr do q = 1, p - 1 temp1 = xsc * min(abs(v(p,p)),abs(v(q,q))) v(p,q) = - sign( temp1, v(q,p) ) end do end do else call stdlib${ii}$_${ri}$laset( 'L',nr-1,nr-1,zero,zero,v(2_${ik}$,1_${ik}$),ldv ) end if ! now, compute r2 = l3 * q3, the lq factorization. call stdlib${ii}$_${ri}$gelqf( nr, nr, v, ldv, work(2_${ik}$*n+n*nr+1),work(2_${ik}$*n+n*nr+nr+1), & lwork-2*n-n*nr-nr, ierr ) ! And Estimate The Condition Number call stdlib${ii}$_${ri}$lacpy( 'L',nr,nr,v,ldv,work(2_${ik}$*n+n*nr+nr+1),nr ) do p = 1, nr temp1 = stdlib${ii}$_${ri}$nrm2( p, work(2_${ik}$*n+n*nr+nr+p), nr ) call stdlib${ii}$_${ri}$scal( p, one/temp1, work(2_${ik}$*n+n*nr+nr+p), nr ) end do call stdlib${ii}$_${ri}$pocon( 'L',nr,work(2_${ik}$*n+n*nr+nr+1),nr,one,temp1,work(2_${ik}$*n+n*nr+nr+& nr*nr+1),iwork(m+2*n+1),ierr ) condr2 = one / sqrt(temp1) if ( condr2 >= cond_ok ) then ! Save The Householder Vectors Used For Q3 ! (this overwrites the copy of r2, as it will not be ! needed in this branch, but it does not overwritte the ! huseholder vectors of q2.). call stdlib${ii}$_${ri}$lacpy( 'U', nr, nr, v, ldv, work(2_${ik}$*n+1), n ) ! And The Rest Of The Information On Q3 Is In ! work(2*n+n*nr+1:2*n+n*nr+n) end if end if if ( l2pert ) then xsc = sqrt(small) do q = 2, nr temp1 = xsc * v(q,q) do p = 1, q - 1 ! v(p,q) = - sign( temp1, v(q,p) ) v(p,q) = - sign( temp1, v(p,q) ) end do end do else call stdlib${ii}$_${ri}$laset( 'U', nr-1,nr-1, zero,zero, v(1_${ik}$,2_${ik}$), ldv ) end if ! second preconditioning finished; continue with jacobi svd ! the input matrix is lower trinagular. ! recover the right singular vectors as solution of a well ! conditioned triangular matrix equation. if ( condr1 < cond_ok ) then call stdlib${ii}$_${ri}$gesvj( 'L','U','N',nr,nr,v,ldv,sva,nr,u,ldu,work(2_${ik}$*n+n*nr+nr+1),& lwork-2*n-n*nr-nr,info ) scalem = work(2_${ik}$*n+n*nr+nr+1) numrank = nint(work(2_${ik}$*n+n*nr+nr+2),KIND=${ik}$) do p = 1, nr call stdlib${ii}$_${ri}$copy( nr, v(1_${ik}$,p), 1_${ik}$, u(1_${ik}$,p), 1_${ik}$ ) call stdlib${ii}$_${ri}$scal( nr, sva(p), v(1_${ik}$,p), 1_${ik}$ ) end do ! Pick The Right Matrix Equation And Solve It if ( nr == n ) then ! :)) .. best case, r1 is inverted. the solution of this matrix ! equation is q2*v2 = the product of the jacobi rotations ! used in stdlib${ii}$_${ri}$gesvj, premultiplied with the orthogonal matrix ! from the second qr factorization. call stdlib${ii}$_${ri}$trsm( 'L','U','N','N', nr,nr,one, a,lda, v,ldv ) else ! .. r1 is well conditioned, but non-square. transpose(r2) ! is inverted to get the product of the jacobi rotations ! used in stdlib${ii}$_${ri}$gesvj. the q-factor from the second qr ! factorization is then built in explicitly. call stdlib${ii}$_${ri}$trsm('L','U','T','N',nr,nr,one,work(2_${ik}$*n+1),n,v,ldv) if ( nr < n ) then call stdlib${ii}$_${ri}$laset('A',n-nr,nr,zero,zero,v(nr+1,1_${ik}$),ldv) call stdlib${ii}$_${ri}$laset('A',nr,n-nr,zero,zero,v(1_${ik}$,nr+1),ldv) call stdlib${ii}$_${ri}$laset('A',n-nr,n-nr,zero,one,v(nr+1,nr+1),ldv) end if call stdlib${ii}$_${ri}$ormqr('L','N',n,n,nr,work(2_${ik}$*n+1),n,work(n+1),v,ldv,work(2_${ik}$*n+& n*nr+nr+1),lwork-2*n-n*nr-nr,ierr) end if else if ( condr2 < cond_ok ) then ! :) .. the input matrix a is very likely a relative of ! the kahan matrix :) ! the matrix r2 is inverted. the solution of the matrix equation ! is q3^t*v3 = the product of the jacobi rotations (appplied to ! the lower triangular l3 from the lq factorization of ! r2=l3*q3), pre-multiplied with the transposed q3. call stdlib${ii}$_${ri}$gesvj( 'L', 'U', 'N', nr, nr, v, ldv, sva, nr, u,ldu, work(2_${ik}$*n+& n*nr+nr+1), lwork-2*n-n*nr-nr, info ) scalem = work(2_${ik}$*n+n*nr+nr+1) numrank = nint(work(2_${ik}$*n+n*nr+nr+2),KIND=${ik}$) do p = 1, nr call stdlib${ii}$_${ri}$copy( nr, v(1_${ik}$,p), 1_${ik}$, u(1_${ik}$,p), 1_${ik}$ ) call stdlib${ii}$_${ri}$scal( nr, sva(p), u(1_${ik}$,p), 1_${ik}$ ) end do call stdlib${ii}$_${ri}$trsm('L','U','N','N',nr,nr,one,work(2_${ik}$*n+1),n,u,ldu) ! Apply The Permutation From The Second Qr Factorization do q = 1, nr do p = 1, nr work(2_${ik}$*n+n*nr+nr+iwork(n+p)) = u(p,q) end do do p = 1, nr u(p,q) = work(2_${ik}$*n+n*nr+nr+p) end do end do if ( nr < n ) then call stdlib${ii}$_${ri}$laset( 'A',n-nr,nr,zero,zero,v(nr+1,1_${ik}$),ldv ) call stdlib${ii}$_${ri}$laset( 'A',nr,n-nr,zero,zero,v(1_${ik}$,nr+1),ldv ) call stdlib${ii}$_${ri}$laset( 'A',n-nr,n-nr,zero,one,v(nr+1,nr+1),ldv ) end if call stdlib${ii}$_${ri}$ormqr( 'L','N',n,n,nr,work(2_${ik}$*n+1),n,work(n+1),v,ldv,work(2_${ik}$*n+& n*nr+nr+1),lwork-2*n-n*nr-nr,ierr ) else ! last line of defense. ! #:( this is a rather pathological case: no scaled condition ! improvement after two pivoted qr factorizations. other ! possibility is that the rank revealing qr factorization ! or the condition estimator has failed, or the cond_ok ! is set very close to one (which is unnecessary). normally, ! this branch should never be executed, but in rare cases of ! failure of the rrqr or condition estimator, the last line of ! defense ensures that stdlib${ii}$_${ri}$gejsv completes the task. ! compute the full svd of l3 using stdlib${ii}$_${ri}$gesvj with explicit ! accumulation of jacobi rotations. call stdlib${ii}$_${ri}$gesvj( 'L', 'U', 'V', nr, nr, v, ldv, sva, nr, u,ldu, work(2_${ik}$*n+& n*nr+nr+1), lwork-2*n-n*nr-nr, info ) scalem = work(2_${ik}$*n+n*nr+nr+1) numrank = nint(work(2_${ik}$*n+n*nr+nr+2),KIND=${ik}$) if ( nr < n ) then call stdlib${ii}$_${ri}$laset( 'A',n-nr,nr,zero,zero,v(nr+1,1_${ik}$),ldv ) call stdlib${ii}$_${ri}$laset( 'A',nr,n-nr,zero,zero,v(1_${ik}$,nr+1),ldv ) call stdlib${ii}$_${ri}$laset( 'A',n-nr,n-nr,zero,one,v(nr+1,nr+1),ldv ) end if call stdlib${ii}$_${ri}$ormqr( 'L','N',n,n,nr,work(2_${ik}$*n+1),n,work(n+1),v,ldv,work(2_${ik}$*n+& n*nr+nr+1),lwork-2*n-n*nr-nr,ierr ) call stdlib${ii}$_${ri}$ormlq( 'L', 'T', nr, nr, nr, work(2_${ik}$*n+1), n,work(2_${ik}$*n+n*nr+1), u, & ldu, work(2_${ik}$*n+n*nr+nr+1),lwork-2*n-n*nr-nr, ierr ) do q = 1, nr do p = 1, nr work(2_${ik}$*n+n*nr+nr+iwork(n+p)) = u(p,q) end do do p = 1, nr u(p,q) = work(2_${ik}$*n+n*nr+nr+p) end do end do end if ! permute the rows of v using the (column) permutation from the ! first qrf. also, scale the columns to make them unit in ! euclidean norm. this applies to all cases. temp1 = sqrt(real(n,KIND=${rk}$)) * epsln do q = 1, n do p = 1, n work(2_${ik}$*n+n*nr+nr+iwork(p)) = v(p,q) end do do p = 1, n v(p,q) = work(2_${ik}$*n+n*nr+nr+p) end do xsc = one / stdlib${ii}$_${ri}$nrm2( n, v(1_${ik}$,q), 1_${ik}$ ) if ( (xsc < (one-temp1)) .or. (xsc > (one+temp1)) )call stdlib${ii}$_${ri}$scal( n, xsc, & v(1_${ik}$,q), 1_${ik}$ ) end do ! at this moment, v contains the right singular vectors of a. ! next, assemble the left singular vector matrix u (m x n). if ( nr < m ) then call stdlib${ii}$_${ri}$laset( 'A', m-nr, nr, zero, zero, u(nr+1,1_${ik}$), ldu ) if ( nr < n1 ) then call stdlib${ii}$_${ri}$laset('A',nr,n1-nr,zero,zero,u(1_${ik}$,nr+1),ldu) call stdlib${ii}$_${ri}$laset('A',m-nr,n1-nr,zero,one,u(nr+1,nr+1),ldu) end if end if ! the q matrix from the first qrf is built into the left singular ! matrix u. this applies to all cases. call stdlib${ii}$_${ri}$ormqr( 'LEFT', 'NO_TR', m, n1, n, a, lda, work, u,ldu, work(n+1), & lwork-n, ierr ) ! the columns of u are normalized. the cost is o(m*n) flops. temp1 = sqrt(real(m,KIND=${rk}$)) * epsln do p = 1, nr xsc = one / stdlib${ii}$_${ri}$nrm2( m, u(1_${ik}$,p), 1_${ik}$ ) if ( (xsc < (one-temp1)) .or. (xsc > (one+temp1)) )call stdlib${ii}$_${ri}$scal( m, xsc, & u(1_${ik}$,p), 1_${ik}$ ) end do ! if the initial qrf is computed with row pivoting, the left ! singular vectors must be adjusted. if ( rowpiv )call stdlib${ii}$_${ri}$laswp( n1, u, ldu, 1_${ik}$, m-1, iwork(2_${ik}$*n+1), -1_${ik}$ ) else ! The Initial Matrix A Has Almost Orthogonal Columns And ! the second qrf is not needed call stdlib${ii}$_${ri}$lacpy( 'UPPER', n, n, a, lda, work(n+1), n ) if ( l2pert ) then xsc = sqrt(small) do p = 2, n temp1 = xsc * work( n + (p-1)*n + p ) do q = 1, p - 1 work(n+(q-1)*n+p)=-sign(temp1,work(n+(p-1)*n+q)) end do end do else call stdlib${ii}$_${ri}$laset( 'LOWER',n-1,n-1,zero,zero,work(n+2),n ) end if call stdlib${ii}$_${ri}$gesvj( 'UPPER', 'U', 'N', n, n, work(n+1), n, sva,n, u, ldu, work(n+& n*n+1), lwork-n-n*n, info ) scalem = work(n+n*n+1) numrank = nint(work(n+n*n+2),KIND=${ik}$) do p = 1, n call stdlib${ii}$_${ri}$copy( n, work(n+(p-1)*n+1), 1_${ik}$, u(1_${ik}$,p), 1_${ik}$ ) call stdlib${ii}$_${ri}$scal( n, sva(p), work(n+(p-1)*n+1), 1_${ik}$ ) end do call stdlib${ii}$_${ri}$trsm( 'LEFT', 'UPPER', 'NOTRANS', 'NO UD', n, n,one, a, lda, work(n+& 1_${ik}$), n ) do p = 1, n call stdlib${ii}$_${ri}$copy( n, work(n+p), n, v(iwork(p),1_${ik}$), ldv ) end do temp1 = sqrt(real(n,KIND=${rk}$))*epsln do p = 1, n xsc = one / stdlib${ii}$_${ri}$nrm2( n, v(1_${ik}$,p), 1_${ik}$ ) if ( (xsc < (one-temp1)) .or. (xsc > (one+temp1)) )call stdlib${ii}$_${ri}$scal( n, xsc, & v(1_${ik}$,p), 1_${ik}$ ) end do ! assemble the left singular vector matrix u (m x n). if ( n < m ) then call stdlib${ii}$_${ri}$laset( 'A', m-n, n, zero, zero, u(n+1,1_${ik}$), ldu ) if ( n < n1 ) then call stdlib${ii}$_${ri}$laset( 'A',n, n1-n, zero, zero, u(1_${ik}$,n+1),ldu ) call stdlib${ii}$_${ri}$laset( 'A',m-n,n1-n, zero, one,u(n+1,n+1),ldu ) end if end if call stdlib${ii}$_${ri}$ormqr( 'LEFT', 'NO TR', m, n1, n, a, lda, work, u,ldu, work(n+1), & lwork-n, ierr ) temp1 = sqrt(real(m,KIND=${rk}$))*epsln do p = 1, n1 xsc = one / stdlib${ii}$_${ri}$nrm2( m, u(1_${ik}$,p), 1_${ik}$ ) if ( (xsc < (one-temp1)) .or. (xsc > (one+temp1)) )call stdlib${ii}$_${ri}$scal( m, xsc, & u(1_${ik}$,p), 1_${ik}$ ) end do if ( rowpiv )call stdlib${ii}$_${ri}$laswp( n1, u, ldu, 1_${ik}$, m-1, iwork(2_${ik}$*n+1), -1_${ik}$ ) end if ! end of the >> almost orthogonal case << in the full svd else ! this branch deploys a preconditioned jacobi svd with explicitly ! accumulated rotations. it is included as optional, mainly for ! experimental purposes. it does perform well, and can also be used. ! in this implementation, this branch will be automatically activated ! if the condition number sigma_max(a) / sigma_min(a) is predicted ! to be greater than the overflow threshold. this is because the ! a posteriori computation of the singular vectors assumes robust ! implementation of blas and some lapack procedures, capable of working ! in presence of extreme values. since that is not always the case, ... do p = 1, nr call stdlib${ii}$_${ri}$copy( n-p+1, a(p,p), lda, v(p,p), 1_${ik}$ ) end do if ( l2pert ) then xsc = sqrt(small/epsln) do q = 1, nr temp1 = xsc*abs( v(q,q) ) do p = 1, n if ( ( p > q ) .and. ( abs(v(p,q)) <= temp1 ).or. ( p < q ) )v(p,q) = sign(& temp1, v(p,q) ) if ( p < q ) v(p,q) = - v(p,q) end do end do else call stdlib${ii}$_${ri}$laset( 'U', nr-1, nr-1, zero, zero, v(1_${ik}$,2_${ik}$), ldv ) end if call stdlib${ii}$_${ri}$geqrf( n, nr, v, ldv, work(n+1), work(2_${ik}$*n+1),lwork-2*n, ierr ) call stdlib${ii}$_${ri}$lacpy( 'L', n, nr, v, ldv, work(2_${ik}$*n+1), n ) do p = 1, nr call stdlib${ii}$_${ri}$copy( nr-p+1, v(p,p), ldv, u(p,p), 1_${ik}$ ) end do if ( l2pert ) then xsc = sqrt(small/epsln) do q = 2, nr do p = 1, q - 1 temp1 = xsc * min(abs(u(p,p)),abs(u(q,q))) u(p,q) = - sign( temp1, u(q,p) ) end do end do else call stdlib${ii}$_${ri}$laset('U', nr-1, nr-1, zero, zero, u(1_${ik}$,2_${ik}$), ldu ) end if call stdlib${ii}$_${ri}$gesvj( 'G', 'U', 'V', nr, nr, u, ldu, sva,n, v, ldv, work(2_${ik}$*n+n*nr+1), & lwork-2*n-n*nr, info ) scalem = work(2_${ik}$*n+n*nr+1) numrank = nint(work(2_${ik}$*n+n*nr+2),KIND=${ik}$) if ( nr < n ) then call stdlib${ii}$_${ri}$laset( 'A',n-nr,nr,zero,zero,v(nr+1,1_${ik}$),ldv ) call stdlib${ii}$_${ri}$laset( 'A',nr,n-nr,zero,zero,v(1_${ik}$,nr+1),ldv ) call stdlib${ii}$_${ri}$laset( 'A',n-nr,n-nr,zero,one,v(nr+1,nr+1),ldv ) end if call stdlib${ii}$_${ri}$ormqr( 'L','N',n,n,nr,work(2_${ik}$*n+1),n,work(n+1),v,ldv,work(2_${ik}$*n+n*nr+nr+1)& ,lwork-2*n-n*nr-nr,ierr ) ! permute the rows of v using the (column) permutation from the ! first qrf. also, scale the columns to make them unit in ! euclidean norm. this applies to all cases. temp1 = sqrt(real(n,KIND=${rk}$)) * epsln do q = 1, n do p = 1, n work(2_${ik}$*n+n*nr+nr+iwork(p)) = v(p,q) end do do p = 1, n v(p,q) = work(2_${ik}$*n+n*nr+nr+p) end do xsc = one / stdlib${ii}$_${ri}$nrm2( n, v(1_${ik}$,q), 1_${ik}$ ) if ( (xsc < (one-temp1)) .or. (xsc > (one+temp1)) )call stdlib${ii}$_${ri}$scal( n, xsc, & v(1_${ik}$,q), 1_${ik}$ ) end do ! at this moment, v contains the right singular vectors of a. ! next, assemble the left singular vector matrix u (m x n). if ( nr < m ) then call stdlib${ii}$_${ri}$laset( 'A', m-nr, nr, zero, zero, u(nr+1,1_${ik}$), ldu ) if ( nr < n1 ) then call stdlib${ii}$_${ri}$laset( 'A',nr, n1-nr, zero, zero, u(1_${ik}$,nr+1),ldu ) call stdlib${ii}$_${ri}$laset( 'A',m-nr,n1-nr, zero, one,u(nr+1,nr+1),ldu ) end if end if call stdlib${ii}$_${ri}$ormqr( 'LEFT', 'NO TR', m, n1, n, a, lda, work, u,ldu, work(n+1), & lwork-n, ierr ) if ( rowpiv )call stdlib${ii}$_${ri}$laswp( n1, u, ldu, 1_${ik}$, m-1, iwork(2_${ik}$*n+1), -1_${ik}$ ) end if if ( transp ) then ! .. swap u and v because the procedure worked on a^t do p = 1, n call stdlib${ii}$_${ri}$swap( n, u(1_${ik}$,p), 1_${ik}$, v(1_${ik}$,p), 1_${ik}$ ) end do end if end if ! end of the full svd ! undo scaling, if necessary (and possible) if ( uscal2 <= (big/sva(1_${ik}$))*uscal1 ) then call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, uscal1, uscal2, nr, 1_${ik}$, sva, n, ierr ) uscal1 = one uscal2 = one end if if ( nr < n ) then do p = nr+1, n sva(p) = zero end do end if work(1_${ik}$) = uscal2 * scalem work(2_${ik}$) = uscal1 if ( errest ) work(3_${ik}$) = sconda if ( lsvec .and. rsvec ) then work(4_${ik}$) = condr1 work(5_${ik}$) = condr2 end if if ( l2tran ) then work(6_${ik}$) = entra work(7_${ik}$) = entrat end if iwork(1_${ik}$) = nr iwork(2_${ik}$) = numrank iwork(3_${ik}$) = warning return end subroutine stdlib${ii}$_${ri}$gejsv #:endif #:endfor pure module subroutine stdlib${ii}$_cgejsv( joba, jobu, jobv, jobr, jobt, jobp,m, n, a, lda, sva, u, ldu, & !! CGEJSV computes the singular value decomposition (SVD) of a complex M-by-N !! matrix [A], where M >= N. The SVD of [A] is written as !! [A] = [U] * [SIGMA] * [V]^*, !! where [SIGMA] is an N-by-N (M-by-N) matrix which is zero except for its N !! diagonal elements, [U] is an M-by-N (or M-by-M) unitary matrix, and !! [V] is an N-by-N unitary matrix. The diagonal elements of [SIGMA] are !! the singular values of [A]. The columns of [U] and [V] are the left and !! the right singular vectors of [A], respectively. The matrices [U] and [V] !! are computed and stored in the arrays U and V, respectively. The diagonal !! of [SIGMA] is computed and stored in the array SVA. v, ldv,cwork, lwork, rwork, lrwork, iwork, info ) ! -- lapack computational routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments integer(${ik}$), intent(out) :: info integer(${ik}$), intent(in) :: lda, ldu, ldv, lwork, lrwork, m, n ! Array Arguments complex(sp), intent(inout) :: a(lda,*) complex(sp), intent(out) :: u(ldu,*), v(ldv,*), cwork(lwork) real(sp), intent(out) :: sva(n), rwork(lrwork) integer(${ik}$), intent(out) :: iwork(*) character, intent(in) :: joba, jobp, jobr, jobt, jobu, jobv ! =========================================================================== ! Local Scalars complex(sp) :: ctemp real(sp) :: aapp, aaqq, aatmax, aatmin, big, big1, cond_ok, condr1, condr2, entra, & entrat, epsln, maxprj, scalem, sconda, sfmin, small, temp1, uscal1, uscal2, xsc integer(${ik}$) :: ierr, n1, nr, numrank, p, q, warning logical(lk) :: almort, defr, errest, goscal, jracc, kill, lquery, lsvec, l2aber, & l2kill, l2pert, l2rank, l2tran, noscal, rowpiv, rsvec, transp integer(${ik}$) :: optwrk, minwrk, minrwrk, miniwrk integer(${ik}$) :: lwcon, lwlqf, lwqp3, lwqrf, lwunmlq, lwunmqr, lwunmqrm, lwsvdj, & lwsvdjv, lrwqp3, lrwcon, lrwsvdj, iwoff integer(${ik}$) :: lwrk_cgelqf, lwrk_cgeqp3, lwrk_cgeqp3n, lwrk_cgeqrf, lwrk_cgesvj, & lwrk_cgesvjv, lwrk_cgesvju, lwrk_cunmlq, lwrk_cunmqr, lwrk_cunmqrm ! Local Arrays complex(sp) :: cdummy(1_${ik}$) real(sp) :: rdummy(1_${ik}$) ! Intrinsic Functions ! test the input arguments lsvec = stdlib_lsame( jobu, 'U' ) .or. stdlib_lsame( jobu, 'F' ) jracc = stdlib_lsame( jobv, 'J' ) rsvec = stdlib_lsame( jobv, 'V' ) .or. jracc rowpiv = stdlib_lsame( joba, 'F' ) .or. stdlib_lsame( joba, 'G' ) l2rank = stdlib_lsame( joba, 'R' ) l2aber = stdlib_lsame( joba, 'A' ) errest = stdlib_lsame( joba, 'E' ) .or. stdlib_lsame( joba, 'G' ) l2tran = stdlib_lsame( jobt, 'T' ) .and. ( m == n ) l2kill = stdlib_lsame( jobr, 'R' ) defr = stdlib_lsame( jobr, 'N' ) l2pert = stdlib_lsame( jobp, 'P' ) lquery = ( lwork == -1_${ik}$ ) .or. ( lrwork == -1_${ik}$ ) if ( .not.(rowpiv .or. l2rank .or. l2aber .or.errest .or. stdlib_lsame( joba, 'C' ) )) & then info = - 1_${ik}$ else if ( .not.( lsvec .or. stdlib_lsame( jobu, 'N' ) .or.( stdlib_lsame( jobu, 'W' ) & .and. rsvec .and. l2tran ) ) ) then info = - 2_${ik}$ else if ( .not.( rsvec .or. stdlib_lsame( jobv, 'N' ) .or.( stdlib_lsame( jobv, 'W' ) & .and. lsvec .and. l2tran ) ) ) then info = - 3_${ik}$ else if ( .not. ( l2kill .or. defr ) ) then info = - 4_${ik}$ else if ( .not. ( stdlib_lsame(jobt,'T') .or. stdlib_lsame(jobt,'N') ) ) then info = - 5_${ik}$ else if ( .not. ( l2pert .or. stdlib_lsame( jobp, 'N' ) ) ) then info = - 6_${ik}$ else if ( m < 0_${ik}$ ) then info = - 7_${ik}$ else if ( ( n < 0_${ik}$ ) .or. ( n > m ) ) then info = - 8_${ik}$ else if ( lda < m ) then info = - 10_${ik}$ else if ( lsvec .and. ( ldu < m ) ) then info = - 13_${ik}$ else if ( rsvec .and. ( ldv < n ) ) then info = - 15_${ik}$ else ! #:) info = 0_${ik}$ end if if ( info == 0_${ik}$ ) then ! Compute The Minimal And The Optimal Workspace Lengths ! [[the expressions for computing the minimal and the optimal ! values of lcwork, lrwork are written with a lot of redundancy and ! can be simplified. however, this verbose form is useful for ! maintenance and modifications of the code.]] ! .. minimal workspace length for stdlib${ii}$_cgeqp3 of an m x n matrix, ! stdlib${ii}$_cgeqrf of an n x n matrix, stdlib${ii}$_cgelqf of an n x n matrix, ! stdlib${ii}$_cunmlq for computing n x n matrix, stdlib${ii}$_cunmqr for computing n x n ! matrix, stdlib${ii}$_cunmqr for computing m x n matrix, respectively. lwqp3 = n+1 lwqrf = max( 1_${ik}$, n ) lwlqf = max( 1_${ik}$, n ) lwunmlq = max( 1_${ik}$, n ) lwunmqr = max( 1_${ik}$, n ) lwunmqrm = max( 1_${ik}$, m ) ! Minimal Workspace Length For Stdlib_Cpocon Of An N X N Matrix lwcon = 2_${ik}$ * n ! .. minimal workspace length for stdlib${ii}$_cgesvj of an n x n matrix, ! without and with explicit accumulation of jacobi rotations lwsvdj = max( 2_${ik}$ * n, 1_${ik}$ ) lwsvdjv = max( 2_${ik}$ * n, 1_${ik}$ ) ! .. minimal real workspace length for stdlib${ii}$_cgeqp3, stdlib${ii}$_cpocon, stdlib${ii}$_cgesvj lrwqp3 = 2_${ik}$ * n lrwcon = n lrwsvdj = n if ( lquery ) then call stdlib${ii}$_cgeqp3( m, n, a, lda, iwork, cdummy, cdummy, -1_${ik}$,rdummy, ierr ) lwrk_cgeqp3 = real( cdummy(1_${ik}$),KIND=sp) call stdlib${ii}$_cgeqrf( n, n, a, lda, cdummy, cdummy,-1_${ik}$, ierr ) lwrk_cgeqrf = real( cdummy(1_${ik}$),KIND=sp) call stdlib${ii}$_cgelqf( n, n, a, lda, cdummy, cdummy,-1_${ik}$, ierr ) lwrk_cgelqf = real( cdummy(1_${ik}$),KIND=sp) end if minwrk = 2_${ik}$ optwrk = 2_${ik}$ miniwrk = n if ( .not. (lsvec .or. rsvec ) ) then ! Minimal And Optimal Sizes Of The Complex Workspace If ! only the singular values are requested if ( errest ) then minwrk = max( n+lwqp3, n**2_${ik}$+lwcon, n+lwqrf, lwsvdj ) else minwrk = max( n+lwqp3, n+lwqrf, lwsvdj ) end if if ( lquery ) then call stdlib${ii}$_cgesvj( 'L', 'N', 'N', n, n, a, lda, sva, n, v,ldv, cdummy, -1_${ik}$,& rdummy, -1_${ik}$, ierr ) lwrk_cgesvj = real( cdummy(1_${ik}$),KIND=sp) if ( errest ) then optwrk = max( n+lwrk_cgeqp3, n**2_${ik}$+lwcon,n+lwrk_cgeqrf, lwrk_cgesvj ) else optwrk = max( n+lwrk_cgeqp3, n+lwrk_cgeqrf,lwrk_cgesvj ) end if end if if ( l2tran .or. rowpiv ) then if ( errest ) then minrwrk = max( 7_${ik}$, 2_${ik}$*m, lrwqp3, lrwcon, lrwsvdj ) else minrwrk = max( 7_${ik}$, 2_${ik}$*m, lrwqp3, lrwsvdj ) end if else if ( errest ) then minrwrk = max( 7_${ik}$, lrwqp3, lrwcon, lrwsvdj ) else minrwrk = max( 7_${ik}$, lrwqp3, lrwsvdj ) end if end if if ( rowpiv .or. l2tran ) miniwrk = miniwrk + m else if ( rsvec .and. (.not.lsvec) ) then ! Minimal And Optimal Sizes Of The Complex Workspace If The ! singular values and the right singular vectors are requested if ( errest ) then minwrk = max( n+lwqp3, lwcon, lwsvdj, n+lwlqf,2_${ik}$*n+lwqrf, n+lwsvdj, n+& lwunmlq ) else minwrk = max( n+lwqp3, lwsvdj, n+lwlqf, 2_${ik}$*n+lwqrf,n+lwsvdj, n+lwunmlq ) end if if ( lquery ) then call stdlib${ii}$_cgesvj( 'L', 'U', 'N', n,n, u, ldu, sva, n, a,lda, cdummy, -1_${ik}$, & rdummy, -1_${ik}$, ierr ) lwrk_cgesvj = real( cdummy(1_${ik}$),KIND=sp) call stdlib${ii}$_cunmlq( 'L', 'C', n, n, n, a, lda, cdummy,v, ldv, cdummy, -1_${ik}$, & ierr ) lwrk_cunmlq = real( cdummy(1_${ik}$),KIND=sp) if ( errest ) then optwrk = max( n+lwrk_cgeqp3, lwcon, lwrk_cgesvj,n+lwrk_cgelqf, 2_${ik}$*n+& lwrk_cgeqrf,n+lwrk_cgesvj, n+lwrk_cunmlq ) else optwrk = max( n+lwrk_cgeqp3, lwrk_cgesvj,n+lwrk_cgelqf,2_${ik}$*n+lwrk_cgeqrf, n+& lwrk_cgesvj,n+lwrk_cunmlq ) end if end if if ( l2tran .or. rowpiv ) then if ( errest ) then minrwrk = max( 7_${ik}$, 2_${ik}$*m, lrwqp3, lrwsvdj, lrwcon ) else minrwrk = max( 7_${ik}$, 2_${ik}$*m, lrwqp3, lrwsvdj ) end if else if ( errest ) then minrwrk = max( 7_${ik}$, lrwqp3, lrwsvdj, lrwcon ) else minrwrk = max( 7_${ik}$, lrwqp3, lrwsvdj ) end if end if if ( rowpiv .or. l2tran ) miniwrk = miniwrk + m else if ( lsvec .and. (.not.rsvec) ) then ! Minimal And Optimal Sizes Of The Complex Workspace If The ! singular values and the left singular vectors are requested if ( errest ) then minwrk = n + max( lwqp3,lwcon,n+lwqrf,lwsvdj,lwunmqrm ) else minwrk = n + max( lwqp3, n+lwqrf, lwsvdj, lwunmqrm ) end if if ( lquery ) then call stdlib${ii}$_cgesvj( 'L', 'U', 'N', n,n, u, ldu, sva, n, a,lda, cdummy, -1_${ik}$, & rdummy, -1_${ik}$, ierr ) lwrk_cgesvj = real( cdummy(1_${ik}$),KIND=sp) call stdlib${ii}$_cunmqr( 'L', 'N', m, n, n, a, lda, cdummy, u,ldu, cdummy, -1_${ik}$, & ierr ) lwrk_cunmqrm = real( cdummy(1_${ik}$),KIND=sp) if ( errest ) then optwrk = n + max( lwrk_cgeqp3, lwcon, n+lwrk_cgeqrf,lwrk_cgesvj, & lwrk_cunmqrm ) else optwrk = n + max( lwrk_cgeqp3, n+lwrk_cgeqrf,lwrk_cgesvj, lwrk_cunmqrm ) end if end if if ( l2tran .or. rowpiv ) then if ( errest ) then minrwrk = max( 7_${ik}$, 2_${ik}$*m, lrwqp3, lrwsvdj, lrwcon ) else minrwrk = max( 7_${ik}$, 2_${ik}$*m, lrwqp3, lrwsvdj ) end if else if ( errest ) then minrwrk = max( 7_${ik}$, lrwqp3, lrwsvdj, lrwcon ) else minrwrk = max( 7_${ik}$, lrwqp3, lrwsvdj ) end if end if if ( rowpiv .or. l2tran ) miniwrk = miniwrk + m else ! Minimal And Optimal Sizes Of The Complex Workspace If The ! full svd is requested if ( .not. jracc ) then if ( errest ) then minwrk = max( n+lwqp3, n+lwcon, 2_${ik}$*n+n**2_${ik}$+lwcon,2_${ik}$*n+lwqrf, 2_${ik}$*n+& lwqp3,2_${ik}$*n+n**2_${ik}$+n+lwlqf, 2_${ik}$*n+n**2_${ik}$+n+n**2_${ik}$+lwcon,2_${ik}$*n+n**2_${ik}$+n+lwsvdj, 2_${ik}$*n+& n**2_${ik}$+n+lwsvdjv,2_${ik}$*n+n**2_${ik}$+n+lwunmqr,2_${ik}$*n+n**2_${ik}$+n+lwunmlq,n+n**2_${ik}$+lwsvdj, n+& lwunmqrm ) else minwrk = max( n+lwqp3, 2_${ik}$*n+n**2_${ik}$+lwcon,2_${ik}$*n+lwqrf, 2_${ik}$*n+& lwqp3,2_${ik}$*n+n**2_${ik}$+n+lwlqf, 2_${ik}$*n+n**2_${ik}$+n+n**2_${ik}$+lwcon,2_${ik}$*n+n**2_${ik}$+n+lwsvdj, 2_${ik}$*n+& n**2_${ik}$+n+lwsvdjv,2_${ik}$*n+n**2_${ik}$+n+lwunmqr,2_${ik}$*n+n**2_${ik}$+n+lwunmlq,n+n**2_${ik}$+lwsvdj, & n+lwunmqrm ) end if miniwrk = miniwrk + n if ( rowpiv .or. l2tran ) miniwrk = miniwrk + m else if ( errest ) then minwrk = max( n+lwqp3, n+lwcon, 2_${ik}$*n+lwqrf,2_${ik}$*n+n**2_${ik}$+lwsvdjv, 2_${ik}$*n+n**2_${ik}$+n+& lwunmqr,n+lwunmqrm ) else minwrk = max( n+lwqp3, 2_${ik}$*n+lwqrf,2_${ik}$*n+n**2_${ik}$+lwsvdjv, 2_${ik}$*n+n**2_${ik}$+n+lwunmqr,n+& lwunmqrm ) end if if ( rowpiv .or. l2tran ) miniwrk = miniwrk + m end if if ( lquery ) then call stdlib${ii}$_cunmqr( 'L', 'N', m, n, n, a, lda, cdummy, u,ldu, cdummy, -1_${ik}$, & ierr ) lwrk_cunmqrm = real( cdummy(1_${ik}$),KIND=sp) call stdlib${ii}$_cunmqr( 'L', 'N', n, n, n, a, lda, cdummy, u,ldu, cdummy, -1_${ik}$, & ierr ) lwrk_cunmqr = real( cdummy(1_${ik}$),KIND=sp) if ( .not. jracc ) then call stdlib${ii}$_cgeqp3( n,n, a, lda, iwork, cdummy,cdummy, -1_${ik}$,rdummy, ierr ) lwrk_cgeqp3n = real( cdummy(1_${ik}$),KIND=sp) call stdlib${ii}$_cgesvj( 'L', 'U', 'N', n, n, u, ldu, sva,n, v, ldv, cdummy, & -1_${ik}$, rdummy, -1_${ik}$, ierr ) lwrk_cgesvj = real( cdummy(1_${ik}$),KIND=sp) call stdlib${ii}$_cgesvj( 'U', 'U', 'N', n, n, u, ldu, sva,n, v, ldv, cdummy, & -1_${ik}$, rdummy, -1_${ik}$, ierr ) lwrk_cgesvju = real( cdummy(1_${ik}$),KIND=sp) call stdlib${ii}$_cgesvj( 'L', 'U', 'V', n, n, u, ldu, sva,n, v, ldv, cdummy, & -1_${ik}$, rdummy, -1_${ik}$, ierr ) lwrk_cgesvjv = real( cdummy(1_${ik}$),KIND=sp) call stdlib${ii}$_cunmlq( 'L', 'C', n, n, n, a, lda, cdummy,v, ldv, cdummy, -& 1_${ik}$, ierr ) lwrk_cunmlq = real( cdummy(1_${ik}$),KIND=sp) if ( errest ) then optwrk = max( n+lwrk_cgeqp3, n+lwcon,2_${ik}$*n+n**2_${ik}$+lwcon, 2_${ik}$*n+lwrk_cgeqrf,& 2_${ik}$*n+lwrk_cgeqp3n,2_${ik}$*n+n**2_${ik}$+n+lwrk_cgelqf,2_${ik}$*n+n**2_${ik}$+n+n**2_${ik}$+lwcon,2_${ik}$*n+& n**2_${ik}$+n+lwrk_cgesvj,2_${ik}$*n+n**2_${ik}$+n+lwrk_cgesvjv,2_${ik}$*n+n**2_${ik}$+n+lwrk_cunmqr,2_${ik}$*n+& n**2_${ik}$+n+lwrk_cunmlq,n+n**2_${ik}$+lwrk_cgesvju,n+lwrk_cunmqrm ) else optwrk = max( n+lwrk_cgeqp3,2_${ik}$*n+n**2_${ik}$+lwcon, 2_${ik}$*n+lwrk_cgeqrf,2_${ik}$*n+& lwrk_cgeqp3n,2_${ik}$*n+n**2_${ik}$+n+lwrk_cgelqf,2_${ik}$*n+n**2_${ik}$+n+n**2_${ik}$+lwcon,2_${ik}$*n+n**2_${ik}$+n+& lwrk_cgesvj,2_${ik}$*n+n**2_${ik}$+n+lwrk_cgesvjv,2_${ik}$*n+n**2_${ik}$+n+lwrk_cunmqr,2_${ik}$*n+n**2_${ik}$+n+& lwrk_cunmlq,n+n**2_${ik}$+lwrk_cgesvju,n+lwrk_cunmqrm ) end if else call stdlib${ii}$_cgesvj( 'L', 'U', 'V', n, n, u, ldu, sva,n, v, ldv, cdummy, & -1_${ik}$, rdummy, -1_${ik}$, ierr ) lwrk_cgesvjv = real( cdummy(1_${ik}$),KIND=sp) call stdlib${ii}$_cunmqr( 'L', 'N', n, n, n, cdummy, n, cdummy,v, ldv, cdummy,& -1_${ik}$, ierr ) lwrk_cunmqr = real( cdummy(1_${ik}$),KIND=sp) call stdlib${ii}$_cunmqr( 'L', 'N', m, n, n, a, lda, cdummy, u,ldu, cdummy, -& 1_${ik}$, ierr ) lwrk_cunmqrm = real( cdummy(1_${ik}$),KIND=sp) if ( errest ) then optwrk = max( n+lwrk_cgeqp3, n+lwcon,2_${ik}$*n+lwrk_cgeqrf, 2_${ik}$*n+n**2_${ik}$,2_${ik}$*n+& n**2_${ik}$+lwrk_cgesvjv,2_${ik}$*n+n**2_${ik}$+n+lwrk_cunmqr,n+lwrk_cunmqrm ) else optwrk = max( n+lwrk_cgeqp3, 2_${ik}$*n+lwrk_cgeqrf,2_${ik}$*n+n**2_${ik}$, 2_${ik}$*n+n**2_${ik}$+& lwrk_cgesvjv,2_${ik}$*n+n**2_${ik}$+n+lwrk_cunmqr,n+lwrk_cunmqrm ) end if end if end if if ( l2tran .or. rowpiv ) then minrwrk = max( 7_${ik}$, 2_${ik}$*m, lrwqp3, lrwsvdj, lrwcon ) else minrwrk = max( 7_${ik}$, lrwqp3, lrwsvdj, lrwcon ) end if end if minwrk = max( 2_${ik}$, minwrk ) optwrk = max( optwrk, minwrk ) if ( lwork < minwrk .and. (.not.lquery) ) info = - 17_${ik}$ if ( lrwork < minrwrk .and. (.not.lquery) ) info = - 19_${ik}$ end if if ( info /= 0_${ik}$ ) then ! #:( call stdlib${ii}$_xerbla( 'CGEJSV', - info ) return else if ( lquery ) then cwork(1_${ik}$) = optwrk cwork(2_${ik}$) = minwrk rwork(1_${ik}$) = minrwrk iwork(1_${ik}$) = max( 4_${ik}$, miniwrk ) return end if ! quick return for void matrix (y3k safe) ! #:) if ( ( m == 0_${ik}$ ) .or. ( n == 0_${ik}$ ) ) then iwork(1_${ik}$:4_${ik}$) = 0_${ik}$ rwork(1_${ik}$:7_${ik}$) = 0_${ik}$ return endif ! determine whether the matrix u should be m x n or m x m if ( lsvec ) then n1 = n if ( stdlib_lsame( jobu, 'F' ) ) n1 = m end if ! set numerical parameters ! ! note: make sure stdlib${ii}$_slamch() does not fail on the target architecture. epsln = stdlib${ii}$_slamch('EPSILON') sfmin = stdlib${ii}$_slamch('SAFEMINIMUM') small = sfmin / epsln big = stdlib${ii}$_slamch('O') ! big = one / sfmin ! initialize sva(1:n) = diag( ||a e_i||_2 )_1^n ! (!) if necessary, scale sva() to protect the largest norm from ! overflow. it is possible that this scaling pushes the smallest ! column norm left from the underflow threshold (extreme case). scalem = one / sqrt(real(m,KIND=sp)*real(n,KIND=sp)) noscal = .true. goscal = .true. do p = 1, n aapp = zero aaqq = one call stdlib${ii}$_classq( m, a(1_${ik}$,p), 1_${ik}$, aapp, aaqq ) if ( aapp > big ) then info = - 9_${ik}$ call stdlib${ii}$_xerbla( 'CGEJSV', -info ) return end if aaqq = sqrt(aaqq) if ( ( aapp < (big / aaqq) ) .and. noscal ) then sva(p) = aapp * aaqq else noscal = .false. sva(p) = aapp * ( aaqq * scalem ) if ( goscal ) then goscal = .false. call stdlib${ii}$_sscal( p-1, scalem, sva, 1_${ik}$ ) end if end if end do if ( noscal ) scalem = one aapp = zero aaqq = big do p = 1, n aapp = max( aapp, sva(p) ) if ( sva(p) /= zero ) aaqq = min( aaqq, sva(p) ) end do ! quick return for zero m x n matrix ! #:) if ( aapp == zero ) then if ( lsvec ) call stdlib${ii}$_claset( 'G', m, n1, czero, cone, u, ldu ) if ( rsvec ) call stdlib${ii}$_claset( 'G', n, n, czero, cone, v, ldv ) rwork(1_${ik}$) = one rwork(2_${ik}$) = one if ( errest ) rwork(3_${ik}$) = one if ( lsvec .and. rsvec ) then rwork(4_${ik}$) = one rwork(5_${ik}$) = one end if if ( l2tran ) then rwork(6_${ik}$) = zero rwork(7_${ik}$) = zero end if iwork(1_${ik}$) = 0_${ik}$ iwork(2_${ik}$) = 0_${ik}$ iwork(3_${ik}$) = 0_${ik}$ iwork(4_${ik}$) = -1_${ik}$ return end if ! issue warning if denormalized column norms detected. override the ! high relative accuracy request. issue licence to kill nonzero columns ! (set them to zero) whose norm is less than sigma_max / big (roughly). ! #:( warning = 0_${ik}$ if ( aaqq <= sfmin ) then l2rank = .true. l2kill = .true. warning = 1_${ik}$ end if ! quick return for one-column matrix ! #:) if ( n == 1_${ik}$ ) then if ( lsvec ) then call stdlib${ii}$_clascl( 'G',0_${ik}$,0_${ik}$,sva(1_${ik}$),scalem, m,1_${ik}$,a(1_${ik}$,1_${ik}$),lda,ierr ) call stdlib${ii}$_clacpy( 'A', m, 1_${ik}$, a, lda, u, ldu ) ! computing all m left singular vectors of the m x 1 matrix if ( n1 /= n ) then call stdlib${ii}$_cgeqrf( m, n, u,ldu, cwork, cwork(n+1),lwork-n,ierr ) call stdlib${ii}$_cungqr( m,n1,1_${ik}$, u,ldu,cwork,cwork(n+1),lwork-n,ierr ) call stdlib${ii}$_ccopy( m, a(1_${ik}$,1_${ik}$), 1_${ik}$, u(1_${ik}$,1_${ik}$), 1_${ik}$ ) end if end if if ( rsvec ) then v(1_${ik}$,1_${ik}$) = cone end if if ( sva(1_${ik}$) < (big*scalem) ) then sva(1_${ik}$) = sva(1_${ik}$) / scalem scalem = one end if rwork(1_${ik}$) = one / scalem rwork(2_${ik}$) = one if ( sva(1_${ik}$) /= zero ) then iwork(1_${ik}$) = 1_${ik}$ if ( ( sva(1_${ik}$) / scalem) >= sfmin ) then iwork(2_${ik}$) = 1_${ik}$ else iwork(2_${ik}$) = 0_${ik}$ end if else iwork(1_${ik}$) = 0_${ik}$ iwork(2_${ik}$) = 0_${ik}$ end if iwork(3_${ik}$) = 0_${ik}$ iwork(4_${ik}$) = -1_${ik}$ if ( errest ) rwork(3_${ik}$) = one if ( lsvec .and. rsvec ) then rwork(4_${ik}$) = one rwork(5_${ik}$) = one end if if ( l2tran ) then rwork(6_${ik}$) = zero rwork(7_${ik}$) = zero end if return end if transp = .false. aatmax = -one aatmin = big if ( rowpiv .or. l2tran ) then ! compute the row norms, needed to determine row pivoting sequence ! (in the case of heavily row weighted a, row pivoting is strongly ! advised) and to collect information needed to compare the ! structures of a * a^* and a^* * a (in the case l2tran==.true.). if ( l2tran ) then do p = 1, m xsc = zero temp1 = one call stdlib${ii}$_classq( n, a(p,1_${ik}$), lda, xsc, temp1 ) ! stdlib${ii}$_classq gets both the ell_2 and the ell_infinity norm ! in one pass through the vector rwork(m+p) = xsc * scalem rwork(p) = xsc * (scalem*sqrt(temp1)) aatmax = max( aatmax, rwork(p) ) if (rwork(p) /= zero)aatmin = min(aatmin,rwork(p)) end do else do p = 1, m rwork(m+p) = scalem*abs( a(p,stdlib${ii}$_icamax(n,a(p,1_${ik}$),lda)) ) aatmax = max( aatmax, rwork(m+p) ) aatmin = min( aatmin, rwork(m+p) ) end do end if end if ! for square matrix a try to determine whether a^* would be better ! input for the preconditioned jacobi svd, with faster convergence. ! the decision is based on an o(n) function of the vector of column ! and row norms of a, based on the shannon entropy. this should give ! the right choice in most cases when the difference actually matters. ! it may fail and pick the slower converging side. entra = zero entrat = zero if ( l2tran ) then xsc = zero temp1 = one call stdlib${ii}$_slassq( n, sva, 1_${ik}$, xsc, temp1 ) temp1 = one / temp1 entra = zero do p = 1, n big1 = ( ( sva(p) / xsc )**2_${ik}$ ) * temp1 if ( big1 /= zero ) entra = entra + big1 * log(big1) end do entra = - entra / log(real(n,KIND=sp)) ! now, sva().^2/trace(a^* * a) is a point in the probability simplex. ! it is derived from the diagonal of a^* * a. do the same with the ! diagonal of a * a^*, compute the entropy of the corresponding ! probability distribution. note that a * a^* and a^* * a have the ! same trace. entrat = zero do p = 1, m big1 = ( ( rwork(p) / xsc )**2_${ik}$ ) * temp1 if ( big1 /= zero ) entrat = entrat + big1 * log(big1) end do entrat = - entrat / log(real(m,KIND=sp)) ! analyze the entropies and decide a or a^*. smaller entropy ! usually means better input for the algorithm. transp = ( entrat < entra ) ! if a^* is better than a, take the adjoint of a. this is allowed ! only for square matrices, m=n. if ( transp ) then ! in an optimal implementation, this trivial transpose ! should be replaced with faster transpose. do p = 1, n - 1 a(p,p) = conjg(a(p,p)) do q = p + 1, n ctemp = conjg(a(q,p)) a(q,p) = conjg(a(p,q)) a(p,q) = ctemp end do end do a(n,n) = conjg(a(n,n)) do p = 1, n rwork(m+p) = sva(p) sva(p) = rwork(p) ! previously computed row 2-norms are now column 2-norms ! of the transposed matrix end do temp1 = aapp aapp = aatmax aatmax = temp1 temp1 = aaqq aaqq = aatmin aatmin = temp1 kill = lsvec lsvec = rsvec rsvec = kill if ( lsvec ) n1 = n rowpiv = .true. end if end if ! end if l2tran ! scale the matrix so that its maximal singular value remains less ! than sqrt(big) -- the matrix is scaled so that its maximal column ! has euclidean norm equal to sqrt(big/n). the only reason to keep ! sqrt(big) instead of big is the fact that stdlib${ii}$_cgejsv uses lapack and ! blas routines that, in some implementations, are not capable of ! working in the full interval [sfmin,big] and that they may provoke ! overflows in the intermediate results. if the singular values spread ! from sfmin to big, then stdlib${ii}$_cgesvj will compute them. so, in that case, ! one should use stdlib_cgesvj instead of stdlib${ii}$_cgejsv. big1 = sqrt( big ) temp1 = sqrt( big / real(n,KIND=sp) ) ! >> for future updates: allow bigger range, i.e. the largest column ! will be allowed up to big/n and stdlib${ii}$_cgesvj will do the rest. however, for ! this all other (lapack) components must allow such a range. ! temp1 = big/real(n,KIND=sp) ! temp1 = big * epsln this should 'almost' work with current lapack components call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, aapp, temp1, n, 1_${ik}$, sva, n, ierr ) if ( aaqq > (aapp * sfmin) ) then aaqq = ( aaqq / aapp ) * temp1 else aaqq = ( aaqq * temp1 ) / aapp end if temp1 = temp1 * scalem call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, aapp, temp1, m, n, a, lda, ierr ) ! to undo scaling at the end of this procedure, multiply the ! computed singular values with uscal2 / uscal1. uscal1 = temp1 uscal2 = aapp if ( l2kill ) then ! l2kill enforces computation of nonzero singular values in ! the restricted range of condition number of the initial a, ! sigma_max(a) / sigma_min(a) approx. sqrt(big)/sqrt(sfmin). xsc = sqrt( sfmin ) else xsc = small ! now, if the condition number of a is too big, ! sigma_max(a) / sigma_min(a) > sqrt(big/n) * epsln / sfmin, ! as a precaution measure, the full svd is computed using stdlib${ii}$_cgesvj ! with accumulated jacobi rotations. this provides numerically ! more robust computation, at the cost of slightly increased run ! time. depending on the concrete implementation of blas and lapack ! (i.e. how they behave in presence of extreme ill-conditioning) the ! implementor may decide to remove this switch. if ( ( aaqq<sqrt(sfmin) ) .and. lsvec .and. rsvec ) then jracc = .true. end if end if if ( aaqq < xsc ) then do p = 1, n if ( sva(p) < xsc ) then call stdlib${ii}$_claset( 'A', m, 1_${ik}$, czero, czero, a(1_${ik}$,p), lda ) sva(p) = zero end if end do end if ! preconditioning using qr factorization with pivoting if ( rowpiv ) then ! optional row permutation (bjoerck row pivoting): ! a result by cox and higham shows that the bjoerck's ! row pivoting combined with standard column pivoting ! has similar effect as powell-reid complete pivoting. ! the ell-infinity norms of a are made nonincreasing. if ( ( lsvec .and. rsvec ) .and. .not.( jracc ) ) then iwoff = 2_${ik}$*n else iwoff = n end if do p = 1, m - 1 q = stdlib${ii}$_isamax( m-p+1, rwork(m+p), 1_${ik}$ ) + p - 1_${ik}$ iwork(iwoff+p) = q if ( p /= q ) then temp1 = rwork(m+p) rwork(m+p) = rwork(m+q) rwork(m+q) = temp1 end if end do call stdlib${ii}$_claswp( n, a, lda, 1_${ik}$, m-1, iwork(iwoff+1), 1_${ik}$ ) end if ! end of the preparation phase (scaling, optional sorting and ! transposing, optional flushing of small columns). ! preconditioning ! if the full svd is needed, the right singular vectors are computed ! from a matrix equation, and for that we need theoretical analysis ! of the businger-golub pivoting. so we use stdlib_cgeqp3 as the first rr qrf. ! in all other cases the first rr qrf can be chosen by other criteria ! (eg speed by replacing global with restricted window pivoting, such ! as in xgeqpx from toms # 782). good results will be obtained using ! xgeqpx with properly (!) chosen numerical parameters. ! any improvement of stdlib${ii}$_cgeqp3 improves overall performance of stdlib${ii}$_cgejsv. ! a * p1 = q1 * [ r1^* 0]^*: do p = 1, n ! All Columns Are Free Columns iwork(p) = 0_${ik}$ end do call stdlib${ii}$_cgeqp3( m, n, a, lda, iwork, cwork, cwork(n+1), lwork-n,rwork, ierr ) ! the upper triangular matrix r1 from the first qrf is inspected for ! rank deficiency and possibilities for deflation, or possible ! ill-conditioning. depending on the user specified flag l2rank, ! the procedure explores possibilities to reduce the numerical ! rank by inspecting the computed upper triangular factor. if ! l2rank or l2aber are up, then stdlib${ii}$_cgejsv will compute the svd of ! a + da, where ||da|| <= f(m,n)*epsln. nr = 1_${ik}$ if ( l2aber ) then ! standard absolute error bound suffices. all sigma_i with ! sigma_i < n*epsln*||a|| are flushed to zero. this is an ! aggressive enforcement of lower numerical rank by introducing a ! backward error of the order of n*epsln*||a||. temp1 = sqrt(real(n,KIND=sp))*epsln loop_3002: do p = 2, n if ( abs(a(p,p)) >= (temp1*abs(a(1_${ik}$,1_${ik}$))) ) then nr = nr + 1_${ik}$ else exit loop_3002 end if end do loop_3002 else if ( l2rank ) then ! .. similarly as above, only slightly more gentle (less aggressive). ! sudden drop on the diagonal of r1 is used as the criterion for ! close-to-rank-deficient. temp1 = sqrt(sfmin) loop_3402: do p = 2, n if ( ( abs(a(p,p)) < (epsln*abs(a(p-1,p-1))) ) .or.( abs(a(p,p)) < small ) .or.( & l2kill .and. (abs(a(p,p)) < temp1) ) ) exit loop_3402 nr = nr + 1_${ik}$ end do loop_3402 else ! the goal is high relative accuracy. however, if the matrix ! has high scaled condition number the relative accuracy is in ! general not feasible. later on, a condition number estimator ! will be deployed to estimate the scaled condition number. ! here we just remove the underflowed part of the triangular ! factor. this prevents the situation in which the code is ! working hard to get the accuracy not warranted by the data. temp1 = sqrt(sfmin) loop_3302: do p = 2, n if ( ( abs(a(p,p)) < small ) .or.( l2kill .and. (abs(a(p,p)) < temp1) ) ) exit loop_3302 nr = nr + 1_${ik}$ end do loop_3302 end if almort = .false. if ( nr == n ) then maxprj = one do p = 2, n temp1 = abs(a(p,p)) / sva(iwork(p)) maxprj = min( maxprj, temp1 ) end do if ( maxprj**2_${ik}$ >= one - real(n,KIND=sp)*epsln ) almort = .true. end if sconda = - one condr1 = - one condr2 = - one if ( errest ) then if ( n == nr ) then if ( rsvec ) then ! V Is Available As Workspace call stdlib${ii}$_clacpy( 'U', n, n, a, lda, v, ldv ) do p = 1, n temp1 = sva(iwork(p)) call stdlib${ii}$_csscal( p, one/temp1, v(1_${ik}$,p), 1_${ik}$ ) end do if ( lsvec )then call stdlib${ii}$_cpocon( 'U', n, v, ldv, one, temp1,cwork(n+1), rwork, ierr ) else call stdlib${ii}$_cpocon( 'U', n, v, ldv, one, temp1,cwork, rwork, ierr ) end if else if ( lsvec ) then ! U Is Available As Workspace call stdlib${ii}$_clacpy( 'U', n, n, a, lda, u, ldu ) do p = 1, n temp1 = sva(iwork(p)) call stdlib${ii}$_csscal( p, one/temp1, u(1_${ik}$,p), 1_${ik}$ ) end do call stdlib${ii}$_cpocon( 'U', n, u, ldu, one, temp1,cwork(n+1), rwork, ierr ) else call stdlib${ii}$_clacpy( 'U', n, n, a, lda, cwork, n ) ! [] call stdlib${ii}$_clacpy( 'u', n, n, a, lda, cwork(n+1), n ) ! change: here index shifted by n to the left, cwork(1:n) ! not needed for sigma only computation do p = 1, n temp1 = sva(iwork(p)) ! [] call stdlib${ii}$_csscal( p, one/temp1, cwork(n+(p-1)*n+1), 1 ) call stdlib${ii}$_csscal( p, one/temp1, cwork((p-1)*n+1), 1_${ik}$ ) end do ! The Columns Of R Are Scaled To Have Unit Euclidean Lengths ! [] call stdlib${ii}$_cpocon( 'u', n, cwork(n+1), n, one, temp1, ! [] $ cwork(n+n*n+1), rwork, ierr ) call stdlib${ii}$_cpocon( 'U', n, cwork, n, one, temp1,cwork(n*n+1), rwork, ierr ) end if if ( temp1 /= zero ) then sconda = one / sqrt(temp1) else sconda = - one end if ! sconda is an estimate of sqrt(||(r^* * r)^(-1)||_1). ! n^(-1/4) * sconda <= ||r^(-1)||_2 <= n^(1/4) * sconda else sconda = - one end if end if l2pert = l2pert .and. ( abs( a(1_${ik}$,1_${ik}$)/a(nr,nr) ) > sqrt(big1) ) ! if there is no violent scaling, artificial perturbation is not needed. ! phase 3: if ( .not. ( rsvec .or. lsvec ) ) then ! singular values only ! .. transpose a(1:nr,1:n) do p = 1, min( n-1, nr ) call stdlib${ii}$_ccopy( n-p, a(p,p+1), lda, a(p+1,p), 1_${ik}$ ) call stdlib${ii}$_clacgv( n-p+1, a(p,p), 1_${ik}$ ) end do if ( nr == n ) a(n,n) = conjg(a(n,n)) ! the following two do-loops introduce small relative perturbation ! into the strict upper triangle of the lower triangular matrix. ! small entries below the main diagonal are also changed. ! this modification is useful if the computing environment does not ! provide/allow flush to zero underflow, for it prevents many ! annoying denormalized numbers in case of strongly scaled matrices. ! the perturbation is structured so that it does not introduce any ! new perturbation of the singular values, and it does not destroy ! the job done by the preconditioner. ! the licence for this perturbation is in the variable l2pert, which ! should be .false. if flush to zero underflow is active. if ( .not. almort ) then if ( l2pert ) then ! xsc = sqrt(small) xsc = epsln / real(n,KIND=sp) do q = 1, nr ctemp = cmplx(xsc*abs(a(q,q)),zero,KIND=sp) do p = 1, n if ( ( (p>q) .and. (abs(a(p,q))<=temp1) ).or. ( p < q ) )a(p,q) = & ctemp ! $ a(p,q) = temp1 * ( a(p,q) / abs(a(p,q)) ) end do end do else if (nr>1_${ik}$) call stdlib${ii}$_claset( 'U', nr-1,nr-1, czero,czero, a(1_${ik}$,2_${ik}$),lda ) end if ! Second Preconditioning Using The Qr Factorization call stdlib${ii}$_cgeqrf( n,nr, a,lda, cwork, cwork(n+1),lwork-n, ierr ) ! And Transpose Upper To Lower Triangular do p = 1, nr - 1 call stdlib${ii}$_ccopy( nr-p, a(p,p+1), lda, a(p+1,p), 1_${ik}$ ) call stdlib${ii}$_clacgv( nr-p+1, a(p,p), 1_${ik}$ ) end do end if ! row-cyclic jacobi svd algorithm with column pivoting ! .. again some perturbation (a "background noise") is added ! to drown denormals if ( l2pert ) then ! xsc = sqrt(small) xsc = epsln / real(n,KIND=sp) do q = 1, nr ctemp = cmplx(xsc*abs(a(q,q)),zero,KIND=sp) do p = 1, nr if ( ( (p>q) .and. (abs(a(p,q))<=temp1) ).or. ( p < q ) )a(p,q) = & ctemp ! $ a(p,q) = temp1 * ( a(p,q) / abs(a(p,q)) ) end do end do else if (nr>1_${ik}$) call stdlib${ii}$_claset( 'U', nr-1, nr-1, czero, czero, a(1_${ik}$,2_${ik}$), lda ) end if ! .. and one-sided jacobi rotations are started on a lower ! triangular matrix (plus perturbation which is ignored in ! the part which destroys triangular form (confusing?!)) call stdlib${ii}$_cgesvj( 'L', 'N', 'N', nr, nr, a, lda, sva,n, v, ldv, cwork, lwork, & rwork, lrwork, info ) scalem = rwork(1_${ik}$) numrank = nint(rwork(2_${ik}$),KIND=${ik}$) else if ( ( rsvec .and. ( .not. lsvec ) .and. ( .not. jracc ) ).or.( jracc .and. ( & .not. lsvec ) .and. ( nr /= n ) ) ) then ! -> singular values and right singular vectors <- if ( almort ) then ! In This Case Nr Equals N do p = 1, nr call stdlib${ii}$_ccopy( n-p+1, a(p,p), lda, v(p,p), 1_${ik}$ ) call stdlib${ii}$_clacgv( n-p+1, v(p,p), 1_${ik}$ ) end do if (nr>1_${ik}$) call stdlib${ii}$_claset( 'U', nr-1,nr-1, czero, czero, v(1_${ik}$,2_${ik}$), ldv ) call stdlib${ii}$_cgesvj( 'L','U','N', n, nr, v, ldv, sva, nr, a, lda,cwork, lwork, & rwork, lrwork, info ) scalem = rwork(1_${ik}$) numrank = nint(rwork(2_${ik}$),KIND=${ik}$) else ! .. two more qr factorizations ( one qrf is not enough, two require ! accumulated product of jacobi rotations, three are perfect ) if (nr>1_${ik}$) call stdlib${ii}$_claset( 'L', nr-1,nr-1, czero, czero, a(2_${ik}$,1_${ik}$), lda ) call stdlib${ii}$_cgelqf( nr,n, a, lda, cwork, cwork(n+1), lwork-n, ierr) call stdlib${ii}$_clacpy( 'L', nr, nr, a, lda, v, ldv ) if (nr>1_${ik}$) call stdlib${ii}$_claset( 'U', nr-1,nr-1, czero, czero, v(1_${ik}$,2_${ik}$), ldv ) call stdlib${ii}$_cgeqrf( nr, nr, v, ldv, cwork(n+1), cwork(2_${ik}$*n+1),lwork-2*n, ierr ) do p = 1, nr call stdlib${ii}$_ccopy( nr-p+1, v(p,p), ldv, v(p,p), 1_${ik}$ ) call stdlib${ii}$_clacgv( nr-p+1, v(p,p), 1_${ik}$ ) end do if (nr>1_${ik}$) call stdlib${ii}$_claset('U', nr-1, nr-1, czero, czero, v(1_${ik}$,2_${ik}$), ldv) call stdlib${ii}$_cgesvj( 'L', 'U','N', nr, nr, v,ldv, sva, nr, u,ldu, cwork(n+1), & lwork-n, rwork, lrwork, info ) scalem = rwork(1_${ik}$) numrank = nint(rwork(2_${ik}$),KIND=${ik}$) if ( nr < n ) then call stdlib${ii}$_claset( 'A',n-nr, nr, czero,czero, v(nr+1,1_${ik}$), ldv ) call stdlib${ii}$_claset( 'A',nr, n-nr, czero,czero, v(1_${ik}$,nr+1), ldv ) call stdlib${ii}$_claset( 'A',n-nr,n-nr,czero,cone, v(nr+1,nr+1),ldv ) end if call stdlib${ii}$_cunmlq( 'L', 'C', n, n, nr, a, lda, cwork,v, ldv, cwork(n+1), lwork-n, & ierr ) end if ! Permute The Rows Of V ! do 8991 p = 1, n ! call stdlib${ii}$_ccopy( n, v(p,1), ldv, a(iwork(p),1), lda ) 8991 continue ! call stdlib${ii}$_clacpy( 'all', n, n, a, lda, v, ldv ) call stdlib${ii}$_clapmr( .false., n, n, v, ldv, iwork ) if ( transp ) then call stdlib${ii}$_clacpy( 'A', n, n, v, ldv, u, ldu ) end if else if ( jracc .and. (.not. lsvec) .and. ( nr== n ) ) then if (n>1_${ik}$) call stdlib${ii}$_claset( 'L', n-1,n-1, czero, czero, a(2_${ik}$,1_${ik}$), lda ) call stdlib${ii}$_cgesvj( 'U','N','V', n, n, a, lda, sva, n, v, ldv,cwork, lwork, rwork, & lrwork, info ) scalem = rwork(1_${ik}$) numrank = nint(rwork(2_${ik}$),KIND=${ik}$) call stdlib${ii}$_clapmr( .false., n, n, v, ldv, iwork ) else if ( lsvec .and. ( .not. rsvec ) ) then ! Singular Values And Left Singular Vectors ! Second Preconditioning Step To Avoid Need To Accumulate ! jacobi rotations in the jacobi iterations. do p = 1, nr call stdlib${ii}$_ccopy( n-p+1, a(p,p), lda, u(p,p), 1_${ik}$ ) call stdlib${ii}$_clacgv( n-p+1, u(p,p), 1_${ik}$ ) end do if (nr>1_${ik}$) call stdlib${ii}$_claset( 'U', nr-1, nr-1, czero, czero, u(1_${ik}$,2_${ik}$), ldu ) call stdlib${ii}$_cgeqrf( n, nr, u, ldu, cwork(n+1), cwork(2_${ik}$*n+1),lwork-2*n, ierr ) do p = 1, nr - 1 call stdlib${ii}$_ccopy( nr-p, u(p,p+1), ldu, u(p+1,p), 1_${ik}$ ) call stdlib${ii}$_clacgv( n-p+1, u(p,p), 1_${ik}$ ) end do if (nr>1_${ik}$) call stdlib${ii}$_claset( 'U', nr-1, nr-1, czero, czero, u(1_${ik}$,2_${ik}$), ldu ) call stdlib${ii}$_cgesvj( 'L', 'U', 'N', nr,nr, u, ldu, sva, nr, a,lda, cwork(n+1), lwork-& n, rwork, lrwork, info ) scalem = rwork(1_${ik}$) numrank = nint(rwork(2_${ik}$),KIND=${ik}$) if ( nr < m ) then call stdlib${ii}$_claset( 'A', m-nr, nr,czero, czero, u(nr+1,1_${ik}$), ldu ) if ( nr < n1 ) then call stdlib${ii}$_claset( 'A',nr, n1-nr, czero, czero, u(1_${ik}$,nr+1),ldu ) call stdlib${ii}$_claset( 'A',m-nr,n1-nr,czero,cone,u(nr+1,nr+1),ldu ) end if end if call stdlib${ii}$_cunmqr( 'L', 'N', m, n1, n, a, lda, cwork, u,ldu, cwork(n+1), lwork-n, & ierr ) if ( rowpiv )call stdlib${ii}$_claswp( n1, u, ldu, 1_${ik}$, m-1, iwork(iwoff+1), -1_${ik}$ ) do p = 1, n1 xsc = one / stdlib${ii}$_scnrm2( m, u(1_${ik}$,p), 1_${ik}$ ) call stdlib${ii}$_csscal( m, xsc, u(1_${ik}$,p), 1_${ik}$ ) end do if ( transp ) then call stdlib${ii}$_clacpy( 'A', n, n, u, ldu, v, ldv ) end if else ! Full Svd if ( .not. jracc ) then if ( .not. almort ) then ! second preconditioning step (qrf [with pivoting]) ! note that the composition of transpose, qrf and transpose is ! equivalent to an lqf call. since in many libraries the qrf ! seems to be better optimized than the lqf, we do explicit ! transpose and use the qrf. this is subject to changes in an ! optimized implementation of stdlib${ii}$_cgejsv. do p = 1, nr call stdlib${ii}$_ccopy( n-p+1, a(p,p), lda, v(p,p), 1_${ik}$ ) call stdlib${ii}$_clacgv( n-p+1, v(p,p), 1_${ik}$ ) end do ! The Following Two Loops Perturb Small Entries To Avoid ! denormals in the second qr factorization, where they are ! as good as zeros. this is done to avoid painfully slow ! computation with denormals. the relative size of the perturbation ! is a parameter that can be changed by the implementer. ! this perturbation device will be obsolete on machines with ! properly implemented arithmetic. ! to switch it off, set l2pert=.false. to remove it from the ! code, remove the action under l2pert=.true., leave the else part. ! the following two loops should be blocked and fused with the ! transposed copy above. if ( l2pert ) then xsc = sqrt(small) do q = 1, nr ctemp = cmplx(xsc*abs( v(q,q) ),zero,KIND=sp) do p = 1, n if ( ( p > q ) .and. ( abs(v(p,q)) <= temp1 ).or. ( p < q ) )v(p,q) = & ctemp ! $ v(p,q) = temp1 * ( v(p,q) / abs(v(p,q)) ) if ( p < q ) v(p,q) = - v(p,q) end do end do else if (nr>1_${ik}$) call stdlib${ii}$_claset( 'U', nr-1, nr-1, czero, czero, v(1_${ik}$,2_${ik}$), ldv ) end if ! estimate the row scaled condition number of r1 ! (if r1 is rectangular, n > nr, then the condition number ! of the leading nr x nr submatrix is estimated.) call stdlib${ii}$_clacpy( 'L', nr, nr, v, ldv, cwork(2_${ik}$*n+1), nr ) do p = 1, nr temp1 = stdlib${ii}$_scnrm2(nr-p+1,cwork(2_${ik}$*n+(p-1)*nr+p),1_${ik}$) call stdlib${ii}$_csscal(nr-p+1,one/temp1,cwork(2_${ik}$*n+(p-1)*nr+p),1_${ik}$) end do call stdlib${ii}$_cpocon('L',nr,cwork(2_${ik}$*n+1),nr,one,temp1,cwork(2_${ik}$*n+nr*nr+1),rwork,& ierr) condr1 = one / sqrt(temp1) ! Here Need A Second Opinion On The Condition Number ! Then Assume Worst Case Scenario ! r1 is ok for inverse <=> condr1 < real(n,KIND=sp) ! more conservative <=> condr1 < sqrt(real(n,KIND=sp)) cond_ok = sqrt(sqrt(real(nr,KIND=sp))) ! [tp] cond_ok is a tuning parameter. if ( condr1 < cond_ok ) then ! .. the second qrf without pivoting. note: in an optimized ! implementation, this qrf should be implemented as the qrf ! of a lower triangular matrix. ! r1^* = q2 * r2 call stdlib${ii}$_cgeqrf( n, nr, v, ldv, cwork(n+1), cwork(2_${ik}$*n+1),lwork-2*n, ierr ) if ( l2pert ) then xsc = sqrt(small)/epsln do p = 2, nr do q = 1, p - 1 ctemp=cmplx(xsc*min(abs(v(p,p)),abs(v(q,q))),zero,KIND=sp) if ( abs(v(q,p)) <= temp1 )v(q,p) = ctemp ! $ v(q,p) = temp1 * ( v(q,p) / abs(v(q,p)) ) end do end do end if if ( nr /= n )call stdlib${ii}$_clacpy( 'A', n, nr, v, ldv, cwork(2_${ik}$*n+1), n ) ! .. save ... ! This Transposed Copy Should Be Better Than Naive do p = 1, nr - 1 call stdlib${ii}$_ccopy( nr-p, v(p,p+1), ldv, v(p+1,p), 1_${ik}$ ) call stdlib${ii}$_clacgv(nr-p+1, v(p,p), 1_${ik}$ ) end do v(nr,nr)=conjg(v(nr,nr)) condr2 = condr1 else ! .. ill-conditioned case: second qrf with pivoting ! note that windowed pivoting would be equally good ! numerically, and more run-time efficient. so, in ! an optimal implementation, the next call to stdlib${ii}$_cgeqp3 ! should be replaced with eg. call cgeqpx (acm toms #782) ! with properly (carefully) chosen parameters. ! r1^* * p2 = q2 * r2 do p = 1, nr iwork(n+p) = 0_${ik}$ end do call stdlib${ii}$_cgeqp3( n, nr, v, ldv, iwork(n+1), cwork(n+1),cwork(2_${ik}$*n+1), lwork-& 2_${ik}$*n, rwork, ierr ) ! * call stdlib${ii}$_cgeqrf( n, nr, v, ldv, cwork(n+1), cwork(2*n+1), ! * $ lwork-2*n, ierr ) if ( l2pert ) then xsc = sqrt(small) do p = 2, nr do q = 1, p - 1 ctemp=cmplx(xsc*min(abs(v(p,p)),abs(v(q,q))),zero,KIND=sp) if ( abs(v(q,p)) <= temp1 )v(q,p) = ctemp ! $ v(q,p) = temp1 * ( v(q,p) / abs(v(q,p)) ) end do end do end if call stdlib${ii}$_clacpy( 'A', n, nr, v, ldv, cwork(2_${ik}$*n+1), n ) if ( l2pert ) then xsc = sqrt(small) do p = 2, nr do q = 1, p - 1 ctemp=cmplx(xsc*min(abs(v(p,p)),abs(v(q,q))),zero,KIND=sp) ! v(p,q) = - temp1*( v(q,p) / abs(v(q,p)) ) v(p,q) = - ctemp end do end do else if (nr>1_${ik}$) call stdlib${ii}$_claset( 'L',nr-1,nr-1,czero,czero,v(2_${ik}$,1_${ik}$),ldv ) end if ! now, compute r2 = l3 * q3, the lq factorization. call stdlib${ii}$_cgelqf( nr, nr, v, ldv, cwork(2_${ik}$*n+n*nr+1),cwork(2_${ik}$*n+n*nr+nr+1), & lwork-2*n-n*nr-nr, ierr ) ! And Estimate The Condition Number call stdlib${ii}$_clacpy( 'L',nr,nr,v,ldv,cwork(2_${ik}$*n+n*nr+nr+1),nr ) do p = 1, nr temp1 = stdlib${ii}$_scnrm2( p, cwork(2_${ik}$*n+n*nr+nr+p), nr ) call stdlib${ii}$_csscal( p, one/temp1, cwork(2_${ik}$*n+n*nr+nr+p), nr ) end do call stdlib${ii}$_cpocon( 'L',nr,cwork(2_${ik}$*n+n*nr+nr+1),nr,one,temp1,cwork(2_${ik}$*n+n*nr+& nr+nr*nr+1),rwork,ierr ) condr2 = one / sqrt(temp1) if ( condr2 >= cond_ok ) then ! Save The Householder Vectors Used For Q3 ! (this overwrites the copy of r2, as it will not be ! needed in this branch, but it does not overwritte the ! huseholder vectors of q2.). call stdlib${ii}$_clacpy( 'U', nr, nr, v, ldv, cwork(2_${ik}$*n+1), n ) ! And The Rest Of The Information On Q3 Is In ! work(2*n+n*nr+1:2*n+n*nr+n) end if end if if ( l2pert ) then xsc = sqrt(small) do q = 2, nr ctemp = xsc * v(q,q) do p = 1, q - 1 ! v(p,q) = - temp1*( v(p,q) / abs(v(p,q)) ) v(p,q) = - ctemp end do end do else if (nr>1_${ik}$) call stdlib${ii}$_claset( 'U', nr-1,nr-1, czero,czero, v(1_${ik}$,2_${ik}$), ldv ) end if ! second preconditioning finished; continue with jacobi svd ! the input matrix is lower trinagular. ! recover the right singular vectors as solution of a well ! conditioned triangular matrix equation. if ( condr1 < cond_ok ) then call stdlib${ii}$_cgesvj( 'L','U','N',nr,nr,v,ldv,sva,nr,u, ldu,cwork(2_${ik}$*n+n*nr+nr+1)& ,lwork-2*n-n*nr-nr,rwork,lrwork, info ) scalem = rwork(1_${ik}$) numrank = nint(rwork(2_${ik}$),KIND=${ik}$) do p = 1, nr call stdlib${ii}$_ccopy( nr, v(1_${ik}$,p), 1_${ik}$, u(1_${ik}$,p), 1_${ik}$ ) call stdlib${ii}$_csscal( nr, sva(p), v(1_${ik}$,p), 1_${ik}$ ) end do ! Pick The Right Matrix Equation And Solve It if ( nr == n ) then ! :)) .. best case, r1 is inverted. the solution of this matrix ! equation is q2*v2 = the product of the jacobi rotations ! used in stdlib${ii}$_cgesvj, premultiplied with the orthogonal matrix ! from the second qr factorization. call stdlib${ii}$_ctrsm('L','U','N','N', nr,nr,cone, a,lda, v,ldv) else ! .. r1 is well conditioned, but non-square. adjoint of r2 ! is inverted to get the product of the jacobi rotations ! used in stdlib${ii}$_cgesvj. the q-factor from the second qr ! factorization is then built in explicitly. call stdlib${ii}$_ctrsm('L','U','C','N',nr,nr,cone,cwork(2_${ik}$*n+1),n,v,ldv) if ( nr < n ) then call stdlib${ii}$_claset('A',n-nr,nr,czero,czero,v(nr+1,1_${ik}$),ldv) call stdlib${ii}$_claset('A',nr,n-nr,czero,czero,v(1_${ik}$,nr+1),ldv) call stdlib${ii}$_claset('A',n-nr,n-nr,czero,cone,v(nr+1,nr+1),ldv) end if call stdlib${ii}$_cunmqr('L','N',n,n,nr,cwork(2_${ik}$*n+1),n,cwork(n+1),v,ldv,cwork(& 2_${ik}$*n+n*nr+nr+1),lwork-2*n-n*nr-nr,ierr) end if else if ( condr2 < cond_ok ) then ! the matrix r2 is inverted. the solution of the matrix equation ! is q3^* * v3 = the product of the jacobi rotations (appplied to ! the lower triangular l3 from the lq factorization of ! r2=l3*q3), pre-multiplied with the transposed q3. call stdlib${ii}$_cgesvj( 'L', 'U', 'N', nr, nr, v, ldv, sva, nr, u,ldu, cwork(2_${ik}$*n+& n*nr+nr+1), lwork-2*n-n*nr-nr,rwork, lrwork, info ) scalem = rwork(1_${ik}$) numrank = nint(rwork(2_${ik}$),KIND=${ik}$) do p = 1, nr call stdlib${ii}$_ccopy( nr, v(1_${ik}$,p), 1_${ik}$, u(1_${ik}$,p), 1_${ik}$ ) call stdlib${ii}$_csscal( nr, sva(p), u(1_${ik}$,p), 1_${ik}$ ) end do call stdlib${ii}$_ctrsm('L','U','N','N',nr,nr,cone,cwork(2_${ik}$*n+1),n,u,ldu) ! Apply The Permutation From The Second Qr Factorization do q = 1, nr do p = 1, nr cwork(2_${ik}$*n+n*nr+nr+iwork(n+p)) = u(p,q) end do do p = 1, nr u(p,q) = cwork(2_${ik}$*n+n*nr+nr+p) end do end do if ( nr < n ) then call stdlib${ii}$_claset( 'A',n-nr,nr,czero,czero,v(nr+1,1_${ik}$),ldv ) call stdlib${ii}$_claset( 'A',nr,n-nr,czero,czero,v(1_${ik}$,nr+1),ldv ) call stdlib${ii}$_claset('A',n-nr,n-nr,czero,cone,v(nr+1,nr+1),ldv) end if call stdlib${ii}$_cunmqr( 'L','N',n,n,nr,cwork(2_${ik}$*n+1),n,cwork(n+1),v,ldv,cwork(2_${ik}$*n+& n*nr+nr+1),lwork-2*n-n*nr-nr,ierr ) else ! last line of defense. ! #:( this is a rather pathological case: no scaled condition ! improvement after two pivoted qr factorizations. other ! possibility is that the rank revealing qr factorization ! or the condition estimator has failed, or the cond_ok ! is set very close to one (which is unnecessary). normally, ! this branch should never be executed, but in rare cases of ! failure of the rrqr or condition estimator, the last line of ! defense ensures that stdlib${ii}$_cgejsv completes the task. ! compute the full svd of l3 using stdlib${ii}$_cgesvj with explicit ! accumulation of jacobi rotations. call stdlib${ii}$_cgesvj( 'L', 'U', 'V', nr, nr, v, ldv, sva, nr, u,ldu, cwork(2_${ik}$*n+& n*nr+nr+1), lwork-2*n-n*nr-nr,rwork, lrwork, info ) scalem = rwork(1_${ik}$) numrank = nint(rwork(2_${ik}$),KIND=${ik}$) if ( nr < n ) then call stdlib${ii}$_claset( 'A',n-nr,nr,czero,czero,v(nr+1,1_${ik}$),ldv ) call stdlib${ii}$_claset( 'A',nr,n-nr,czero,czero,v(1_${ik}$,nr+1),ldv ) call stdlib${ii}$_claset('A',n-nr,n-nr,czero,cone,v(nr+1,nr+1),ldv) end if call stdlib${ii}$_cunmqr( 'L','N',n,n,nr,cwork(2_${ik}$*n+1),n,cwork(n+1),v,ldv,cwork(2_${ik}$*n+& n*nr+nr+1),lwork-2*n-n*nr-nr,ierr ) call stdlib${ii}$_cunmlq( 'L', 'C', nr, nr, nr, cwork(2_${ik}$*n+1), n,cwork(2_${ik}$*n+n*nr+1), & u, ldu, cwork(2_${ik}$*n+n*nr+nr+1),lwork-2*n-n*nr-nr, ierr ) do q = 1, nr do p = 1, nr cwork(2_${ik}$*n+n*nr+nr+iwork(n+p)) = u(p,q) end do do p = 1, nr u(p,q) = cwork(2_${ik}$*n+n*nr+nr+p) end do end do end if ! permute the rows of v using the (column) permutation from the ! first qrf. also, scale the columns to make them unit in ! euclidean norm. this applies to all cases. temp1 = sqrt(real(n,KIND=sp)) * epsln do q = 1, n do p = 1, n cwork(2_${ik}$*n+n*nr+nr+iwork(p)) = v(p,q) end do do p = 1, n v(p,q) = cwork(2_${ik}$*n+n*nr+nr+p) end do xsc = one / stdlib${ii}$_scnrm2( n, v(1_${ik}$,q), 1_${ik}$ ) if ( (xsc < (one-temp1)) .or. (xsc > (one+temp1)) )call stdlib${ii}$_csscal( n, xsc,& v(1_${ik}$,q), 1_${ik}$ ) end do ! at this moment, v contains the right singular vectors of a. ! next, assemble the left singular vector matrix u (m x n). if ( nr < m ) then call stdlib${ii}$_claset('A', m-nr, nr, czero, czero, u(nr+1,1_${ik}$), ldu) if ( nr < n1 ) then call stdlib${ii}$_claset('A',nr,n1-nr,czero,czero,u(1_${ik}$,nr+1),ldu) call stdlib${ii}$_claset('A',m-nr,n1-nr,czero,cone,u(nr+1,nr+1),ldu) end if end if ! the q matrix from the first qrf is built into the left singular ! matrix u. this applies to all cases. call stdlib${ii}$_cunmqr( 'L', 'N', m, n1, n, a, lda, cwork, u,ldu, cwork(n+1), lwork-& n, ierr ) ! the columns of u are normalized. the cost is o(m*n) flops. temp1 = sqrt(real(m,KIND=sp)) * epsln do p = 1, nr xsc = one / stdlib${ii}$_scnrm2( m, u(1_${ik}$,p), 1_${ik}$ ) if ( (xsc < (one-temp1)) .or. (xsc > (one+temp1)) )call stdlib${ii}$_csscal( m, xsc,& u(1_${ik}$,p), 1_${ik}$ ) end do ! if the initial qrf is computed with row pivoting, the left ! singular vectors must be adjusted. if ( rowpiv )call stdlib${ii}$_claswp( n1, u, ldu, 1_${ik}$, m-1, iwork(iwoff+1), -1_${ik}$ ) else ! The Initial Matrix A Has Almost Orthogonal Columns And ! the second qrf is not needed call stdlib${ii}$_clacpy( 'U', n, n, a, lda, cwork(n+1), n ) if ( l2pert ) then xsc = sqrt(small) do p = 2, n ctemp = xsc * cwork( n + (p-1)*n + p ) do q = 1, p - 1 ! cwork(n+(q-1)*n+p)=-temp1 * ( cwork(n+(p-1)*n+q) / ! $ abs(cwork(n+(p-1)*n+q)) ) cwork(n+(q-1)*n+p)=-ctemp end do end do else call stdlib${ii}$_claset( 'L',n-1,n-1,czero,czero,cwork(n+2),n ) end if call stdlib${ii}$_cgesvj( 'U', 'U', 'N', n, n, cwork(n+1), n, sva,n, u, ldu, cwork(n+& n*n+1), lwork-n-n*n, rwork, lrwork,info ) scalem = rwork(1_${ik}$) numrank = nint(rwork(2_${ik}$),KIND=${ik}$) do p = 1, n call stdlib${ii}$_ccopy( n, cwork(n+(p-1)*n+1), 1_${ik}$, u(1_${ik}$,p), 1_${ik}$ ) call stdlib${ii}$_csscal( n, sva(p), cwork(n+(p-1)*n+1), 1_${ik}$ ) end do call stdlib${ii}$_ctrsm( 'L', 'U', 'N', 'N', n, n,cone, a, lda, cwork(n+1), n ) do p = 1, n call stdlib${ii}$_ccopy( n, cwork(n+p), n, v(iwork(p),1_${ik}$), ldv ) end do temp1 = sqrt(real(n,KIND=sp))*epsln do p = 1, n xsc = one / stdlib${ii}$_scnrm2( n, v(1_${ik}$,p), 1_${ik}$ ) if ( (xsc < (one-temp1)) .or. (xsc > (one+temp1)) )call stdlib${ii}$_csscal( n, xsc,& v(1_${ik}$,p), 1_${ik}$ ) end do ! assemble the left singular vector matrix u (m x n). if ( n < m ) then call stdlib${ii}$_claset( 'A', m-n, n, czero, czero, u(n+1,1_${ik}$), ldu ) if ( n < n1 ) then call stdlib${ii}$_claset('A',n, n1-n, czero, czero, u(1_${ik}$,n+1),ldu) call stdlib${ii}$_claset( 'A',m-n,n1-n, czero, cone,u(n+1,n+1),ldu) end if end if call stdlib${ii}$_cunmqr( 'L', 'N', m, n1, n, a, lda, cwork, u,ldu, cwork(n+1), lwork-& n, ierr ) temp1 = sqrt(real(m,KIND=sp))*epsln do p = 1, n1 xsc = one / stdlib${ii}$_scnrm2( m, u(1_${ik}$,p), 1_${ik}$ ) if ( (xsc < (one-temp1)) .or. (xsc > (one+temp1)) )call stdlib${ii}$_csscal( m, xsc,& u(1_${ik}$,p), 1_${ik}$ ) end do if ( rowpiv )call stdlib${ii}$_claswp( n1, u, ldu, 1_${ik}$, m-1, iwork(iwoff+1), -1_${ik}$ ) end if ! end of the >> almost orthogonal case << in the full svd else ! this branch deploys a preconditioned jacobi svd with explicitly ! accumulated rotations. it is included as optional, mainly for ! experimental purposes. it does perform well, and can also be used. ! in this implementation, this branch will be automatically activated ! if the condition number sigma_max(a) / sigma_min(a) is predicted ! to be greater than the overflow threshold. this is because the ! a posteriori computation of the singular vectors assumes robust ! implementation of blas and some lapack procedures, capable of working ! in presence of extreme values, e.g. when the singular values spread from ! the underflow to the overflow threshold. do p = 1, nr call stdlib${ii}$_ccopy( n-p+1, a(p,p), lda, v(p,p), 1_${ik}$ ) call stdlib${ii}$_clacgv( n-p+1, v(p,p), 1_${ik}$ ) end do if ( l2pert ) then xsc = sqrt(small/epsln) do q = 1, nr ctemp = cmplx(xsc*abs( v(q,q) ),zero,KIND=sp) do p = 1, n if ( ( p > q ) .and. ( abs(v(p,q)) <= temp1 ).or. ( p < q ) )v(p,q) = & ctemp ! $ v(p,q) = temp1 * ( v(p,q) / abs(v(p,q)) ) if ( p < q ) v(p,q) = - v(p,q) end do end do else if (nr>1_${ik}$) call stdlib${ii}$_claset( 'U', nr-1, nr-1, czero, czero, v(1_${ik}$,2_${ik}$), ldv ) end if call stdlib${ii}$_cgeqrf( n, nr, v, ldv, cwork(n+1), cwork(2_${ik}$*n+1),lwork-2*n, ierr ) call stdlib${ii}$_clacpy( 'L', n, nr, v, ldv, cwork(2_${ik}$*n+1), n ) do p = 1, nr call stdlib${ii}$_ccopy( nr-p+1, v(p,p), ldv, u(p,p), 1_${ik}$ ) call stdlib${ii}$_clacgv( nr-p+1, u(p,p), 1_${ik}$ ) end do if ( l2pert ) then xsc = sqrt(small/epsln) do q = 2, nr do p = 1, q - 1 ctemp = cmplx(xsc * min(abs(u(p,p)),abs(u(q,q))),zero,KIND=sp) ! u(p,q) = - temp1 * ( u(q,p) / abs(u(q,p)) ) u(p,q) = - ctemp end do end do else if (nr>1_${ik}$) call stdlib${ii}$_claset('U', nr-1, nr-1, czero, czero, u(1_${ik}$,2_${ik}$), ldu ) end if call stdlib${ii}$_cgesvj( 'L', 'U', 'V', nr, nr, u, ldu, sva,n, v, ldv, cwork(2_${ik}$*n+n*nr+1),& lwork-2*n-n*nr,rwork, lrwork, info ) scalem = rwork(1_${ik}$) numrank = nint(rwork(2_${ik}$),KIND=${ik}$) if ( nr < n ) then call stdlib${ii}$_claset( 'A',n-nr,nr,czero,czero,v(nr+1,1_${ik}$),ldv ) call stdlib${ii}$_claset( 'A',nr,n-nr,czero,czero,v(1_${ik}$,nr+1),ldv ) call stdlib${ii}$_claset( 'A',n-nr,n-nr,czero,cone,v(nr+1,nr+1),ldv ) end if call stdlib${ii}$_cunmqr( 'L','N',n,n,nr,cwork(2_${ik}$*n+1),n,cwork(n+1),v,ldv,cwork(2_${ik}$*n+n*nr+& nr+1),lwork-2*n-n*nr-nr,ierr ) ! permute the rows of v using the (column) permutation from the ! first qrf. also, scale the columns to make them unit in ! euclidean norm. this applies to all cases. temp1 = sqrt(real(n,KIND=sp)) * epsln do q = 1, n do p = 1, n cwork(2_${ik}$*n+n*nr+nr+iwork(p)) = v(p,q) end do do p = 1, n v(p,q) = cwork(2_${ik}$*n+n*nr+nr+p) end do xsc = one / stdlib${ii}$_scnrm2( n, v(1_${ik}$,q), 1_${ik}$ ) if ( (xsc < (one-temp1)) .or. (xsc > (one+temp1)) )call stdlib${ii}$_csscal( n, xsc,& v(1_${ik}$,q), 1_${ik}$ ) end do ! at this moment, v contains the right singular vectors of a. ! next, assemble the left singular vector matrix u (m x n). if ( nr < m ) then call stdlib${ii}$_claset( 'A', m-nr, nr, czero, czero, u(nr+1,1_${ik}$), ldu ) if ( nr < n1 ) then call stdlib${ii}$_claset('A',nr, n1-nr, czero, czero, u(1_${ik}$,nr+1),ldu) call stdlib${ii}$_claset('A',m-nr,n1-nr, czero, cone,u(nr+1,nr+1),ldu) end if end if call stdlib${ii}$_cunmqr( 'L', 'N', m, n1, n, a, lda, cwork, u,ldu, cwork(n+1), lwork-n, & ierr ) if ( rowpiv )call stdlib${ii}$_claswp( n1, u, ldu, 1_${ik}$, m-1, iwork(iwoff+1), -1_${ik}$ ) end if if ( transp ) then ! .. swap u and v because the procedure worked on a^* do p = 1, n call stdlib${ii}$_cswap( n, u(1_${ik}$,p), 1_${ik}$, v(1_${ik}$,p), 1_${ik}$ ) end do end if end if ! end of the full svd ! undo scaling, if necessary (and possible) if ( uscal2 <= (big/sva(1_${ik}$))*uscal1 ) then call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, uscal1, uscal2, nr, 1_${ik}$, sva, n, ierr ) uscal1 = one uscal2 = one end if if ( nr < n ) then do p = nr+1, n sva(p) = zero end do end if rwork(1_${ik}$) = uscal2 * scalem rwork(2_${ik}$) = uscal1 if ( errest ) rwork(3_${ik}$) = sconda if ( lsvec .and. rsvec ) then rwork(4_${ik}$) = condr1 rwork(5_${ik}$) = condr2 end if if ( l2tran ) then rwork(6_${ik}$) = entra rwork(7_${ik}$) = entrat end if iwork(1_${ik}$) = nr iwork(2_${ik}$) = numrank iwork(3_${ik}$) = warning if ( transp ) then iwork(4_${ik}$) = 1_${ik}$ else iwork(4_${ik}$) = -1_${ik}$ end if return end subroutine stdlib${ii}$_cgejsv pure module subroutine stdlib${ii}$_zgejsv( joba, jobu, jobv, jobr, jobt, jobp,m, n, a, lda, sva, u, ldu, & !! ZGEJSV computes the singular value decomposition (SVD) of a complex M-by-N !! matrix [A], where M >= N. The SVD of [A] is written as !! [A] = [U] * [SIGMA] * [V]^*, !! where [SIGMA] is an N-by-N (M-by-N) matrix which is zero except for its N !! diagonal elements, [U] is an M-by-N (or M-by-M) unitary matrix, and !! [V] is an N-by-N unitary matrix. The diagonal elements of [SIGMA] are !! the singular values of [A]. The columns of [U] and [V] are the left and !! the right singular vectors of [A], respectively. The matrices [U] and [V] !! are computed and stored in the arrays U and V, respectively. The diagonal !! of [SIGMA] is computed and stored in the array SVA. v, ldv,cwork, lwork, rwork, lrwork, iwork, info ) ! -- lapack computational routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments integer(${ik}$), intent(out) :: info integer(${ik}$), intent(in) :: lda, ldu, ldv, lwork, lrwork, m, n ! Array Arguments complex(dp), intent(inout) :: a(lda,*) complex(dp), intent(out) :: u(ldu,*), v(ldv,*), cwork(lwork) real(dp), intent(out) :: sva(n), rwork(lrwork) integer(${ik}$), intent(out) :: iwork(*) character, intent(in) :: joba, jobp, jobr, jobt, jobu, jobv ! =========================================================================== ! Local Scalars complex(dp) :: ctemp real(dp) :: aapp, aaqq, aatmax, aatmin, big, big1, cond_ok, condr1, condr2, entra, & entrat, epsln, maxprj, scalem, sconda, sfmin, small, temp1, uscal1, uscal2, xsc integer(${ik}$) :: ierr, n1, nr, numrank, p, q, warning logical(lk) :: almort, defr, errest, goscal, jracc, kill, lquery, lsvec, l2aber, & l2kill, l2pert, l2rank, l2tran, noscal, rowpiv, rsvec, transp integer(${ik}$) :: optwrk, minwrk, minrwrk, miniwrk integer(${ik}$) :: lwcon, lwlqf, lwqp3, lwqrf, lwunmlq, lwunmqr, lwunmqrm, lwsvdj, & lwsvdjv, lrwqp3, lrwcon, lrwsvdj, iwoff integer(${ik}$) :: lwrk_zgelqf, lwrk_zgeqp3, lwrk_zgeqp3n, lwrk_zgeqrf, lwrk_zgesvj, & lwrk_zgesvjv, lwrk_zgesvju, lwrk_zunmlq, lwrk_zunmqr, lwrk_zunmqrm ! Local Arrays complex(dp) :: cdummy(1_${ik}$) real(dp) :: rdummy(1_${ik}$) ! Intrinsic Functions ! test the input arguments lsvec = stdlib_lsame( jobu, 'U' ) .or. stdlib_lsame( jobu, 'F' ) jracc = stdlib_lsame( jobv, 'J' ) rsvec = stdlib_lsame( jobv, 'V' ) .or. jracc rowpiv = stdlib_lsame( joba, 'F' ) .or. stdlib_lsame( joba, 'G' ) l2rank = stdlib_lsame( joba, 'R' ) l2aber = stdlib_lsame( joba, 'A' ) errest = stdlib_lsame( joba, 'E' ) .or. stdlib_lsame( joba, 'G' ) l2tran = stdlib_lsame( jobt, 'T' ) .and. ( m == n ) l2kill = stdlib_lsame( jobr, 'R' ) defr = stdlib_lsame( jobr, 'N' ) l2pert = stdlib_lsame( jobp, 'P' ) lquery = ( lwork == -1_${ik}$ ) .or. ( lrwork == -1_${ik}$ ) if ( .not.(rowpiv .or. l2rank .or. l2aber .or.errest .or. stdlib_lsame( joba, 'C' ) )) & then info = - 1_${ik}$ else if ( .not.( lsvec .or. stdlib_lsame( jobu, 'N' ) .or.( stdlib_lsame( jobu, 'W' ) & .and. rsvec .and. l2tran ) ) ) then info = - 2_${ik}$ else if ( .not.( rsvec .or. stdlib_lsame( jobv, 'N' ) .or.( stdlib_lsame( jobv, 'W' ) & .and. lsvec .and. l2tran ) ) ) then info = - 3_${ik}$ else if ( .not. ( l2kill .or. defr ) ) then info = - 4_${ik}$ else if ( .not. ( stdlib_lsame(jobt,'T') .or. stdlib_lsame(jobt,'N') ) ) then info = - 5_${ik}$ else if ( .not. ( l2pert .or. stdlib_lsame( jobp, 'N' ) ) ) then info = - 6_${ik}$ else if ( m < 0_${ik}$ ) then info = - 7_${ik}$ else if ( ( n < 0_${ik}$ ) .or. ( n > m ) ) then info = - 8_${ik}$ else if ( lda < m ) then info = - 10_${ik}$ else if ( lsvec .and. ( ldu < m ) ) then info = - 13_${ik}$ else if ( rsvec .and. ( ldv < n ) ) then info = - 15_${ik}$ else ! #:) info = 0_${ik}$ end if if ( info == 0_${ik}$ ) then ! Compute The Minimal And The Optimal Workspace Lengths ! [[the expressions for computing the minimal and the optimal ! values of lcwork, lrwork are written with a lot of redundancy and ! can be simplified. however, this verbose form is useful for ! maintenance and modifications of the code.]] ! .. minimal workspace length for stdlib${ii}$_zgeqp3 of an m x n matrix, ! stdlib${ii}$_zgeqrf of an n x n matrix, stdlib${ii}$_zgelqf of an n x n matrix, ! stdlib${ii}$_zunmlq for computing n x n matrix, stdlib${ii}$_zunmqr for computing n x n ! matrix, stdlib${ii}$_zunmqr for computing m x n matrix, respectively. lwqp3 = n+1 lwqrf = max( 1_${ik}$, n ) lwlqf = max( 1_${ik}$, n ) lwunmlq = max( 1_${ik}$, n ) lwunmqr = max( 1_${ik}$, n ) lwunmqrm = max( 1_${ik}$, m ) ! Minimal Workspace Length For Stdlib_Zpocon Of An N X N Matrix lwcon = 2_${ik}$ * n ! .. minimal workspace length for stdlib${ii}$_zgesvj of an n x n matrix, ! without and with explicit accumulation of jacobi rotations lwsvdj = max( 2_${ik}$ * n, 1_${ik}$ ) lwsvdjv = max( 2_${ik}$ * n, 1_${ik}$ ) ! .. minimal real workspace length for stdlib${ii}$_zgeqp3, stdlib${ii}$_zpocon, stdlib${ii}$_zgesvj lrwqp3 = 2_${ik}$ * n lrwcon = n lrwsvdj = n if ( lquery ) then call stdlib${ii}$_zgeqp3( m, n, a, lda, iwork, cdummy, cdummy, -1_${ik}$,rdummy, ierr ) lwrk_zgeqp3 = real( cdummy(1_${ik}$),KIND=dp) call stdlib${ii}$_zgeqrf( n, n, a, lda, cdummy, cdummy,-1_${ik}$, ierr ) lwrk_zgeqrf = real( cdummy(1_${ik}$),KIND=dp) call stdlib${ii}$_zgelqf( n, n, a, lda, cdummy, cdummy,-1_${ik}$, ierr ) lwrk_zgelqf = real( cdummy(1_${ik}$),KIND=dp) end if minwrk = 2_${ik}$ optwrk = 2_${ik}$ miniwrk = n if ( .not. (lsvec .or. rsvec ) ) then ! Minimal And Optimal Sizes Of The Complex Workspace If ! only the singular values are requested if ( errest ) then minwrk = max( n+lwqp3, n**2_${ik}$+lwcon, n+lwqrf, lwsvdj ) else minwrk = max( n+lwqp3, n+lwqrf, lwsvdj ) end if if ( lquery ) then call stdlib${ii}$_zgesvj( 'L', 'N', 'N', n, n, a, lda, sva, n, v,ldv, cdummy, -1_${ik}$,& rdummy, -1_${ik}$, ierr ) lwrk_zgesvj = real( cdummy(1_${ik}$),KIND=dp) if ( errest ) then optwrk = max( n+lwrk_zgeqp3, n**2_${ik}$+lwcon,n+lwrk_zgeqrf, lwrk_zgesvj ) else optwrk = max( n+lwrk_zgeqp3, n+lwrk_zgeqrf,lwrk_zgesvj ) end if end if if ( l2tran .or. rowpiv ) then if ( errest ) then minrwrk = max( 7_${ik}$, 2_${ik}$*m, lrwqp3, lrwcon, lrwsvdj ) else minrwrk = max( 7_${ik}$, 2_${ik}$*m, lrwqp3, lrwsvdj ) end if else if ( errest ) then minrwrk = max( 7_${ik}$, lrwqp3, lrwcon, lrwsvdj ) else minrwrk = max( 7_${ik}$, lrwqp3, lrwsvdj ) end if end if if ( rowpiv .or. l2tran ) miniwrk = miniwrk + m else if ( rsvec .and. (.not.lsvec) ) then ! Minimal And Optimal Sizes Of The Complex Workspace If The ! singular values and the right singular vectors are requested if ( errest ) then minwrk = max( n+lwqp3, lwcon, lwsvdj, n+lwlqf,2_${ik}$*n+lwqrf, n+lwsvdj, n+& lwunmlq ) else minwrk = max( n+lwqp3, lwsvdj, n+lwlqf, 2_${ik}$*n+lwqrf,n+lwsvdj, n+lwunmlq ) end if if ( lquery ) then call stdlib${ii}$_zgesvj( 'L', 'U', 'N', n,n, u, ldu, sva, n, a,lda, cdummy, -1_${ik}$, & rdummy, -1_${ik}$, ierr ) lwrk_zgesvj = real( cdummy(1_${ik}$),KIND=dp) call stdlib${ii}$_zunmlq( 'L', 'C', n, n, n, a, lda, cdummy,v, ldv, cdummy, -1_${ik}$, & ierr ) lwrk_zunmlq = real( cdummy(1_${ik}$),KIND=dp) if ( errest ) then optwrk = max( n+lwrk_zgeqp3, lwcon, lwrk_zgesvj,n+lwrk_zgelqf, 2_${ik}$*n+& lwrk_zgeqrf,n+lwrk_zgesvj, n+lwrk_zunmlq ) else optwrk = max( n+lwrk_zgeqp3, lwrk_zgesvj,n+lwrk_zgelqf,2_${ik}$*n+lwrk_zgeqrf, n+& lwrk_zgesvj,n+lwrk_zunmlq ) end if end if if ( l2tran .or. rowpiv ) then if ( errest ) then minrwrk = max( 7_${ik}$, 2_${ik}$*m, lrwqp3, lrwsvdj, lrwcon ) else minrwrk = max( 7_${ik}$, 2_${ik}$*m, lrwqp3, lrwsvdj ) end if else if ( errest ) then minrwrk = max( 7_${ik}$, lrwqp3, lrwsvdj, lrwcon ) else minrwrk = max( 7_${ik}$, lrwqp3, lrwsvdj ) end if end if if ( rowpiv .or. l2tran ) miniwrk = miniwrk + m else if ( lsvec .and. (.not.rsvec) ) then ! Minimal And Optimal Sizes Of The Complex Workspace If The ! singular values and the left singular vectors are requested if ( errest ) then minwrk = n + max( lwqp3,lwcon,n+lwqrf,lwsvdj,lwunmqrm ) else minwrk = n + max( lwqp3, n+lwqrf, lwsvdj, lwunmqrm ) end if if ( lquery ) then call stdlib${ii}$_zgesvj( 'L', 'U', 'N', n,n, u, ldu, sva, n, a,lda, cdummy, -1_${ik}$, & rdummy, -1_${ik}$, ierr ) lwrk_zgesvj = real( cdummy(1_${ik}$),KIND=dp) call stdlib${ii}$_zunmqr( 'L', 'N', m, n, n, a, lda, cdummy, u,ldu, cdummy, -1_${ik}$, & ierr ) lwrk_zunmqrm = real( cdummy(1_${ik}$),KIND=dp) if ( errest ) then optwrk = n + max( lwrk_zgeqp3, lwcon, n+lwrk_zgeqrf,lwrk_zgesvj, & lwrk_zunmqrm ) else optwrk = n + max( lwrk_zgeqp3, n+lwrk_zgeqrf,lwrk_zgesvj, lwrk_zunmqrm ) end if end if if ( l2tran .or. rowpiv ) then if ( errest ) then minrwrk = max( 7_${ik}$, 2_${ik}$*m, lrwqp3, lrwsvdj, lrwcon ) else minrwrk = max( 7_${ik}$, 2_${ik}$*m, lrwqp3, lrwsvdj ) end if else if ( errest ) then minrwrk = max( 7_${ik}$, lrwqp3, lrwsvdj, lrwcon ) else minrwrk = max( 7_${ik}$, lrwqp3, lrwsvdj ) end if end if if ( rowpiv .or. l2tran ) miniwrk = miniwrk + m else ! Minimal And Optimal Sizes Of The Complex Workspace If The ! full svd is requested if ( .not. jracc ) then if ( errest ) then minwrk = max( n+lwqp3, n+lwcon, 2_${ik}$*n+n**2_${ik}$+lwcon,2_${ik}$*n+lwqrf, 2_${ik}$*n+& lwqp3,2_${ik}$*n+n**2_${ik}$+n+lwlqf, 2_${ik}$*n+n**2_${ik}$+n+n**2_${ik}$+lwcon,2_${ik}$*n+n**2_${ik}$+n+lwsvdj, 2_${ik}$*n+& n**2_${ik}$+n+lwsvdjv,2_${ik}$*n+n**2_${ik}$+n+lwunmqr,2_${ik}$*n+n**2_${ik}$+n+lwunmlq,n+n**2_${ik}$+lwsvdj, n+& lwunmqrm ) else minwrk = max( n+lwqp3, 2_${ik}$*n+n**2_${ik}$+lwcon,2_${ik}$*n+lwqrf, 2_${ik}$*n+& lwqp3,2_${ik}$*n+n**2_${ik}$+n+lwlqf, 2_${ik}$*n+n**2_${ik}$+n+n**2_${ik}$+lwcon,2_${ik}$*n+n**2_${ik}$+n+lwsvdj, 2_${ik}$*n+& n**2_${ik}$+n+lwsvdjv,2_${ik}$*n+n**2_${ik}$+n+lwunmqr,2_${ik}$*n+n**2_${ik}$+n+lwunmlq,n+n**2_${ik}$+lwsvdj, & n+lwunmqrm ) end if miniwrk = miniwrk + n if ( rowpiv .or. l2tran ) miniwrk = miniwrk + m else if ( errest ) then minwrk = max( n+lwqp3, n+lwcon, 2_${ik}$*n+lwqrf,2_${ik}$*n+n**2_${ik}$+lwsvdjv, 2_${ik}$*n+n**2_${ik}$+n+& lwunmqr,n+lwunmqrm ) else minwrk = max( n+lwqp3, 2_${ik}$*n+lwqrf,2_${ik}$*n+n**2_${ik}$+lwsvdjv, 2_${ik}$*n+n**2_${ik}$+n+lwunmqr,n+& lwunmqrm ) end if if ( rowpiv .or. l2tran ) miniwrk = miniwrk + m end if if ( lquery ) then call stdlib${ii}$_zunmqr( 'L', 'N', m, n, n, a, lda, cdummy, u,ldu, cdummy, -1_${ik}$, & ierr ) lwrk_zunmqrm = real( cdummy(1_${ik}$),KIND=dp) call stdlib${ii}$_zunmqr( 'L', 'N', n, n, n, a, lda, cdummy, u,ldu, cdummy, -1_${ik}$, & ierr ) lwrk_zunmqr = real( cdummy(1_${ik}$),KIND=dp) if ( .not. jracc ) then call stdlib${ii}$_zgeqp3( n,n, a, lda, iwork, cdummy,cdummy, -1_${ik}$,rdummy, ierr ) lwrk_zgeqp3n = real( cdummy(1_${ik}$),KIND=dp) call stdlib${ii}$_zgesvj( 'L', 'U', 'N', n, n, u, ldu, sva,n, v, ldv, cdummy, & -1_${ik}$, rdummy, -1_${ik}$, ierr ) lwrk_zgesvj = real( cdummy(1_${ik}$),KIND=dp) call stdlib${ii}$_zgesvj( 'U', 'U', 'N', n, n, u, ldu, sva,n, v, ldv, cdummy, & -1_${ik}$, rdummy, -1_${ik}$, ierr ) lwrk_zgesvju = real( cdummy(1_${ik}$),KIND=dp) call stdlib${ii}$_zgesvj( 'L', 'U', 'V', n, n, u, ldu, sva,n, v, ldv, cdummy, & -1_${ik}$, rdummy, -1_${ik}$, ierr ) lwrk_zgesvjv = real( cdummy(1_${ik}$),KIND=dp) call stdlib${ii}$_zunmlq( 'L', 'C', n, n, n, a, lda, cdummy,v, ldv, cdummy, -& 1_${ik}$, ierr ) lwrk_zunmlq = real( cdummy(1_${ik}$),KIND=dp) if ( errest ) then optwrk = max( n+lwrk_zgeqp3, n+lwcon,2_${ik}$*n+n**2_${ik}$+lwcon, 2_${ik}$*n+lwrk_zgeqrf,& 2_${ik}$*n+lwrk_zgeqp3n,2_${ik}$*n+n**2_${ik}$+n+lwrk_zgelqf,2_${ik}$*n+n**2_${ik}$+n+n**2_${ik}$+lwcon,2_${ik}$*n+& n**2_${ik}$+n+lwrk_zgesvj,2_${ik}$*n+n**2_${ik}$+n+lwrk_zgesvjv,2_${ik}$*n+n**2_${ik}$+n+lwrk_zunmqr,2_${ik}$*n+& n**2_${ik}$+n+lwrk_zunmlq,n+n**2_${ik}$+lwrk_zgesvju,n+lwrk_zunmqrm ) else optwrk = max( n+lwrk_zgeqp3,2_${ik}$*n+n**2_${ik}$+lwcon, 2_${ik}$*n+lwrk_zgeqrf,2_${ik}$*n+& lwrk_zgeqp3n,2_${ik}$*n+n**2_${ik}$+n+lwrk_zgelqf,2_${ik}$*n+n**2_${ik}$+n+n**2_${ik}$+lwcon,2_${ik}$*n+n**2_${ik}$+n+& lwrk_zgesvj,2_${ik}$*n+n**2_${ik}$+n+lwrk_zgesvjv,2_${ik}$*n+n**2_${ik}$+n+lwrk_zunmqr,2_${ik}$*n+n**2_${ik}$+n+& lwrk_zunmlq,n+n**2_${ik}$+lwrk_zgesvju,n+lwrk_zunmqrm ) end if else call stdlib${ii}$_zgesvj( 'L', 'U', 'V', n, n, u, ldu, sva,n, v, ldv, cdummy, & -1_${ik}$, rdummy, -1_${ik}$, ierr ) lwrk_zgesvjv = real( cdummy(1_${ik}$),KIND=dp) call stdlib${ii}$_zunmqr( 'L', 'N', n, n, n, cdummy, n, cdummy,v, ldv, cdummy,& -1_${ik}$, ierr ) lwrk_zunmqr = real( cdummy(1_${ik}$),KIND=dp) call stdlib${ii}$_zunmqr( 'L', 'N', m, n, n, a, lda, cdummy, u,ldu, cdummy, -& 1_${ik}$, ierr ) lwrk_zunmqrm = real( cdummy(1_${ik}$),KIND=dp) if ( errest ) then optwrk = max( n+lwrk_zgeqp3, n+lwcon,2_${ik}$*n+lwrk_zgeqrf, 2_${ik}$*n+n**2_${ik}$,2_${ik}$*n+& n**2_${ik}$+lwrk_zgesvjv,2_${ik}$*n+n**2_${ik}$+n+lwrk_zunmqr,n+lwrk_zunmqrm ) else optwrk = max( n+lwrk_zgeqp3, 2_${ik}$*n+lwrk_zgeqrf,2_${ik}$*n+n**2_${ik}$, 2_${ik}$*n+n**2_${ik}$+& lwrk_zgesvjv,2_${ik}$*n+n**2_${ik}$+n+lwrk_zunmqr,n+lwrk_zunmqrm ) end if end if end if if ( l2tran .or. rowpiv ) then minrwrk = max( 7_${ik}$, 2_${ik}$*m, lrwqp3, lrwsvdj, lrwcon ) else minrwrk = max( 7_${ik}$, lrwqp3, lrwsvdj, lrwcon ) end if end if minwrk = max( 2_${ik}$, minwrk ) optwrk = max( minwrk, optwrk ) if ( lwork < minwrk .and. (.not.lquery) ) info = - 17_${ik}$ if ( lrwork < minrwrk .and. (.not.lquery) ) info = - 19_${ik}$ end if if ( info /= 0_${ik}$ ) then ! #:( call stdlib${ii}$_xerbla( 'ZGEJSV', - info ) return else if ( lquery ) then cwork(1_${ik}$) = optwrk cwork(2_${ik}$) = minwrk rwork(1_${ik}$) = minrwrk iwork(1_${ik}$) = max( 4_${ik}$, miniwrk ) return end if ! quick return for void matrix (y3k safe) ! #:) if ( ( m == 0_${ik}$ ) .or. ( n == 0_${ik}$ ) ) then iwork(1_${ik}$:4_${ik}$) = 0_${ik}$ rwork(1_${ik}$:7_${ik}$) = 0_${ik}$ return endif ! determine whether the matrix u should be m x n or m x m if ( lsvec ) then n1 = n if ( stdlib_lsame( jobu, 'F' ) ) n1 = m end if ! set numerical parameters ! ! note: make sure stdlib${ii}$_dlamch() does not fail on the target architecture. epsln = stdlib${ii}$_dlamch('EPSILON') sfmin = stdlib${ii}$_dlamch('SAFEMINIMUM') small = sfmin / epsln big = stdlib${ii}$_dlamch('O') ! big = one / sfmin ! initialize sva(1:n) = diag( ||a e_i||_2 )_1^n ! (!) if necessary, scale sva() to protect the largest norm from ! overflow. it is possible that this scaling pushes the smallest ! column norm left from the underflow threshold (extreme case). scalem = one / sqrt(real(m,KIND=dp)*real(n,KIND=dp)) noscal = .true. goscal = .true. do p = 1, n aapp = zero aaqq = one call stdlib${ii}$_zlassq( m, a(1_${ik}$,p), 1_${ik}$, aapp, aaqq ) if ( aapp > big ) then info = - 9_${ik}$ call stdlib${ii}$_xerbla( 'ZGEJSV', -info ) return end if aaqq = sqrt(aaqq) if ( ( aapp < (big / aaqq) ) .and. noscal ) then sva(p) = aapp * aaqq else noscal = .false. sva(p) = aapp * ( aaqq * scalem ) if ( goscal ) then goscal = .false. call stdlib${ii}$_dscal( p-1, scalem, sva, 1_${ik}$ ) end if end if end do if ( noscal ) scalem = one aapp = zero aaqq = big do p = 1, n aapp = max( aapp, sva(p) ) if ( sva(p) /= zero ) aaqq = min( aaqq, sva(p) ) end do ! quick return for zero m x n matrix ! #:) if ( aapp == zero ) then if ( lsvec ) call stdlib${ii}$_zlaset( 'G', m, n1, czero, cone, u, ldu ) if ( rsvec ) call stdlib${ii}$_zlaset( 'G', n, n, czero, cone, v, ldv ) rwork(1_${ik}$) = one rwork(2_${ik}$) = one if ( errest ) rwork(3_${ik}$) = one if ( lsvec .and. rsvec ) then rwork(4_${ik}$) = one rwork(5_${ik}$) = one end if if ( l2tran ) then rwork(6_${ik}$) = zero rwork(7_${ik}$) = zero end if iwork(1_${ik}$) = 0_${ik}$ iwork(2_${ik}$) = 0_${ik}$ iwork(3_${ik}$) = 0_${ik}$ iwork(4_${ik}$) = -1_${ik}$ return end if ! issue warning if denormalized column norms detected. override the ! high relative accuracy request. issue licence to kill nonzero columns ! (set them to zero) whose norm is less than sigma_max / big (roughly). ! #:( warning = 0_${ik}$ if ( aaqq <= sfmin ) then l2rank = .true. l2kill = .true. warning = 1_${ik}$ end if ! quick return for one-column matrix ! #:) if ( n == 1_${ik}$ ) then if ( lsvec ) then call stdlib${ii}$_zlascl( 'G',0_${ik}$,0_${ik}$,sva(1_${ik}$),scalem, m,1_${ik}$,a(1_${ik}$,1_${ik}$),lda,ierr ) call stdlib${ii}$_zlacpy( 'A', m, 1_${ik}$, a, lda, u, ldu ) ! computing all m left singular vectors of the m x 1 matrix if ( n1 /= n ) then call stdlib${ii}$_zgeqrf( m, n, u,ldu, cwork, cwork(n+1),lwork-n,ierr ) call stdlib${ii}$_zungqr( m,n1,1_${ik}$, u,ldu,cwork,cwork(n+1),lwork-n,ierr ) call stdlib${ii}$_zcopy( m, a(1_${ik}$,1_${ik}$), 1_${ik}$, u(1_${ik}$,1_${ik}$), 1_${ik}$ ) end if end if if ( rsvec ) then v(1_${ik}$,1_${ik}$) = cone end if if ( sva(1_${ik}$) < (big*scalem) ) then sva(1_${ik}$) = sva(1_${ik}$) / scalem scalem = one end if rwork(1_${ik}$) = one / scalem rwork(2_${ik}$) = one if ( sva(1_${ik}$) /= zero ) then iwork(1_${ik}$) = 1_${ik}$ if ( ( sva(1_${ik}$) / scalem) >= sfmin ) then iwork(2_${ik}$) = 1_${ik}$ else iwork(2_${ik}$) = 0_${ik}$ end if else iwork(1_${ik}$) = 0_${ik}$ iwork(2_${ik}$) = 0_${ik}$ end if iwork(3_${ik}$) = 0_${ik}$ iwork(4_${ik}$) = -1_${ik}$ if ( errest ) rwork(3_${ik}$) = one if ( lsvec .and. rsvec ) then rwork(4_${ik}$) = one rwork(5_${ik}$) = one end if if ( l2tran ) then rwork(6_${ik}$) = zero rwork(7_${ik}$) = zero end if return end if transp = .false. aatmax = -one aatmin = big if ( rowpiv .or. l2tran ) then ! compute the row norms, needed to determine row pivoting sequence ! (in the case of heavily row weighted a, row pivoting is strongly ! advised) and to collect information needed to compare the ! structures of a * a^* and a^* * a (in the case l2tran==.true.). if ( l2tran ) then do p = 1, m xsc = zero temp1 = one call stdlib${ii}$_zlassq( n, a(p,1_${ik}$), lda, xsc, temp1 ) ! stdlib${ii}$_zlassq gets both the ell_2 and the ell_infinity norm ! in one pass through the vector rwork(m+p) = xsc * scalem rwork(p) = xsc * (scalem*sqrt(temp1)) aatmax = max( aatmax, rwork(p) ) if (rwork(p) /= zero)aatmin = min(aatmin,rwork(p)) end do else do p = 1, m rwork(m+p) = scalem*abs( a(p,stdlib${ii}$_izamax(n,a(p,1_${ik}$),lda)) ) aatmax = max( aatmax, rwork(m+p) ) aatmin = min( aatmin, rwork(m+p) ) end do end if end if ! for square matrix a try to determine whether a^* would be better ! input for the preconditioned jacobi svd, with faster convergence. ! the decision is based on an o(n) function of the vector of column ! and row norms of a, based on the shannon entropy. this should give ! the right choice in most cases when the difference actually matters. ! it may fail and pick the slower converging side. entra = zero entrat = zero if ( l2tran ) then xsc = zero temp1 = one call stdlib${ii}$_dlassq( n, sva, 1_${ik}$, xsc, temp1 ) temp1 = one / temp1 entra = zero do p = 1, n big1 = ( ( sva(p) / xsc )**2_${ik}$ ) * temp1 if ( big1 /= zero ) entra = entra + big1 * log(big1) end do entra = - entra / log(real(n,KIND=dp)) ! now, sva().^2/trace(a^* * a) is a point in the probability simplex. ! it is derived from the diagonal of a^* * a. do the same with the ! diagonal of a * a^*, compute the entropy of the corresponding ! probability distribution. note that a * a^* and a^* * a have the ! same trace. entrat = zero do p = 1, m big1 = ( ( rwork(p) / xsc )**2_${ik}$ ) * temp1 if ( big1 /= zero ) entrat = entrat + big1 * log(big1) end do entrat = - entrat / log(real(m,KIND=dp)) ! analyze the entropies and decide a or a^*. smaller entropy ! usually means better input for the algorithm. transp = ( entrat < entra ) ! if a^* is better than a, take the adjoint of a. this is allowed ! only for square matrices, m=n. if ( transp ) then ! in an optimal implementation, this trivial transpose ! should be replaced with faster transpose. do p = 1, n - 1 a(p,p) = conjg(a(p,p)) do q = p + 1, n ctemp = conjg(a(q,p)) a(q,p) = conjg(a(p,q)) a(p,q) = ctemp end do end do a(n,n) = conjg(a(n,n)) do p = 1, n rwork(m+p) = sva(p) sva(p) = rwork(p) ! previously computed row 2-norms are now column 2-norms ! of the transposed matrix end do temp1 = aapp aapp = aatmax aatmax = temp1 temp1 = aaqq aaqq = aatmin aatmin = temp1 kill = lsvec lsvec = rsvec rsvec = kill if ( lsvec ) n1 = n rowpiv = .true. end if end if ! end if l2tran ! scale the matrix so that its maximal singular value remains less ! than sqrt(big) -- the matrix is scaled so that its maximal column ! has euclidean norm equal to sqrt(big/n). the only reason to keep ! sqrt(big) instead of big is the fact that stdlib${ii}$_zgejsv uses lapack and ! blas routines that, in some implementations, are not capable of ! working in the full interval [sfmin,big] and that they may provoke ! overflows in the intermediate results. if the singular values spread ! from sfmin to big, then stdlib${ii}$_zgesvj will compute them. so, in that case, ! one should use stdlib_zgesvj instead of stdlib${ii}$_zgejsv. ! >> change in the april 2016 update: allow bigger range, i.e. the ! largest column is allowed up to big/n and stdlib${ii}$_zgesvj will do the rest. big1 = sqrt( big ) temp1 = sqrt( big / real(n,KIND=dp) ) ! temp1 = big/real(n,KIND=dp) call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, aapp, temp1, n, 1_${ik}$, sva, n, ierr ) if ( aaqq > (aapp * sfmin) ) then aaqq = ( aaqq / aapp ) * temp1 else aaqq = ( aaqq * temp1 ) / aapp end if temp1 = temp1 * scalem call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, aapp, temp1, m, n, a, lda, ierr ) ! to undo scaling at the end of this procedure, multiply the ! computed singular values with uscal2 / uscal1. uscal1 = temp1 uscal2 = aapp if ( l2kill ) then ! l2kill enforces computation of nonzero singular values in ! the restricted range of condition number of the initial a, ! sigma_max(a) / sigma_min(a) approx. sqrt(big)/sqrt(sfmin). xsc = sqrt( sfmin ) else xsc = small ! now, if the condition number of a is too big, ! sigma_max(a) / sigma_min(a) > sqrt(big/n) * epsln / sfmin, ! as a precaution measure, the full svd is computed using stdlib${ii}$_zgesvj ! with accumulated jacobi rotations. this provides numerically ! more robust computation, at the cost of slightly increased run ! time. depending on the concrete implementation of blas and lapack ! (i.e. how they behave in presence of extreme ill-conditioning) the ! implementor may decide to remove this switch. if ( ( aaqq<sqrt(sfmin) ) .and. lsvec .and. rsvec ) then jracc = .true. end if end if if ( aaqq < xsc ) then do p = 1, n if ( sva(p) < xsc ) then call stdlib${ii}$_zlaset( 'A', m, 1_${ik}$, czero, czero, a(1_${ik}$,p), lda ) sva(p) = zero end if end do end if ! preconditioning using qr factorization with pivoting if ( rowpiv ) then ! optional row permutation (bjoerck row pivoting): ! a result by cox and higham shows that the bjoerck's ! row pivoting combined with standard column pivoting ! has similar effect as powell-reid complete pivoting. ! the ell-infinity norms of a are made nonincreasing. if ( ( lsvec .and. rsvec ) .and. .not.( jracc ) ) then iwoff = 2_${ik}$*n else iwoff = n end if do p = 1, m - 1 q = stdlib${ii}$_idamax( m-p+1, rwork(m+p), 1_${ik}$ ) + p - 1_${ik}$ iwork(iwoff+p) = q if ( p /= q ) then temp1 = rwork(m+p) rwork(m+p) = rwork(m+q) rwork(m+q) = temp1 end if end do call stdlib${ii}$_zlaswp( n, a, lda, 1_${ik}$, m-1, iwork(iwoff+1), 1_${ik}$ ) end if ! end of the preparation phase (scaling, optional sorting and ! transposing, optional flushing of small columns). ! preconditioning ! if the full svd is needed, the right singular vectors are computed ! from a matrix equation, and for that we need theoretical analysis ! of the businger-golub pivoting. so we use stdlib_zgeqp3 as the first rr qrf. ! in all other cases the first rr qrf can be chosen by other criteria ! (eg speed by replacing global with restricted window pivoting, such ! as in xgeqpx from toms # 782). good results will be obtained using ! xgeqpx with properly (!) chosen numerical parameters. ! any improvement of stdlib${ii}$_zgeqp3 improves overall performance of stdlib${ii}$_zgejsv. ! a * p1 = q1 * [ r1^* 0]^*: do p = 1, n ! All Columns Are Free Columns iwork(p) = 0_${ik}$ end do call stdlib${ii}$_zgeqp3( m, n, a, lda, iwork, cwork, cwork(n+1), lwork-n,rwork, ierr ) ! the upper triangular matrix r1 from the first qrf is inspected for ! rank deficiency and possibilities for deflation, or possible ! ill-conditioning. depending on the user specified flag l2rank, ! the procedure explores possibilities to reduce the numerical ! rank by inspecting the computed upper triangular factor. if ! l2rank or l2aber are up, then stdlib${ii}$_zgejsv will compute the svd of ! a + da, where ||da|| <= f(m,n)*epsln. nr = 1_${ik}$ if ( l2aber ) then ! standard absolute error bound suffices. all sigma_i with ! sigma_i < n*epsln*||a|| are flushed to zero. this is an ! aggressive enforcement of lower numerical rank by introducing a ! backward error of the order of n*epsln*||a||. temp1 = sqrt(real(n,KIND=dp))*epsln loop_3002: do p = 2, n if ( abs(a(p,p)) >= (temp1*abs(a(1_${ik}$,1_${ik}$))) ) then nr = nr + 1_${ik}$ else exit loop_3002 end if end do loop_3002 else if ( l2rank ) then ! .. similarly as above, only slightly more gentle (less aggressive). ! sudden drop on the diagonal of r1 is used as the criterion for ! close-to-rank-deficient. temp1 = sqrt(sfmin) loop_3402: do p = 2, n if ( ( abs(a(p,p)) < (epsln*abs(a(p-1,p-1))) ) .or.( abs(a(p,p)) < small ) .or.( & l2kill .and. (abs(a(p,p)) < temp1) ) ) exit loop_3402 nr = nr + 1_${ik}$ end do loop_3402 else ! the goal is high relative accuracy. however, if the matrix ! has high scaled condition number the relative accuracy is in ! general not feasible. later on, a condition number estimator ! will be deployed to estimate the scaled condition number. ! here we just remove the underflowed part of the triangular ! factor. this prevents the situation in which the code is ! working hard to get the accuracy not warranted by the data. temp1 = sqrt(sfmin) loop_3302: do p = 2, n if ( ( abs(a(p,p)) < small ) .or.( l2kill .and. (abs(a(p,p)) < temp1) ) ) exit loop_3302 nr = nr + 1_${ik}$ end do loop_3302 end if almort = .false. if ( nr == n ) then maxprj = one do p = 2, n temp1 = abs(a(p,p)) / sva(iwork(p)) maxprj = min( maxprj, temp1 ) end do if ( maxprj**2_${ik}$ >= one - real(n,KIND=dp)*epsln ) almort = .true. end if sconda = - one condr1 = - one condr2 = - one if ( errest ) then if ( n == nr ) then if ( rsvec ) then ! V Is Available As Workspace call stdlib${ii}$_zlacpy( 'U', n, n, a, lda, v, ldv ) do p = 1, n temp1 = sva(iwork(p)) call stdlib${ii}$_zdscal( p, one/temp1, v(1_${ik}$,p), 1_${ik}$ ) end do if ( lsvec )then call stdlib${ii}$_zpocon( 'U', n, v, ldv, one, temp1,cwork(n+1), rwork, ierr ) else call stdlib${ii}$_zpocon( 'U', n, v, ldv, one, temp1,cwork, rwork, ierr ) end if else if ( lsvec ) then ! U Is Available As Workspace call stdlib${ii}$_zlacpy( 'U', n, n, a, lda, u, ldu ) do p = 1, n temp1 = sva(iwork(p)) call stdlib${ii}$_zdscal( p, one/temp1, u(1_${ik}$,p), 1_${ik}$ ) end do call stdlib${ii}$_zpocon( 'U', n, u, ldu, one, temp1,cwork(n+1), rwork, ierr ) else call stdlib${ii}$_zlacpy( 'U', n, n, a, lda, cwork, n ) ! [] call stdlib${ii}$_zlacpy( 'u', n, n, a, lda, cwork(n+1), n ) ! change: here index shifted by n to the left, cwork(1:n) ! not needed for sigma only computation do p = 1, n temp1 = sva(iwork(p)) ! [] call stdlib${ii}$_zdscal( p, one/temp1, cwork(n+(p-1)*n+1), 1 ) call stdlib${ii}$_zdscal( p, one/temp1, cwork((p-1)*n+1), 1_${ik}$ ) end do ! The Columns Of R Are Scaled To Have Unit Euclidean Lengths ! [] call stdlib${ii}$_zpocon( 'u', n, cwork(n+1), n, one, temp1, ! [] $ cwork(n+n*n+1), rwork, ierr ) call stdlib${ii}$_zpocon( 'U', n, cwork, n, one, temp1,cwork(n*n+1), rwork, ierr ) end if if ( temp1 /= zero ) then sconda = one / sqrt(temp1) else sconda = - one end if ! sconda is an estimate of sqrt(||(r^* * r)^(-1)||_1). ! n^(-1/4) * sconda <= ||r^(-1)||_2 <= n^(1/4) * sconda else sconda = - one end if end if l2pert = l2pert .and. ( abs( a(1_${ik}$,1_${ik}$)/a(nr,nr) ) > sqrt(big1) ) ! if there is no violent scaling, artificial perturbation is not needed. ! phase 3: if ( .not. ( rsvec .or. lsvec ) ) then ! singular values only ! .. transpose a(1:nr,1:n) do p = 1, min( n-1, nr ) call stdlib${ii}$_zcopy( n-p, a(p,p+1), lda, a(p+1,p), 1_${ik}$ ) call stdlib${ii}$_zlacgv( n-p+1, a(p,p), 1_${ik}$ ) end do if ( nr == n ) a(n,n) = conjg(a(n,n)) ! the following two do-loops introduce small relative perturbation ! into the strict upper triangle of the lower triangular matrix. ! small entries below the main diagonal are also changed. ! this modification is useful if the computing environment does not ! provide/allow flush to zero underflow, for it prevents many ! annoying denormalized numbers in case of strongly scaled matrices. ! the perturbation is structured so that it does not introduce any ! new perturbation of the singular values, and it does not destroy ! the job done by the preconditioner. ! the licence for this perturbation is in the variable l2pert, which ! should be .false. if flush to zero underflow is active. if ( .not. almort ) then if ( l2pert ) then ! xsc = sqrt(small) xsc = epsln / real(n,KIND=dp) do q = 1, nr ctemp = cmplx(xsc*abs(a(q,q)),zero,KIND=dp) do p = 1, n if ( ( (p>q) .and. (abs(a(p,q))<=temp1) ).or. ( p < q ) )a(p,q) = & ctemp ! $ a(p,q) = temp1 * ( a(p,q) / abs(a(p,q)) ) end do end do else if (nr>1_${ik}$) call stdlib${ii}$_zlaset( 'U', nr-1,nr-1, czero,czero, a(1_${ik}$,2_${ik}$),lda ) end if ! Second Preconditioning Using The Qr Factorization call stdlib${ii}$_zgeqrf( n,nr, a,lda, cwork, cwork(n+1),lwork-n, ierr ) ! And Transpose Upper To Lower Triangular do p = 1, nr - 1 call stdlib${ii}$_zcopy( nr-p, a(p,p+1), lda, a(p+1,p), 1_${ik}$ ) call stdlib${ii}$_zlacgv( nr-p+1, a(p,p), 1_${ik}$ ) end do end if ! row-cyclic jacobi svd algorithm with column pivoting ! .. again some perturbation (a "background noise") is added ! to drown denormals if ( l2pert ) then ! xsc = sqrt(small) xsc = epsln / real(n,KIND=dp) do q = 1, nr ctemp = cmplx(xsc*abs(a(q,q)),zero,KIND=dp) do p = 1, nr if ( ( (p>q) .and. (abs(a(p,q))<=temp1) ).or. ( p < q ) )a(p,q) = & ctemp ! $ a(p,q) = temp1 * ( a(p,q) / abs(a(p,q)) ) end do end do else if (nr>1_${ik}$) call stdlib${ii}$_zlaset( 'U', nr-1, nr-1, czero, czero, a(1_${ik}$,2_${ik}$), lda ) end if ! .. and one-sided jacobi rotations are started on a lower ! triangular matrix (plus perturbation which is ignored in ! the part which destroys triangular form (confusing?!)) call stdlib${ii}$_zgesvj( 'L', 'N', 'N', nr, nr, a, lda, sva,n, v, ldv, cwork, lwork, & rwork, lrwork, info ) scalem = rwork(1_${ik}$) numrank = nint(rwork(2_${ik}$),KIND=${ik}$) else if ( ( rsvec .and. ( .not. lsvec ) .and. ( .not. jracc ) ).or.( jracc .and. ( & .not. lsvec ) .and. ( nr /= n ) ) ) then ! -> singular values and right singular vectors <- if ( almort ) then ! In This Case Nr Equals N do p = 1, nr call stdlib${ii}$_zcopy( n-p+1, a(p,p), lda, v(p,p), 1_${ik}$ ) call stdlib${ii}$_zlacgv( n-p+1, v(p,p), 1_${ik}$ ) end do if (nr>1_${ik}$) call stdlib${ii}$_zlaset( 'U', nr-1,nr-1, czero, czero, v(1_${ik}$,2_${ik}$), ldv ) call stdlib${ii}$_zgesvj( 'L','U','N', n, nr, v, ldv, sva, nr, a, lda,cwork, lwork, & rwork, lrwork, info ) scalem = rwork(1_${ik}$) numrank = nint(rwork(2_${ik}$),KIND=${ik}$) else ! .. two more qr factorizations ( one qrf is not enough, two require ! accumulated product of jacobi rotations, three are perfect ) if (nr>1_${ik}$) call stdlib${ii}$_zlaset( 'L', nr-1,nr-1, czero, czero, a(2_${ik}$,1_${ik}$), lda ) call stdlib${ii}$_zgelqf( nr,n, a, lda, cwork, cwork(n+1), lwork-n, ierr) call stdlib${ii}$_zlacpy( 'L', nr, nr, a, lda, v, ldv ) if (nr>1_${ik}$) call stdlib${ii}$_zlaset( 'U', nr-1,nr-1, czero, czero, v(1_${ik}$,2_${ik}$), ldv ) call stdlib${ii}$_zgeqrf( nr, nr, v, ldv, cwork(n+1), cwork(2_${ik}$*n+1),lwork-2*n, ierr ) do p = 1, nr call stdlib${ii}$_zcopy( nr-p+1, v(p,p), ldv, v(p,p), 1_${ik}$ ) call stdlib${ii}$_zlacgv( nr-p+1, v(p,p), 1_${ik}$ ) end do if (nr>1_${ik}$) call stdlib${ii}$_zlaset('U', nr-1, nr-1, czero, czero, v(1_${ik}$,2_${ik}$), ldv) call stdlib${ii}$_zgesvj( 'L', 'U','N', nr, nr, v,ldv, sva, nr, u,ldu, cwork(n+1), & lwork-n, rwork, lrwork, info ) scalem = rwork(1_${ik}$) numrank = nint(rwork(2_${ik}$),KIND=${ik}$) if ( nr < n ) then call stdlib${ii}$_zlaset( 'A',n-nr, nr, czero,czero, v(nr+1,1_${ik}$), ldv ) call stdlib${ii}$_zlaset( 'A',nr, n-nr, czero,czero, v(1_${ik}$,nr+1), ldv ) call stdlib${ii}$_zlaset( 'A',n-nr,n-nr,czero,cone, v(nr+1,nr+1),ldv ) end if call stdlib${ii}$_zunmlq( 'L', 'C', n, n, nr, a, lda, cwork,v, ldv, cwork(n+1), lwork-n, & ierr ) end if ! Permute The Rows Of V ! do 8991 p = 1, n ! call stdlib${ii}$_zcopy( n, v(p,1), ldv, a(iwork(p),1), lda ) 8991 continue ! call stdlib${ii}$_zlacpy( 'all', n, n, a, lda, v, ldv ) call stdlib${ii}$_zlapmr( .false., n, n, v, ldv, iwork ) if ( transp ) then call stdlib${ii}$_zlacpy( 'A', n, n, v, ldv, u, ldu ) end if else if ( jracc .and. (.not. lsvec) .and. ( nr== n ) ) then if (n>1_${ik}$) call stdlib${ii}$_zlaset( 'L', n-1,n-1, czero, czero, a(2_${ik}$,1_${ik}$), lda ) call stdlib${ii}$_zgesvj( 'U','N','V', n, n, a, lda, sva, n, v, ldv,cwork, lwork, rwork, & lrwork, info ) scalem = rwork(1_${ik}$) numrank = nint(rwork(2_${ik}$),KIND=${ik}$) call stdlib${ii}$_zlapmr( .false., n, n, v, ldv, iwork ) else if ( lsvec .and. ( .not. rsvec ) ) then ! Singular Values And Left Singular Vectors ! Second Preconditioning Step To Avoid Need To Accumulate ! jacobi rotations in the jacobi iterations. do p = 1, nr call stdlib${ii}$_zcopy( n-p+1, a(p,p), lda, u(p,p), 1_${ik}$ ) call stdlib${ii}$_zlacgv( n-p+1, u(p,p), 1_${ik}$ ) end do if (nr>1_${ik}$) call stdlib${ii}$_zlaset( 'U', nr-1, nr-1, czero, czero, u(1_${ik}$,2_${ik}$), ldu ) call stdlib${ii}$_zgeqrf( n, nr, u, ldu, cwork(n+1), cwork(2_${ik}$*n+1),lwork-2*n, ierr ) do p = 1, nr - 1 call stdlib${ii}$_zcopy( nr-p, u(p,p+1), ldu, u(p+1,p), 1_${ik}$ ) call stdlib${ii}$_zlacgv( n-p+1, u(p,p), 1_${ik}$ ) end do if (nr>1_${ik}$) call stdlib${ii}$_zlaset( 'U', nr-1, nr-1, czero, czero, u(1_${ik}$,2_${ik}$), ldu ) call stdlib${ii}$_zgesvj( 'L', 'U', 'N', nr,nr, u, ldu, sva, nr, a,lda, cwork(n+1), lwork-& n, rwork, lrwork, info ) scalem = rwork(1_${ik}$) numrank = nint(rwork(2_${ik}$),KIND=${ik}$) if ( nr < m ) then call stdlib${ii}$_zlaset( 'A', m-nr, nr,czero, czero, u(nr+1,1_${ik}$), ldu ) if ( nr < n1 ) then call stdlib${ii}$_zlaset( 'A',nr, n1-nr, czero, czero, u(1_${ik}$,nr+1),ldu ) call stdlib${ii}$_zlaset( 'A',m-nr,n1-nr,czero,cone,u(nr+1,nr+1),ldu ) end if end if call stdlib${ii}$_zunmqr( 'L', 'N', m, n1, n, a, lda, cwork, u,ldu, cwork(n+1), lwork-n, & ierr ) if ( rowpiv )call stdlib${ii}$_zlaswp( n1, u, ldu, 1_${ik}$, m-1, iwork(iwoff+1), -1_${ik}$ ) do p = 1, n1 xsc = one / stdlib${ii}$_dznrm2( m, u(1_${ik}$,p), 1_${ik}$ ) call stdlib${ii}$_zdscal( m, xsc, u(1_${ik}$,p), 1_${ik}$ ) end do if ( transp ) then call stdlib${ii}$_zlacpy( 'A', n, n, u, ldu, v, ldv ) end if else ! Full Svd if ( .not. jracc ) then if ( .not. almort ) then ! second preconditioning step (qrf [with pivoting]) ! note that the composition of transpose, qrf and transpose is ! equivalent to an lqf call. since in many libraries the qrf ! seems to be better optimized than the lqf, we do explicit ! transpose and use the qrf. this is subject to changes in an ! optimized implementation of stdlib${ii}$_zgejsv. do p = 1, nr call stdlib${ii}$_zcopy( n-p+1, a(p,p), lda, v(p,p), 1_${ik}$ ) call stdlib${ii}$_zlacgv( n-p+1, v(p,p), 1_${ik}$ ) end do ! The Following Two Loops Perturb Small Entries To Avoid ! denormals in the second qr factorization, where they are ! as good as zeros. this is done to avoid painfully slow ! computation with denormals. the relative size of the perturbation ! is a parameter that can be changed by the implementer. ! this perturbation device will be obsolete on machines with ! properly implemented arithmetic. ! to switch it off, set l2pert=.false. to remove it from the ! code, remove the action under l2pert=.true., leave the else part. ! the following two loops should be blocked and fused with the ! transposed copy above. if ( l2pert ) then xsc = sqrt(small) do q = 1, nr ctemp = cmplx(xsc*abs( v(q,q) ),zero,KIND=dp) do p = 1, n if ( ( p > q ) .and. ( abs(v(p,q)) <= temp1 ).or. ( p < q ) )v(p,q) = & ctemp ! $ v(p,q) = temp1 * ( v(p,q) / abs(v(p,q)) ) if ( p < q ) v(p,q) = - v(p,q) end do end do else if (nr>1_${ik}$) call stdlib${ii}$_zlaset( 'U', nr-1, nr-1, czero, czero, v(1_${ik}$,2_${ik}$), ldv ) end if ! estimate the row scaled condition number of r1 ! (if r1 is rectangular, n > nr, then the condition number ! of the leading nr x nr submatrix is estimated.) call stdlib${ii}$_zlacpy( 'L', nr, nr, v, ldv, cwork(2_${ik}$*n+1), nr ) do p = 1, nr temp1 = stdlib${ii}$_dznrm2(nr-p+1,cwork(2_${ik}$*n+(p-1)*nr+p),1_${ik}$) call stdlib${ii}$_zdscal(nr-p+1,one/temp1,cwork(2_${ik}$*n+(p-1)*nr+p),1_${ik}$) end do call stdlib${ii}$_zpocon('L',nr,cwork(2_${ik}$*n+1),nr,one,temp1,cwork(2_${ik}$*n+nr*nr+1),rwork,& ierr) condr1 = one / sqrt(temp1) ! Here Need A Second Opinion On The Condition Number ! Then Assume Worst Case Scenario ! r1 is ok for inverse <=> condr1 < real(n,KIND=dp) ! more conservative <=> condr1 < sqrt(real(n,KIND=dp)) cond_ok = sqrt(sqrt(real(nr,KIND=dp))) ! [tp] cond_ok is a tuning parameter. if ( condr1 < cond_ok ) then ! .. the second qrf without pivoting. note: in an optimized ! implementation, this qrf should be implemented as the qrf ! of a lower triangular matrix. ! r1^* = q2 * r2 call stdlib${ii}$_zgeqrf( n, nr, v, ldv, cwork(n+1), cwork(2_${ik}$*n+1),lwork-2*n, ierr ) if ( l2pert ) then xsc = sqrt(small)/epsln do p = 2, nr do q = 1, p - 1 ctemp=cmplx(xsc*min(abs(v(p,p)),abs(v(q,q))),zero,KIND=dp) if ( abs(v(q,p)) <= temp1 )v(q,p) = ctemp ! $ v(q,p) = temp1 * ( v(q,p) / abs(v(q,p)) ) end do end do end if if ( nr /= n )call stdlib${ii}$_zlacpy( 'A', n, nr, v, ldv, cwork(2_${ik}$*n+1), n ) ! .. save ... ! This Transposed Copy Should Be Better Than Naive do p = 1, nr - 1 call stdlib${ii}$_zcopy( nr-p, v(p,p+1), ldv, v(p+1,p), 1_${ik}$ ) call stdlib${ii}$_zlacgv(nr-p+1, v(p,p), 1_${ik}$ ) end do v(nr,nr)=conjg(v(nr,nr)) condr2 = condr1 else ! .. ill-conditioned case: second qrf with pivoting ! note that windowed pivoting would be equally good ! numerically, and more run-time efficient. so, in ! an optimal implementation, the next call to stdlib${ii}$_zgeqp3 ! should be replaced with eg. call zgeqpx (acm toms #782) ! with properly (carefully) chosen parameters. ! r1^* * p2 = q2 * r2 do p = 1, nr iwork(n+p) = 0_${ik}$ end do call stdlib${ii}$_zgeqp3( n, nr, v, ldv, iwork(n+1), cwork(n+1),cwork(2_${ik}$*n+1), lwork-& 2_${ik}$*n, rwork, ierr ) ! * call stdlib${ii}$_zgeqrf( n, nr, v, ldv, cwork(n+1), cwork(2*n+1), ! * $ lwork-2*n, ierr ) if ( l2pert ) then xsc = sqrt(small) do p = 2, nr do q = 1, p - 1 ctemp=cmplx(xsc*min(abs(v(p,p)),abs(v(q,q))),zero,KIND=dp) if ( abs(v(q,p)) <= temp1 )v(q,p) = ctemp ! $ v(q,p) = temp1 * ( v(q,p) / abs(v(q,p)) ) end do end do end if call stdlib${ii}$_zlacpy( 'A', n, nr, v, ldv, cwork(2_${ik}$*n+1), n ) if ( l2pert ) then xsc = sqrt(small) do p = 2, nr do q = 1, p - 1 ctemp=cmplx(xsc*min(abs(v(p,p)),abs(v(q,q))),zero,KIND=dp) ! v(p,q) = - temp1*( v(q,p) / abs(v(q,p)) ) v(p,q) = - ctemp end do end do else if (nr>1_${ik}$) call stdlib${ii}$_zlaset( 'L',nr-1,nr-1,czero,czero,v(2_${ik}$,1_${ik}$),ldv ) end if ! now, compute r2 = l3 * q3, the lq factorization. call stdlib${ii}$_zgelqf( nr, nr, v, ldv, cwork(2_${ik}$*n+n*nr+1),cwork(2_${ik}$*n+n*nr+nr+1), & lwork-2*n-n*nr-nr, ierr ) ! And Estimate The Condition Number call stdlib${ii}$_zlacpy( 'L',nr,nr,v,ldv,cwork(2_${ik}$*n+n*nr+nr+1),nr ) do p = 1, nr temp1 = stdlib${ii}$_dznrm2( p, cwork(2_${ik}$*n+n*nr+nr+p), nr ) call stdlib${ii}$_zdscal( p, one/temp1, cwork(2_${ik}$*n+n*nr+nr+p), nr ) end do call stdlib${ii}$_zpocon( 'L',nr,cwork(2_${ik}$*n+n*nr+nr+1),nr,one,temp1,cwork(2_${ik}$*n+n*nr+& nr+nr*nr+1),rwork,ierr ) condr2 = one / sqrt(temp1) if ( condr2 >= cond_ok ) then ! Save The Householder Vectors Used For Q3 ! (this overwrites the copy of r2, as it will not be ! needed in this branch, but it does not overwritte the ! huseholder vectors of q2.). call stdlib${ii}$_zlacpy( 'U', nr, nr, v, ldv, cwork(2_${ik}$*n+1), n ) ! And The Rest Of The Information On Q3 Is In ! work(2*n+n*nr+1:2*n+n*nr+n) end if end if if ( l2pert ) then xsc = sqrt(small) do q = 2, nr ctemp = xsc * v(q,q) do p = 1, q - 1 ! v(p,q) = - temp1*( v(p,q) / abs(v(p,q)) ) v(p,q) = - ctemp end do end do else if (nr>1_${ik}$) call stdlib${ii}$_zlaset( 'U', nr-1,nr-1, czero,czero, v(1_${ik}$,2_${ik}$), ldv ) end if ! second preconditioning finished; continue with jacobi svd ! the input matrix is lower trinagular. ! recover the right singular vectors as solution of a well ! conditioned triangular matrix equation. if ( condr1 < cond_ok ) then call stdlib${ii}$_zgesvj( 'L','U','N',nr,nr,v,ldv,sva,nr,u, ldu,cwork(2_${ik}$*n+n*nr+nr+1)& ,lwork-2*n-n*nr-nr,rwork,lrwork, info ) scalem = rwork(1_${ik}$) numrank = nint(rwork(2_${ik}$),KIND=${ik}$) do p = 1, nr call stdlib${ii}$_zcopy( nr, v(1_${ik}$,p), 1_${ik}$, u(1_${ik}$,p), 1_${ik}$ ) call stdlib${ii}$_zdscal( nr, sva(p), v(1_${ik}$,p), 1_${ik}$ ) end do ! Pick The Right Matrix Equation And Solve It if ( nr == n ) then ! :)) .. best case, r1 is inverted. the solution of this matrix ! equation is q2*v2 = the product of the jacobi rotations ! used in stdlib${ii}$_zgesvj, premultiplied with the orthogonal matrix ! from the second qr factorization. call stdlib${ii}$_ztrsm('L','U','N','N', nr,nr,cone, a,lda, v,ldv) else ! .. r1 is well conditioned, but non-square. adjoint of r2 ! is inverted to get the product of the jacobi rotations ! used in stdlib${ii}$_zgesvj. the q-factor from the second qr ! factorization is then built in explicitly. call stdlib${ii}$_ztrsm('L','U','C','N',nr,nr,cone,cwork(2_${ik}$*n+1),n,v,ldv) if ( nr < n ) then call stdlib${ii}$_zlaset('A',n-nr,nr,czero,czero,v(nr+1,1_${ik}$),ldv) call stdlib${ii}$_zlaset('A',nr,n-nr,czero,czero,v(1_${ik}$,nr+1),ldv) call stdlib${ii}$_zlaset('A',n-nr,n-nr,czero,cone,v(nr+1,nr+1),ldv) end if call stdlib${ii}$_zunmqr('L','N',n,n,nr,cwork(2_${ik}$*n+1),n,cwork(n+1),v,ldv,cwork(& 2_${ik}$*n+n*nr+nr+1),lwork-2*n-n*nr-nr,ierr) end if else if ( condr2 < cond_ok ) then ! the matrix r2 is inverted. the solution of the matrix equation ! is q3^* * v3 = the product of the jacobi rotations (appplied to ! the lower triangular l3 from the lq factorization of ! r2=l3*q3), pre-multiplied with the transposed q3. call stdlib${ii}$_zgesvj( 'L', 'U', 'N', nr, nr, v, ldv, sva, nr, u,ldu, cwork(2_${ik}$*n+& n*nr+nr+1), lwork-2*n-n*nr-nr,rwork, lrwork, info ) scalem = rwork(1_${ik}$) numrank = nint(rwork(2_${ik}$),KIND=${ik}$) do p = 1, nr call stdlib${ii}$_zcopy( nr, v(1_${ik}$,p), 1_${ik}$, u(1_${ik}$,p), 1_${ik}$ ) call stdlib${ii}$_zdscal( nr, sva(p), u(1_${ik}$,p), 1_${ik}$ ) end do call stdlib${ii}$_ztrsm('L','U','N','N',nr,nr,cone,cwork(2_${ik}$*n+1),n,u,ldu) ! Apply The Permutation From The Second Qr Factorization do q = 1, nr do p = 1, nr cwork(2_${ik}$*n+n*nr+nr+iwork(n+p)) = u(p,q) end do do p = 1, nr u(p,q) = cwork(2_${ik}$*n+n*nr+nr+p) end do end do if ( nr < n ) then call stdlib${ii}$_zlaset( 'A',n-nr,nr,czero,czero,v(nr+1,1_${ik}$),ldv ) call stdlib${ii}$_zlaset( 'A',nr,n-nr,czero,czero,v(1_${ik}$,nr+1),ldv ) call stdlib${ii}$_zlaset('A',n-nr,n-nr,czero,cone,v(nr+1,nr+1),ldv) end if call stdlib${ii}$_zunmqr( 'L','N',n,n,nr,cwork(2_${ik}$*n+1),n,cwork(n+1),v,ldv,cwork(2_${ik}$*n+& n*nr+nr+1),lwork-2*n-n*nr-nr,ierr ) else ! last line of defense. ! #:( this is a rather pathological case: no scaled condition ! improvement after two pivoted qr factorizations. other ! possibility is that the rank revealing qr factorization ! or the condition estimator has failed, or the cond_ok ! is set very close to one (which is unnecessary). normally, ! this branch should never be executed, but in rare cases of ! failure of the rrqr or condition estimator, the last line of ! defense ensures that stdlib${ii}$_zgejsv completes the task. ! compute the full svd of l3 using stdlib${ii}$_zgesvj with explicit ! accumulation of jacobi rotations. call stdlib${ii}$_zgesvj( 'L', 'U', 'V', nr, nr, v, ldv, sva, nr, u,ldu, cwork(2_${ik}$*n+& n*nr+nr+1), lwork-2*n-n*nr-nr,rwork, lrwork, info ) scalem = rwork(1_${ik}$) numrank = nint(rwork(2_${ik}$),KIND=${ik}$) if ( nr < n ) then call stdlib${ii}$_zlaset( 'A',n-nr,nr,czero,czero,v(nr+1,1_${ik}$),ldv ) call stdlib${ii}$_zlaset( 'A',nr,n-nr,czero,czero,v(1_${ik}$,nr+1),ldv ) call stdlib${ii}$_zlaset('A',n-nr,n-nr,czero,cone,v(nr+1,nr+1),ldv) end if call stdlib${ii}$_zunmqr( 'L','N',n,n,nr,cwork(2_${ik}$*n+1),n,cwork(n+1),v,ldv,cwork(2_${ik}$*n+& n*nr+nr+1),lwork-2*n-n*nr-nr,ierr ) call stdlib${ii}$_zunmlq( 'L', 'C', nr, nr, nr, cwork(2_${ik}$*n+1), n,cwork(2_${ik}$*n+n*nr+1), & u, ldu, cwork(2_${ik}$*n+n*nr+nr+1),lwork-2*n-n*nr-nr, ierr ) do q = 1, nr do p = 1, nr cwork(2_${ik}$*n+n*nr+nr+iwork(n+p)) = u(p,q) end do do p = 1, nr u(p,q) = cwork(2_${ik}$*n+n*nr+nr+p) end do end do end if ! permute the rows of v using the (column) permutation from the ! first qrf. also, scale the columns to make them unit in ! euclidean norm. this applies to all cases. temp1 = sqrt(real(n,KIND=dp)) * epsln do q = 1, n do p = 1, n cwork(2_${ik}$*n+n*nr+nr+iwork(p)) = v(p,q) end do do p = 1, n v(p,q) = cwork(2_${ik}$*n+n*nr+nr+p) end do xsc = one / stdlib${ii}$_dznrm2( n, v(1_${ik}$,q), 1_${ik}$ ) if ( (xsc < (one-temp1)) .or. (xsc > (one+temp1)) )call stdlib${ii}$_zdscal( n, xsc,& v(1_${ik}$,q), 1_${ik}$ ) end do ! at this moment, v contains the right singular vectors of a. ! next, assemble the left singular vector matrix u (m x n). if ( nr < m ) then call stdlib${ii}$_zlaset('A', m-nr, nr, czero, czero, u(nr+1,1_${ik}$), ldu) if ( nr < n1 ) then call stdlib${ii}$_zlaset('A',nr,n1-nr,czero,czero,u(1_${ik}$,nr+1),ldu) call stdlib${ii}$_zlaset('A',m-nr,n1-nr,czero,cone,u(nr+1,nr+1),ldu) end if end if ! the q matrix from the first qrf is built into the left singular ! matrix u. this applies to all cases. call stdlib${ii}$_zunmqr( 'L', 'N', m, n1, n, a, lda, cwork, u,ldu, cwork(n+1), lwork-& n, ierr ) ! the columns of u are normalized. the cost is o(m*n) flops. temp1 = sqrt(real(m,KIND=dp)) * epsln do p = 1, nr xsc = one / stdlib${ii}$_dznrm2( m, u(1_${ik}$,p), 1_${ik}$ ) if ( (xsc < (one-temp1)) .or. (xsc > (one+temp1)) )call stdlib${ii}$_zdscal( m, xsc,& u(1_${ik}$,p), 1_${ik}$ ) end do ! if the initial qrf is computed with row pivoting, the left ! singular vectors must be adjusted. if ( rowpiv )call stdlib${ii}$_zlaswp( n1, u, ldu, 1_${ik}$, m-1, iwork(iwoff+1), -1_${ik}$ ) else ! The Initial Matrix A Has Almost Orthogonal Columns And ! the second qrf is not needed call stdlib${ii}$_zlacpy( 'U', n, n, a, lda, cwork(n+1), n ) if ( l2pert ) then xsc = sqrt(small) do p = 2, n ctemp = xsc * cwork( n + (p-1)*n + p ) do q = 1, p - 1 ! cwork(n+(q-1)*n+p)=-temp1 * ( cwork(n+(p-1)*n+q) / ! $ abs(cwork(n+(p-1)*n+q)) ) cwork(n+(q-1)*n+p)=-ctemp end do end do else call stdlib${ii}$_zlaset( 'L',n-1,n-1,czero,czero,cwork(n+2),n ) end if call stdlib${ii}$_zgesvj( 'U', 'U', 'N', n, n, cwork(n+1), n, sva,n, u, ldu, cwork(n+& n*n+1), lwork-n-n*n, rwork, lrwork,info ) scalem = rwork(1_${ik}$) numrank = nint(rwork(2_${ik}$),KIND=${ik}$) do p = 1, n call stdlib${ii}$_zcopy( n, cwork(n+(p-1)*n+1), 1_${ik}$, u(1_${ik}$,p), 1_${ik}$ ) call stdlib${ii}$_zdscal( n, sva(p), cwork(n+(p-1)*n+1), 1_${ik}$ ) end do call stdlib${ii}$_ztrsm( 'L', 'U', 'N', 'N', n, n,cone, a, lda, cwork(n+1), n ) do p = 1, n call stdlib${ii}$_zcopy( n, cwork(n+p), n, v(iwork(p),1_${ik}$), ldv ) end do temp1 = sqrt(real(n,KIND=dp))*epsln do p = 1, n xsc = one / stdlib${ii}$_dznrm2( n, v(1_${ik}$,p), 1_${ik}$ ) if ( (xsc < (one-temp1)) .or. (xsc > (one+temp1)) )call stdlib${ii}$_zdscal( n, xsc,& v(1_${ik}$,p), 1_${ik}$ ) end do ! assemble the left singular vector matrix u (m x n). if ( n < m ) then call stdlib${ii}$_zlaset( 'A', m-n, n, czero, czero, u(n+1,1_${ik}$), ldu ) if ( n < n1 ) then call stdlib${ii}$_zlaset('A',n, n1-n, czero, czero, u(1_${ik}$,n+1),ldu) call stdlib${ii}$_zlaset( 'A',m-n,n1-n, czero, cone,u(n+1,n+1),ldu) end if end if call stdlib${ii}$_zunmqr( 'L', 'N', m, n1, n, a, lda, cwork, u,ldu, cwork(n+1), lwork-& n, ierr ) temp1 = sqrt(real(m,KIND=dp))*epsln do p = 1, n1 xsc = one / stdlib${ii}$_dznrm2( m, u(1_${ik}$,p), 1_${ik}$ ) if ( (xsc < (one-temp1)) .or. (xsc > (one+temp1)) )call stdlib${ii}$_zdscal( m, xsc,& u(1_${ik}$,p), 1_${ik}$ ) end do if ( rowpiv )call stdlib${ii}$_zlaswp( n1, u, ldu, 1_${ik}$, m-1, iwork(iwoff+1), -1_${ik}$ ) end if ! end of the >> almost orthogonal case << in the full svd else ! this branch deploys a preconditioned jacobi svd with explicitly ! accumulated rotations. it is included as optional, mainly for ! experimental purposes. it does perform well, and can also be used. ! in this implementation, this branch will be automatically activated ! if the condition number sigma_max(a) / sigma_min(a) is predicted ! to be greater than the overflow threshold. this is because the ! a posteriori computation of the singular vectors assumes robust ! implementation of blas and some lapack procedures, capable of working ! in presence of extreme values, e.g. when the singular values spread from ! the underflow to the overflow threshold. do p = 1, nr call stdlib${ii}$_zcopy( n-p+1, a(p,p), lda, v(p,p), 1_${ik}$ ) call stdlib${ii}$_zlacgv( n-p+1, v(p,p), 1_${ik}$ ) end do if ( l2pert ) then xsc = sqrt(small/epsln) do q = 1, nr ctemp = cmplx(xsc*abs( v(q,q) ),zero,KIND=dp) do p = 1, n if ( ( p > q ) .and. ( abs(v(p,q)) <= temp1 ).or. ( p < q ) )v(p,q) = & ctemp ! $ v(p,q) = temp1 * ( v(p,q) / abs(v(p,q)) ) if ( p < q ) v(p,q) = - v(p,q) end do end do else if (nr>1_${ik}$) call stdlib${ii}$_zlaset( 'U', nr-1, nr-1, czero, czero, v(1_${ik}$,2_${ik}$), ldv ) end if call stdlib${ii}$_zgeqrf( n, nr, v, ldv, cwork(n+1), cwork(2_${ik}$*n+1),lwork-2*n, ierr ) call stdlib${ii}$_zlacpy( 'L', n, nr, v, ldv, cwork(2_${ik}$*n+1), n ) do p = 1, nr call stdlib${ii}$_zcopy( nr-p+1, v(p,p), ldv, u(p,p), 1_${ik}$ ) call stdlib${ii}$_zlacgv( nr-p+1, u(p,p), 1_${ik}$ ) end do if ( l2pert ) then xsc = sqrt(small/epsln) do q = 2, nr do p = 1, q - 1 ctemp = cmplx(xsc * min(abs(u(p,p)),abs(u(q,q))),zero,KIND=dp) ! u(p,q) = - temp1 * ( u(q,p) / abs(u(q,p)) ) u(p,q) = - ctemp end do end do else if (nr>1_${ik}$) call stdlib${ii}$_zlaset('U', nr-1, nr-1, czero, czero, u(1_${ik}$,2_${ik}$), ldu ) end if call stdlib${ii}$_zgesvj( 'L', 'U', 'V', nr, nr, u, ldu, sva,n, v, ldv, cwork(2_${ik}$*n+n*nr+1),& lwork-2*n-n*nr,rwork, lrwork, info ) scalem = rwork(1_${ik}$) numrank = nint(rwork(2_${ik}$),KIND=${ik}$) if ( nr < n ) then call stdlib${ii}$_zlaset( 'A',n-nr,nr,czero,czero,v(nr+1,1_${ik}$),ldv ) call stdlib${ii}$_zlaset( 'A',nr,n-nr,czero,czero,v(1_${ik}$,nr+1),ldv ) call stdlib${ii}$_zlaset( 'A',n-nr,n-nr,czero,cone,v(nr+1,nr+1),ldv ) end if call stdlib${ii}$_zunmqr( 'L','N',n,n,nr,cwork(2_${ik}$*n+1),n,cwork(n+1),v,ldv,cwork(2_${ik}$*n+n*nr+& nr+1),lwork-2*n-n*nr-nr,ierr ) ! permute the rows of v using the (column) permutation from the ! first qrf. also, scale the columns to make them unit in ! euclidean norm. this applies to all cases. temp1 = sqrt(real(n,KIND=dp)) * epsln do q = 1, n do p = 1, n cwork(2_${ik}$*n+n*nr+nr+iwork(p)) = v(p,q) end do do p = 1, n v(p,q) = cwork(2_${ik}$*n+n*nr+nr+p) end do xsc = one / stdlib${ii}$_dznrm2( n, v(1_${ik}$,q), 1_${ik}$ ) if ( (xsc < (one-temp1)) .or. (xsc > (one+temp1)) )call stdlib${ii}$_zdscal( n, xsc,& v(1_${ik}$,q), 1_${ik}$ ) end do ! at this moment, v contains the right singular vectors of a. ! next, assemble the left singular vector matrix u (m x n). if ( nr < m ) then call stdlib${ii}$_zlaset( 'A', m-nr, nr, czero, czero, u(nr+1,1_${ik}$), ldu ) if ( nr < n1 ) then call stdlib${ii}$_zlaset('A',nr, n1-nr, czero, czero, u(1_${ik}$,nr+1),ldu) call stdlib${ii}$_zlaset('A',m-nr,n1-nr, czero, cone,u(nr+1,nr+1),ldu) end if end if call stdlib${ii}$_zunmqr( 'L', 'N', m, n1, n, a, lda, cwork, u,ldu, cwork(n+1), lwork-n, & ierr ) if ( rowpiv )call stdlib${ii}$_zlaswp( n1, u, ldu, 1_${ik}$, m-1, iwork(iwoff+1), -1_${ik}$ ) end if if ( transp ) then ! .. swap u and v because the procedure worked on a^* do p = 1, n call stdlib${ii}$_zswap( n, u(1_${ik}$,p), 1_${ik}$, v(1_${ik}$,p), 1_${ik}$ ) end do end if end if ! end of the full svd ! undo scaling, if necessary (and possible) if ( uscal2 <= (big/sva(1_${ik}$))*uscal1 ) then call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, uscal1, uscal2, nr, 1_${ik}$, sva, n, ierr ) uscal1 = one uscal2 = one end if if ( nr < n ) then do p = nr+1, n sva(p) = zero end do end if rwork(1_${ik}$) = uscal2 * scalem rwork(2_${ik}$) = uscal1 if ( errest ) rwork(3_${ik}$) = sconda if ( lsvec .and. rsvec ) then rwork(4_${ik}$) = condr1 rwork(5_${ik}$) = condr2 end if if ( l2tran ) then rwork(6_${ik}$) = entra rwork(7_${ik}$) = entrat end if iwork(1_${ik}$) = nr iwork(2_${ik}$) = numrank iwork(3_${ik}$) = warning if ( transp ) then iwork(4_${ik}$) = 1_${ik}$ else iwork(4_${ik}$) = -1_${ik}$ end if return end subroutine stdlib${ii}$_zgejsv #:for ck,ct,ci in CMPLX_KINDS_TYPES #:if not ck in ["sp","dp"] pure module subroutine stdlib${ii}$_${ci}$gejsv( joba, jobu, jobv, jobr, jobt, jobp,m, n, a, lda, sva, u, ldu, & !! ZGEJSV: computes the singular value decomposition (SVD) of a complex M-by-N !! matrix [A], where M >= N. The SVD of [A] is written as !! [A] = [U] * [SIGMA] * [V]^*, !! where [SIGMA] is an N-by-N (M-by-N) matrix which is zero except for its N !! diagonal elements, [U] is an M-by-N (or M-by-M) unitary matrix, and !! [V] is an N-by-N unitary matrix. The diagonal elements of [SIGMA] are !! the singular values of [A]. The columns of [U] and [V] are the left and !! the right singular vectors of [A], respectively. The matrices [U] and [V] !! are computed and stored in the arrays U and V, respectively. The diagonal !! of [SIGMA] is computed and stored in the array SVA. v, ldv,cwork, lwork, rwork, lrwork, iwork, info ) ! -- lapack computational routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_${ck}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments integer(${ik}$), intent(out) :: info integer(${ik}$), intent(in) :: lda, ldu, ldv, lwork, lrwork, m, n ! Array Arguments complex(${ck}$), intent(inout) :: a(lda,*) complex(${ck}$), intent(out) :: u(ldu,*), v(ldv,*), cwork(lwork) real(${ck}$), intent(out) :: sva(n), rwork(lrwork) integer(${ik}$), intent(out) :: iwork(*) character, intent(in) :: joba, jobp, jobr, jobt, jobu, jobv ! =========================================================================== ! Local Scalars complex(${ck}$) :: ctemp real(${ck}$) :: aapp, aaqq, aatmax, aatmin, big, big1, cond_ok, condr1, condr2, entra, & entrat, epsln, maxprj, scalem, sconda, sfmin, small, temp1, uscal1, uscal2, xsc integer(${ik}$) :: ierr, n1, nr, numrank, p, q, warning logical(lk) :: almort, defr, errest, goscal, jracc, kill, lquery, lsvec, l2aber, & l2kill, l2pert, l2rank, l2tran, noscal, rowpiv, rsvec, transp integer(${ik}$) :: optwrk, minwrk, minrwrk, miniwrk integer(${ik}$) :: lwcon, lwlqf, lwqp3, lwqrf, lwunmlq, lwunmqr, lwunmqrm, lwsvdj, & lwsvdjv, lrwqp3, lrwcon, lrwsvdj, iwoff integer(${ik}$) :: lwrk_wgelqf, lwrk_wgeqp3, lwrk_wgeqp3n, lwrk_wgeqrf, lwrk_wgesvj, & lwrk_wgesvjv, lwrk_wgesvju, lwrk_wunmlq, lwrk_wunmqr, lwrk_wunmqrm ! Local Arrays complex(${ck}$) :: cdummy(1_${ik}$) real(${ck}$) :: rdummy(1_${ik}$) ! Intrinsic Functions ! test the input arguments lsvec = stdlib_lsame( jobu, 'U' ) .or. stdlib_lsame( jobu, 'F' ) jracc = stdlib_lsame( jobv, 'J' ) rsvec = stdlib_lsame( jobv, 'V' ) .or. jracc rowpiv = stdlib_lsame( joba, 'F' ) .or. stdlib_lsame( joba, 'G' ) l2rank = stdlib_lsame( joba, 'R' ) l2aber = stdlib_lsame( joba, 'A' ) errest = stdlib_lsame( joba, 'E' ) .or. stdlib_lsame( joba, 'G' ) l2tran = stdlib_lsame( jobt, 'T' ) .and. ( m == n ) l2kill = stdlib_lsame( jobr, 'R' ) defr = stdlib_lsame( jobr, 'N' ) l2pert = stdlib_lsame( jobp, 'P' ) lquery = ( lwork == -1_${ik}$ ) .or. ( lrwork == -1_${ik}$ ) if ( .not.(rowpiv .or. l2rank .or. l2aber .or.errest .or. stdlib_lsame( joba, 'C' ) )) & then info = - 1_${ik}$ else if ( .not.( lsvec .or. stdlib_lsame( jobu, 'N' ) .or.( stdlib_lsame( jobu, 'W' ) & .and. rsvec .and. l2tran ) ) ) then info = - 2_${ik}$ else if ( .not.( rsvec .or. stdlib_lsame( jobv, 'N' ) .or.( stdlib_lsame( jobv, 'W' ) & .and. lsvec .and. l2tran ) ) ) then info = - 3_${ik}$ else if ( .not. ( l2kill .or. defr ) ) then info = - 4_${ik}$ else if ( .not. ( stdlib_lsame(jobt,'T') .or. stdlib_lsame(jobt,'N') ) ) then info = - 5_${ik}$ else if ( .not. ( l2pert .or. stdlib_lsame( jobp, 'N' ) ) ) then info = - 6_${ik}$ else if ( m < 0_${ik}$ ) then info = - 7_${ik}$ else if ( ( n < 0_${ik}$ ) .or. ( n > m ) ) then info = - 8_${ik}$ else if ( lda < m ) then info = - 10_${ik}$ else if ( lsvec .and. ( ldu < m ) ) then info = - 13_${ik}$ else if ( rsvec .and. ( ldv < n ) ) then info = - 15_${ik}$ else ! #:) info = 0_${ik}$ end if if ( info == 0_${ik}$ ) then ! Compute The Minimal And The Optimal Workspace Lengths ! [[the expressions for computing the minimal and the optimal ! values of lcwork, lrwork are written with a lot of redundancy and ! can be simplified. however, this verbose form is useful for ! maintenance and modifications of the code.]] ! .. minimal workspace length for stdlib${ii}$_${ci}$geqp3 of an m x n matrix, ! stdlib${ii}$_${ci}$geqrf of an n x n matrix, stdlib${ii}$_${ci}$gelqf of an n x n matrix, ! stdlib${ii}$_${ci}$unmlq for computing n x n matrix, stdlib${ii}$_${ci}$unmqr for computing n x n ! matrix, stdlib${ii}$_${ci}$unmqr for computing m x n matrix, respectively. lwqp3 = n+1 lwqrf = max( 1_${ik}$, n ) lwlqf = max( 1_${ik}$, n ) lwunmlq = max( 1_${ik}$, n ) lwunmqr = max( 1_${ik}$, n ) lwunmqrm = max( 1_${ik}$, m ) ! Minimal Workspace Length For Stdlib_Zpocon Of An N X N Matrix lwcon = 2_${ik}$ * n ! .. minimal workspace length for stdlib${ii}$_${ci}$gesvj of an n x n matrix, ! without and with explicit accumulation of jacobi rotations lwsvdj = max( 2_${ik}$ * n, 1_${ik}$ ) lwsvdjv = max( 2_${ik}$ * n, 1_${ik}$ ) ! .. minimal real workspace length for stdlib${ii}$_${ci}$geqp3, stdlib${ii}$_${ci}$pocon, stdlib${ii}$_${ci}$gesvj lrwqp3 = 2_${ik}$ * n lrwcon = n lrwsvdj = n if ( lquery ) then call stdlib${ii}$_${ci}$geqp3( m, n, a, lda, iwork, cdummy, cdummy, -1_${ik}$,rdummy, ierr ) lwrk_wgeqp3 = real( cdummy(1_${ik}$),KIND=${ck}$) call stdlib${ii}$_${ci}$geqrf( n, n, a, lda, cdummy, cdummy,-1_${ik}$, ierr ) lwrk_wgeqrf = real( cdummy(1_${ik}$),KIND=${ck}$) call stdlib${ii}$_${ci}$gelqf( n, n, a, lda, cdummy, cdummy,-1_${ik}$, ierr ) lwrk_wgelqf = real( cdummy(1_${ik}$),KIND=${ck}$) end if minwrk = 2_${ik}$ optwrk = 2_${ik}$ miniwrk = n if ( .not. (lsvec .or. rsvec ) ) then ! Minimal And Optimal Sizes Of The Complex Workspace If ! only the singular values are requested if ( errest ) then minwrk = max( n+lwqp3, n**2_${ik}$+lwcon, n+lwqrf, lwsvdj ) else minwrk = max( n+lwqp3, n+lwqrf, lwsvdj ) end if if ( lquery ) then call stdlib${ii}$_${ci}$gesvj( 'L', 'N', 'N', n, n, a, lda, sva, n, v,ldv, cdummy, -1_${ik}$,& rdummy, -1_${ik}$, ierr ) lwrk_wgesvj = real( cdummy(1_${ik}$),KIND=${ck}$) if ( errest ) then optwrk = max( n+lwrk_wgeqp3, n**2_${ik}$+lwcon,n+lwrk_wgeqrf, lwrk_wgesvj ) else optwrk = max( n+lwrk_wgeqp3, n+lwrk_wgeqrf,lwrk_wgesvj ) end if end if if ( l2tran .or. rowpiv ) then if ( errest ) then minrwrk = max( 7_${ik}$, 2_${ik}$*m, lrwqp3, lrwcon, lrwsvdj ) else minrwrk = max( 7_${ik}$, 2_${ik}$*m, lrwqp3, lrwsvdj ) end if else if ( errest ) then minrwrk = max( 7_${ik}$, lrwqp3, lrwcon, lrwsvdj ) else minrwrk = max( 7_${ik}$, lrwqp3, lrwsvdj ) end if end if if ( rowpiv .or. l2tran ) miniwrk = miniwrk + m else if ( rsvec .and. (.not.lsvec) ) then ! Minimal And Optimal Sizes Of The Complex Workspace If The ! singular values and the right singular vectors are requested if ( errest ) then minwrk = max( n+lwqp3, lwcon, lwsvdj, n+lwlqf,2_${ik}$*n+lwqrf, n+lwsvdj, n+& lwunmlq ) else minwrk = max( n+lwqp3, lwsvdj, n+lwlqf, 2_${ik}$*n+lwqrf,n+lwsvdj, n+lwunmlq ) end if if ( lquery ) then call stdlib${ii}$_${ci}$gesvj( 'L', 'U', 'N', n,n, u, ldu, sva, n, a,lda, cdummy, -1_${ik}$, & rdummy, -1_${ik}$, ierr ) lwrk_wgesvj = real( cdummy(1_${ik}$),KIND=${ck}$) call stdlib${ii}$_${ci}$unmlq( 'L', 'C', n, n, n, a, lda, cdummy,v, ldv, cdummy, -1_${ik}$, & ierr ) lwrk_wunmlq = real( cdummy(1_${ik}$),KIND=${ck}$) if ( errest ) then optwrk = max( n+lwrk_wgeqp3, lwcon, lwrk_wgesvj,n+lwrk_wgelqf, 2_${ik}$*n+& lwrk_wgeqrf,n+lwrk_wgesvj, n+lwrk_wunmlq ) else optwrk = max( n+lwrk_wgeqp3, lwrk_wgesvj,n+lwrk_wgelqf,2_${ik}$*n+lwrk_wgeqrf, n+& lwrk_wgesvj,n+lwrk_wunmlq ) end if end if if ( l2tran .or. rowpiv ) then if ( errest ) then minrwrk = max( 7_${ik}$, 2_${ik}$*m, lrwqp3, lrwsvdj, lrwcon ) else minrwrk = max( 7_${ik}$, 2_${ik}$*m, lrwqp3, lrwsvdj ) end if else if ( errest ) then minrwrk = max( 7_${ik}$, lrwqp3, lrwsvdj, lrwcon ) else minrwrk = max( 7_${ik}$, lrwqp3, lrwsvdj ) end if end if if ( rowpiv .or. l2tran ) miniwrk = miniwrk + m else if ( lsvec .and. (.not.rsvec) ) then ! Minimal And Optimal Sizes Of The Complex Workspace If The ! singular values and the left singular vectors are requested if ( errest ) then minwrk = n + max( lwqp3,lwcon,n+lwqrf,lwsvdj,lwunmqrm ) else minwrk = n + max( lwqp3, n+lwqrf, lwsvdj, lwunmqrm ) end if if ( lquery ) then call stdlib${ii}$_${ci}$gesvj( 'L', 'U', 'N', n,n, u, ldu, sva, n, a,lda, cdummy, -1_${ik}$, & rdummy, -1_${ik}$, ierr ) lwrk_wgesvj = real( cdummy(1_${ik}$),KIND=${ck}$) call stdlib${ii}$_${ci}$unmqr( 'L', 'N', m, n, n, a, lda, cdummy, u,ldu, cdummy, -1_${ik}$, & ierr ) lwrk_wunmqrm = real( cdummy(1_${ik}$),KIND=${ck}$) if ( errest ) then optwrk = n + max( lwrk_wgeqp3, lwcon, n+lwrk_wgeqrf,lwrk_wgesvj, & lwrk_wunmqrm ) else optwrk = n + max( lwrk_wgeqp3, n+lwrk_wgeqrf,lwrk_wgesvj, lwrk_wunmqrm ) end if end if if ( l2tran .or. rowpiv ) then if ( errest ) then minrwrk = max( 7_${ik}$, 2_${ik}$*m, lrwqp3, lrwsvdj, lrwcon ) else minrwrk = max( 7_${ik}$, 2_${ik}$*m, lrwqp3, lrwsvdj ) end if else if ( errest ) then minrwrk = max( 7_${ik}$, lrwqp3, lrwsvdj, lrwcon ) else minrwrk = max( 7_${ik}$, lrwqp3, lrwsvdj ) end if end if if ( rowpiv .or. l2tran ) miniwrk = miniwrk + m else ! Minimal And Optimal Sizes Of The Complex Workspace If The ! full svd is requested if ( .not. jracc ) then if ( errest ) then minwrk = max( n+lwqp3, n+lwcon, 2_${ik}$*n+n**2_${ik}$+lwcon,2_${ik}$*n+lwqrf, 2_${ik}$*n+& lwqp3,2_${ik}$*n+n**2_${ik}$+n+lwlqf, 2_${ik}$*n+n**2_${ik}$+n+n**2_${ik}$+lwcon,2_${ik}$*n+n**2_${ik}$+n+lwsvdj, 2_${ik}$*n+& n**2_${ik}$+n+lwsvdjv,2_${ik}$*n+n**2_${ik}$+n+lwunmqr,2_${ik}$*n+n**2_${ik}$+n+lwunmlq,n+n**2_${ik}$+lwsvdj, n+& lwunmqrm ) else minwrk = max( n+lwqp3, 2_${ik}$*n+n**2_${ik}$+lwcon,2_${ik}$*n+lwqrf, 2_${ik}$*n+& lwqp3,2_${ik}$*n+n**2_${ik}$+n+lwlqf, 2_${ik}$*n+n**2_${ik}$+n+n**2_${ik}$+lwcon,2_${ik}$*n+n**2_${ik}$+n+lwsvdj, 2_${ik}$*n+& n**2_${ik}$+n+lwsvdjv,2_${ik}$*n+n**2_${ik}$+n+lwunmqr,2_${ik}$*n+n**2_${ik}$+n+lwunmlq,n+n**2_${ik}$+lwsvdj, & n+lwunmqrm ) end if miniwrk = miniwrk + n if ( rowpiv .or. l2tran ) miniwrk = miniwrk + m else if ( errest ) then minwrk = max( n+lwqp3, n+lwcon, 2_${ik}$*n+lwqrf,2_${ik}$*n+n**2_${ik}$+lwsvdjv, 2_${ik}$*n+n**2_${ik}$+n+& lwunmqr,n+lwunmqrm ) else minwrk = max( n+lwqp3, 2_${ik}$*n+lwqrf,2_${ik}$*n+n**2_${ik}$+lwsvdjv, 2_${ik}$*n+n**2_${ik}$+n+lwunmqr,n+& lwunmqrm ) end if if ( rowpiv .or. l2tran ) miniwrk = miniwrk + m end if if ( lquery ) then call stdlib${ii}$_${ci}$unmqr( 'L', 'N', m, n, n, a, lda, cdummy, u,ldu, cdummy, -1_${ik}$, & ierr ) lwrk_wunmqrm = real( cdummy(1_${ik}$),KIND=${ck}$) call stdlib${ii}$_${ci}$unmqr( 'L', 'N', n, n, n, a, lda, cdummy, u,ldu, cdummy, -1_${ik}$, & ierr ) lwrk_wunmqr = real( cdummy(1_${ik}$),KIND=${ck}$) if ( .not. jracc ) then call stdlib${ii}$_${ci}$geqp3( n,n, a, lda, iwork, cdummy,cdummy, -1_${ik}$,rdummy, ierr ) lwrk_wgeqp3n = real( cdummy(1_${ik}$),KIND=${ck}$) call stdlib${ii}$_${ci}$gesvj( 'L', 'U', 'N', n, n, u, ldu, sva,n, v, ldv, cdummy, & -1_${ik}$, rdummy, -1_${ik}$, ierr ) lwrk_wgesvj = real( cdummy(1_${ik}$),KIND=${ck}$) call stdlib${ii}$_${ci}$gesvj( 'U', 'U', 'N', n, n, u, ldu, sva,n, v, ldv, cdummy, & -1_${ik}$, rdummy, -1_${ik}$, ierr ) lwrk_wgesvju = real( cdummy(1_${ik}$),KIND=${ck}$) call stdlib${ii}$_${ci}$gesvj( 'L', 'U', 'V', n, n, u, ldu, sva,n, v, ldv, cdummy, & -1_${ik}$, rdummy, -1_${ik}$, ierr ) lwrk_wgesvjv = real( cdummy(1_${ik}$),KIND=${ck}$) call stdlib${ii}$_${ci}$unmlq( 'L', 'C', n, n, n, a, lda, cdummy,v, ldv, cdummy, -& 1_${ik}$, ierr ) lwrk_wunmlq = real( cdummy(1_${ik}$),KIND=${ck}$) if ( errest ) then optwrk = max( n+lwrk_wgeqp3, n+lwcon,2_${ik}$*n+n**2_${ik}$+lwcon, 2_${ik}$*n+lwrk_wgeqrf,& 2_${ik}$*n+lwrk_wgeqp3n,2_${ik}$*n+n**2_${ik}$+n+lwrk_wgelqf,2_${ik}$*n+n**2_${ik}$+n+n**2_${ik}$+lwcon,2_${ik}$*n+& n**2_${ik}$+n+lwrk_wgesvj,2_${ik}$*n+n**2_${ik}$+n+lwrk_wgesvjv,2_${ik}$*n+n**2_${ik}$+n+lwrk_wunmqr,2_${ik}$*n+& n**2_${ik}$+n+lwrk_wunmlq,n+n**2_${ik}$+lwrk_wgesvju,n+lwrk_wunmqrm ) else optwrk = max( n+lwrk_wgeqp3,2_${ik}$*n+n**2_${ik}$+lwcon, 2_${ik}$*n+lwrk_wgeqrf,2_${ik}$*n+& lwrk_wgeqp3n,2_${ik}$*n+n**2_${ik}$+n+lwrk_wgelqf,2_${ik}$*n+n**2_${ik}$+n+n**2_${ik}$+lwcon,2_${ik}$*n+n**2_${ik}$+n+& lwrk_wgesvj,2_${ik}$*n+n**2_${ik}$+n+lwrk_wgesvjv,2_${ik}$*n+n**2_${ik}$+n+lwrk_wunmqr,2_${ik}$*n+n**2_${ik}$+n+& lwrk_wunmlq,n+n**2_${ik}$+lwrk_wgesvju,n+lwrk_wunmqrm ) end if else call stdlib${ii}$_${ci}$gesvj( 'L', 'U', 'V', n, n, u, ldu, sva,n, v, ldv, cdummy, & -1_${ik}$, rdummy, -1_${ik}$, ierr ) lwrk_wgesvjv = real( cdummy(1_${ik}$),KIND=${ck}$) call stdlib${ii}$_${ci}$unmqr( 'L', 'N', n, n, n, cdummy, n, cdummy,v, ldv, cdummy,& -1_${ik}$, ierr ) lwrk_wunmqr = real( cdummy(1_${ik}$),KIND=${ck}$) call stdlib${ii}$_${ci}$unmqr( 'L', 'N', m, n, n, a, lda, cdummy, u,ldu, cdummy, -& 1_${ik}$, ierr ) lwrk_wunmqrm = real( cdummy(1_${ik}$),KIND=${ck}$) if ( errest ) then optwrk = max( n+lwrk_wgeqp3, n+lwcon,2_${ik}$*n+lwrk_wgeqrf, 2_${ik}$*n+n**2_${ik}$,2_${ik}$*n+& n**2_${ik}$+lwrk_wgesvjv,2_${ik}$*n+n**2_${ik}$+n+lwrk_wunmqr,n+lwrk_wunmqrm ) else optwrk = max( n+lwrk_wgeqp3, 2_${ik}$*n+lwrk_wgeqrf,2_${ik}$*n+n**2_${ik}$, 2_${ik}$*n+n**2_${ik}$+& lwrk_wgesvjv,2_${ik}$*n+n**2_${ik}$+n+lwrk_wunmqr,n+lwrk_wunmqrm ) end if end if end if if ( l2tran .or. rowpiv ) then minrwrk = max( 7_${ik}$, 2_${ik}$*m, lrwqp3, lrwsvdj, lrwcon ) else minrwrk = max( 7_${ik}$, lrwqp3, lrwsvdj, lrwcon ) end if end if minwrk = max( 2_${ik}$, minwrk ) optwrk = max( minwrk, optwrk ) if ( lwork < minwrk .and. (.not.lquery) ) info = - 17_${ik}$ if ( lrwork < minrwrk .and. (.not.lquery) ) info = - 19_${ik}$ end if if ( info /= 0_${ik}$ ) then ! #:( call stdlib${ii}$_xerbla( 'ZGEJSV', - info ) return else if ( lquery ) then cwork(1_${ik}$) = optwrk cwork(2_${ik}$) = minwrk rwork(1_${ik}$) = minrwrk iwork(1_${ik}$) = max( 4_${ik}$, miniwrk ) return end if ! quick return for void matrix (y3k safe) ! #:) if ( ( m == 0_${ik}$ ) .or. ( n == 0_${ik}$ ) ) then iwork(1_${ik}$:4_${ik}$) = 0_${ik}$ rwork(1_${ik}$:7_${ik}$) = 0_${ik}$ return endif ! determine whether the matrix u should be m x n or m x m if ( lsvec ) then n1 = n if ( stdlib_lsame( jobu, 'F' ) ) n1 = m end if ! set numerical parameters ! ! note: make sure stdlib${ii}$_${c2ri(ci)}$lamch() does not fail on the target architecture. epsln = stdlib${ii}$_${c2ri(ci)}$lamch('EPSILON') sfmin = stdlib${ii}$_${c2ri(ci)}$lamch('SAFEMINIMUM') small = sfmin / epsln big = stdlib${ii}$_${c2ri(ci)}$lamch('O') ! big = one / sfmin ! initialize sva(1:n) = diag( ||a e_i||_2 )_1^n ! (!) if necessary, scale sva() to protect the largest norm from ! overflow. it is possible that this scaling pushes the smallest ! column norm left from the underflow threshold (extreme case). scalem = one / sqrt(real(m,KIND=${ck}$)*real(n,KIND=${ck}$)) noscal = .true. goscal = .true. do p = 1, n aapp = zero aaqq = one call stdlib${ii}$_${ci}$lassq( m, a(1_${ik}$,p), 1_${ik}$, aapp, aaqq ) if ( aapp > big ) then info = - 9_${ik}$ call stdlib${ii}$_xerbla( 'ZGEJSV', -info ) return end if aaqq = sqrt(aaqq) if ( ( aapp < (big / aaqq) ) .and. noscal ) then sva(p) = aapp * aaqq else noscal = .false. sva(p) = aapp * ( aaqq * scalem ) if ( goscal ) then goscal = .false. call stdlib${ii}$_${c2ri(ci)}$scal( p-1, scalem, sva, 1_${ik}$ ) end if end if end do if ( noscal ) scalem = one aapp = zero aaqq = big do p = 1, n aapp = max( aapp, sva(p) ) if ( sva(p) /= zero ) aaqq = min( aaqq, sva(p) ) end do ! quick return for zero m x n matrix ! #:) if ( aapp == zero ) then if ( lsvec ) call stdlib${ii}$_${ci}$laset( 'G', m, n1, czero, cone, u, ldu ) if ( rsvec ) call stdlib${ii}$_${ci}$laset( 'G', n, n, czero, cone, v, ldv ) rwork(1_${ik}$) = one rwork(2_${ik}$) = one if ( errest ) rwork(3_${ik}$) = one if ( lsvec .and. rsvec ) then rwork(4_${ik}$) = one rwork(5_${ik}$) = one end if if ( l2tran ) then rwork(6_${ik}$) = zero rwork(7_${ik}$) = zero end if iwork(1_${ik}$) = 0_${ik}$ iwork(2_${ik}$) = 0_${ik}$ iwork(3_${ik}$) = 0_${ik}$ iwork(4_${ik}$) = -1_${ik}$ return end if ! issue warning if denormalized column norms detected. override the ! high relative accuracy request. issue licence to kill nonzero columns ! (set them to zero) whose norm is less than sigma_max / big (roughly). ! #:( warning = 0_${ik}$ if ( aaqq <= sfmin ) then l2rank = .true. l2kill = .true. warning = 1_${ik}$ end if ! quick return for one-column matrix ! #:) if ( n == 1_${ik}$ ) then if ( lsvec ) then call stdlib${ii}$_${ci}$lascl( 'G',0_${ik}$,0_${ik}$,sva(1_${ik}$),scalem, m,1_${ik}$,a(1_${ik}$,1_${ik}$),lda,ierr ) call stdlib${ii}$_${ci}$lacpy( 'A', m, 1_${ik}$, a, lda, u, ldu ) ! computing all m left singular vectors of the m x 1 matrix if ( n1 /= n ) then call stdlib${ii}$_${ci}$geqrf( m, n, u,ldu, cwork, cwork(n+1),lwork-n,ierr ) call stdlib${ii}$_${ci}$ungqr( m,n1,1_${ik}$, u,ldu,cwork,cwork(n+1),lwork-n,ierr ) call stdlib${ii}$_${ci}$copy( m, a(1_${ik}$,1_${ik}$), 1_${ik}$, u(1_${ik}$,1_${ik}$), 1_${ik}$ ) end if end if if ( rsvec ) then v(1_${ik}$,1_${ik}$) = cone end if if ( sva(1_${ik}$) < (big*scalem) ) then sva(1_${ik}$) = sva(1_${ik}$) / scalem scalem = one end if rwork(1_${ik}$) = one / scalem rwork(2_${ik}$) = one if ( sva(1_${ik}$) /= zero ) then iwork(1_${ik}$) = 1_${ik}$ if ( ( sva(1_${ik}$) / scalem) >= sfmin ) then iwork(2_${ik}$) = 1_${ik}$ else iwork(2_${ik}$) = 0_${ik}$ end if else iwork(1_${ik}$) = 0_${ik}$ iwork(2_${ik}$) = 0_${ik}$ end if iwork(3_${ik}$) = 0_${ik}$ iwork(4_${ik}$) = -1_${ik}$ if ( errest ) rwork(3_${ik}$) = one if ( lsvec .and. rsvec ) then rwork(4_${ik}$) = one rwork(5_${ik}$) = one end if if ( l2tran ) then rwork(6_${ik}$) = zero rwork(7_${ik}$) = zero end if return end if transp = .false. aatmax = -one aatmin = big if ( rowpiv .or. l2tran ) then ! compute the row norms, needed to determine row pivoting sequence ! (in the case of heavily row weighted a, row pivoting is strongly ! advised) and to collect information needed to compare the ! structures of a * a^* and a^* * a (in the case l2tran==.true.). if ( l2tran ) then do p = 1, m xsc = zero temp1 = one call stdlib${ii}$_${ci}$lassq( n, a(p,1_${ik}$), lda, xsc, temp1 ) ! stdlib${ii}$_${ci}$lassq gets both the ell_2 and the ell_infinity norm ! in one pass through the vector rwork(m+p) = xsc * scalem rwork(p) = xsc * (scalem*sqrt(temp1)) aatmax = max( aatmax, rwork(p) ) if (rwork(p) /= zero)aatmin = min(aatmin,rwork(p)) end do else do p = 1, m rwork(m+p) = scalem*abs( a(p,stdlib${ii}$_i${ci}$amax(n,a(p,1_${ik}$),lda)) ) aatmax = max( aatmax, rwork(m+p) ) aatmin = min( aatmin, rwork(m+p) ) end do end if end if ! for square matrix a try to determine whether a^* would be better ! input for the preconditioned jacobi svd, with faster convergence. ! the decision is based on an o(n) function of the vector of column ! and row norms of a, based on the shannon entropy. this should give ! the right choice in most cases when the difference actually matters. ! it may fail and pick the slower converging side. entra = zero entrat = zero if ( l2tran ) then xsc = zero temp1 = one call stdlib${ii}$_${c2ri(ci)}$lassq( n, sva, 1_${ik}$, xsc, temp1 ) temp1 = one / temp1 entra = zero do p = 1, n big1 = ( ( sva(p) / xsc )**2_${ik}$ ) * temp1 if ( big1 /= zero ) entra = entra + big1 * log(big1) end do entra = - entra / log(real(n,KIND=${ck}$)) ! now, sva().^2/trace(a^* * a) is a point in the probability simplex. ! it is derived from the diagonal of a^* * a. do the same with the ! diagonal of a * a^*, compute the entropy of the corresponding ! probability distribution. note that a * a^* and a^* * a have the ! same trace. entrat = zero do p = 1, m big1 = ( ( rwork(p) / xsc )**2_${ik}$ ) * temp1 if ( big1 /= zero ) entrat = entrat + big1 * log(big1) end do entrat = - entrat / log(real(m,KIND=${ck}$)) ! analyze the entropies and decide a or a^*. smaller entropy ! usually means better input for the algorithm. transp = ( entrat < entra ) ! if a^* is better than a, take the adjoint of a. this is allowed ! only for square matrices, m=n. if ( transp ) then ! in an optimal implementation, this trivial transpose ! should be replaced with faster transpose. do p = 1, n - 1 a(p,p) = conjg(a(p,p)) do q = p + 1, n ctemp = conjg(a(q,p)) a(q,p) = conjg(a(p,q)) a(p,q) = ctemp end do end do a(n,n) = conjg(a(n,n)) do p = 1, n rwork(m+p) = sva(p) sva(p) = rwork(p) ! previously computed row 2-norms are now column 2-norms ! of the transposed matrix end do temp1 = aapp aapp = aatmax aatmax = temp1 temp1 = aaqq aaqq = aatmin aatmin = temp1 kill = lsvec lsvec = rsvec rsvec = kill if ( lsvec ) n1 = n rowpiv = .true. end if end if ! end if l2tran ! scale the matrix so that its maximal singular value remains less ! than sqrt(big) -- the matrix is scaled so that its maximal column ! has euclidean norm equal to sqrt(big/n). the only reason to keep ! sqrt(big) instead of big is the fact that stdlib${ii}$_${ci}$gejsv uses lapack and ! blas routines that, in some implementations, are not capable of ! working in the full interval [sfmin,big] and that they may provoke ! overflows in the intermediate results. if the singular values spread ! from sfmin to big, then stdlib${ii}$_${ci}$gesvj will compute them. so, in that case, ! one should use stdlib_${ci}$gesvj instead of stdlib${ii}$_${ci}$gejsv. ! >> change in the april 2016 update: allow bigger range, i.e. the ! largest column is allowed up to big/n and stdlib${ii}$_${ci}$gesvj will do the rest. big1 = sqrt( big ) temp1 = sqrt( big / real(n,KIND=${ck}$) ) ! temp1 = big/real(n,KIND=${ck}$) call stdlib${ii}$_${c2ri(ci)}$lascl( 'G', 0_${ik}$, 0_${ik}$, aapp, temp1, n, 1_${ik}$, sva, n, ierr ) if ( aaqq > (aapp * sfmin) ) then aaqq = ( aaqq / aapp ) * temp1 else aaqq = ( aaqq * temp1 ) / aapp end if temp1 = temp1 * scalem call stdlib${ii}$_${ci}$lascl( 'G', 0_${ik}$, 0_${ik}$, aapp, temp1, m, n, a, lda, ierr ) ! to undo scaling at the end of this procedure, multiply the ! computed singular values with uscal2 / uscal1. uscal1 = temp1 uscal2 = aapp if ( l2kill ) then ! l2kill enforces computation of nonzero singular values in ! the restricted range of condition number of the initial a, ! sigma_max(a) / sigma_min(a) approx. sqrt(big)/sqrt(sfmin). xsc = sqrt( sfmin ) else xsc = small ! now, if the condition number of a is too big, ! sigma_max(a) / sigma_min(a) > sqrt(big/n) * epsln / sfmin, ! as a precaution measure, the full svd is computed using stdlib${ii}$_${ci}$gesvj ! with accumulated jacobi rotations. this provides numerically ! more robust computation, at the cost of slightly increased run ! time. depending on the concrete implementation of blas and lapack ! (i.e. how they behave in presence of extreme ill-conditioning) the ! implementor may decide to remove this switch. if ( ( aaqq<sqrt(sfmin) ) .and. lsvec .and. rsvec ) then jracc = .true. end if end if if ( aaqq < xsc ) then do p = 1, n if ( sva(p) < xsc ) then call stdlib${ii}$_${ci}$laset( 'A', m, 1_${ik}$, czero, czero, a(1_${ik}$,p), lda ) sva(p) = zero end if end do end if ! preconditioning using qr factorization with pivoting if ( rowpiv ) then ! optional row permutation (bjoerck row pivoting): ! a result by cox and higham shows that the bjoerck's ! row pivoting combined with standard column pivoting ! has similar effect as powell-reid complete pivoting. ! the ell-infinity norms of a are made nonincreasing. if ( ( lsvec .and. rsvec ) .and. .not.( jracc ) ) then iwoff = 2_${ik}$*n else iwoff = n end if do p = 1, m - 1 q = stdlib${ii}$_i${c2ri(ci)}$amax( m-p+1, rwork(m+p), 1_${ik}$ ) + p - 1_${ik}$ iwork(iwoff+p) = q if ( p /= q ) then temp1 = rwork(m+p) rwork(m+p) = rwork(m+q) rwork(m+q) = temp1 end if end do call stdlib${ii}$_${ci}$laswp( n, a, lda, 1_${ik}$, m-1, iwork(iwoff+1), 1_${ik}$ ) end if ! end of the preparation phase (scaling, optional sorting and ! transposing, optional flushing of small columns). ! preconditioning ! if the full svd is needed, the right singular vectors are computed ! from a matrix equation, and for that we need theoretical analysis ! of the businger-golub pivoting. so we use stdlib_${ci}$geqp3 as the first rr qrf. ! in all other cases the first rr qrf can be chosen by other criteria ! (eg speed by replacing global with restricted window pivoting, such ! as in xgeqpx from toms # 782). good results will be obtained using ! xgeqpx with properly (!) chosen numerical parameters. ! any improvement of stdlib${ii}$_${ci}$geqp3 improves overall performance of stdlib${ii}$_${ci}$gejsv. ! a * p1 = q1 * [ r1^* 0]^*: do p = 1, n ! All Columns Are Free Columns iwork(p) = 0_${ik}$ end do call stdlib${ii}$_${ci}$geqp3( m, n, a, lda, iwork, cwork, cwork(n+1), lwork-n,rwork, ierr ) ! the upper triangular matrix r1 from the first qrf is inspected for ! rank deficiency and possibilities for deflation, or possible ! ill-conditioning. depending on the user specified flag l2rank, ! the procedure explores possibilities to reduce the numerical ! rank by inspecting the computed upper triangular factor. if ! l2rank or l2aber are up, then stdlib${ii}$_${ci}$gejsv will compute the svd of ! a + da, where ||da|| <= f(m,n)*epsln. nr = 1_${ik}$ if ( l2aber ) then ! standard absolute error bound suffices. all sigma_i with ! sigma_i < n*epsln*||a|| are flushed to zero. this is an ! aggressive enforcement of lower numerical rank by introducing a ! backward error of the order of n*epsln*||a||. temp1 = sqrt(real(n,KIND=${ck}$))*epsln loop_3002: do p = 2, n if ( abs(a(p,p)) >= (temp1*abs(a(1_${ik}$,1_${ik}$))) ) then nr = nr + 1_${ik}$ else exit loop_3002 end if end do loop_3002 else if ( l2rank ) then ! .. similarly as above, only slightly more gentle (less aggressive). ! sudden drop on the diagonal of r1 is used as the criterion for ! close-to-rank-deficient. temp1 = sqrt(sfmin) loop_3402: do p = 2, n if ( ( abs(a(p,p)) < (epsln*abs(a(p-1,p-1))) ) .or.( abs(a(p,p)) < small ) .or.( & l2kill .and. (abs(a(p,p)) < temp1) ) ) exit loop_3402 nr = nr + 1_${ik}$ end do loop_3402 else ! the goal is high relative accuracy. however, if the matrix ! has high scaled condition number the relative accuracy is in ! general not feasible. later on, a condition number estimator ! will be deployed to estimate the scaled condition number. ! here we just remove the underflowed part of the triangular ! factor. this prevents the situation in which the code is ! working hard to get the accuracy not warranted by the data. temp1 = sqrt(sfmin) loop_3302: do p = 2, n if ( ( abs(a(p,p)) < small ) .or.( l2kill .and. (abs(a(p,p)) < temp1) ) ) exit loop_3302 nr = nr + 1_${ik}$ end do loop_3302 end if almort = .false. if ( nr == n ) then maxprj = one do p = 2, n temp1 = abs(a(p,p)) / sva(iwork(p)) maxprj = min( maxprj, temp1 ) end do if ( maxprj**2_${ik}$ >= one - real(n,KIND=${ck}$)*epsln ) almort = .true. end if sconda = - one condr1 = - one condr2 = - one if ( errest ) then if ( n == nr ) then if ( rsvec ) then ! V Is Available As Workspace call stdlib${ii}$_${ci}$lacpy( 'U', n, n, a, lda, v, ldv ) do p = 1, n temp1 = sva(iwork(p)) call stdlib${ii}$_${ci}$dscal( p, one/temp1, v(1_${ik}$,p), 1_${ik}$ ) end do if ( lsvec )then call stdlib${ii}$_${ci}$pocon( 'U', n, v, ldv, one, temp1,cwork(n+1), rwork, ierr ) else call stdlib${ii}$_${ci}$pocon( 'U', n, v, ldv, one, temp1,cwork, rwork, ierr ) end if else if ( lsvec ) then ! U Is Available As Workspace call stdlib${ii}$_${ci}$lacpy( 'U', n, n, a, lda, u, ldu ) do p = 1, n temp1 = sva(iwork(p)) call stdlib${ii}$_${ci}$dscal( p, one/temp1, u(1_${ik}$,p), 1_${ik}$ ) end do call stdlib${ii}$_${ci}$pocon( 'U', n, u, ldu, one, temp1,cwork(n+1), rwork, ierr ) else call stdlib${ii}$_${ci}$lacpy( 'U', n, n, a, lda, cwork, n ) ! [] call stdlib${ii}$_${ci}$lacpy( 'u', n, n, a, lda, cwork(n+1), n ) ! change: here index shifted by n to the left, cwork(1:n) ! not needed for sigma only computation do p = 1, n temp1 = sva(iwork(p)) ! [] call stdlib${ii}$_${ci}$dscal( p, one/temp1, cwork(n+(p-1)*n+1), 1 ) call stdlib${ii}$_${ci}$dscal( p, one/temp1, cwork((p-1)*n+1), 1_${ik}$ ) end do ! The Columns Of R Are Scaled To Have Unit Euclidean Lengths ! [] call stdlib${ii}$_${ci}$pocon( 'u', n, cwork(n+1), n, one, temp1, ! [] $ cwork(n+n*n+1), rwork, ierr ) call stdlib${ii}$_${ci}$pocon( 'U', n, cwork, n, one, temp1,cwork(n*n+1), rwork, ierr ) end if if ( temp1 /= zero ) then sconda = one / sqrt(temp1) else sconda = - one end if ! sconda is an estimate of sqrt(||(r^* * r)^(-1)||_1). ! n^(-1/4) * sconda <= ||r^(-1)||_2 <= n^(1/4) * sconda else sconda = - one end if end if l2pert = l2pert .and. ( abs( a(1_${ik}$,1_${ik}$)/a(nr,nr) ) > sqrt(big1) ) ! if there is no violent scaling, artificial perturbation is not needed. ! phase 3: if ( .not. ( rsvec .or. lsvec ) ) then ! singular values only ! .. transpose a(1:nr,1:n) do p = 1, min( n-1, nr ) call stdlib${ii}$_${ci}$copy( n-p, a(p,p+1), lda, a(p+1,p), 1_${ik}$ ) call stdlib${ii}$_${ci}$lacgv( n-p+1, a(p,p), 1_${ik}$ ) end do if ( nr == n ) a(n,n) = conjg(a(n,n)) ! the following two do-loops introduce small relative perturbation ! into the strict upper triangle of the lower triangular matrix. ! small entries below the main diagonal are also changed. ! this modification is useful if the computing environment does not ! provide/allow flush to zero underflow, for it prevents many ! annoying denormalized numbers in case of strongly scaled matrices. ! the perturbation is structured so that it does not introduce any ! new perturbation of the singular values, and it does not destroy ! the job done by the preconditioner. ! the licence for this perturbation is in the variable l2pert, which ! should be .false. if flush to zero underflow is active. if ( .not. almort ) then if ( l2pert ) then ! xsc = sqrt(small) xsc = epsln / real(n,KIND=${ck}$) do q = 1, nr ctemp = cmplx(xsc*abs(a(q,q)),zero,KIND=${ck}$) do p = 1, n if ( ( (p>q) .and. (abs(a(p,q))<=temp1) ).or. ( p < q ) )a(p,q) = & ctemp ! $ a(p,q) = temp1 * ( a(p,q) / abs(a(p,q)) ) end do end do else if (nr>1_${ik}$) call stdlib${ii}$_${ci}$laset( 'U', nr-1,nr-1, czero,czero, a(1_${ik}$,2_${ik}$),lda ) end if ! Second Preconditioning Using The Qr Factorization call stdlib${ii}$_${ci}$geqrf( n,nr, a,lda, cwork, cwork(n+1),lwork-n, ierr ) ! And Transpose Upper To Lower Triangular do p = 1, nr - 1 call stdlib${ii}$_${ci}$copy( nr-p, a(p,p+1), lda, a(p+1,p), 1_${ik}$ ) call stdlib${ii}$_${ci}$lacgv( nr-p+1, a(p,p), 1_${ik}$ ) end do end if ! row-cyclic jacobi svd algorithm with column pivoting ! .. again some perturbation (a "background noise") is added ! to drown denormals if ( l2pert ) then ! xsc = sqrt(small) xsc = epsln / real(n,KIND=${ck}$) do q = 1, nr ctemp = cmplx(xsc*abs(a(q,q)),zero,KIND=${ck}$) do p = 1, nr if ( ( (p>q) .and. (abs(a(p,q))<=temp1) ).or. ( p < q ) )a(p,q) = & ctemp ! $ a(p,q) = temp1 * ( a(p,q) / abs(a(p,q)) ) end do end do else if (nr>1_${ik}$) call stdlib${ii}$_${ci}$laset( 'U', nr-1, nr-1, czero, czero, a(1_${ik}$,2_${ik}$), lda ) end if ! .. and one-sided jacobi rotations are started on a lower ! triangular matrix (plus perturbation which is ignored in ! the part which destroys triangular form (confusing?!)) call stdlib${ii}$_${ci}$gesvj( 'L', 'N', 'N', nr, nr, a, lda, sva,n, v, ldv, cwork, lwork, & rwork, lrwork, info ) scalem = rwork(1_${ik}$) numrank = nint(rwork(2_${ik}$),KIND=${ik}$) else if ( ( rsvec .and. ( .not. lsvec ) .and. ( .not. jracc ) ).or.( jracc .and. ( & .not. lsvec ) .and. ( nr /= n ) ) ) then ! -> singular values and right singular vectors <- if ( almort ) then ! In This Case Nr Equals N do p = 1, nr call stdlib${ii}$_${ci}$copy( n-p+1, a(p,p), lda, v(p,p), 1_${ik}$ ) call stdlib${ii}$_${ci}$lacgv( n-p+1, v(p,p), 1_${ik}$ ) end do if (nr>1_${ik}$) call stdlib${ii}$_${ci}$laset( 'U', nr-1,nr-1, czero, czero, v(1_${ik}$,2_${ik}$), ldv ) call stdlib${ii}$_${ci}$gesvj( 'L','U','N', n, nr, v, ldv, sva, nr, a, lda,cwork, lwork, & rwork, lrwork, info ) scalem = rwork(1_${ik}$) numrank = nint(rwork(2_${ik}$),KIND=${ik}$) else ! .. two more qr factorizations ( one qrf is not enough, two require ! accumulated product of jacobi rotations, three are perfect ) if (nr>1_${ik}$) call stdlib${ii}$_${ci}$laset( 'L', nr-1,nr-1, czero, czero, a(2_${ik}$,1_${ik}$), lda ) call stdlib${ii}$_${ci}$gelqf( nr,n, a, lda, cwork, cwork(n+1), lwork-n, ierr) call stdlib${ii}$_${ci}$lacpy( 'L', nr, nr, a, lda, v, ldv ) if (nr>1_${ik}$) call stdlib${ii}$_${ci}$laset( 'U', nr-1,nr-1, czero, czero, v(1_${ik}$,2_${ik}$), ldv ) call stdlib${ii}$_${ci}$geqrf( nr, nr, v, ldv, cwork(n+1), cwork(2_${ik}$*n+1),lwork-2*n, ierr ) do p = 1, nr call stdlib${ii}$_${ci}$copy( nr-p+1, v(p,p), ldv, v(p,p), 1_${ik}$ ) call stdlib${ii}$_${ci}$lacgv( nr-p+1, v(p,p), 1_${ik}$ ) end do if (nr>1_${ik}$) call stdlib${ii}$_${ci}$laset('U', nr-1, nr-1, czero, czero, v(1_${ik}$,2_${ik}$), ldv) call stdlib${ii}$_${ci}$gesvj( 'L', 'U','N', nr, nr, v,ldv, sva, nr, u,ldu, cwork(n+1), & lwork-n, rwork, lrwork, info ) scalem = rwork(1_${ik}$) numrank = nint(rwork(2_${ik}$),KIND=${ik}$) if ( nr < n ) then call stdlib${ii}$_${ci}$laset( 'A',n-nr, nr, czero,czero, v(nr+1,1_${ik}$), ldv ) call stdlib${ii}$_${ci}$laset( 'A',nr, n-nr, czero,czero, v(1_${ik}$,nr+1), ldv ) call stdlib${ii}$_${ci}$laset( 'A',n-nr,n-nr,czero,cone, v(nr+1,nr+1),ldv ) end if call stdlib${ii}$_${ci}$unmlq( 'L', 'C', n, n, nr, a, lda, cwork,v, ldv, cwork(n+1), lwork-n, & ierr ) end if ! Permute The Rows Of V ! do 8991 p = 1, n ! call stdlib${ii}$_${ci}$copy( n, v(p,1), ldv, a(iwork(p),1), lda ) 8991 continue ! call stdlib${ii}$_${ci}$lacpy( 'all', n, n, a, lda, v, ldv ) call stdlib${ii}$_${ci}$lapmr( .false., n, n, v, ldv, iwork ) if ( transp ) then call stdlib${ii}$_${ci}$lacpy( 'A', n, n, v, ldv, u, ldu ) end if else if ( jracc .and. (.not. lsvec) .and. ( nr== n ) ) then if (n>1_${ik}$) call stdlib${ii}$_${ci}$laset( 'L', n-1,n-1, czero, czero, a(2_${ik}$,1_${ik}$), lda ) call stdlib${ii}$_${ci}$gesvj( 'U','N','V', n, n, a, lda, sva, n, v, ldv,cwork, lwork, rwork, & lrwork, info ) scalem = rwork(1_${ik}$) numrank = nint(rwork(2_${ik}$),KIND=${ik}$) call stdlib${ii}$_${ci}$lapmr( .false., n, n, v, ldv, iwork ) else if ( lsvec .and. ( .not. rsvec ) ) then ! Singular Values And Left Singular Vectors ! Second Preconditioning Step To Avoid Need To Accumulate ! jacobi rotations in the jacobi iterations. do p = 1, nr call stdlib${ii}$_${ci}$copy( n-p+1, a(p,p), lda, u(p,p), 1_${ik}$ ) call stdlib${ii}$_${ci}$lacgv( n-p+1, u(p,p), 1_${ik}$ ) end do if (nr>1_${ik}$) call stdlib${ii}$_${ci}$laset( 'U', nr-1, nr-1, czero, czero, u(1_${ik}$,2_${ik}$), ldu ) call stdlib${ii}$_${ci}$geqrf( n, nr, u, ldu, cwork(n+1), cwork(2_${ik}$*n+1),lwork-2*n, ierr ) do p = 1, nr - 1 call stdlib${ii}$_${ci}$copy( nr-p, u(p,p+1), ldu, u(p+1,p), 1_${ik}$ ) call stdlib${ii}$_${ci}$lacgv( n-p+1, u(p,p), 1_${ik}$ ) end do if (nr>1_${ik}$) call stdlib${ii}$_${ci}$laset( 'U', nr-1, nr-1, czero, czero, u(1_${ik}$,2_${ik}$), ldu ) call stdlib${ii}$_${ci}$gesvj( 'L', 'U', 'N', nr,nr, u, ldu, sva, nr, a,lda, cwork(n+1), lwork-& n, rwork, lrwork, info ) scalem = rwork(1_${ik}$) numrank = nint(rwork(2_${ik}$),KIND=${ik}$) if ( nr < m ) then call stdlib${ii}$_${ci}$laset( 'A', m-nr, nr,czero, czero, u(nr+1,1_${ik}$), ldu ) if ( nr < n1 ) then call stdlib${ii}$_${ci}$laset( 'A',nr, n1-nr, czero, czero, u(1_${ik}$,nr+1),ldu ) call stdlib${ii}$_${ci}$laset( 'A',m-nr,n1-nr,czero,cone,u(nr+1,nr+1),ldu ) end if end if call stdlib${ii}$_${ci}$unmqr( 'L', 'N', m, n1, n, a, lda, cwork, u,ldu, cwork(n+1), lwork-n, & ierr ) if ( rowpiv )call stdlib${ii}$_${ci}$laswp( n1, u, ldu, 1_${ik}$, m-1, iwork(iwoff+1), -1_${ik}$ ) do p = 1, n1 xsc = one / stdlib${ii}$_${c2ri(ci)}$znrm2( m, u(1_${ik}$,p), 1_${ik}$ ) call stdlib${ii}$_${ci}$dscal( m, xsc, u(1_${ik}$,p), 1_${ik}$ ) end do if ( transp ) then call stdlib${ii}$_${ci}$lacpy( 'A', n, n, u, ldu, v, ldv ) end if else ! Full Svd if ( .not. jracc ) then if ( .not. almort ) then ! second preconditioning step (qrf [with pivoting]) ! note that the composition of transpose, qrf and transpose is ! equivalent to an lqf call. since in many libraries the qrf ! seems to be better optimized than the lqf, we do explicit ! transpose and use the qrf. this is subject to changes in an ! optimized implementation of stdlib${ii}$_${ci}$gejsv. do p = 1, nr call stdlib${ii}$_${ci}$copy( n-p+1, a(p,p), lda, v(p,p), 1_${ik}$ ) call stdlib${ii}$_${ci}$lacgv( n-p+1, v(p,p), 1_${ik}$ ) end do ! The Following Two Loops Perturb Small Entries To Avoid ! denormals in the second qr factorization, where they are ! as good as zeros. this is done to avoid painfully slow ! computation with denormals. the relative size of the perturbation ! is a parameter that can be changed by the implementer. ! this perturbation device will be obsolete on machines with ! properly implemented arithmetic. ! to switch it off, set l2pert=.false. to remove it from the ! code, remove the action under l2pert=.true., leave the else part. ! the following two loops should be blocked and fused with the ! transposed copy above. if ( l2pert ) then xsc = sqrt(small) do q = 1, nr ctemp = cmplx(xsc*abs( v(q,q) ),zero,KIND=${ck}$) do p = 1, n if ( ( p > q ) .and. ( abs(v(p,q)) <= temp1 ).or. ( p < q ) )v(p,q) = & ctemp ! $ v(p,q) = temp1 * ( v(p,q) / abs(v(p,q)) ) if ( p < q ) v(p,q) = - v(p,q) end do end do else if (nr>1_${ik}$) call stdlib${ii}$_${ci}$laset( 'U', nr-1, nr-1, czero, czero, v(1_${ik}$,2_${ik}$), ldv ) end if ! estimate the row scaled condition number of r1 ! (if r1 is rectangular, n > nr, then the condition number ! of the leading nr x nr submatrix is estimated.) call stdlib${ii}$_${ci}$lacpy( 'L', nr, nr, v, ldv, cwork(2_${ik}$*n+1), nr ) do p = 1, nr temp1 = stdlib${ii}$_${c2ri(ci)}$znrm2(nr-p+1,cwork(2_${ik}$*n+(p-1)*nr+p),1_${ik}$) call stdlib${ii}$_${ci}$dscal(nr-p+1,one/temp1,cwork(2_${ik}$*n+(p-1)*nr+p),1_${ik}$) end do call stdlib${ii}$_${ci}$pocon('L',nr,cwork(2_${ik}$*n+1),nr,one,temp1,cwork(2_${ik}$*n+nr*nr+1),rwork,& ierr) condr1 = one / sqrt(temp1) ! Here Need A Second Opinion On The Condition Number ! Then Assume Worst Case Scenario ! r1 is ok for inverse <=> condr1 < real(n,KIND=${ck}$) ! more conservative <=> condr1 < sqrt(real(n,KIND=${ck}$)) cond_ok = sqrt(sqrt(real(nr,KIND=${ck}$))) ! [tp] cond_ok is a tuning parameter. if ( condr1 < cond_ok ) then ! .. the second qrf without pivoting. note: in an optimized ! implementation, this qrf should be implemented as the qrf ! of a lower triangular matrix. ! r1^* = q2 * r2 call stdlib${ii}$_${ci}$geqrf( n, nr, v, ldv, cwork(n+1), cwork(2_${ik}$*n+1),lwork-2*n, ierr ) if ( l2pert ) then xsc = sqrt(small)/epsln do p = 2, nr do q = 1, p - 1 ctemp=cmplx(xsc*min(abs(v(p,p)),abs(v(q,q))),zero,KIND=${ck}$) if ( abs(v(q,p)) <= temp1 )v(q,p) = ctemp ! $ v(q,p) = temp1 * ( v(q,p) / abs(v(q,p)) ) end do end do end if if ( nr /= n )call stdlib${ii}$_${ci}$lacpy( 'A', n, nr, v, ldv, cwork(2_${ik}$*n+1), n ) ! .. save ... ! This Transposed Copy Should Be Better Than Naive do p = 1, nr - 1 call stdlib${ii}$_${ci}$copy( nr-p, v(p,p+1), ldv, v(p+1,p), 1_${ik}$ ) call stdlib${ii}$_${ci}$lacgv(nr-p+1, v(p,p), 1_${ik}$ ) end do v(nr,nr)=conjg(v(nr,nr)) condr2 = condr1 else ! .. ill-conditioned case: second qrf with pivoting ! note that windowed pivoting would be equally good ! numerically, and more run-time efficient. so, in ! an optimal implementation, the next call to stdlib${ii}$_${ci}$geqp3 ! should be replaced with eg. call zgeqpx (acm toms #782) ! with properly (carefully) chosen parameters. ! r1^* * p2 = q2 * r2 do p = 1, nr iwork(n+p) = 0_${ik}$ end do call stdlib${ii}$_${ci}$geqp3( n, nr, v, ldv, iwork(n+1), cwork(n+1),cwork(2_${ik}$*n+1), lwork-& 2_${ik}$*n, rwork, ierr ) ! * call stdlib${ii}$_${ci}$geqrf( n, nr, v, ldv, cwork(n+1), cwork(2*n+1), ! * $ lwork-2*n, ierr ) if ( l2pert ) then xsc = sqrt(small) do p = 2, nr do q = 1, p - 1 ctemp=cmplx(xsc*min(abs(v(p,p)),abs(v(q,q))),zero,KIND=${ck}$) if ( abs(v(q,p)) <= temp1 )v(q,p) = ctemp ! $ v(q,p) = temp1 * ( v(q,p) / abs(v(q,p)) ) end do end do end if call stdlib${ii}$_${ci}$lacpy( 'A', n, nr, v, ldv, cwork(2_${ik}$*n+1), n ) if ( l2pert ) then xsc = sqrt(small) do p = 2, nr do q = 1, p - 1 ctemp=cmplx(xsc*min(abs(v(p,p)),abs(v(q,q))),zero,KIND=${ck}$) ! v(p,q) = - temp1*( v(q,p) / abs(v(q,p)) ) v(p,q) = - ctemp end do end do else if (nr>1_${ik}$) call stdlib${ii}$_${ci}$laset( 'L',nr-1,nr-1,czero,czero,v(2_${ik}$,1_${ik}$),ldv ) end if ! now, compute r2 = l3 * q3, the lq factorization. call stdlib${ii}$_${ci}$gelqf( nr, nr, v, ldv, cwork(2_${ik}$*n+n*nr+1),cwork(2_${ik}$*n+n*nr+nr+1), & lwork-2*n-n*nr-nr, ierr ) ! And Estimate The Condition Number call stdlib${ii}$_${ci}$lacpy( 'L',nr,nr,v,ldv,cwork(2_${ik}$*n+n*nr+nr+1),nr ) do p = 1, nr temp1 = stdlib${ii}$_${c2ri(ci)}$znrm2( p, cwork(2_${ik}$*n+n*nr+nr+p), nr ) call stdlib${ii}$_${ci}$dscal( p, one/temp1, cwork(2_${ik}$*n+n*nr+nr+p), nr ) end do call stdlib${ii}$_${ci}$pocon( 'L',nr,cwork(2_${ik}$*n+n*nr+nr+1),nr,one,temp1,cwork(2_${ik}$*n+n*nr+& nr+nr*nr+1),rwork,ierr ) condr2 = one / sqrt(temp1) if ( condr2 >= cond_ok ) then ! Save The Householder Vectors Used For Q3 ! (this overwrites the copy of r2, as it will not be ! needed in this branch, but it does not overwritte the ! huseholder vectors of q2.). call stdlib${ii}$_${ci}$lacpy( 'U', nr, nr, v, ldv, cwork(2_${ik}$*n+1), n ) ! And The Rest Of The Information On Q3 Is In ! work(2*n+n*nr+1:2*n+n*nr+n) end if end if if ( l2pert ) then xsc = sqrt(small) do q = 2, nr ctemp = xsc * v(q,q) do p = 1, q - 1 ! v(p,q) = - temp1*( v(p,q) / abs(v(p,q)) ) v(p,q) = - ctemp end do end do else if (nr>1_${ik}$) call stdlib${ii}$_${ci}$laset( 'U', nr-1,nr-1, czero,czero, v(1_${ik}$,2_${ik}$), ldv ) end if ! second preconditioning finished; continue with jacobi svd ! the input matrix is lower trinagular. ! recover the right singular vectors as solution of a well ! conditioned triangular matrix equation. if ( condr1 < cond_ok ) then call stdlib${ii}$_${ci}$gesvj( 'L','U','N',nr,nr,v,ldv,sva,nr,u, ldu,cwork(2_${ik}$*n+n*nr+nr+1)& ,lwork-2*n-n*nr-nr,rwork,lrwork, info ) scalem = rwork(1_${ik}$) numrank = nint(rwork(2_${ik}$),KIND=${ik}$) do p = 1, nr call stdlib${ii}$_${ci}$copy( nr, v(1_${ik}$,p), 1_${ik}$, u(1_${ik}$,p), 1_${ik}$ ) call stdlib${ii}$_${ci}$dscal( nr, sva(p), v(1_${ik}$,p), 1_${ik}$ ) end do ! Pick The Right Matrix Equation And Solve It if ( nr == n ) then ! :)) .. best case, r1 is inverted. the solution of this matrix ! equation is q2*v2 = the product of the jacobi rotations ! used in stdlib${ii}$_${ci}$gesvj, premultiplied with the orthogonal matrix ! from the second qr factorization. call stdlib${ii}$_${ci}$trsm('L','U','N','N', nr,nr,cone, a,lda, v,ldv) else ! .. r1 is well conditioned, but non-square. adjoint of r2 ! is inverted to get the product of the jacobi rotations ! used in stdlib${ii}$_${ci}$gesvj. the q-factor from the second qr ! factorization is then built in explicitly. call stdlib${ii}$_${ci}$trsm('L','U','C','N',nr,nr,cone,cwork(2_${ik}$*n+1),n,v,ldv) if ( nr < n ) then call stdlib${ii}$_${ci}$laset('A',n-nr,nr,czero,czero,v(nr+1,1_${ik}$),ldv) call stdlib${ii}$_${ci}$laset('A',nr,n-nr,czero,czero,v(1_${ik}$,nr+1),ldv) call stdlib${ii}$_${ci}$laset('A',n-nr,n-nr,czero,cone,v(nr+1,nr+1),ldv) end if call stdlib${ii}$_${ci}$unmqr('L','N',n,n,nr,cwork(2_${ik}$*n+1),n,cwork(n+1),v,ldv,cwork(& 2_${ik}$*n+n*nr+nr+1),lwork-2*n-n*nr-nr,ierr) end if else if ( condr2 < cond_ok ) then ! the matrix r2 is inverted. the solution of the matrix equation ! is q3^* * v3 = the product of the jacobi rotations (appplied to ! the lower triangular l3 from the lq factorization of ! r2=l3*q3), pre-multiplied with the transposed q3. call stdlib${ii}$_${ci}$gesvj( 'L', 'U', 'N', nr, nr, v, ldv, sva, nr, u,ldu, cwork(2_${ik}$*n+& n*nr+nr+1), lwork-2*n-n*nr-nr,rwork, lrwork, info ) scalem = rwork(1_${ik}$) numrank = nint(rwork(2_${ik}$),KIND=${ik}$) do p = 1, nr call stdlib${ii}$_${ci}$copy( nr, v(1_${ik}$,p), 1_${ik}$, u(1_${ik}$,p), 1_${ik}$ ) call stdlib${ii}$_${ci}$dscal( nr, sva(p), u(1_${ik}$,p), 1_${ik}$ ) end do call stdlib${ii}$_${ci}$trsm('L','U','N','N',nr,nr,cone,cwork(2_${ik}$*n+1),n,u,ldu) ! Apply The Permutation From The Second Qr Factorization do q = 1, nr do p = 1, nr cwork(2_${ik}$*n+n*nr+nr+iwork(n+p)) = u(p,q) end do do p = 1, nr u(p,q) = cwork(2_${ik}$*n+n*nr+nr+p) end do end do if ( nr < n ) then call stdlib${ii}$_${ci}$laset( 'A',n-nr,nr,czero,czero,v(nr+1,1_${ik}$),ldv ) call stdlib${ii}$_${ci}$laset( 'A',nr,n-nr,czero,czero,v(1_${ik}$,nr+1),ldv ) call stdlib${ii}$_${ci}$laset('A',n-nr,n-nr,czero,cone,v(nr+1,nr+1),ldv) end if call stdlib${ii}$_${ci}$unmqr( 'L','N',n,n,nr,cwork(2_${ik}$*n+1),n,cwork(n+1),v,ldv,cwork(2_${ik}$*n+& n*nr+nr+1),lwork-2*n-n*nr-nr,ierr ) else ! last line of defense. ! #:( this is a rather pathological case: no scaled condition ! improvement after two pivoted qr factorizations. other ! possibility is that the rank revealing qr factorization ! or the condition estimator has failed, or the cond_ok ! is set very close to one (which is unnecessary). normally, ! this branch should never be executed, but in rare cases of ! failure of the rrqr or condition estimator, the last line of ! defense ensures that stdlib${ii}$_${ci}$gejsv completes the task. ! compute the full svd of l3 using stdlib${ii}$_${ci}$gesvj with explicit ! accumulation of jacobi rotations. call stdlib${ii}$_${ci}$gesvj( 'L', 'U', 'V', nr, nr, v, ldv, sva, nr, u,ldu, cwork(2_${ik}$*n+& n*nr+nr+1), lwork-2*n-n*nr-nr,rwork, lrwork, info ) scalem = rwork(1_${ik}$) numrank = nint(rwork(2_${ik}$),KIND=${ik}$) if ( nr < n ) then call stdlib${ii}$_${ci}$laset( 'A',n-nr,nr,czero,czero,v(nr+1,1_${ik}$),ldv ) call stdlib${ii}$_${ci}$laset( 'A',nr,n-nr,czero,czero,v(1_${ik}$,nr+1),ldv ) call stdlib${ii}$_${ci}$laset('A',n-nr,n-nr,czero,cone,v(nr+1,nr+1),ldv) end if call stdlib${ii}$_${ci}$unmqr( 'L','N',n,n,nr,cwork(2_${ik}$*n+1),n,cwork(n+1),v,ldv,cwork(2_${ik}$*n+& n*nr+nr+1),lwork-2*n-n*nr-nr,ierr ) call stdlib${ii}$_${ci}$unmlq( 'L', 'C', nr, nr, nr, cwork(2_${ik}$*n+1), n,cwork(2_${ik}$*n+n*nr+1), & u, ldu, cwork(2_${ik}$*n+n*nr+nr+1),lwork-2*n-n*nr-nr, ierr ) do q = 1, nr do p = 1, nr cwork(2_${ik}$*n+n*nr+nr+iwork(n+p)) = u(p,q) end do do p = 1, nr u(p,q) = cwork(2_${ik}$*n+n*nr+nr+p) end do end do end if ! permute the rows of v using the (column) permutation from the ! first qrf. also, scale the columns to make them unit in ! euclidean norm. this applies to all cases. temp1 = sqrt(real(n,KIND=${ck}$)) * epsln do q = 1, n do p = 1, n cwork(2_${ik}$*n+n*nr+nr+iwork(p)) = v(p,q) end do do p = 1, n v(p,q) = cwork(2_${ik}$*n+n*nr+nr+p) end do xsc = one / stdlib${ii}$_${c2ri(ci)}$znrm2( n, v(1_${ik}$,q), 1_${ik}$ ) if ( (xsc < (one-temp1)) .or. (xsc > (one+temp1)) )call stdlib${ii}$_${ci}$dscal( n, xsc,& v(1_${ik}$,q), 1_${ik}$ ) end do ! at this moment, v contains the right singular vectors of a. ! next, assemble the left singular vector matrix u (m x n). if ( nr < m ) then call stdlib${ii}$_${ci}$laset('A', m-nr, nr, czero, czero, u(nr+1,1_${ik}$), ldu) if ( nr < n1 ) then call stdlib${ii}$_${ci}$laset('A',nr,n1-nr,czero,czero,u(1_${ik}$,nr+1),ldu) call stdlib${ii}$_${ci}$laset('A',m-nr,n1-nr,czero,cone,u(nr+1,nr+1),ldu) end if end if ! the q matrix from the first qrf is built into the left singular ! matrix u. this applies to all cases. call stdlib${ii}$_${ci}$unmqr( 'L', 'N', m, n1, n, a, lda, cwork, u,ldu, cwork(n+1), lwork-& n, ierr ) ! the columns of u are normalized. the cost is o(m*n) flops. temp1 = sqrt(real(m,KIND=${ck}$)) * epsln do p = 1, nr xsc = one / stdlib${ii}$_${c2ri(ci)}$znrm2( m, u(1_${ik}$,p), 1_${ik}$ ) if ( (xsc < (one-temp1)) .or. (xsc > (one+temp1)) )call stdlib${ii}$_${ci}$dscal( m, xsc,& u(1_${ik}$,p), 1_${ik}$ ) end do ! if the initial qrf is computed with row pivoting, the left ! singular vectors must be adjusted. if ( rowpiv )call stdlib${ii}$_${ci}$laswp( n1, u, ldu, 1_${ik}$, m-1, iwork(iwoff+1), -1_${ik}$ ) else ! The Initial Matrix A Has Almost Orthogonal Columns And ! the second qrf is not needed call stdlib${ii}$_${ci}$lacpy( 'U', n, n, a, lda, cwork(n+1), n ) if ( l2pert ) then xsc = sqrt(small) do p = 2, n ctemp = xsc * cwork( n + (p-1)*n + p ) do q = 1, p - 1 ! cwork(n+(q-1)*n+p)=-temp1 * ( cwork(n+(p-1)*n+q) / ! $ abs(cwork(n+(p-1)*n+q)) ) cwork(n+(q-1)*n+p)=-ctemp end do end do else call stdlib${ii}$_${ci}$laset( 'L',n-1,n-1,czero,czero,cwork(n+2),n ) end if call stdlib${ii}$_${ci}$gesvj( 'U', 'U', 'N', n, n, cwork(n+1), n, sva,n, u, ldu, cwork(n+& n*n+1), lwork-n-n*n, rwork, lrwork,info ) scalem = rwork(1_${ik}$) numrank = nint(rwork(2_${ik}$),KIND=${ik}$) do p = 1, n call stdlib${ii}$_${ci}$copy( n, cwork(n+(p-1)*n+1), 1_${ik}$, u(1_${ik}$,p), 1_${ik}$ ) call stdlib${ii}$_${ci}$dscal( n, sva(p), cwork(n+(p-1)*n+1), 1_${ik}$ ) end do call stdlib${ii}$_${ci}$trsm( 'L', 'U', 'N', 'N', n, n,cone, a, lda, cwork(n+1), n ) do p = 1, n call stdlib${ii}$_${ci}$copy( n, cwork(n+p), n, v(iwork(p),1_${ik}$), ldv ) end do temp1 = sqrt(real(n,KIND=${ck}$))*epsln do p = 1, n xsc = one / stdlib${ii}$_${c2ri(ci)}$znrm2( n, v(1_${ik}$,p), 1_${ik}$ ) if ( (xsc < (one-temp1)) .or. (xsc > (one+temp1)) )call stdlib${ii}$_${ci}$dscal( n, xsc,& v(1_${ik}$,p), 1_${ik}$ ) end do ! assemble the left singular vector matrix u (m x n). if ( n < m ) then call stdlib${ii}$_${ci}$laset( 'A', m-n, n, czero, czero, u(n+1,1_${ik}$), ldu ) if ( n < n1 ) then call stdlib${ii}$_${ci}$laset('A',n, n1-n, czero, czero, u(1_${ik}$,n+1),ldu) call stdlib${ii}$_${ci}$laset( 'A',m-n,n1-n, czero, cone,u(n+1,n+1),ldu) end if end if call stdlib${ii}$_${ci}$unmqr( 'L', 'N', m, n1, n, a, lda, cwork, u,ldu, cwork(n+1), lwork-& n, ierr ) temp1 = sqrt(real(m,KIND=${ck}$))*epsln do p = 1, n1 xsc = one / stdlib${ii}$_${c2ri(ci)}$znrm2( m, u(1_${ik}$,p), 1_${ik}$ ) if ( (xsc < (one-temp1)) .or. (xsc > (one+temp1)) )call stdlib${ii}$_${ci}$dscal( m, xsc,& u(1_${ik}$,p), 1_${ik}$ ) end do if ( rowpiv )call stdlib${ii}$_${ci}$laswp( n1, u, ldu, 1_${ik}$, m-1, iwork(iwoff+1), -1_${ik}$ ) end if ! end of the >> almost orthogonal case << in the full svd else ! this branch deploys a preconditioned jacobi svd with explicitly ! accumulated rotations. it is included as optional, mainly for ! experimental purposes. it does perform well, and can also be used. ! in this implementation, this branch will be automatically activated ! if the condition number sigma_max(a) / sigma_min(a) is predicted ! to be greater than the overflow threshold. this is because the ! a posteriori computation of the singular vectors assumes robust ! implementation of blas and some lapack procedures, capable of working ! in presence of extreme values, e.g. when the singular values spread from ! the underflow to the overflow threshold. do p = 1, nr call stdlib${ii}$_${ci}$copy( n-p+1, a(p,p), lda, v(p,p), 1_${ik}$ ) call stdlib${ii}$_${ci}$lacgv( n-p+1, v(p,p), 1_${ik}$ ) end do if ( l2pert ) then xsc = sqrt(small/epsln) do q = 1, nr ctemp = cmplx(xsc*abs( v(q,q) ),zero,KIND=${ck}$) do p = 1, n if ( ( p > q ) .and. ( abs(v(p,q)) <= temp1 ).or. ( p < q ) )v(p,q) = & ctemp ! $ v(p,q) = temp1 * ( v(p,q) / abs(v(p,q)) ) if ( p < q ) v(p,q) = - v(p,q) end do end do else if (nr>1_${ik}$) call stdlib${ii}$_${ci}$laset( 'U', nr-1, nr-1, czero, czero, v(1_${ik}$,2_${ik}$), ldv ) end if call stdlib${ii}$_${ci}$geqrf( n, nr, v, ldv, cwork(n+1), cwork(2_${ik}$*n+1),lwork-2*n, ierr ) call stdlib${ii}$_${ci}$lacpy( 'L', n, nr, v, ldv, cwork(2_${ik}$*n+1), n ) do p = 1, nr call stdlib${ii}$_${ci}$copy( nr-p+1, v(p,p), ldv, u(p,p), 1_${ik}$ ) call stdlib${ii}$_${ci}$lacgv( nr-p+1, u(p,p), 1_${ik}$ ) end do if ( l2pert ) then xsc = sqrt(small/epsln) do q = 2, nr do p = 1, q - 1 ctemp = cmplx(xsc * min(abs(u(p,p)),abs(u(q,q))),zero,KIND=${ck}$) ! u(p,q) = - temp1 * ( u(q,p) / abs(u(q,p)) ) u(p,q) = - ctemp end do end do else if (nr>1_${ik}$) call stdlib${ii}$_${ci}$laset('U', nr-1, nr-1, czero, czero, u(1_${ik}$,2_${ik}$), ldu ) end if call stdlib${ii}$_${ci}$gesvj( 'L', 'U', 'V', nr, nr, u, ldu, sva,n, v, ldv, cwork(2_${ik}$*n+n*nr+1),& lwork-2*n-n*nr,rwork, lrwork, info ) scalem = rwork(1_${ik}$) numrank = nint(rwork(2_${ik}$),KIND=${ik}$) if ( nr < n ) then call stdlib${ii}$_${ci}$laset( 'A',n-nr,nr,czero,czero,v(nr+1,1_${ik}$),ldv ) call stdlib${ii}$_${ci}$laset( 'A',nr,n-nr,czero,czero,v(1_${ik}$,nr+1),ldv ) call stdlib${ii}$_${ci}$laset( 'A',n-nr,n-nr,czero,cone,v(nr+1,nr+1),ldv ) end if call stdlib${ii}$_${ci}$unmqr( 'L','N',n,n,nr,cwork(2_${ik}$*n+1),n,cwork(n+1),v,ldv,cwork(2_${ik}$*n+n*nr+& nr+1),lwork-2*n-n*nr-nr,ierr ) ! permute the rows of v using the (column) permutation from the ! first qrf. also, scale the columns to make them unit in ! euclidean norm. this applies to all cases. temp1 = sqrt(real(n,KIND=${ck}$)) * epsln do q = 1, n do p = 1, n cwork(2_${ik}$*n+n*nr+nr+iwork(p)) = v(p,q) end do do p = 1, n v(p,q) = cwork(2_${ik}$*n+n*nr+nr+p) end do xsc = one / stdlib${ii}$_${c2ri(ci)}$znrm2( n, v(1_${ik}$,q), 1_${ik}$ ) if ( (xsc < (one-temp1)) .or. (xsc > (one+temp1)) )call stdlib${ii}$_${ci}$dscal( n, xsc,& v(1_${ik}$,q), 1_${ik}$ ) end do ! at this moment, v contains the right singular vectors of a. ! next, assemble the left singular vector matrix u (m x n). if ( nr < m ) then call stdlib${ii}$_${ci}$laset( 'A', m-nr, nr, czero, czero, u(nr+1,1_${ik}$), ldu ) if ( nr < n1 ) then call stdlib${ii}$_${ci}$laset('A',nr, n1-nr, czero, czero, u(1_${ik}$,nr+1),ldu) call stdlib${ii}$_${ci}$laset('A',m-nr,n1-nr, czero, cone,u(nr+1,nr+1),ldu) end if end if call stdlib${ii}$_${ci}$unmqr( 'L', 'N', m, n1, n, a, lda, cwork, u,ldu, cwork(n+1), lwork-n, & ierr ) if ( rowpiv )call stdlib${ii}$_${ci}$laswp( n1, u, ldu, 1_${ik}$, m-1, iwork(iwoff+1), -1_${ik}$ ) end if if ( transp ) then ! .. swap u and v because the procedure worked on a^* do p = 1, n call stdlib${ii}$_${ci}$swap( n, u(1_${ik}$,p), 1_${ik}$, v(1_${ik}$,p), 1_${ik}$ ) end do end if end if ! end of the full svd ! undo scaling, if necessary (and possible) if ( uscal2 <= (big/sva(1_${ik}$))*uscal1 ) then call stdlib${ii}$_${c2ri(ci)}$lascl( 'G', 0_${ik}$, 0_${ik}$, uscal1, uscal2, nr, 1_${ik}$, sva, n, ierr ) uscal1 = one uscal2 = one end if if ( nr < n ) then do p = nr+1, n sva(p) = zero end do end if rwork(1_${ik}$) = uscal2 * scalem rwork(2_${ik}$) = uscal1 if ( errest ) rwork(3_${ik}$) = sconda if ( lsvec .and. rsvec ) then rwork(4_${ik}$) = condr1 rwork(5_${ik}$) = condr2 end if if ( l2tran ) then rwork(6_${ik}$) = entra rwork(7_${ik}$) = entrat end if iwork(1_${ik}$) = nr iwork(2_${ik}$) = numrank iwork(3_${ik}$) = warning if ( transp ) then iwork(4_${ik}$) = 1_${ik}$ else iwork(4_${ik}$) = -1_${ik}$ end if return end subroutine stdlib${ii}$_${ci}$gejsv #:endif #:endfor pure module subroutine stdlib${ii}$_sgesvj( joba, jobu, jobv, m, n, a, lda, sva, mv, v,ldv, work, lwork, & !! SGESVJ computes the singular value decomposition (SVD) of a real !! M-by-N matrix A, where M >= N. The SVD of A is written as !! [++] [xx] [x0] [xx] !! A = U * SIGMA * V^t, [++] = [xx] * [ox] * [xx] !! [++] [xx] !! where SIGMA is an N-by-N diagonal matrix, U is an M-by-N orthonormal !! matrix, and V is an N-by-N orthogonal matrix. The diagonal elements !! of SIGMA are the singular values of A. The columns of U and V are the !! left and the right singular vectors of A, respectively. !! SGESVJ can sometimes compute tiny singular values and their singular vectors much !! more accurately than other SVD routines, see below under Further Details. info ) ! -- lapack computational routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments integer(${ik}$), intent(out) :: info integer(${ik}$), intent(in) :: lda, ldv, lwork, m, mv, n character, intent(in) :: joba, jobu, jobv ! Array Arguments real(sp), intent(inout) :: a(lda,*), v(ldv,*), work(lwork) real(sp), intent(out) :: sva(n) ! ===================================================================== ! Local Parameters integer(${ik}$), parameter :: nsweep = 30_${ik}$ ! Local Scalars real(sp) :: aapp, aapp0, aapq, aaqq, apoaq, aqoap, big, bigtheta, cs, ctol, epsln, & large, mxaapq, mxsinj, rootbig, rooteps, rootsfmin, roottol, skl, sfmin, small, sn, t, & temp1, theta, thsign, tol integer(${ik}$) :: blskip, emptsw, i, ibr, ierr, igl, ijblsk, ir1, iswrot, jbc, jgl, kbl, & lkahead, mvl, n2, n34, n4, nbl, notrot, p, pskipped, q, rowskip, swband logical(lk) :: applv, goscale, lower, lsvec, noscale, rotok, rsvec, uctol, & upper ! Local Arrays real(sp) :: fastr(5_${ik}$) ! Intrinsic Functions ! from lapack ! from lapack ! Executable Statements ! test the input arguments lsvec = stdlib_lsame( jobu, 'U' ) uctol = stdlib_lsame( jobu, 'C' ) rsvec = stdlib_lsame( jobv, 'V' ) applv = stdlib_lsame( jobv, 'A' ) upper = stdlib_lsame( joba, 'U' ) lower = stdlib_lsame( joba, 'L' ) if( .not.( upper .or. lower .or. stdlib_lsame( joba, 'G' ) ) ) then info = -1_${ik}$ else if( .not.( lsvec .or. uctol .or. stdlib_lsame( jobu, 'N' ) ) ) then info = -2_${ik}$ else if( .not.( rsvec .or. applv .or. stdlib_lsame( jobv, 'N' ) ) ) then info = -3_${ik}$ else if( m<0_${ik}$ ) then info = -4_${ik}$ else if( ( n<0_${ik}$ ) .or. ( n>m ) ) then info = -5_${ik}$ else if( lda<m ) then info = -7_${ik}$ else if( mv<0_${ik}$ ) then info = -9_${ik}$ else if( ( rsvec .and. ( ldv<n ) ) .or.( applv .and. ( ldv<mv ) ) ) then info = -11_${ik}$ else if( uctol .and. ( work( 1_${ik}$ )<=one ) ) then info = -12_${ik}$ else if( lwork<max( m+n, 6_${ik}$ ) ) then info = -13_${ik}$ else info = 0_${ik}$ end if ! #:( if( info/=0_${ik}$ ) then call stdlib${ii}$_xerbla( 'SGESVJ', -info ) return end if ! #:) quick return for void matrix if( ( m==0 ) .or. ( n==0 ) )return ! set numerical parameters ! the stopping criterion for jacobi rotations is ! max_{i<>j}|a(:,i)^t * a(:,j)|/(||a(:,i)||*||a(:,j)||) < ctol*eps ! where eps is the round-off and ctol is defined as follows: if( uctol ) then ! ... user controlled ctol = work( 1_${ik}$ ) else ! ... default if( lsvec .or. rsvec .or. applv ) then ctol = sqrt( real( m,KIND=sp) ) else ctol = real( m,KIND=sp) end if end if ! ... and the machine dependent parameters are ! [!] (make sure that stdlib${ii}$_slamch() works properly on the target machine.) epsln = stdlib${ii}$_slamch( 'EPSILON' ) rooteps = sqrt( epsln ) sfmin = stdlib${ii}$_slamch( 'SAFEMINIMUM' ) rootsfmin = sqrt( sfmin ) small = sfmin / epsln big = stdlib${ii}$_slamch( 'OVERFLOW' ) ! big = one / sfmin rootbig = one / rootsfmin large = big / sqrt( real( m*n,KIND=sp) ) bigtheta = one / rooteps tol = ctol*epsln roottol = sqrt( tol ) if( real( m,KIND=sp)*epsln>=one ) then info = -4_${ik}$ call stdlib${ii}$_xerbla( 'SGESVJ', -info ) return end if ! initialize the right singular vector matrix. if( rsvec ) then mvl = n call stdlib${ii}$_slaset( 'A', mvl, n, zero, one, v, ldv ) else if( applv ) then mvl = mv end if rsvec = rsvec .or. applv ! initialize sva( 1:n ) = ( ||a e_i||_2, i = 1:n ) ! (!) if necessary, scale a to protect the largest singular value ! from overflow. it is possible that saving the largest singular ! value destroys the information about the small ones. ! this initial scaling is almost minimal in the sense that the ! goal is to make sure that no column norm overflows, and that ! sqrt(n)*max_i sva(i) does not overflow. if infinite entries ! in a are detected, the procedure returns with info=-6. skl = one / sqrt( real( m,KIND=sp)*real( n,KIND=sp) ) noscale = .true. goscale = .true. if( lower ) then ! the input matrix is m-by-n lower triangular (trapezoidal) do p = 1, n aapp = zero aaqq = one call stdlib${ii}$_slassq( m-p+1, a( p, p ), 1_${ik}$, aapp, aaqq ) if( aapp>big ) then info = -6_${ik}$ call stdlib${ii}$_xerbla( 'SGESVJ', -info ) return end if aaqq = sqrt( aaqq ) if( ( aapp<( big / aaqq ) ) .and. noscale ) then sva( p ) = aapp*aaqq else noscale = .false. sva( p ) = aapp*( aaqq*skl ) if( goscale ) then goscale = .false. do q = 1, p - 1 sva( q ) = sva( q )*skl end do end if end if end do else if( upper ) then ! the input matrix is m-by-n upper triangular (trapezoidal) do p = 1, n aapp = zero aaqq = one call stdlib${ii}$_slassq( p, a( 1_${ik}$, p ), 1_${ik}$, aapp, aaqq ) if( aapp>big ) then info = -6_${ik}$ call stdlib${ii}$_xerbla( 'SGESVJ', -info ) return end if aaqq = sqrt( aaqq ) if( ( aapp<( big / aaqq ) ) .and. noscale ) then sva( p ) = aapp*aaqq else noscale = .false. sva( p ) = aapp*( aaqq*skl ) if( goscale ) then goscale = .false. do q = 1, p - 1 sva( q ) = sva( q )*skl end do end if end if end do else ! the input matrix is m-by-n general dense do p = 1, n aapp = zero aaqq = one call stdlib${ii}$_slassq( m, a( 1_${ik}$, p ), 1_${ik}$, aapp, aaqq ) if( aapp>big ) then info = -6_${ik}$ call stdlib${ii}$_xerbla( 'SGESVJ', -info ) return end if aaqq = sqrt( aaqq ) if( ( aapp<( big / aaqq ) ) .and. noscale ) then sva( p ) = aapp*aaqq else noscale = .false. sva( p ) = aapp*( aaqq*skl ) if( goscale ) then goscale = .false. do q = 1, p - 1 sva( q ) = sva( q )*skl end do end if end if end do end if if( noscale )skl = one ! move the smaller part of the spectrum from the underflow threshold ! (!) start by determining the position of the nonzero entries of the ! array sva() relative to ( sfmin, big ). aapp = zero aaqq = big do p = 1, n if( sva( p )/=zero )aaqq = min( aaqq, sva( p ) ) aapp = max( aapp, sva( p ) ) end do ! #:) quick return for zero matrix if( aapp==zero ) then if( lsvec )call stdlib${ii}$_slaset( 'G', m, n, zero, one, a, lda ) work( 1_${ik}$ ) = one work( 2_${ik}$ ) = zero work( 3_${ik}$ ) = zero work( 4_${ik}$ ) = zero work( 5_${ik}$ ) = zero work( 6_${ik}$ ) = zero return end if ! #:) quick return for one-column matrix if( n==1_${ik}$ ) then if( lsvec )call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, sva( 1_${ik}$ ), skl, m, 1_${ik}$,a( 1_${ik}$, 1_${ik}$ ), lda, ierr ) work( 1_${ik}$ ) = one / skl if( sva( 1_${ik}$ )>=sfmin ) then work( 2_${ik}$ ) = one else work( 2_${ik}$ ) = zero end if work( 3_${ik}$ ) = zero work( 4_${ik}$ ) = zero work( 5_${ik}$ ) = zero work( 6_${ik}$ ) = zero return end if ! protect small singular values from underflow, and try to ! avoid underflows/overflows in computing jacobi rotations. sn = sqrt( sfmin / epsln ) temp1 = sqrt( big / real( n,KIND=sp) ) if( ( aapp<=sn ) .or. ( aaqq>=temp1 ) .or.( ( sn<=aaqq ) .and. ( aapp<=temp1 ) ) ) & then temp1 = min( big, temp1 / aapp ) ! aaqq = aaqq*temp1 ! aapp = aapp*temp1 else if( ( aaqq<=sn ) .and. ( aapp<=temp1 ) ) then temp1 = min( sn / aaqq, big / ( aapp*sqrt( real( n,KIND=sp) ) ) ) ! aaqq = aaqq*temp1 ! aapp = aapp*temp1 else if( ( aaqq>=sn ) .and. ( aapp>=temp1 ) ) then temp1 = max( sn / aaqq, temp1 / aapp ) ! aaqq = aaqq*temp1 ! aapp = aapp*temp1 else if( ( aaqq<=sn ) .and. ( aapp>=temp1 ) ) then temp1 = min( sn / aaqq, big / ( sqrt( real( n,KIND=sp) )*aapp ) ) ! aaqq = aaqq*temp1 ! aapp = aapp*temp1 else temp1 = one end if ! scale, if necessary if( temp1/=one ) then call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, one, temp1, n, 1_${ik}$, sva, n, ierr ) end if skl = temp1*skl if( skl/=one ) then call stdlib${ii}$_slascl( joba, 0_${ik}$, 0_${ik}$, one, skl, m, n, a, lda, ierr ) skl = one / skl end if ! row-cyclic jacobi svd algorithm with column pivoting emptsw = ( n*( n-1 ) ) / 2_${ik}$ notrot = 0_${ik}$ fastr( 1_${ik}$ ) = zero ! a is represented in factored form a = a * diag(work), where diag(work) ! is initialized to identity. work is updated during fast scaled ! rotations. do q = 1, n work( q ) = one end do swband = 3_${ik}$ ! [tp] swband is a tuning parameter [tp]. it is meaningful and effective ! if stdlib${ii}$_sgesvj is used as a computational routine in the preconditioned ! jacobi svd algorithm stdlib${ii}$_sgesvj. for sweeps i=1:swband the procedure ! works on pivots inside a band-like region around the diagonal. ! the boundaries are determined dynamically, based on the number of ! pivots above a threshold. kbl = min( 8_${ik}$, n ) ! [tp] kbl is a tuning parameter that defines the tile size in the ! tiling of the p-q loops of pivot pairs. in general, an optimal ! value of kbl depends on the matrix dimensions and on the ! parameters of the computer's memory. nbl = n / kbl if( ( nbl*kbl )/=n )nbl = nbl + 1_${ik}$ blskip = kbl**2_${ik}$ ! [tp] blkskip is a tuning parameter that depends on swband and kbl. rowskip = min( 5_${ik}$, kbl ) ! [tp] rowskip is a tuning parameter. lkahead = 1_${ik}$ ! [tp] lkahead is a tuning parameter. ! quasi block transformations, using the lower (upper) triangular ! structure of the input matrix. the quasi-block-cycling usually ! invokes cubic convergence. big part of this cycle is done inside ! canonical subspaces of dimensions less than m. if( ( lower .or. upper ) .and. ( n>max( 64_${ik}$, 4_${ik}$*kbl ) ) ) then ! [tp] the number of partition levels and the actual partition are ! tuning parameters. n4 = n / 4_${ik}$ n2 = n / 2_${ik}$ n34 = 3_${ik}$*n4 if( applv ) then q = 0_${ik}$ else q = 1_${ik}$ end if if( lower ) then ! this works very well on lower triangular matrices, in particular ! in the framework of the preconditioned jacobi svd (xgejsv). ! the idea is simple: ! [+ 0 0 0] note that jacobi transformations of [0 0] ! [+ + 0 0] [0 0] ! [+ + x 0] actually work on [x 0] [x 0] ! [+ + x x] [x x]. [x x] call stdlib${ii}$_sgsvj0( jobv, m-n34, n-n34, a( n34+1, n34+1 ), lda,work( n34+1 ), & sva( n34+1 ), mvl,v( n34*q+1, n34+1 ), ldv, epsln, sfmin, tol,2_${ik}$, work( n+1 ), & lwork-n, ierr ) call stdlib${ii}$_sgsvj0( jobv, m-n2, n34-n2, a( n2+1, n2+1 ), lda,work( n2+1 ), sva( & n2+1 ), mvl,v( n2*q+1, n2+1 ), ldv, epsln, sfmin, tol, 2_${ik}$,work( n+1 ), lwork-n, & ierr ) call stdlib${ii}$_sgsvj1( jobv, m-n2, n-n2, n4, a( n2+1, n2+1 ), lda,work( n2+1 ), sva(& n2+1 ), mvl,v( n2*q+1, n2+1 ), ldv, epsln, sfmin, tol, 1_${ik}$,work( n+1 ), lwork-n, & ierr ) call stdlib${ii}$_sgsvj0( jobv, m-n4, n2-n4, a( n4+1, n4+1 ), lda,work( n4+1 ), sva( & n4+1 ), mvl,v( n4*q+1, n4+1 ), ldv, epsln, sfmin, tol, 1_${ik}$,work( n+1 ), lwork-n, & ierr ) call stdlib${ii}$_sgsvj0( jobv, m, n4, a, lda, work, sva, mvl, v, ldv,epsln, sfmin, & tol, 1_${ik}$, work( n+1 ), lwork-n,ierr ) call stdlib${ii}$_sgsvj1( jobv, m, n2, n4, a, lda, work, sva, mvl, v,ldv, epsln, sfmin,& tol, 1_${ik}$, work( n+1 ),lwork-n, ierr ) else if( upper ) then call stdlib${ii}$_sgsvj0( jobv, n4, n4, a, lda, work, sva, mvl, v, ldv,epsln, sfmin, & tol, 2_${ik}$, work( n+1 ), lwork-n,ierr ) call stdlib${ii}$_sgsvj0( jobv, n2, n4, a( 1_${ik}$, n4+1 ), lda, work( n4+1 ),sva( n4+1 ), & mvl, v( n4*q+1, n4+1 ), ldv,epsln, sfmin, tol, 1_${ik}$, work( n+1 ), lwork-n,ierr ) call stdlib${ii}$_sgsvj1( jobv, n2, n2, n4, a, lda, work, sva, mvl, v,ldv, epsln, & sfmin, tol, 1_${ik}$, work( n+1 ),lwork-n, ierr ) call stdlib${ii}$_sgsvj0( jobv, n2+n4, n4, a( 1_${ik}$, n2+1 ), lda,work( n2+1 ), sva( n2+1 ),& mvl,v( n2*q+1, n2+1 ), ldv, epsln, sfmin, tol, 1_${ik}$,work( n+1 ), lwork-n, ierr ) end if end if ! .. row-cyclic pivot strategy with de rijk's pivoting .. loop_1993: do i = 1, nsweep ! .. go go go ... mxaapq = zero mxsinj = zero iswrot = 0_${ik}$ notrot = 0_${ik}$ pskipped = 0_${ik}$ ! each sweep is unrolled using kbl-by-kbl tiles over the pivot pairs ! 1 <= p < q <= n. this is the first step toward a blocked implementation ! of the rotations. new implementation, based on block transformations, ! is under development. loop_2000: do ibr = 1, nbl igl = ( ibr-1 )*kbl + 1_${ik}$ loop_1002: do ir1 = 0, min( lkahead, nbl-ibr ) igl = igl + ir1*kbl loop_2001: do p = igl, min( igl+kbl-1, n-1 ) ! .. de rijk's pivoting q = stdlib${ii}$_isamax( n-p+1, sva( p ), 1_${ik}$ ) + p - 1_${ik}$ if( p/=q ) then call stdlib${ii}$_sswap( m, a( 1_${ik}$, p ), 1_${ik}$, a( 1_${ik}$, q ), 1_${ik}$ ) if( rsvec )call stdlib${ii}$_sswap( mvl, v( 1_${ik}$, p ), 1_${ik}$,v( 1_${ik}$, q ), 1_${ik}$ ) temp1 = sva( p ) sva( p ) = sva( q ) sva( q ) = temp1 temp1 = work( p ) work( p ) = work( q ) work( q ) = temp1 end if if( ir1==0_${ik}$ ) then ! column norms are periodically updated by explicit ! norm computation. ! caveat: ! unfortunately, some blas implementations compute stdlib${ii}$_snrm2(m,a(1,p),1) ! as sqrt(stdlib${ii}$_sdot(m,a(1,p),1,a(1,p),1)), which may cause the result to ! overflow for ||a(:,p)||_2 > sqrt(overflow_threshold), and to ! underflow for ||a(:,p)||_2 < sqrt(underflow_threshold). ! hence, stdlib${ii}$_snrm2 cannot be trusted, not even in the case when ! the true norm is far from the under(over)flow boundaries. ! if properly implemented stdlib${ii}$_snrm2 is available, the if-then-else ! below should read "aapp = stdlib${ii}$_snrm2( m, a(1,p), 1 ) * work(p)". if( ( sva( p )<rootbig ) .and.( sva( p )>rootsfmin ) ) then sva( p ) = stdlib${ii}$_snrm2( m, a( 1_${ik}$, p ), 1_${ik}$ )*work( p ) else temp1 = zero aapp = one call stdlib${ii}$_slassq( m, a( 1_${ik}$, p ), 1_${ik}$, temp1, aapp ) sva( p ) = temp1*sqrt( aapp )*work( p ) end if aapp = sva( p ) else aapp = sva( p ) end if if( aapp>zero ) then pskipped = 0_${ik}$ loop_2002: do q = p + 1, min( igl+kbl-1, n ) aaqq = sva( q ) if( aaqq>zero ) then aapp0 = aapp if( aaqq>=one ) then rotok = ( small*aapp )<=aaqq if( aapp<( big / aaqq ) ) then aapq = ( stdlib${ii}$_sdot( m, a( 1_${ik}$, p ), 1_${ik}$, a( 1_${ik}$,q ), 1_${ik}$ )*work( & p )*work( q ) /aaqq ) / aapp else call stdlib${ii}$_scopy( m, a( 1_${ik}$, p ), 1_${ik}$,work( n+1 ), 1_${ik}$ ) call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, aapp,work( p ), m, 1_${ik}$,work( n+& 1_${ik}$ ), lda, ierr ) aapq = stdlib${ii}$_sdot( m, work( n+1 ), 1_${ik}$,a( 1_${ik}$, q ), 1_${ik}$ )*work( & q ) / aaqq end if else rotok = aapp<=( aaqq / small ) if( aapp>( small / aaqq ) ) then aapq = ( stdlib${ii}$_sdot( m, a( 1_${ik}$, p ), 1_${ik}$, a( 1_${ik}$,q ), 1_${ik}$ )*work( & p )*work( q ) /aaqq ) / aapp else call stdlib${ii}$_scopy( m, a( 1_${ik}$, q ), 1_${ik}$,work( n+1 ), 1_${ik}$ ) call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, aaqq,work( q ), m, 1_${ik}$,work( n+& 1_${ik}$ ), lda, ierr ) aapq = stdlib${ii}$_sdot( m, work( n+1 ), 1_${ik}$,a( 1_${ik}$, p ), 1_${ik}$ )*work( & p ) / aapp end if end if mxaapq = max( mxaapq, abs( aapq ) ) ! to rotate or not to rotate, that is the question ... if( abs( aapq )>tol ) then ! Rotate ! [rtd] rotated = rotated + one if( ir1==0_${ik}$ ) then notrot = 0_${ik}$ pskipped = 0_${ik}$ iswrot = iswrot + 1_${ik}$ end if if( rotok ) then aqoap = aaqq / aapp apoaq = aapp / aaqq theta = -half*abs( aqoap-apoaq ) / aapq if( abs( theta )>bigtheta ) then t = half / theta fastr( 3_${ik}$ ) = t*work( p ) / work( q ) fastr( 4_${ik}$ ) = -t*work( q ) /work( p ) call stdlib${ii}$_srotm( m, a( 1_${ik}$, p ), 1_${ik}$,a( 1_${ik}$, q ), 1_${ik}$, fastr ) if( rsvec )call stdlib${ii}$_srotm( mvl,v( 1_${ik}$, p ), 1_${ik}$,v( 1_${ik}$, q ),& 1_${ik}$,fastr ) sva( q ) = aaqq*sqrt( max( zero,one+t*apoaq*aapq ) ) aapp = aapp*sqrt( max( zero,one-t*aqoap*aapq ) ) mxsinj = max( mxsinj, abs( t ) ) else ! Choose Correct Signum For Theta And Rotate thsign = -sign( one, aapq ) t = one / ( theta+thsign*sqrt( one+theta*theta ) ) cs = sqrt( one / ( one+t*t ) ) sn = t*cs mxsinj = max( mxsinj, abs( sn ) ) sva( q ) = aaqq*sqrt( max( zero,one+t*apoaq*aapq ) ) aapp = aapp*sqrt( max( zero,one-t*aqoap*aapq ) ) apoaq = work( p ) / work( q ) aqoap = work( q ) / work( p ) if( work( p )>=one ) then if( work( q )>=one ) then fastr( 3_${ik}$ ) = t*apoaq fastr( 4_${ik}$ ) = -t*aqoap work( p ) = work( p )*cs work( q ) = work( q )*cs call stdlib${ii}$_srotm( m, a( 1_${ik}$, p ), 1_${ik}$,a( 1_${ik}$, q ), 1_${ik}$,& fastr ) if( rsvec )call stdlib${ii}$_srotm( mvl,v( 1_${ik}$, p ), 1_${ik}$, v( & 1_${ik}$, q ),1_${ik}$, fastr ) else call stdlib${ii}$_saxpy( m, -t*aqoap,a( 1_${ik}$, q ), 1_${ik}$,a( 1_${ik}$, & p ), 1_${ik}$ ) call stdlib${ii}$_saxpy( m, cs*sn*apoaq,a( 1_${ik}$, p ), 1_${ik}$,a( & 1_${ik}$, q ), 1_${ik}$ ) work( p ) = work( p )*cs work( q ) = work( q ) / cs if( rsvec ) then call stdlib${ii}$_saxpy( mvl, -t*aqoap,v( 1_${ik}$, q ), 1_${ik}$,v(& 1_${ik}$, p ), 1_${ik}$ ) call stdlib${ii}$_saxpy( mvl,cs*sn*apoaq,v( 1_${ik}$, p ), 1_${ik}$,& v( 1_${ik}$, q ), 1_${ik}$ ) end if end if else if( work( q )>=one ) then call stdlib${ii}$_saxpy( m, t*apoaq,a( 1_${ik}$, p ), 1_${ik}$,a( 1_${ik}$, q & ), 1_${ik}$ ) call stdlib${ii}$_saxpy( m, -cs*sn*aqoap,a( 1_${ik}$, q ), 1_${ik}$,a( & 1_${ik}$, p ), 1_${ik}$ ) work( p ) = work( p ) / cs work( q ) = work( q )*cs if( rsvec ) then call stdlib${ii}$_saxpy( mvl, t*apoaq,v( 1_${ik}$, p ), 1_${ik}$,v( & 1_${ik}$, q ), 1_${ik}$ ) call stdlib${ii}$_saxpy( mvl,-cs*sn*aqoap,v( 1_${ik}$, q ), & 1_${ik}$,v( 1_${ik}$, p ), 1_${ik}$ ) end if else if( work( p )>=work( q ) )then call stdlib${ii}$_saxpy( m, -t*aqoap,a( 1_${ik}$, q ), 1_${ik}$,a( & 1_${ik}$, p ), 1_${ik}$ ) call stdlib${ii}$_saxpy( m, cs*sn*apoaq,a( 1_${ik}$, p ), 1_${ik}$,& a( 1_${ik}$, q ), 1_${ik}$ ) work( p ) = work( p )*cs work( q ) = work( q ) / cs if( rsvec ) then call stdlib${ii}$_saxpy( mvl,-t*aqoap,v( 1_${ik}$, q ), 1_${ik}$,& v( 1_${ik}$, p ), 1_${ik}$ ) call stdlib${ii}$_saxpy( mvl,cs*sn*apoaq,v( 1_${ik}$, p ),& 1_${ik}$,v( 1_${ik}$, q ), 1_${ik}$ ) end if else call stdlib${ii}$_saxpy( m, t*apoaq,a( 1_${ik}$, p ), 1_${ik}$,a( 1_${ik}$,& q ), 1_${ik}$ ) call stdlib${ii}$_saxpy( m,-cs*sn*aqoap,a( 1_${ik}$, q ), 1_${ik}$,& a( 1_${ik}$, p ), 1_${ik}$ ) work( p ) = work( p ) / cs work( q ) = work( q )*cs if( rsvec ) then call stdlib${ii}$_saxpy( mvl,t*apoaq, v( 1_${ik}$, p ),1_${ik}$, & v( 1_${ik}$, q ), 1_${ik}$ ) call stdlib${ii}$_saxpy( mvl,-cs*sn*aqoap,v( 1_${ik}$, q )& , 1_${ik}$,v( 1_${ik}$, p ), 1_${ik}$ ) end if end if end if end if end if else ! .. have to use modified gram-schmidt like transformation call stdlib${ii}$_scopy( m, a( 1_${ik}$, p ), 1_${ik}$,work( n+1 ), 1_${ik}$ ) call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, aapp, one, m,1_${ik}$, work( n+1 ), & lda,ierr ) call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, aaqq, one, m,1_${ik}$, a( 1_${ik}$, q ), & lda, ierr ) temp1 = -aapq*work( p ) / work( q ) call stdlib${ii}$_saxpy( m, temp1, work( n+1 ), 1_${ik}$,a( 1_${ik}$, q ), 1_${ik}$ ) call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, one, aaqq, m,1_${ik}$, a( 1_${ik}$, q ), & lda, ierr ) sva( q ) = aaqq*sqrt( max( zero,one-aapq*aapq ) ) mxsinj = max( mxsinj, sfmin ) end if ! end if rotok then ... else ! in the case of cancellation in updating sva(q), sva(p) ! recompute sva(q), sva(p). if( ( sva( q ) / aaqq )**2_${ik}$<=rooteps )then if( ( aaqq<rootbig ) .and.( aaqq>rootsfmin ) ) then sva( q ) = stdlib${ii}$_snrm2( m, a( 1_${ik}$, q ), 1_${ik}$ )*work( q ) else t = zero aaqq = one call stdlib${ii}$_slassq( m, a( 1_${ik}$, q ), 1_${ik}$, t,aaqq ) sva( q ) = t*sqrt( aaqq )*work( q ) end if end if if( ( aapp / aapp0 )<=rooteps ) then if( ( aapp<rootbig ) .and.( aapp>rootsfmin ) ) then aapp = stdlib${ii}$_snrm2( m, a( 1_${ik}$, p ), 1_${ik}$ )*work( p ) else t = zero aapp = one call stdlib${ii}$_slassq( m, a( 1_${ik}$, p ), 1_${ik}$, t,aapp ) aapp = t*sqrt( aapp )*work( p ) end if sva( p ) = aapp end if else ! a(:,p) and a(:,q) already numerically orthogonal if( ir1==0_${ik}$ )notrot = notrot + 1_${ik}$ ! [rtd] skipped = skipped + 1 pskipped = pskipped + 1_${ik}$ end if else ! a(:,q) is zero column if( ir1==0_${ik}$ )notrot = notrot + 1_${ik}$ pskipped = pskipped + 1_${ik}$ end if if( ( i<=swband ) .and.( pskipped>rowskip ) ) then if( ir1==0_${ik}$ )aapp = -aapp notrot = 0_${ik}$ go to 2103 end if end do loop_2002 ! end q-loop 2103 continue ! bailed out of q-loop sva( p ) = aapp else sva( p ) = aapp if( ( ir1==0_${ik}$ ) .and. ( aapp==zero ) )notrot = notrot + min( igl+kbl-1, & n ) - p end if end do loop_2001 ! end of the p-loop ! end of doing the block ( ibr, ibr ) end do loop_1002 ! end of ir1-loop ! ... go to the off diagonal blocks igl = ( ibr-1 )*kbl + 1_${ik}$ loop_2010: do jbc = ibr + 1, nbl jgl = ( jbc-1 )*kbl + 1_${ik}$ ! doing the block at ( ibr, jbc ) ijblsk = 0_${ik}$ loop_2100: do p = igl, min( igl+kbl-1, n ) aapp = sva( p ) if( aapp>zero ) then pskipped = 0_${ik}$ loop_2200: do q = jgl, min( jgl+kbl-1, n ) aaqq = sva( q ) if( aaqq>zero ) then aapp0 = aapp ! M X 2 Jacobi Svd ! safe gram matrix computation if( aaqq>=one ) then if( aapp>=aaqq ) then rotok = ( small*aapp )<=aaqq else rotok = ( small*aaqq )<=aapp end if if( aapp<( big / aaqq ) ) then aapq = ( stdlib${ii}$_sdot( m, a( 1_${ik}$, p ), 1_${ik}$, a( 1_${ik}$,q ), 1_${ik}$ )*work( & p )*work( q ) /aaqq ) / aapp else call stdlib${ii}$_scopy( m, a( 1_${ik}$, p ), 1_${ik}$,work( n+1 ), 1_${ik}$ ) call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, aapp,work( p ), m, 1_${ik}$,work( n+& 1_${ik}$ ), lda, ierr ) aapq = stdlib${ii}$_sdot( m, work( n+1 ), 1_${ik}$,a( 1_${ik}$, q ), 1_${ik}$ )*work( & q ) / aaqq end if else if( aapp>=aaqq ) then rotok = aapp<=( aaqq / small ) else rotok = aaqq<=( aapp / small ) end if if( aapp>( small / aaqq ) ) then aapq = ( stdlib${ii}$_sdot( m, a( 1_${ik}$, p ), 1_${ik}$, a( 1_${ik}$,q ), 1_${ik}$ )*work( & p )*work( q ) /aaqq ) / aapp else call stdlib${ii}$_scopy( m, a( 1_${ik}$, q ), 1_${ik}$,work( n+1 ), 1_${ik}$ ) call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, aaqq,work( q ), m, 1_${ik}$,work( n+& 1_${ik}$ ), lda, ierr ) aapq = stdlib${ii}$_sdot( m, work( n+1 ), 1_${ik}$,a( 1_${ik}$, p ), 1_${ik}$ )*work( & p ) / aapp end if end if mxaapq = max( mxaapq, abs( aapq ) ) ! to rotate or not to rotate, that is the question ... if( abs( aapq )>tol ) then notrot = 0_${ik}$ ! [rtd] rotated = rotated + 1 pskipped = 0_${ik}$ iswrot = iswrot + 1_${ik}$ if( rotok ) then aqoap = aaqq / aapp apoaq = aapp / aaqq theta = -half*abs( aqoap-apoaq ) / aapq if( aaqq>aapp0 )theta = -theta if( abs( theta )>bigtheta ) then t = half / theta fastr( 3_${ik}$ ) = t*work( p ) / work( q ) fastr( 4_${ik}$ ) = -t*work( q ) /work( p ) call stdlib${ii}$_srotm( m, a( 1_${ik}$, p ), 1_${ik}$,a( 1_${ik}$, q ), 1_${ik}$, fastr ) if( rsvec )call stdlib${ii}$_srotm( mvl,v( 1_${ik}$, p ), 1_${ik}$,v( 1_${ik}$, q ),& 1_${ik}$,fastr ) sva( q ) = aaqq*sqrt( max( zero,one+t*apoaq*aapq ) ) aapp = aapp*sqrt( max( zero,one-t*aqoap*aapq ) ) mxsinj = max( mxsinj, abs( t ) ) else ! Choose Correct Signum For Theta And Rotate thsign = -sign( one, aapq ) if( aaqq>aapp0 )thsign = -thsign t = one / ( theta+thsign*sqrt( one+theta*theta ) ) cs = sqrt( one / ( one+t*t ) ) sn = t*cs mxsinj = max( mxsinj, abs( sn ) ) sva( q ) = aaqq*sqrt( max( zero,one+t*apoaq*aapq ) ) aapp = aapp*sqrt( max( zero,one-t*aqoap*aapq ) ) apoaq = work( p ) / work( q ) aqoap = work( q ) / work( p ) if( work( p )>=one ) then if( work( q )>=one ) then fastr( 3_${ik}$ ) = t*apoaq fastr( 4_${ik}$ ) = -t*aqoap work( p ) = work( p )*cs work( q ) = work( q )*cs call stdlib${ii}$_srotm( m, a( 1_${ik}$, p ), 1_${ik}$,a( 1_${ik}$, q ), 1_${ik}$,& fastr ) if( rsvec )call stdlib${ii}$_srotm( mvl,v( 1_${ik}$, p ), 1_${ik}$, v( & 1_${ik}$, q ),1_${ik}$, fastr ) else call stdlib${ii}$_saxpy( m, -t*aqoap,a( 1_${ik}$, q ), 1_${ik}$,a( 1_${ik}$, & p ), 1_${ik}$ ) call stdlib${ii}$_saxpy( m, cs*sn*apoaq,a( 1_${ik}$, p ), 1_${ik}$,a( & 1_${ik}$, q ), 1_${ik}$ ) if( rsvec ) then call stdlib${ii}$_saxpy( mvl, -t*aqoap,v( 1_${ik}$, q ), 1_${ik}$,v(& 1_${ik}$, p ), 1_${ik}$ ) call stdlib${ii}$_saxpy( mvl,cs*sn*apoaq,v( 1_${ik}$, p ), 1_${ik}$,& v( 1_${ik}$, q ), 1_${ik}$ ) end if work( p ) = work( p )*cs work( q ) = work( q ) / cs end if else if( work( q )>=one ) then call stdlib${ii}$_saxpy( m, t*apoaq,a( 1_${ik}$, p ), 1_${ik}$,a( 1_${ik}$, q & ), 1_${ik}$ ) call stdlib${ii}$_saxpy( m, -cs*sn*aqoap,a( 1_${ik}$, q ), 1_${ik}$,a( & 1_${ik}$, p ), 1_${ik}$ ) if( rsvec ) then call stdlib${ii}$_saxpy( mvl, t*apoaq,v( 1_${ik}$, p ), 1_${ik}$,v( & 1_${ik}$, q ), 1_${ik}$ ) call stdlib${ii}$_saxpy( mvl,-cs*sn*aqoap,v( 1_${ik}$, q ), & 1_${ik}$,v( 1_${ik}$, p ), 1_${ik}$ ) end if work( p ) = work( p ) / cs work( q ) = work( q )*cs else if( work( p )>=work( q ) )then call stdlib${ii}$_saxpy( m, -t*aqoap,a( 1_${ik}$, q ), 1_${ik}$,a( & 1_${ik}$, p ), 1_${ik}$ ) call stdlib${ii}$_saxpy( m, cs*sn*apoaq,a( 1_${ik}$, p ), 1_${ik}$,& a( 1_${ik}$, q ), 1_${ik}$ ) work( p ) = work( p )*cs work( q ) = work( q ) / cs if( rsvec ) then call stdlib${ii}$_saxpy( mvl,-t*aqoap,v( 1_${ik}$, q ), 1_${ik}$,& v( 1_${ik}$, p ), 1_${ik}$ ) call stdlib${ii}$_saxpy( mvl,cs*sn*apoaq,v( 1_${ik}$, p ),& 1_${ik}$,v( 1_${ik}$, q ), 1_${ik}$ ) end if else call stdlib${ii}$_saxpy( m, t*apoaq,a( 1_${ik}$, p ), 1_${ik}$,a( 1_${ik}$,& q ), 1_${ik}$ ) call stdlib${ii}$_saxpy( m,-cs*sn*aqoap,a( 1_${ik}$, q ), 1_${ik}$,& a( 1_${ik}$, p ), 1_${ik}$ ) work( p ) = work( p ) / cs work( q ) = work( q )*cs if( rsvec ) then call stdlib${ii}$_saxpy( mvl,t*apoaq, v( 1_${ik}$, p ),1_${ik}$, & v( 1_${ik}$, q ), 1_${ik}$ ) call stdlib${ii}$_saxpy( mvl,-cs*sn*aqoap,v( 1_${ik}$, q )& , 1_${ik}$,v( 1_${ik}$, p ), 1_${ik}$ ) end if end if end if end if end if else if( aapp>aaqq ) then call stdlib${ii}$_scopy( m, a( 1_${ik}$, p ), 1_${ik}$,work( n+1 ), 1_${ik}$ ) call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, aapp, one,m, 1_${ik}$, work( n+1 & ), lda,ierr ) call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, aaqq, one,m, 1_${ik}$, a( 1_${ik}$, q ),& lda,ierr ) temp1 = -aapq*work( p ) / work( q ) call stdlib${ii}$_saxpy( m, temp1, work( n+1 ),1_${ik}$, a( 1_${ik}$, q ), 1_${ik}$ & ) call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, one, aaqq,m, 1_${ik}$, a( 1_${ik}$, q ),& lda,ierr ) sva( q ) = aaqq*sqrt( max( zero,one-aapq*aapq ) ) mxsinj = max( mxsinj, sfmin ) else call stdlib${ii}$_scopy( m, a( 1_${ik}$, q ), 1_${ik}$,work( n+1 ), 1_${ik}$ ) call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, aaqq, one,m, 1_${ik}$, work( n+1 & ), lda,ierr ) call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, aapp, one,m, 1_${ik}$, a( 1_${ik}$, p ),& lda,ierr ) temp1 = -aapq*work( q ) / work( p ) call stdlib${ii}$_saxpy( m, temp1, work( n+1 ),1_${ik}$, a( 1_${ik}$, p ), 1_${ik}$ & ) call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, one, aapp,m, 1_${ik}$, a( 1_${ik}$, p ),& lda,ierr ) sva( p ) = aapp*sqrt( max( zero,one-aapq*aapq ) ) mxsinj = max( mxsinj, sfmin ) end if end if ! end if rotok then ... else ! in the case of cancellation in updating sva(q) ! .. recompute sva(q) if( ( sva( q ) / aaqq )**2_${ik}$<=rooteps )then if( ( aaqq<rootbig ) .and.( aaqq>rootsfmin ) ) then sva( q ) = stdlib${ii}$_snrm2( m, a( 1_${ik}$, q ), 1_${ik}$ )*work( q ) else t = zero aaqq = one call stdlib${ii}$_slassq( m, a( 1_${ik}$, q ), 1_${ik}$, t,aaqq ) sva( q ) = t*sqrt( aaqq )*work( q ) end if end if if( ( aapp / aapp0 )**2_${ik}$<=rooteps ) then if( ( aapp<rootbig ) .and.( aapp>rootsfmin ) ) then aapp = stdlib${ii}$_snrm2( m, a( 1_${ik}$, p ), 1_${ik}$ )*work( p ) else t = zero aapp = one call stdlib${ii}$_slassq( m, a( 1_${ik}$, p ), 1_${ik}$, t,aapp ) aapp = t*sqrt( aapp )*work( p ) end if sva( p ) = aapp end if ! end of ok rotation else notrot = notrot + 1_${ik}$ ! [rtd] skipped = skipped + 1 pskipped = pskipped + 1_${ik}$ ijblsk = ijblsk + 1_${ik}$ end if else notrot = notrot + 1_${ik}$ pskipped = pskipped + 1_${ik}$ ijblsk = ijblsk + 1_${ik}$ end if if( ( i<=swband ) .and. ( ijblsk>=blskip ) )then sva( p ) = aapp notrot = 0_${ik}$ go to 2011 end if if( ( i<=swband ) .and.( pskipped>rowskip ) ) then aapp = -aapp notrot = 0_${ik}$ go to 2203 end if end do loop_2200 ! end of the q-loop 2203 continue sva( p ) = aapp else if( aapp==zero )notrot = notrot +min( jgl+kbl-1, n ) - jgl + 1_${ik}$ if( aapp<zero )notrot = 0_${ik}$ end if end do loop_2100 ! end of the p-loop end do loop_2010 ! end of the jbc-loop 2011 continue ! 2011 bailed out of the jbc-loop do p = igl, min( igl+kbl-1, n ) sva( p ) = abs( sva( p ) ) end do ! ** end do loop_2000 ! 2000 :: end of the ibr-loop ! .. update sva(n) if( ( sva( n )<rootbig ) .and. ( sva( n )>rootsfmin ) )then sva( n ) = stdlib${ii}$_snrm2( m, a( 1_${ik}$, n ), 1_${ik}$ )*work( n ) else t = zero aapp = one call stdlib${ii}$_slassq( m, a( 1_${ik}$, n ), 1_${ik}$, t, aapp ) sva( n ) = t*sqrt( aapp )*work( n ) end if ! additional steering devices if( ( i<swband ) .and. ( ( mxaapq<=roottol ) .or.( iswrot<=n ) ) )swband = i if( ( i>swband+1 ) .and. ( mxaapq<sqrt( real( n,KIND=sp) )*tol ) .and. ( real( n,& KIND=sp)*mxaapq*mxsinj<tol ) ) then go to 1994 end if if( notrot>=emptsw )go to 1994 end do loop_1993 ! end i=1:nsweep loop ! #:( reaching this point means that the procedure has not converged. info = nsweep - 1_${ik}$ go to 1995 1994 continue ! #:) reaching this point means numerical convergence after the i-th ! sweep. info = 0_${ik}$ ! #:) info = 0 confirms successful iterations. 1995 continue ! sort the singular values and find how many are above ! the underflow threshold. n2 = 0_${ik}$ n4 = 0_${ik}$ do p = 1, n - 1 q = stdlib${ii}$_isamax( n-p+1, sva( p ), 1_${ik}$ ) + p - 1_${ik}$ if( p/=q ) then temp1 = sva( p ) sva( p ) = sva( q ) sva( q ) = temp1 temp1 = work( p ) work( p ) = work( q ) work( q ) = temp1 call stdlib${ii}$_sswap( m, a( 1_${ik}$, p ), 1_${ik}$, a( 1_${ik}$, q ), 1_${ik}$ ) if( rsvec )call stdlib${ii}$_sswap( mvl, v( 1_${ik}$, p ), 1_${ik}$, v( 1_${ik}$, q ), 1_${ik}$ ) end if if( sva( p )/=zero ) then n4 = n4 + 1_${ik}$ if( sva( p )*skl>sfmin )n2 = n2 + 1_${ik}$ end if end do if( sva( n )/=zero ) then n4 = n4 + 1_${ik}$ if( sva( n )*skl>sfmin )n2 = n2 + 1_${ik}$ end if ! normalize the left singular vectors. if( lsvec .or. uctol ) then do p = 1, n2 call stdlib${ii}$_sscal( m, work( p ) / sva( p ), a( 1_${ik}$, p ), 1_${ik}$ ) end do end if ! scale the product of jacobi rotations (assemble the fast rotations). if( rsvec ) then if( applv ) then do p = 1, n call stdlib${ii}$_sscal( mvl, work( p ), v( 1_${ik}$, p ), 1_${ik}$ ) end do else do p = 1, n temp1 = one / stdlib${ii}$_snrm2( mvl, v( 1_${ik}$, p ), 1_${ik}$ ) call stdlib${ii}$_sscal( mvl, temp1, v( 1_${ik}$, p ), 1_${ik}$ ) end do end if end if ! undo scaling, if necessary (and possible). if( ( ( skl>one ) .and. ( sva( 1_${ik}$ )<( big / skl ) ) ).or. ( ( skl<one ) .and. ( sva( & max( n2, 1_${ik}$ ) ) >( sfmin / skl ) ) ) ) then do p = 1, n sva( p ) = skl*sva( p ) end do skl = one end if work( 1_${ik}$ ) = skl ! the singular values of a are skl*sva(1:n). if skl/=one ! then some of the singular values may overflow or underflow and ! the spectrum is given in this factored representation. work( 2_${ik}$ ) = real( n4,KIND=sp) ! n4 is the number of computed nonzero singular values of a. work( 3_${ik}$ ) = real( n2,KIND=sp) ! n2 is the number of singular values of a greater than sfmin. ! if n2<n, sva(n2:n) contains zeros and/or denormalized numbers ! that may carry some information. work( 4_${ik}$ ) = real( i,KIND=sp) ! i is the index of the last sweep before declaring convergence. work( 5_${ik}$ ) = mxaapq ! mxaapq is the largest absolute value of scaled pivots in the ! last sweep work( 6_${ik}$ ) = mxsinj ! mxsinj is the largest absolute value of the sines of jacobi angles ! in the last sweep return end subroutine stdlib${ii}$_sgesvj pure module subroutine stdlib${ii}$_dgesvj( joba, jobu, jobv, m, n, a, lda, sva, mv, v,ldv, work, lwork, & !! DGESVJ computes the singular value decomposition (SVD) of a real !! M-by-N matrix A, where M >= N. The SVD of A is written as !! [++] [xx] [x0] [xx] !! A = U * SIGMA * V^t, [++] = [xx] * [ox] * [xx] !! [++] [xx] !! where SIGMA is an N-by-N diagonal matrix, U is an M-by-N orthonormal !! matrix, and V is an N-by-N orthogonal matrix. The diagonal elements !! of SIGMA are the singular values of A. The columns of U and V are the !! left and the right singular vectors of A, respectively. !! DGESVJ can sometimes compute tiny singular values and their singular vectors much !! more accurately than other SVD routines, see below under Further Details. info ) ! -- lapack computational routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments integer(${ik}$), intent(out) :: info integer(${ik}$), intent(in) :: lda, ldv, lwork, m, mv, n character, intent(in) :: joba, jobu, jobv ! Array Arguments real(dp), intent(inout) :: a(lda,*), v(ldv,*), work(lwork) real(dp), intent(out) :: sva(n) ! ===================================================================== ! Local Parameters integer(${ik}$), parameter :: nsweep = 30_${ik}$ ! Local Scalars real(dp) :: aapp, aapp0, aapq, aaqq, apoaq, aqoap, big, bigtheta, cs, ctol, epsln, & large, mxaapq, mxsinj, rootbig, rooteps, rootsfmin, roottol, skl, sfmin, small, sn, t, & temp1, theta, thsign, tol integer(${ik}$) :: blskip, emptsw, i, ibr, ierr, igl, ijblsk, ir1, iswrot, jbc, jgl, kbl, & lkahead, mvl, n2, n34, n4, nbl, notrot, p, pskipped, q, rowskip, swband logical(lk) :: applv, goscale, lower, lsvec, noscale, rotok, rsvec, uctol, & upper ! Local Arrays real(dp) :: fastr(5_${ik}$) ! Intrinsic Functions ! from lapack ! from lapack ! Executable Statements ! test the input arguments lsvec = stdlib_lsame( jobu, 'U' ) uctol = stdlib_lsame( jobu, 'C' ) rsvec = stdlib_lsame( jobv, 'V' ) applv = stdlib_lsame( jobv, 'A' ) upper = stdlib_lsame( joba, 'U' ) lower = stdlib_lsame( joba, 'L' ) if( .not.( upper .or. lower .or. stdlib_lsame( joba, 'G' ) ) ) then info = -1_${ik}$ else if( .not.( lsvec .or. uctol .or. stdlib_lsame( jobu, 'N' ) ) ) then info = -2_${ik}$ else if( .not.( rsvec .or. applv .or. stdlib_lsame( jobv, 'N' ) ) ) then info = -3_${ik}$ else if( m<0_${ik}$ ) then info = -4_${ik}$ else if( ( n<0_${ik}$ ) .or. ( n>m ) ) then info = -5_${ik}$ else if( lda<m ) then info = -7_${ik}$ else if( mv<0_${ik}$ ) then info = -9_${ik}$ else if( ( rsvec .and. ( ldv<n ) ) .or.( applv .and. ( ldv<mv ) ) ) then info = -11_${ik}$ else if( uctol .and. ( work( 1_${ik}$ )<=one ) ) then info = -12_${ik}$ else if( lwork<max( m+n, 6_${ik}$ ) ) then info = -13_${ik}$ else info = 0_${ik}$ end if ! #:( if( info/=0_${ik}$ ) then call stdlib${ii}$_xerbla( 'DGESVJ', -info ) return end if ! #:) quick return for void matrix if( ( m==0 ) .or. ( n==0 ) )return ! set numerical parameters ! the stopping criterion for jacobi rotations is ! max_{i<>j}|a(:,i)^t * a(:,j)|/(||a(:,i)||*||a(:,j)||) < ctol*eps ! where eps is the round-off and ctol is defined as follows: if( uctol ) then ! ... user controlled ctol = work( 1_${ik}$ ) else ! ... default if( lsvec .or. rsvec .or. applv ) then ctol = sqrt( real( m,KIND=dp) ) else ctol = real( m,KIND=dp) end if end if ! ... and the machine dependent parameters are ! [!] (make sure that stdlib${ii}$_dlamch() works properly on the target machine.) epsln = stdlib${ii}$_dlamch( 'EPSILON' ) rooteps = sqrt( epsln ) sfmin = stdlib${ii}$_dlamch( 'SAFEMINIMUM' ) rootsfmin = sqrt( sfmin ) small = sfmin / epsln big = stdlib${ii}$_dlamch( 'OVERFLOW' ) ! big = one / sfmin rootbig = one / rootsfmin large = big / sqrt( real( m*n,KIND=dp) ) bigtheta = one / rooteps tol = ctol*epsln roottol = sqrt( tol ) if( real( m,KIND=dp)*epsln>=one ) then info = -4_${ik}$ call stdlib${ii}$_xerbla( 'DGESVJ', -info ) return end if ! initialize the right singular vector matrix. if( rsvec ) then mvl = n call stdlib${ii}$_dlaset( 'A', mvl, n, zero, one, v, ldv ) else if( applv ) then mvl = mv end if rsvec = rsvec .or. applv ! initialize sva( 1:n ) = ( ||a e_i||_2, i = 1:n ) ! (!) if necessary, scale a to protect the largest singular value ! from overflow. it is possible that saving the largest singular ! value destroys the information about the small ones. ! this initial scaling is almost minimal in the sense that the ! goal is to make sure that no column norm overflows, and that ! sqrt(n)*max_i sva(i) does not overflow. if infinite entries ! in a are detected, the procedure returns with info=-6. skl= one / sqrt( real( m,KIND=dp)*real( n,KIND=dp) ) noscale = .true. goscale = .true. if( lower ) then ! the input matrix is m-by-n lower triangular (trapezoidal) do p = 1, n aapp = zero aaqq = one call stdlib${ii}$_dlassq( m-p+1, a( p, p ), 1_${ik}$, aapp, aaqq ) if( aapp>big ) then info = -6_${ik}$ call stdlib${ii}$_xerbla( 'DGESVJ', -info ) return end if aaqq = sqrt( aaqq ) if( ( aapp<( big / aaqq ) ) .and. noscale ) then sva( p ) = aapp*aaqq else noscale = .false. sva( p ) = aapp*( aaqq*skl) if( goscale ) then goscale = .false. do q = 1, p - 1 sva( q ) = sva( q )*skl end do end if end if end do else if( upper ) then ! the input matrix is m-by-n upper triangular (trapezoidal) do p = 1, n aapp = zero aaqq = one call stdlib${ii}$_dlassq( p, a( 1_${ik}$, p ), 1_${ik}$, aapp, aaqq ) if( aapp>big ) then info = -6_${ik}$ call stdlib${ii}$_xerbla( 'DGESVJ', -info ) return end if aaqq = sqrt( aaqq ) if( ( aapp<( big / aaqq ) ) .and. noscale ) then sva( p ) = aapp*aaqq else noscale = .false. sva( p ) = aapp*( aaqq*skl) if( goscale ) then goscale = .false. do q = 1, p - 1 sva( q ) = sva( q )*skl end do end if end if end do else ! the input matrix is m-by-n general dense do p = 1, n aapp = zero aaqq = one call stdlib${ii}$_dlassq( m, a( 1_${ik}$, p ), 1_${ik}$, aapp, aaqq ) if( aapp>big ) then info = -6_${ik}$ call stdlib${ii}$_xerbla( 'DGESVJ', -info ) return end if aaqq = sqrt( aaqq ) if( ( aapp<( big / aaqq ) ) .and. noscale ) then sva( p ) = aapp*aaqq else noscale = .false. sva( p ) = aapp*( aaqq*skl) if( goscale ) then goscale = .false. do q = 1, p - 1 sva( q ) = sva( q )*skl end do end if end if end do end if if( noscale )skl= one ! move the smaller part of the spectrum from the underflow threshold ! (!) start by determining the position of the nonzero entries of the ! array sva() relative to ( sfmin, big ). aapp = zero aaqq = big do p = 1, n if( sva( p )/=zero )aaqq = min( aaqq, sva( p ) ) aapp = max( aapp, sva( p ) ) end do ! #:) quick return for zero matrix if( aapp==zero ) then if( lsvec )call stdlib${ii}$_dlaset( 'G', m, n, zero, one, a, lda ) work( 1_${ik}$ ) = one work( 2_${ik}$ ) = zero work( 3_${ik}$ ) = zero work( 4_${ik}$ ) = zero work( 5_${ik}$ ) = zero work( 6_${ik}$ ) = zero return end if ! #:) quick return for one-column matrix if( n==1_${ik}$ ) then if( lsvec )call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, sva( 1_${ik}$ ), skl, m, 1_${ik}$,a( 1_${ik}$, 1_${ik}$ ), lda, ierr ) work( 1_${ik}$ ) = one / skl if( sva( 1_${ik}$ )>=sfmin ) then work( 2_${ik}$ ) = one else work( 2_${ik}$ ) = zero end if work( 3_${ik}$ ) = zero work( 4_${ik}$ ) = zero work( 5_${ik}$ ) = zero work( 6_${ik}$ ) = zero return end if ! protect small singular values from underflow, and try to ! avoid underflows/overflows in computing jacobi rotations. sn = sqrt( sfmin / epsln ) temp1 = sqrt( big / real( n,KIND=dp) ) if( ( aapp<=sn ) .or. ( aaqq>=temp1 ) .or.( ( sn<=aaqq ) .and. ( aapp<=temp1 ) ) ) & then temp1 = min( big, temp1 / aapp ) ! aaqq = aaqq*temp1 ! aapp = aapp*temp1 else if( ( aaqq<=sn ) .and. ( aapp<=temp1 ) ) then temp1 = min( sn / aaqq, big / ( aapp*sqrt( real( n,KIND=dp) ) ) ) ! aaqq = aaqq*temp1 ! aapp = aapp*temp1 else if( ( aaqq>=sn ) .and. ( aapp>=temp1 ) ) then temp1 = max( sn / aaqq, temp1 / aapp ) ! aaqq = aaqq*temp1 ! aapp = aapp*temp1 else if( ( aaqq<=sn ) .and. ( aapp>=temp1 ) ) then temp1 = min( sn / aaqq, big / ( sqrt( real( n,KIND=dp) )*aapp ) ) ! aaqq = aaqq*temp1 ! aapp = aapp*temp1 else temp1 = one end if ! scale, if necessary if( temp1/=one ) then call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, one, temp1, n, 1_${ik}$, sva, n, ierr ) end if skl= temp1*skl if( skl/=one ) then call stdlib${ii}$_dlascl( joba, 0_${ik}$, 0_${ik}$, one, skl, m, n, a, lda, ierr ) skl= one / skl end if ! row-cyclic jacobi svd algorithm with column pivoting emptsw = ( n*( n-1 ) ) / 2_${ik}$ notrot = 0_${ik}$ fastr( 1_${ik}$ ) = zero ! a is represented in factored form a = a * diag(work), where diag(work) ! is initialized to identity. work is updated during fast scaled ! rotations. do q = 1, n work( q ) = one end do swband = 3_${ik}$ ! [tp] swband is a tuning parameter [tp]. it is meaningful and effective ! if stdlib${ii}$_dgesvj is used as a computational routine in the preconditioned ! jacobi svd algorithm stdlib${ii}$_dgesvj. for sweeps i=1:swband the procedure ! works on pivots inside a band-like region around the diagonal. ! the boundaries are determined dynamically, based on the number of ! pivots above a threshold. kbl = min( 8_${ik}$, n ) ! [tp] kbl is a tuning parameter that defines the tile size in the ! tiling of the p-q loops of pivot pairs. in general, an optimal ! value of kbl depends on the matrix dimensions and on the ! parameters of the computer's memory. nbl = n / kbl if( ( nbl*kbl )/=n )nbl = nbl + 1_${ik}$ blskip = kbl**2_${ik}$ ! [tp] blkskip is a tuning parameter that depends on swband and kbl. rowskip = min( 5_${ik}$, kbl ) ! [tp] rowskip is a tuning parameter. lkahead = 1_${ik}$ ! [tp] lkahead is a tuning parameter. ! quasi block transformations, using the lower (upper) triangular ! structure of the input matrix. the quasi-block-cycling usually ! invokes cubic convergence. big part of this cycle is done inside ! canonical subspaces of dimensions less than m. if( ( lower .or. upper ) .and. ( n>max( 64_${ik}$, 4_${ik}$*kbl ) ) ) then ! [tp] the number of partition levels and the actual partition are ! tuning parameters. n4 = n / 4_${ik}$ n2 = n / 2_${ik}$ n34 = 3_${ik}$*n4 if( applv ) then q = 0_${ik}$ else q = 1_${ik}$ end if if( lower ) then ! this works very well on lower triangular matrices, in particular ! in the framework of the preconditioned jacobi svd (xgejsv). ! the idea is simple: ! [+ 0 0 0] note that jacobi transformations of [0 0] ! [+ + 0 0] [0 0] ! [+ + x 0] actually work on [x 0] [x 0] ! [+ + x x] [x x]. [x x] call stdlib${ii}$_dgsvj0( jobv, m-n34, n-n34, a( n34+1, n34+1 ), lda,work( n34+1 ), & sva( n34+1 ), mvl,v( n34*q+1, n34+1 ), ldv, epsln, sfmin, tol,2_${ik}$, work( n+1 ), & lwork-n, ierr ) call stdlib${ii}$_dgsvj0( jobv, m-n2, n34-n2, a( n2+1, n2+1 ), lda,work( n2+1 ), sva( & n2+1 ), mvl,v( n2*q+1, n2+1 ), ldv, epsln, sfmin, tol, 2_${ik}$,work( n+1 ), lwork-n, & ierr ) call stdlib${ii}$_dgsvj1( jobv, m-n2, n-n2, n4, a( n2+1, n2+1 ), lda,work( n2+1 ), sva(& n2+1 ), mvl,v( n2*q+1, n2+1 ), ldv, epsln, sfmin, tol, 1_${ik}$,work( n+1 ), lwork-n, & ierr ) call stdlib${ii}$_dgsvj0( jobv, m-n4, n2-n4, a( n4+1, n4+1 ), lda,work( n4+1 ), sva( & n4+1 ), mvl,v( n4*q+1, n4+1 ), ldv, epsln, sfmin, tol, 1_${ik}$,work( n+1 ), lwork-n, & ierr ) call stdlib${ii}$_dgsvj0( jobv, m, n4, a, lda, work, sva, mvl, v, ldv,epsln, sfmin, & tol, 1_${ik}$, work( n+1 ), lwork-n,ierr ) call stdlib${ii}$_dgsvj1( jobv, m, n2, n4, a, lda, work, sva, mvl, v,ldv, epsln, sfmin,& tol, 1_${ik}$, work( n+1 ),lwork-n, ierr ) else if( upper ) then call stdlib${ii}$_dgsvj0( jobv, n4, n4, a, lda, work, sva, mvl, v, ldv,epsln, sfmin, & tol, 2_${ik}$, work( n+1 ), lwork-n,ierr ) call stdlib${ii}$_dgsvj0( jobv, n2, n4, a( 1_${ik}$, n4+1 ), lda, work( n4+1 ),sva( n4+1 ), & mvl, v( n4*q+1, n4+1 ), ldv,epsln, sfmin, tol, 1_${ik}$, work( n+1 ), lwork-n,ierr ) call stdlib${ii}$_dgsvj1( jobv, n2, n2, n4, a, lda, work, sva, mvl, v,ldv, epsln, & sfmin, tol, 1_${ik}$, work( n+1 ),lwork-n, ierr ) call stdlib${ii}$_dgsvj0( jobv, n2+n4, n4, a( 1_${ik}$, n2+1 ), lda,work( n2+1 ), sva( n2+1 ),& mvl,v( n2*q+1, n2+1 ), ldv, epsln, sfmin, tol, 1_${ik}$,work( n+1 ), lwork-n, ierr ) end if end if ! .. row-cyclic pivot strategy with de rijk's pivoting .. loop_1993: do i = 1, nsweep ! .. go go go ... mxaapq = zero mxsinj = zero iswrot = 0_${ik}$ notrot = 0_${ik}$ pskipped = 0_${ik}$ ! each sweep is unrolled using kbl-by-kbl tiles over the pivot pairs ! 1 <= p < q <= n. this is the first step toward a blocked implementation ! of the rotations. new implementation, based on block transformations, ! is under development. loop_2000: do ibr = 1, nbl igl = ( ibr-1 )*kbl + 1_${ik}$ loop_1002: do ir1 = 0, min( lkahead, nbl-ibr ) igl = igl + ir1*kbl loop_2001: do p = igl, min( igl+kbl-1, n-1 ) ! .. de rijk's pivoting q = stdlib${ii}$_idamax( n-p+1, sva( p ), 1_${ik}$ ) + p - 1_${ik}$ if( p/=q ) then call stdlib${ii}$_dswap( m, a( 1_${ik}$, p ), 1_${ik}$, a( 1_${ik}$, q ), 1_${ik}$ ) if( rsvec )call stdlib${ii}$_dswap( mvl, v( 1_${ik}$, p ), 1_${ik}$,v( 1_${ik}$, q ), 1_${ik}$ ) temp1 = sva( p ) sva( p ) = sva( q ) sva( q ) = temp1 temp1 = work( p ) work( p ) = work( q ) work( q ) = temp1 end if if( ir1==0_${ik}$ ) then ! column norms are periodically updated by explicit ! norm computation. ! caveat: ! unfortunately, some blas implementations compute stdlib${ii}$_dnrm2(m,a(1,p),1) ! as sqrt(stdlib${ii}$_ddot(m,a(1,p),1,a(1,p),1)), which may cause the result to ! overflow for ||a(:,p)||_2 > sqrt(overflow_threshold), and to ! underflow for ||a(:,p)||_2 < sqrt(underflow_threshold). ! hence, stdlib${ii}$_dnrm2 cannot be trusted, not even in the case when ! the true norm is far from the under(over)flow boundaries. ! if properly implemented stdlib${ii}$_dnrm2 is available, the if-then-else ! below should read "aapp = stdlib${ii}$_dnrm2( m, a(1,p), 1 ) * work(p)". if( ( sva( p )<rootbig ) .and.( sva( p )>rootsfmin ) ) then sva( p ) = stdlib${ii}$_dnrm2( m, a( 1_${ik}$, p ), 1_${ik}$ )*work( p ) else temp1 = zero aapp = one call stdlib${ii}$_dlassq( m, a( 1_${ik}$, p ), 1_${ik}$, temp1, aapp ) sva( p ) = temp1*sqrt( aapp )*work( p ) end if aapp = sva( p ) else aapp = sva( p ) end if if( aapp>zero ) then pskipped = 0_${ik}$ loop_2002: do q = p + 1, min( igl+kbl-1, n ) aaqq = sva( q ) if( aaqq>zero ) then aapp0 = aapp if( aaqq>=one ) then rotok = ( small*aapp )<=aaqq if( aapp<( big / aaqq ) ) then aapq = ( stdlib${ii}$_ddot( m, a( 1_${ik}$, p ), 1_${ik}$, a( 1_${ik}$,q ), 1_${ik}$ )*work( & p )*work( q ) /aaqq ) / aapp else call stdlib${ii}$_dcopy( m, a( 1_${ik}$, p ), 1_${ik}$,work( n+1 ), 1_${ik}$ ) call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, aapp,work( p ), m, 1_${ik}$,work( n+& 1_${ik}$ ), lda, ierr ) aapq = stdlib${ii}$_ddot( m, work( n+1 ), 1_${ik}$,a( 1_${ik}$, q ), 1_${ik}$ )*work( & q ) / aaqq end if else rotok = aapp<=( aaqq / small ) if( aapp>( small / aaqq ) ) then aapq = ( stdlib${ii}$_ddot( m, a( 1_${ik}$, p ), 1_${ik}$, a( 1_${ik}$,q ), 1_${ik}$ )*work( & p )*work( q ) /aaqq ) / aapp else call stdlib${ii}$_dcopy( m, a( 1_${ik}$, q ), 1_${ik}$,work( n+1 ), 1_${ik}$ ) call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, aaqq,work( q ), m, 1_${ik}$,work( n+& 1_${ik}$ ), lda, ierr ) aapq = stdlib${ii}$_ddot( m, work( n+1 ), 1_${ik}$,a( 1_${ik}$, p ), 1_${ik}$ )*work( & p ) / aapp end if end if mxaapq = max( mxaapq, abs( aapq ) ) ! to rotate or not to rotate, that is the question ... if( abs( aapq )>tol ) then ! Rotate ! [rtd] rotated = rotated + one if( ir1==0_${ik}$ ) then notrot = 0_${ik}$ pskipped = 0_${ik}$ iswrot = iswrot + 1_${ik}$ end if if( rotok ) then aqoap = aaqq / aapp apoaq = aapp / aaqq theta = -half*abs(aqoap-apoaq)/aapq if( abs( theta )>bigtheta ) then t = half / theta fastr( 3_${ik}$ ) = t*work( p ) / work( q ) fastr( 4_${ik}$ ) = -t*work( q ) /work( p ) call stdlib${ii}$_drotm( m, a( 1_${ik}$, p ), 1_${ik}$,a( 1_${ik}$, q ), 1_${ik}$, fastr ) if( rsvec )call stdlib${ii}$_drotm( mvl,v( 1_${ik}$, p ), 1_${ik}$,v( 1_${ik}$, q ),& 1_${ik}$,fastr ) sva( q ) = aaqq*sqrt( max( zero,one+t*apoaq*aapq ) ) aapp = aapp*sqrt( max( zero,one-t*aqoap*aapq ) ) mxsinj = max( mxsinj, abs( t ) ) else ! Choose Correct Signum For Theta And Rotate thsign = -sign( one, aapq ) t = one / ( theta+thsign*sqrt( one+theta*theta ) ) cs = sqrt( one / ( one+t*t ) ) sn = t*cs mxsinj = max( mxsinj, abs( sn ) ) sva( q ) = aaqq*sqrt( max( zero,one+t*apoaq*aapq ) ) aapp = aapp*sqrt( max( zero,one-t*aqoap*aapq ) ) apoaq = work( p ) / work( q ) aqoap = work( q ) / work( p ) if( work( p )>=one ) then if( work( q )>=one ) then fastr( 3_${ik}$ ) = t*apoaq fastr( 4_${ik}$ ) = -t*aqoap work( p ) = work( p )*cs work( q ) = work( q )*cs call stdlib${ii}$_drotm( m, a( 1_${ik}$, p ), 1_${ik}$,a( 1_${ik}$, q ), 1_${ik}$,& fastr ) if( rsvec )call stdlib${ii}$_drotm( mvl,v( 1_${ik}$, p ), 1_${ik}$, v( & 1_${ik}$, q ),1_${ik}$, fastr ) else call stdlib${ii}$_daxpy( m, -t*aqoap,a( 1_${ik}$, q ), 1_${ik}$,a( 1_${ik}$, & p ), 1_${ik}$ ) call stdlib${ii}$_daxpy( m, cs*sn*apoaq,a( 1_${ik}$, p ), 1_${ik}$,a( & 1_${ik}$, q ), 1_${ik}$ ) work( p ) = work( p )*cs work( q ) = work( q ) / cs if( rsvec ) then call stdlib${ii}$_daxpy( mvl, -t*aqoap,v( 1_${ik}$, q ), 1_${ik}$,v(& 1_${ik}$, p ), 1_${ik}$ ) call stdlib${ii}$_daxpy( mvl,cs*sn*apoaq,v( 1_${ik}$, p ), 1_${ik}$,& v( 1_${ik}$, q ), 1_${ik}$ ) end if end if else if( work( q )>=one ) then call stdlib${ii}$_daxpy( m, t*apoaq,a( 1_${ik}$, p ), 1_${ik}$,a( 1_${ik}$, q & ), 1_${ik}$ ) call stdlib${ii}$_daxpy( m, -cs*sn*aqoap,a( 1_${ik}$, q ), 1_${ik}$,a( & 1_${ik}$, p ), 1_${ik}$ ) work( p ) = work( p ) / cs work( q ) = work( q )*cs if( rsvec ) then call stdlib${ii}$_daxpy( mvl, t*apoaq,v( 1_${ik}$, p ), 1_${ik}$,v( & 1_${ik}$, q ), 1_${ik}$ ) call stdlib${ii}$_daxpy( mvl,-cs*sn*aqoap,v( 1_${ik}$, q ), & 1_${ik}$,v( 1_${ik}$, p ), 1_${ik}$ ) end if else if( work( p )>=work( q ) )then call stdlib${ii}$_daxpy( m, -t*aqoap,a( 1_${ik}$, q ), 1_${ik}$,a( & 1_${ik}$, p ), 1_${ik}$ ) call stdlib${ii}$_daxpy( m, cs*sn*apoaq,a( 1_${ik}$, p ), 1_${ik}$,& a( 1_${ik}$, q ), 1_${ik}$ ) work( p ) = work( p )*cs work( q ) = work( q ) / cs if( rsvec ) then call stdlib${ii}$_daxpy( mvl,-t*aqoap,v( 1_${ik}$, q ), 1_${ik}$,& v( 1_${ik}$, p ), 1_${ik}$ ) call stdlib${ii}$_daxpy( mvl,cs*sn*apoaq,v( 1_${ik}$, p ),& 1_${ik}$,v( 1_${ik}$, q ), 1_${ik}$ ) end if else call stdlib${ii}$_daxpy( m, t*apoaq,a( 1_${ik}$, p ), 1_${ik}$,a( 1_${ik}$,& q ), 1_${ik}$ ) call stdlib${ii}$_daxpy( m,-cs*sn*aqoap,a( 1_${ik}$, q ), 1_${ik}$,& a( 1_${ik}$, p ), 1_${ik}$ ) work( p ) = work( p ) / cs work( q ) = work( q )*cs if( rsvec ) then call stdlib${ii}$_daxpy( mvl,t*apoaq, v( 1_${ik}$, p ),1_${ik}$, & v( 1_${ik}$, q ), 1_${ik}$ ) call stdlib${ii}$_daxpy( mvl,-cs*sn*aqoap,v( 1_${ik}$, q )& , 1_${ik}$,v( 1_${ik}$, p ), 1_${ik}$ ) end if end if end if end if end if else ! .. have to use modified gram-schmidt like transformation call stdlib${ii}$_dcopy( m, a( 1_${ik}$, p ), 1_${ik}$,work( n+1 ), 1_${ik}$ ) call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, aapp, one, m,1_${ik}$, work( n+1 ), & lda,ierr ) call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, aaqq, one, m,1_${ik}$, a( 1_${ik}$, q ), & lda, ierr ) temp1 = -aapq*work( p ) / work( q ) call stdlib${ii}$_daxpy( m, temp1, work( n+1 ), 1_${ik}$,a( 1_${ik}$, q ), 1_${ik}$ ) call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, one, aaqq, m,1_${ik}$, a( 1_${ik}$, q ), & lda, ierr ) sva( q ) = aaqq*sqrt( max( zero,one-aapq*aapq ) ) mxsinj = max( mxsinj, sfmin ) end if ! end if rotok then ... else ! in the case of cancellation in updating sva(q), sva(p) ! recompute sva(q), sva(p). if( ( sva( q ) / aaqq )**2_${ik}$<=rooteps )then if( ( aaqq<rootbig ) .and.( aaqq>rootsfmin ) ) then sva( q ) = stdlib${ii}$_dnrm2( m, a( 1_${ik}$, q ), 1_${ik}$ )*work( q ) else t = zero aaqq = one call stdlib${ii}$_dlassq( m, a( 1_${ik}$, q ), 1_${ik}$, t,aaqq ) sva( q ) = t*sqrt( aaqq )*work( q ) end if end if if( ( aapp / aapp0 )<=rooteps ) then if( ( aapp<rootbig ) .and.( aapp>rootsfmin ) ) then aapp = stdlib${ii}$_dnrm2( m, a( 1_${ik}$, p ), 1_${ik}$ )*work( p ) else t = zero aapp = one call stdlib${ii}$_dlassq( m, a( 1_${ik}$, p ), 1_${ik}$, t,aapp ) aapp = t*sqrt( aapp )*work( p ) end if sva( p ) = aapp end if else ! a(:,p) and a(:,q) already numerically orthogonal if( ir1==0_${ik}$ )notrot = notrot + 1_${ik}$ ! [rtd] skipped = skipped + 1 pskipped = pskipped + 1_${ik}$ end if else ! a(:,q) is zero column if( ir1==0_${ik}$ )notrot = notrot + 1_${ik}$ pskipped = pskipped + 1_${ik}$ end if if( ( i<=swband ) .and.( pskipped>rowskip ) ) then if( ir1==0_${ik}$ )aapp = -aapp notrot = 0_${ik}$ go to 2103 end if end do loop_2002 ! end q-loop 2103 continue ! bailed out of q-loop sva( p ) = aapp else sva( p ) = aapp if( ( ir1==0_${ik}$ ) .and. ( aapp==zero ) )notrot = notrot + min( igl+kbl-1, & n ) - p end if end do loop_2001 ! end of the p-loop ! end of doing the block ( ibr, ibr ) end do loop_1002 ! end of ir1-loop ! ... go to the off diagonal blocks igl = ( ibr-1 )*kbl + 1_${ik}$ loop_2010: do jbc = ibr + 1, nbl jgl = ( jbc-1 )*kbl + 1_${ik}$ ! doing the block at ( ibr, jbc ) ijblsk = 0_${ik}$ loop_2100: do p = igl, min( igl+kbl-1, n ) aapp = sva( p ) if( aapp>zero ) then pskipped = 0_${ik}$ loop_2200: do q = jgl, min( jgl+kbl-1, n ) aaqq = sva( q ) if( aaqq>zero ) then aapp0 = aapp ! M X 2 Jacobi Svd ! safe gram matrix computation if( aaqq>=one ) then if( aapp>=aaqq ) then rotok = ( small*aapp )<=aaqq else rotok = ( small*aaqq )<=aapp end if if( aapp<( big / aaqq ) ) then aapq = ( stdlib${ii}$_ddot( m, a( 1_${ik}$, p ), 1_${ik}$, a( 1_${ik}$,q ), 1_${ik}$ )*work( & p )*work( q ) /aaqq ) / aapp else call stdlib${ii}$_dcopy( m, a( 1_${ik}$, p ), 1_${ik}$,work( n+1 ), 1_${ik}$ ) call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, aapp,work( p ), m, 1_${ik}$,work( n+& 1_${ik}$ ), lda, ierr ) aapq = stdlib${ii}$_ddot( m, work( n+1 ), 1_${ik}$,a( 1_${ik}$, q ), 1_${ik}$ )*work( & q ) / aaqq end if else if( aapp>=aaqq ) then rotok = aapp<=( aaqq / small ) else rotok = aaqq<=( aapp / small ) end if if( aapp>( small / aaqq ) ) then aapq = ( stdlib${ii}$_ddot( m, a( 1_${ik}$, p ), 1_${ik}$, a( 1_${ik}$,q ), 1_${ik}$ )*work( & p )*work( q ) /aaqq ) / aapp else call stdlib${ii}$_dcopy( m, a( 1_${ik}$, q ), 1_${ik}$,work( n+1 ), 1_${ik}$ ) call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, aaqq,work( q ), m, 1_${ik}$,work( n+& 1_${ik}$ ), lda, ierr ) aapq = stdlib${ii}$_ddot( m, work( n+1 ), 1_${ik}$,a( 1_${ik}$, p ), 1_${ik}$ )*work( & p ) / aapp end if end if mxaapq = max( mxaapq, abs( aapq ) ) ! to rotate or not to rotate, that is the question ... if( abs( aapq )>tol ) then notrot = 0_${ik}$ ! [rtd] rotated = rotated + 1 pskipped = 0_${ik}$ iswrot = iswrot + 1_${ik}$ if( rotok ) then aqoap = aaqq / aapp apoaq = aapp / aaqq theta = -half*abs(aqoap-apoaq)/aapq if( aaqq>aapp0 )theta = -theta if( abs( theta )>bigtheta ) then t = half / theta fastr( 3_${ik}$ ) = t*work( p ) / work( q ) fastr( 4_${ik}$ ) = -t*work( q ) /work( p ) call stdlib${ii}$_drotm( m, a( 1_${ik}$, p ), 1_${ik}$,a( 1_${ik}$, q ), 1_${ik}$, fastr ) if( rsvec )call stdlib${ii}$_drotm( mvl,v( 1_${ik}$, p ), 1_${ik}$,v( 1_${ik}$, q ),& 1_${ik}$,fastr ) sva( q ) = aaqq*sqrt( max( zero,one+t*apoaq*aapq ) ) aapp = aapp*sqrt( max( zero,one-t*aqoap*aapq ) ) mxsinj = max( mxsinj, abs( t ) ) else ! Choose Correct Signum For Theta And Rotate thsign = -sign( one, aapq ) if( aaqq>aapp0 )thsign = -thsign t = one / ( theta+thsign*sqrt( one+theta*theta ) ) cs = sqrt( one / ( one+t*t ) ) sn = t*cs mxsinj = max( mxsinj, abs( sn ) ) sva( q ) = aaqq*sqrt( max( zero,one+t*apoaq*aapq ) ) aapp = aapp*sqrt( max( zero,one-t*aqoap*aapq ) ) apoaq = work( p ) / work( q ) aqoap = work( q ) / work( p ) if( work( p )>=one ) then if( work( q )>=one ) then fastr( 3_${ik}$ ) = t*apoaq fastr( 4_${ik}$ ) = -t*aqoap work( p ) = work( p )*cs work( q ) = work( q )*cs call stdlib${ii}$_drotm( m, a( 1_${ik}$, p ), 1_${ik}$,a( 1_${ik}$, q ), 1_${ik}$,& fastr ) if( rsvec )call stdlib${ii}$_drotm( mvl,v( 1_${ik}$, p ), 1_${ik}$, v( & 1_${ik}$, q ),1_${ik}$, fastr ) else call stdlib${ii}$_daxpy( m, -t*aqoap,a( 1_${ik}$, q ), 1_${ik}$,a( 1_${ik}$, & p ), 1_${ik}$ ) call stdlib${ii}$_daxpy( m, cs*sn*apoaq,a( 1_${ik}$, p ), 1_${ik}$,a( & 1_${ik}$, q ), 1_${ik}$ ) if( rsvec ) then call stdlib${ii}$_daxpy( mvl, -t*aqoap,v( 1_${ik}$, q ), 1_${ik}$,v(& 1_${ik}$, p ), 1_${ik}$ ) call stdlib${ii}$_daxpy( mvl,cs*sn*apoaq,v( 1_${ik}$, p ), 1_${ik}$,& v( 1_${ik}$, q ), 1_${ik}$ ) end if work( p ) = work( p )*cs work( q ) = work( q ) / cs end if else if( work( q )>=one ) then call stdlib${ii}$_daxpy( m, t*apoaq,a( 1_${ik}$, p ), 1_${ik}$,a( 1_${ik}$, q & ), 1_${ik}$ ) call stdlib${ii}$_daxpy( m, -cs*sn*aqoap,a( 1_${ik}$, q ), 1_${ik}$,a( & 1_${ik}$, p ), 1_${ik}$ ) if( rsvec ) then call stdlib${ii}$_daxpy( mvl, t*apoaq,v( 1_${ik}$, p ), 1_${ik}$,v( & 1_${ik}$, q ), 1_${ik}$ ) call stdlib${ii}$_daxpy( mvl,-cs*sn*aqoap,v( 1_${ik}$, q ), & 1_${ik}$,v( 1_${ik}$, p ), 1_${ik}$ ) end if work( p ) = work( p ) / cs work( q ) = work( q )*cs else if( work( p )>=work( q ) )then call stdlib${ii}$_daxpy( m, -t*aqoap,a( 1_${ik}$, q ), 1_${ik}$,a( & 1_${ik}$, p ), 1_${ik}$ ) call stdlib${ii}$_daxpy( m, cs*sn*apoaq,a( 1_${ik}$, p ), 1_${ik}$,& a( 1_${ik}$, q ), 1_${ik}$ ) work( p ) = work( p )*cs work( q ) = work( q ) / cs if( rsvec ) then call stdlib${ii}$_daxpy( mvl,-t*aqoap,v( 1_${ik}$, q ), 1_${ik}$,& v( 1_${ik}$, p ), 1_${ik}$ ) call stdlib${ii}$_daxpy( mvl,cs*sn*apoaq,v( 1_${ik}$, p ),& 1_${ik}$,v( 1_${ik}$, q ), 1_${ik}$ ) end if else call stdlib${ii}$_daxpy( m, t*apoaq,a( 1_${ik}$, p ), 1_${ik}$,a( 1_${ik}$,& q ), 1_${ik}$ ) call stdlib${ii}$_daxpy( m,-cs*sn*aqoap,a( 1_${ik}$, q ), 1_${ik}$,& a( 1_${ik}$, p ), 1_${ik}$ ) work( p ) = work( p ) / cs work( q ) = work( q )*cs if( rsvec ) then call stdlib${ii}$_daxpy( mvl,t*apoaq, v( 1_${ik}$, p ),1_${ik}$, & v( 1_${ik}$, q ), 1_${ik}$ ) call stdlib${ii}$_daxpy( mvl,-cs*sn*aqoap,v( 1_${ik}$, q )& , 1_${ik}$,v( 1_${ik}$, p ), 1_${ik}$ ) end if end if end if end if end if else if( aapp>aaqq ) then call stdlib${ii}$_dcopy( m, a( 1_${ik}$, p ), 1_${ik}$,work( n+1 ), 1_${ik}$ ) call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, aapp, one,m, 1_${ik}$, work( n+1 & ), lda,ierr ) call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, aaqq, one,m, 1_${ik}$, a( 1_${ik}$, q ),& lda,ierr ) temp1 = -aapq*work( p ) / work( q ) call stdlib${ii}$_daxpy( m, temp1, work( n+1 ),1_${ik}$, a( 1_${ik}$, q ), 1_${ik}$ & ) call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, one, aaqq,m, 1_${ik}$, a( 1_${ik}$, q ),& lda,ierr ) sva( q ) = aaqq*sqrt( max( zero,one-aapq*aapq ) ) mxsinj = max( mxsinj, sfmin ) else call stdlib${ii}$_dcopy( m, a( 1_${ik}$, q ), 1_${ik}$,work( n+1 ), 1_${ik}$ ) call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, aaqq, one,m, 1_${ik}$, work( n+1 & ), lda,ierr ) call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, aapp, one,m, 1_${ik}$, a( 1_${ik}$, p ),& lda,ierr ) temp1 = -aapq*work( q ) / work( p ) call stdlib${ii}$_daxpy( m, temp1, work( n+1 ),1_${ik}$, a( 1_${ik}$, p ), 1_${ik}$ & ) call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, one, aapp,m, 1_${ik}$, a( 1_${ik}$, p ),& lda,ierr ) sva( p ) = aapp*sqrt( max( zero,one-aapq*aapq ) ) mxsinj = max( mxsinj, sfmin ) end if end if ! end if rotok then ... else ! in the case of cancellation in updating sva(q) ! .. recompute sva(q) if( ( sva( q ) / aaqq )**2_${ik}$<=rooteps )then if( ( aaqq<rootbig ) .and.( aaqq>rootsfmin ) ) then sva( q ) = stdlib${ii}$_dnrm2( m, a( 1_${ik}$, q ), 1_${ik}$ )*work( q ) else t = zero aaqq = one call stdlib${ii}$_dlassq( m, a( 1_${ik}$, q ), 1_${ik}$, t,aaqq ) sva( q ) = t*sqrt( aaqq )*work( q ) end if end if if( ( aapp / aapp0 )**2_${ik}$<=rooteps ) then if( ( aapp<rootbig ) .and.( aapp>rootsfmin ) ) then aapp = stdlib${ii}$_dnrm2( m, a( 1_${ik}$, p ), 1_${ik}$ )*work( p ) else t = zero aapp = one call stdlib${ii}$_dlassq( m, a( 1_${ik}$, p ), 1_${ik}$, t,aapp ) aapp = t*sqrt( aapp )*work( p ) end if sva( p ) = aapp end if ! end of ok rotation else notrot = notrot + 1_${ik}$ ! [rtd] skipped = skipped + 1 pskipped = pskipped + 1_${ik}$ ijblsk = ijblsk + 1_${ik}$ end if else notrot = notrot + 1_${ik}$ pskipped = pskipped + 1_${ik}$ ijblsk = ijblsk + 1_${ik}$ end if if( ( i<=swband ) .and. ( ijblsk>=blskip ) )then sva( p ) = aapp notrot = 0_${ik}$ go to 2011 end if if( ( i<=swband ) .and.( pskipped>rowskip ) ) then aapp = -aapp notrot = 0_${ik}$ go to 2203 end if end do loop_2200 ! end of the q-loop 2203 continue sva( p ) = aapp else if( aapp==zero )notrot = notrot +min( jgl+kbl-1, n ) - jgl + 1_${ik}$ if( aapp<zero )notrot = 0_${ik}$ end if end do loop_2100 ! end of the p-loop end do loop_2010 ! end of the jbc-loop 2011 continue ! 2011 bailed out of the jbc-loop do p = igl, min( igl+kbl-1, n ) sva( p ) = abs( sva( p ) ) end do ! ** end do loop_2000 ! 2000 :: end of the ibr-loop ! .. update sva(n) if( ( sva( n )<rootbig ) .and. ( sva( n )>rootsfmin ) )then sva( n ) = stdlib${ii}$_dnrm2( m, a( 1_${ik}$, n ), 1_${ik}$ )*work( n ) else t = zero aapp = one call stdlib${ii}$_dlassq( m, a( 1_${ik}$, n ), 1_${ik}$, t, aapp ) sva( n ) = t*sqrt( aapp )*work( n ) end if ! additional steering devices if( ( i<swband ) .and. ( ( mxaapq<=roottol ) .or.( iswrot<=n ) ) )swband = i if( ( i>swband+1 ) .and. ( mxaapq<sqrt( real( n,KIND=dp) )*tol ) .and. ( real( n,& KIND=dp)*mxaapq*mxsinj<tol ) ) then go to 1994 end if if( notrot>=emptsw )go to 1994 end do loop_1993 ! end i=1:nsweep loop ! #:( reaching this point means that the procedure has not converged. info = nsweep - 1_${ik}$ go to 1995 1994 continue ! #:) reaching this point means numerical convergence after the i-th ! sweep. info = 0_${ik}$ ! #:) info = 0 confirms successful iterations. 1995 continue ! sort the singular values and find how many are above ! the underflow threshold. n2 = 0_${ik}$ n4 = 0_${ik}$ do p = 1, n - 1 q = stdlib${ii}$_idamax( n-p+1, sva( p ), 1_${ik}$ ) + p - 1_${ik}$ if( p/=q ) then temp1 = sva( p ) sva( p ) = sva( q ) sva( q ) = temp1 temp1 = work( p ) work( p ) = work( q ) work( q ) = temp1 call stdlib${ii}$_dswap( m, a( 1_${ik}$, p ), 1_${ik}$, a( 1_${ik}$, q ), 1_${ik}$ ) if( rsvec )call stdlib${ii}$_dswap( mvl, v( 1_${ik}$, p ), 1_${ik}$, v( 1_${ik}$, q ), 1_${ik}$ ) end if if( sva( p )/=zero ) then n4 = n4 + 1_${ik}$ if( sva( p )*skl>sfmin )n2 = n2 + 1_${ik}$ end if end do if( sva( n )/=zero ) then n4 = n4 + 1_${ik}$ if( sva( n )*skl>sfmin )n2 = n2 + 1_${ik}$ end if ! normalize the left singular vectors. if( lsvec .or. uctol ) then do p = 1, n2 call stdlib${ii}$_dscal( m, work( p ) / sva( p ), a( 1_${ik}$, p ), 1_${ik}$ ) end do end if ! scale the product of jacobi rotations (assemble the fast rotations). if( rsvec ) then if( applv ) then do p = 1, n call stdlib${ii}$_dscal( mvl, work( p ), v( 1_${ik}$, p ), 1_${ik}$ ) end do else do p = 1, n temp1 = one / stdlib${ii}$_dnrm2( mvl, v( 1_${ik}$, p ), 1_${ik}$ ) call stdlib${ii}$_dscal( mvl, temp1, v( 1_${ik}$, p ), 1_${ik}$ ) end do end if end if ! undo scaling, if necessary (and possible). if( ( ( skl>one ) .and. ( sva( 1_${ik}$ )<( big / skl) ) ).or. ( ( skl<one ) .and. ( sva( max(& n2, 1_${ik}$ ) ) >( sfmin / skl) ) ) ) then do p = 1, n sva( p ) = skl*sva( p ) end do skl= one end if work( 1_${ik}$ ) = skl ! the singular values of a are skl*sva(1:n). if skl/=one ! then some of the singular values may overflow or underflow and ! the spectrum is given in this factored representation. work( 2_${ik}$ ) = real( n4,KIND=dp) ! n4 is the number of computed nonzero singular values of a. work( 3_${ik}$ ) = real( n2,KIND=dp) ! n2 is the number of singular values of a greater than sfmin. ! if n2<n, sva(n2:n) contains zeros and/or denormalized numbers ! that may carry some information. work( 4_${ik}$ ) = real( i,KIND=dp) ! i is the index of the last sweep before declaring convergence. work( 5_${ik}$ ) = mxaapq ! mxaapq is the largest absolute value of scaled pivots in the ! last sweep work( 6_${ik}$ ) = mxsinj ! mxsinj is the largest absolute value of the sines of jacobi angles ! in the last sweep return end subroutine stdlib${ii}$_dgesvj #:for rk,rt,ri in REAL_KINDS_TYPES #:if not rk in ["sp","dp"] pure module subroutine stdlib${ii}$_${ri}$gesvj( joba, jobu, jobv, m, n, a, lda, sva, mv, v,ldv, work, lwork, & !! DGESVJ: computes the singular value decomposition (SVD) of a real !! M-by-N matrix A, where M >= N. The SVD of A is written as !! [++] [xx] [x0] [xx] !! A = U * SIGMA * V^t, [++] = [xx] * [ox] * [xx] !! [++] [xx] !! where SIGMA is an N-by-N diagonal matrix, U is an M-by-N orthonormal !! matrix, and V is an N-by-N orthogonal matrix. The diagonal elements !! of SIGMA are the singular values of A. The columns of U and V are the !! left and the right singular vectors of A, respectively. !! DGESVJ can sometimes compute tiny singular values and their singular vectors much !! more accurately than other SVD routines, see below under Further Details. info ) ! -- lapack computational routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments integer(${ik}$), intent(out) :: info integer(${ik}$), intent(in) :: lda, ldv, lwork, m, mv, n character, intent(in) :: joba, jobu, jobv ! Array Arguments real(${rk}$), intent(inout) :: a(lda,*), v(ldv,*), work(lwork) real(${rk}$), intent(out) :: sva(n) ! ===================================================================== ! Local Parameters integer(${ik}$), parameter :: nsweep = 30_${ik}$ ! Local Scalars real(${rk}$) :: aapp, aapp0, aapq, aaqq, apoaq, aqoap, big, bigtheta, cs, ctol, epsln, & large, mxaapq, mxsinj, rootbig, rooteps, rootsfmin, roottol, skl, sfmin, small, sn, t, & temp1, theta, thsign, tol integer(${ik}$) :: blskip, emptsw, i, ibr, ierr, igl, ijblsk, ir1, iswrot, jbc, jgl, kbl, & lkahead, mvl, n2, n34, n4, nbl, notrot, p, pskipped, q, rowskip, swband logical(lk) :: applv, goscale, lower, lsvec, noscale, rotok, rsvec, uctol, & upper ! Local Arrays real(${rk}$) :: fastr(5_${ik}$) ! Intrinsic Functions ! from lapack ! from lapack ! Executable Statements ! test the input arguments lsvec = stdlib_lsame( jobu, 'U' ) uctol = stdlib_lsame( jobu, 'C' ) rsvec = stdlib_lsame( jobv, 'V' ) applv = stdlib_lsame( jobv, 'A' ) upper = stdlib_lsame( joba, 'U' ) lower = stdlib_lsame( joba, 'L' ) if( .not.( upper .or. lower .or. stdlib_lsame( joba, 'G' ) ) ) then info = -1_${ik}$ else if( .not.( lsvec .or. uctol .or. stdlib_lsame( jobu, 'N' ) ) ) then info = -2_${ik}$ else if( .not.( rsvec .or. applv .or. stdlib_lsame( jobv, 'N' ) ) ) then info = -3_${ik}$ else if( m<0_${ik}$ ) then info = -4_${ik}$ else if( ( n<0_${ik}$ ) .or. ( n>m ) ) then info = -5_${ik}$ else if( lda<m ) then info = -7_${ik}$ else if( mv<0_${ik}$ ) then info = -9_${ik}$ else if( ( rsvec .and. ( ldv<n ) ) .or.( applv .and. ( ldv<mv ) ) ) then info = -11_${ik}$ else if( uctol .and. ( work( 1_${ik}$ )<=one ) ) then info = -12_${ik}$ else if( lwork<max( m+n, 6_${ik}$ ) ) then info = -13_${ik}$ else info = 0_${ik}$ end if ! #:( if( info/=0_${ik}$ ) then call stdlib${ii}$_xerbla( 'DGESVJ', -info ) return end if ! #:) quick return for void matrix if( ( m==0 ) .or. ( n==0 ) )return ! set numerical parameters ! the stopping criterion for jacobi rotations is ! max_{i<>j}|a(:,i)^t * a(:,j)|/(||a(:,i)||*||a(:,j)||) < ctol*eps ! where eps is the round-off and ctol is defined as follows: if( uctol ) then ! ... user controlled ctol = work( 1_${ik}$ ) else ! ... default if( lsvec .or. rsvec .or. applv ) then ctol = sqrt( real( m,KIND=${rk}$) ) else ctol = real( m,KIND=${rk}$) end if end if ! ... and the machine dependent parameters are ! [!] (make sure that stdlib${ii}$_${ri}$lamch() works properly on the target machine.) epsln = stdlib${ii}$_${ri}$lamch( 'EPSILON' ) rooteps = sqrt( epsln ) sfmin = stdlib${ii}$_${ri}$lamch( 'SAFEMINIMUM' ) rootsfmin = sqrt( sfmin ) small = sfmin / epsln big = stdlib${ii}$_${ri}$lamch( 'OVERFLOW' ) ! big = one / sfmin rootbig = one / rootsfmin large = big / sqrt( real( m*n,KIND=${rk}$) ) bigtheta = one / rooteps tol = ctol*epsln roottol = sqrt( tol ) if( real( m,KIND=${rk}$)*epsln>=one ) then info = -4_${ik}$ call stdlib${ii}$_xerbla( 'DGESVJ', -info ) return end if ! initialize the right singular vector matrix. if( rsvec ) then mvl = n call stdlib${ii}$_${ri}$laset( 'A', mvl, n, zero, one, v, ldv ) else if( applv ) then mvl = mv end if rsvec = rsvec .or. applv ! initialize sva( 1:n ) = ( ||a e_i||_2, i = 1:n ) ! (!) if necessary, scale a to protect the largest singular value ! from overflow. it is possible that saving the largest singular ! value destroys the information about the small ones. ! this initial scaling is almost minimal in the sense that the ! goal is to make sure that no column norm overflows, and that ! sqrt(n)*max_i sva(i) does not overflow. if infinite entries ! in a are detected, the procedure returns with info=-6. skl= one / sqrt( real( m,KIND=${rk}$)*real( n,KIND=${rk}$) ) noscale = .true. goscale = .true. if( lower ) then ! the input matrix is m-by-n lower triangular (trapezoidal) do p = 1, n aapp = zero aaqq = one call stdlib${ii}$_${ri}$lassq( m-p+1, a( p, p ), 1_${ik}$, aapp, aaqq ) if( aapp>big ) then info = -6_${ik}$ call stdlib${ii}$_xerbla( 'DGESVJ', -info ) return end if aaqq = sqrt( aaqq ) if( ( aapp<( big / aaqq ) ) .and. noscale ) then sva( p ) = aapp*aaqq else noscale = .false. sva( p ) = aapp*( aaqq*skl) if( goscale ) then goscale = .false. do q = 1, p - 1 sva( q ) = sva( q )*skl end do end if end if end do else if( upper ) then ! the input matrix is m-by-n upper triangular (trapezoidal) do p = 1, n aapp = zero aaqq = one call stdlib${ii}$_${ri}$lassq( p, a( 1_${ik}$, p ), 1_${ik}$, aapp, aaqq ) if( aapp>big ) then info = -6_${ik}$ call stdlib${ii}$_xerbla( 'DGESVJ', -info ) return end if aaqq = sqrt( aaqq ) if( ( aapp<( big / aaqq ) ) .and. noscale ) then sva( p ) = aapp*aaqq else noscale = .false. sva( p ) = aapp*( aaqq*skl) if( goscale ) then goscale = .false. do q = 1, p - 1 sva( q ) = sva( q )*skl end do end if end if end do else ! the input matrix is m-by-n general dense do p = 1, n aapp = zero aaqq = one call stdlib${ii}$_${ri}$lassq( m, a( 1_${ik}$, p ), 1_${ik}$, aapp, aaqq ) if( aapp>big ) then info = -6_${ik}$ call stdlib${ii}$_xerbla( 'DGESVJ', -info ) return end if aaqq = sqrt( aaqq ) if( ( aapp<( big / aaqq ) ) .and. noscale ) then sva( p ) = aapp*aaqq else noscale = .false. sva( p ) = aapp*( aaqq*skl) if( goscale ) then goscale = .false. do q = 1, p - 1 sva( q ) = sva( q )*skl end do end if end if end do end if if( noscale )skl= one ! move the smaller part of the spectrum from the underflow threshold ! (!) start by determining the position of the nonzero entries of the ! array sva() relative to ( sfmin, big ). aapp = zero aaqq = big do p = 1, n if( sva( p )/=zero )aaqq = min( aaqq, sva( p ) ) aapp = max( aapp, sva( p ) ) end do ! #:) quick return for zero matrix if( aapp==zero ) then if( lsvec )call stdlib${ii}$_${ri}$laset( 'G', m, n, zero, one, a, lda ) work( 1_${ik}$ ) = one work( 2_${ik}$ ) = zero work( 3_${ik}$ ) = zero work( 4_${ik}$ ) = zero work( 5_${ik}$ ) = zero work( 6_${ik}$ ) = zero return end if ! #:) quick return for one-column matrix if( n==1_${ik}$ ) then if( lsvec )call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, sva( 1_${ik}$ ), skl, m, 1_${ik}$,a( 1_${ik}$, 1_${ik}$ ), lda, ierr ) work( 1_${ik}$ ) = one / skl if( sva( 1_${ik}$ )>=sfmin ) then work( 2_${ik}$ ) = one else work( 2_${ik}$ ) = zero end if work( 3_${ik}$ ) = zero work( 4_${ik}$ ) = zero work( 5_${ik}$ ) = zero work( 6_${ik}$ ) = zero return end if ! protect small singular values from underflow, and try to ! avoid underflows/overflows in computing jacobi rotations. sn = sqrt( sfmin / epsln ) temp1 = sqrt( big / real( n,KIND=${rk}$) ) if( ( aapp<=sn ) .or. ( aaqq>=temp1 ) .or.( ( sn<=aaqq ) .and. ( aapp<=temp1 ) ) ) & then temp1 = min( big, temp1 / aapp ) ! aaqq = aaqq*temp1 ! aapp = aapp*temp1 else if( ( aaqq<=sn ) .and. ( aapp<=temp1 ) ) then temp1 = min( sn / aaqq, big / ( aapp*sqrt( real( n,KIND=${rk}$) ) ) ) ! aaqq = aaqq*temp1 ! aapp = aapp*temp1 else if( ( aaqq>=sn ) .and. ( aapp>=temp1 ) ) then temp1 = max( sn / aaqq, temp1 / aapp ) ! aaqq = aaqq*temp1 ! aapp = aapp*temp1 else if( ( aaqq<=sn ) .and. ( aapp>=temp1 ) ) then temp1 = min( sn / aaqq, big / ( sqrt( real( n,KIND=${rk}$) )*aapp ) ) ! aaqq = aaqq*temp1 ! aapp = aapp*temp1 else temp1 = one end if ! scale, if necessary if( temp1/=one ) then call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, one, temp1, n, 1_${ik}$, sva, n, ierr ) end if skl= temp1*skl if( skl/=one ) then call stdlib${ii}$_${ri}$lascl( joba, 0_${ik}$, 0_${ik}$, one, skl, m, n, a, lda, ierr ) skl= one / skl end if ! row-cyclic jacobi svd algorithm with column pivoting emptsw = ( n*( n-1 ) ) / 2_${ik}$ notrot = 0_${ik}$ fastr( 1_${ik}$ ) = zero ! a is represented in factored form a = a * diag(work), where diag(work) ! is initialized to identity. work is updated during fast scaled ! rotations. do q = 1, n work( q ) = one end do swband = 3_${ik}$ ! [tp] swband is a tuning parameter [tp]. it is meaningful and effective ! if stdlib${ii}$_${ri}$gesvj is used as a computational routine in the preconditioned ! jacobi svd algorithm stdlib${ii}$_${ri}$gesvj. for sweeps i=1:swband the procedure ! works on pivots inside a band-like region around the diagonal. ! the boundaries are determined dynamically, based on the number of ! pivots above a threshold. kbl = min( 8_${ik}$, n ) ! [tp] kbl is a tuning parameter that defines the tile size in the ! tiling of the p-q loops of pivot pairs. in general, an optimal ! value of kbl depends on the matrix dimensions and on the ! parameters of the computer's memory. nbl = n / kbl if( ( nbl*kbl )/=n )nbl = nbl + 1_${ik}$ blskip = kbl**2_${ik}$ ! [tp] blkskip is a tuning parameter that depends on swband and kbl. rowskip = min( 5_${ik}$, kbl ) ! [tp] rowskip is a tuning parameter. lkahead = 1_${ik}$ ! [tp] lkahead is a tuning parameter. ! quasi block transformations, using the lower (upper) triangular ! structure of the input matrix. the quasi-block-cycling usually ! invokes cubic convergence. big part of this cycle is done inside ! canonical subspaces of dimensions less than m. if( ( lower .or. upper ) .and. ( n>max( 64_${ik}$, 4_${ik}$*kbl ) ) ) then ! [tp] the number of partition levels and the actual partition are ! tuning parameters. n4 = n / 4_${ik}$ n2 = n / 2_${ik}$ n34 = 3_${ik}$*n4 if( applv ) then q = 0_${ik}$ else q = 1_${ik}$ end if if( lower ) then ! this works very well on lower triangular matrices, in particular ! in the framework of the preconditioned jacobi svd (xgejsv). ! the idea is simple: ! [+ 0 0 0] note that jacobi transformations of [0 0] ! [+ + 0 0] [0 0] ! [+ + x 0] actually work on [x 0] [x 0] ! [+ + x x] [x x]. [x x] call stdlib${ii}$_${ri}$gsvj0( jobv, m-n34, n-n34, a( n34+1, n34+1 ), lda,work( n34+1 ), & sva( n34+1 ), mvl,v( n34*q+1, n34+1 ), ldv, epsln, sfmin, tol,2_${ik}$, work( n+1 ), & lwork-n, ierr ) call stdlib${ii}$_${ri}$gsvj0( jobv, m-n2, n34-n2, a( n2+1, n2+1 ), lda,work( n2+1 ), sva( & n2+1 ), mvl,v( n2*q+1, n2+1 ), ldv, epsln, sfmin, tol, 2_${ik}$,work( n+1 ), lwork-n, & ierr ) call stdlib${ii}$_${ri}$gsvj1( jobv, m-n2, n-n2, n4, a( n2+1, n2+1 ), lda,work( n2+1 ), sva(& n2+1 ), mvl,v( n2*q+1, n2+1 ), ldv, epsln, sfmin, tol, 1_${ik}$,work( n+1 ), lwork-n, & ierr ) call stdlib${ii}$_${ri}$gsvj0( jobv, m-n4, n2-n4, a( n4+1, n4+1 ), lda,work( n4+1 ), sva( & n4+1 ), mvl,v( n4*q+1, n4+1 ), ldv, epsln, sfmin, tol, 1_${ik}$,work( n+1 ), lwork-n, & ierr ) call stdlib${ii}$_${ri}$gsvj0( jobv, m, n4, a, lda, work, sva, mvl, v, ldv,epsln, sfmin, & tol, 1_${ik}$, work( n+1 ), lwork-n,ierr ) call stdlib${ii}$_${ri}$gsvj1( jobv, m, n2, n4, a, lda, work, sva, mvl, v,ldv, epsln, sfmin,& tol, 1_${ik}$, work( n+1 ),lwork-n, ierr ) else if( upper ) then call stdlib${ii}$_${ri}$gsvj0( jobv, n4, n4, a, lda, work, sva, mvl, v, ldv,epsln, sfmin, & tol, 2_${ik}$, work( n+1 ), lwork-n,ierr ) call stdlib${ii}$_${ri}$gsvj0( jobv, n2, n4, a( 1_${ik}$, n4+1 ), lda, work( n4+1 ),sva( n4+1 ), & mvl, v( n4*q+1, n4+1 ), ldv,epsln, sfmin, tol, 1_${ik}$, work( n+1 ), lwork-n,ierr ) call stdlib${ii}$_${ri}$gsvj1( jobv, n2, n2, n4, a, lda, work, sva, mvl, v,ldv, epsln, & sfmin, tol, 1_${ik}$, work( n+1 ),lwork-n, ierr ) call stdlib${ii}$_${ri}$gsvj0( jobv, n2+n4, n4, a( 1_${ik}$, n2+1 ), lda,work( n2+1 ), sva( n2+1 ),& mvl,v( n2*q+1, n2+1 ), ldv, epsln, sfmin, tol, 1_${ik}$,work( n+1 ), lwork-n, ierr ) end if end if ! .. row-cyclic pivot strategy with de rijk's pivoting .. loop_1993: do i = 1, nsweep ! .. go go go ... mxaapq = zero mxsinj = zero iswrot = 0_${ik}$ notrot = 0_${ik}$ pskipped = 0_${ik}$ ! each sweep is unrolled using kbl-by-kbl tiles over the pivot pairs ! 1 <= p < q <= n. this is the first step toward a blocked implementation ! of the rotations. new implementation, based on block transformations, ! is under development. loop_2000: do ibr = 1, nbl igl = ( ibr-1 )*kbl + 1_${ik}$ loop_1002: do ir1 = 0, min( lkahead, nbl-ibr ) igl = igl + ir1*kbl loop_2001: do p = igl, min( igl+kbl-1, n-1 ) ! .. de rijk's pivoting q = stdlib${ii}$_i${ri}$amax( n-p+1, sva( p ), 1_${ik}$ ) + p - 1_${ik}$ if( p/=q ) then call stdlib${ii}$_${ri}$swap( m, a( 1_${ik}$, p ), 1_${ik}$, a( 1_${ik}$, q ), 1_${ik}$ ) if( rsvec )call stdlib${ii}$_${ri}$swap( mvl, v( 1_${ik}$, p ), 1_${ik}$,v( 1_${ik}$, q ), 1_${ik}$ ) temp1 = sva( p ) sva( p ) = sva( q ) sva( q ) = temp1 temp1 = work( p ) work( p ) = work( q ) work( q ) = temp1 end if if( ir1==0_${ik}$ ) then ! column norms are periodically updated by explicit ! norm computation. ! caveat: ! unfortunately, some blas implementations compute stdlib${ii}$_${ri}$nrm2(m,a(1,p),1) ! as sqrt(stdlib${ii}$_${ri}$dot(m,a(1,p),1,a(1,p),1)), which may cause the result to ! overflow for ||a(:,p)||_2 > sqrt(overflow_threshold), and to ! underflow for ||a(:,p)||_2 < sqrt(underflow_threshold). ! hence, stdlib${ii}$_${ri}$nrm2 cannot be trusted, not even in the case when ! the true norm is far from the under(over)flow boundaries. ! if properly implemented stdlib${ii}$_${ri}$nrm2 is available, the if-then-else ! below should read "aapp = stdlib${ii}$_${ri}$nrm2( m, a(1,p), 1 ) * work(p)". if( ( sva( p )<rootbig ) .and.( sva( p )>rootsfmin ) ) then sva( p ) = stdlib${ii}$_${ri}$nrm2( m, a( 1_${ik}$, p ), 1_${ik}$ )*work( p ) else temp1 = zero aapp = one call stdlib${ii}$_${ri}$lassq( m, a( 1_${ik}$, p ), 1_${ik}$, temp1, aapp ) sva( p ) = temp1*sqrt( aapp )*work( p ) end if aapp = sva( p ) else aapp = sva( p ) end if if( aapp>zero ) then pskipped = 0_${ik}$ loop_2002: do q = p + 1, min( igl+kbl-1, n ) aaqq = sva( q ) if( aaqq>zero ) then aapp0 = aapp if( aaqq>=one ) then rotok = ( small*aapp )<=aaqq if( aapp<( big / aaqq ) ) then aapq = ( stdlib${ii}$_${ri}$dot( m, a( 1_${ik}$, p ), 1_${ik}$, a( 1_${ik}$,q ), 1_${ik}$ )*work( & p )*work( q ) /aaqq ) / aapp else call stdlib${ii}$_${ri}$copy( m, a( 1_${ik}$, p ), 1_${ik}$,work( n+1 ), 1_${ik}$ ) call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, aapp,work( p ), m, 1_${ik}$,work( n+& 1_${ik}$ ), lda, ierr ) aapq = stdlib${ii}$_${ri}$dot( m, work( n+1 ), 1_${ik}$,a( 1_${ik}$, q ), 1_${ik}$ )*work( & q ) / aaqq end if else rotok = aapp<=( aaqq / small ) if( aapp>( small / aaqq ) ) then aapq = ( stdlib${ii}$_${ri}$dot( m, a( 1_${ik}$, p ), 1_${ik}$, a( 1_${ik}$,q ), 1_${ik}$ )*work( & p )*work( q ) /aaqq ) / aapp else call stdlib${ii}$_${ri}$copy( m, a( 1_${ik}$, q ), 1_${ik}$,work( n+1 ), 1_${ik}$ ) call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, aaqq,work( q ), m, 1_${ik}$,work( n+& 1_${ik}$ ), lda, ierr ) aapq = stdlib${ii}$_${ri}$dot( m, work( n+1 ), 1_${ik}$,a( 1_${ik}$, p ), 1_${ik}$ )*work( & p ) / aapp end if end if mxaapq = max( mxaapq, abs( aapq ) ) ! to rotate or not to rotate, that is the question ... if( abs( aapq )>tol ) then ! Rotate ! [rtd] rotated = rotated + one if( ir1==0_${ik}$ ) then notrot = 0_${ik}$ pskipped = 0_${ik}$ iswrot = iswrot + 1_${ik}$ end if if( rotok ) then aqoap = aaqq / aapp apoaq = aapp / aaqq theta = -half*abs(aqoap-apoaq)/aapq if( abs( theta )>bigtheta ) then t = half / theta fastr( 3_${ik}$ ) = t*work( p ) / work( q ) fastr( 4_${ik}$ ) = -t*work( q ) /work( p ) call stdlib${ii}$_${ri}$rotm( m, a( 1_${ik}$, p ), 1_${ik}$,a( 1_${ik}$, q ), 1_${ik}$, fastr ) if( rsvec )call stdlib${ii}$_${ri}$rotm( mvl,v( 1_${ik}$, p ), 1_${ik}$,v( 1_${ik}$, q ),& 1_${ik}$,fastr ) sva( q ) = aaqq*sqrt( max( zero,one+t*apoaq*aapq ) ) aapp = aapp*sqrt( max( zero,one-t*aqoap*aapq ) ) mxsinj = max( mxsinj, abs( t ) ) else ! Choose Correct Signum For Theta And Rotate thsign = -sign( one, aapq ) t = one / ( theta+thsign*sqrt( one+theta*theta ) ) cs = sqrt( one / ( one+t*t ) ) sn = t*cs mxsinj = max( mxsinj, abs( sn ) ) sva( q ) = aaqq*sqrt( max( zero,one+t*apoaq*aapq ) ) aapp = aapp*sqrt( max( zero,one-t*aqoap*aapq ) ) apoaq = work( p ) / work( q ) aqoap = work( q ) / work( p ) if( work( p )>=one ) then if( work( q )>=one ) then fastr( 3_${ik}$ ) = t*apoaq fastr( 4_${ik}$ ) = -t*aqoap work( p ) = work( p )*cs work( q ) = work( q )*cs call stdlib${ii}$_${ri}$rotm( m, a( 1_${ik}$, p ), 1_${ik}$,a( 1_${ik}$, q ), 1_${ik}$,& fastr ) if( rsvec )call stdlib${ii}$_${ri}$rotm( mvl,v( 1_${ik}$, p ), 1_${ik}$, v( & 1_${ik}$, q ),1_${ik}$, fastr ) else call stdlib${ii}$_${ri}$axpy( m, -t*aqoap,a( 1_${ik}$, q ), 1_${ik}$,a( 1_${ik}$, & p ), 1_${ik}$ ) call stdlib${ii}$_${ri}$axpy( m, cs*sn*apoaq,a( 1_${ik}$, p ), 1_${ik}$,a( & 1_${ik}$, q ), 1_${ik}$ ) work( p ) = work( p )*cs work( q ) = work( q ) / cs if( rsvec ) then call stdlib${ii}$_${ri}$axpy( mvl, -t*aqoap,v( 1_${ik}$, q ), 1_${ik}$,v(& 1_${ik}$, p ), 1_${ik}$ ) call stdlib${ii}$_${ri}$axpy( mvl,cs*sn*apoaq,v( 1_${ik}$, p ), 1_${ik}$,& v( 1_${ik}$, q ), 1_${ik}$ ) end if end if else if( work( q )>=one ) then call stdlib${ii}$_${ri}$axpy( m, t*apoaq,a( 1_${ik}$, p ), 1_${ik}$,a( 1_${ik}$, q & ), 1_${ik}$ ) call stdlib${ii}$_${ri}$axpy( m, -cs*sn*aqoap,a( 1_${ik}$, q ), 1_${ik}$,a( & 1_${ik}$, p ), 1_${ik}$ ) work( p ) = work( p ) / cs work( q ) = work( q )*cs if( rsvec ) then call stdlib${ii}$_${ri}$axpy( mvl, t*apoaq,v( 1_${ik}$, p ), 1_${ik}$,v( & 1_${ik}$, q ), 1_${ik}$ ) call stdlib${ii}$_${ri}$axpy( mvl,-cs*sn*aqoap,v( 1_${ik}$, q ), & 1_${ik}$,v( 1_${ik}$, p ), 1_${ik}$ ) end if else if( work( p )>=work( q ) )then call stdlib${ii}$_${ri}$axpy( m, -t*aqoap,a( 1_${ik}$, q ), 1_${ik}$,a( & 1_${ik}$, p ), 1_${ik}$ ) call stdlib${ii}$_${ri}$axpy( m, cs*sn*apoaq,a( 1_${ik}$, p ), 1_${ik}$,& a( 1_${ik}$, q ), 1_${ik}$ ) work( p ) = work( p )*cs work( q ) = work( q ) / cs if( rsvec ) then call stdlib${ii}$_${ri}$axpy( mvl,-t*aqoap,v( 1_${ik}$, q ), 1_${ik}$,& v( 1_${ik}$, p ), 1_${ik}$ ) call stdlib${ii}$_${ri}$axpy( mvl,cs*sn*apoaq,v( 1_${ik}$, p ),& 1_${ik}$,v( 1_${ik}$, q ), 1_${ik}$ ) end if else call stdlib${ii}$_${ri}$axpy( m, t*apoaq,a( 1_${ik}$, p ), 1_${ik}$,a( 1_${ik}$,& q ), 1_${ik}$ ) call stdlib${ii}$_${ri}$axpy( m,-cs*sn*aqoap,a( 1_${ik}$, q ), 1_${ik}$,& a( 1_${ik}$, p ), 1_${ik}$ ) work( p ) = work( p ) / cs work( q ) = work( q )*cs if( rsvec ) then call stdlib${ii}$_${ri}$axpy( mvl,t*apoaq, v( 1_${ik}$, p ),1_${ik}$, & v( 1_${ik}$, q ), 1_${ik}$ ) call stdlib${ii}$_${ri}$axpy( mvl,-cs*sn*aqoap,v( 1_${ik}$, q )& , 1_${ik}$,v( 1_${ik}$, p ), 1_${ik}$ ) end if end if end if end if end if else ! .. have to use modified gram-schmidt like transformation call stdlib${ii}$_${ri}$copy( m, a( 1_${ik}$, p ), 1_${ik}$,work( n+1 ), 1_${ik}$ ) call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, aapp, one, m,1_${ik}$, work( n+1 ), & lda,ierr ) call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, aaqq, one, m,1_${ik}$, a( 1_${ik}$, q ), & lda, ierr ) temp1 = -aapq*work( p ) / work( q ) call stdlib${ii}$_${ri}$axpy( m, temp1, work( n+1 ), 1_${ik}$,a( 1_${ik}$, q ), 1_${ik}$ ) call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, one, aaqq, m,1_${ik}$, a( 1_${ik}$, q ), & lda, ierr ) sva( q ) = aaqq*sqrt( max( zero,one-aapq*aapq ) ) mxsinj = max( mxsinj, sfmin ) end if ! end if rotok then ... else ! in the case of cancellation in updating sva(q), sva(p) ! recompute sva(q), sva(p). if( ( sva( q ) / aaqq )**2_${ik}$<=rooteps )then if( ( aaqq<rootbig ) .and.( aaqq>rootsfmin ) ) then sva( q ) = stdlib${ii}$_${ri}$nrm2( m, a( 1_${ik}$, q ), 1_${ik}$ )*work( q ) else t = zero aaqq = one call stdlib${ii}$_${ri}$lassq( m, a( 1_${ik}$, q ), 1_${ik}$, t,aaqq ) sva( q ) = t*sqrt( aaqq )*work( q ) end if end if if( ( aapp / aapp0 )<=rooteps ) then if( ( aapp<rootbig ) .and.( aapp>rootsfmin ) ) then aapp = stdlib${ii}$_${ri}$nrm2( m, a( 1_${ik}$, p ), 1_${ik}$ )*work( p ) else t = zero aapp = one call stdlib${ii}$_${ri}$lassq( m, a( 1_${ik}$, p ), 1_${ik}$, t,aapp ) aapp = t*sqrt( aapp )*work( p ) end if sva( p ) = aapp end if else ! a(:,p) and a(:,q) already numerically orthogonal if( ir1==0_${ik}$ )notrot = notrot + 1_${ik}$ ! [rtd] skipped = skipped + 1 pskipped = pskipped + 1_${ik}$ end if else ! a(:,q) is zero column if( ir1==0_${ik}$ )notrot = notrot + 1_${ik}$ pskipped = pskipped + 1_${ik}$ end if if( ( i<=swband ) .and.( pskipped>rowskip ) ) then if( ir1==0_${ik}$ )aapp = -aapp notrot = 0_${ik}$ go to 2103 end if end do loop_2002 ! end q-loop 2103 continue ! bailed out of q-loop sva( p ) = aapp else sva( p ) = aapp if( ( ir1==0_${ik}$ ) .and. ( aapp==zero ) )notrot = notrot + min( igl+kbl-1, & n ) - p end if end do loop_2001 ! end of the p-loop ! end of doing the block ( ibr, ibr ) end do loop_1002 ! end of ir1-loop ! ... go to the off diagonal blocks igl = ( ibr-1 )*kbl + 1_${ik}$ loop_2010: do jbc = ibr + 1, nbl jgl = ( jbc-1 )*kbl + 1_${ik}$ ! doing the block at ( ibr, jbc ) ijblsk = 0_${ik}$ loop_2100: do p = igl, min( igl+kbl-1, n ) aapp = sva( p ) if( aapp>zero ) then pskipped = 0_${ik}$ loop_2200: do q = jgl, min( jgl+kbl-1, n ) aaqq = sva( q ) if( aaqq>zero ) then aapp0 = aapp ! M X 2 Jacobi Svd ! safe gram matrix computation if( aaqq>=one ) then if( aapp>=aaqq ) then rotok = ( small*aapp )<=aaqq else rotok = ( small*aaqq )<=aapp end if if( aapp<( big / aaqq ) ) then aapq = ( stdlib${ii}$_${ri}$dot( m, a( 1_${ik}$, p ), 1_${ik}$, a( 1_${ik}$,q ), 1_${ik}$ )*work( & p )*work( q ) /aaqq ) / aapp else call stdlib${ii}$_${ri}$copy( m, a( 1_${ik}$, p ), 1_${ik}$,work( n+1 ), 1_${ik}$ ) call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, aapp,work( p ), m, 1_${ik}$,work( n+& 1_${ik}$ ), lda, ierr ) aapq = stdlib${ii}$_${ri}$dot( m, work( n+1 ), 1_${ik}$,a( 1_${ik}$, q ), 1_${ik}$ )*work( & q ) / aaqq end if else if( aapp>=aaqq ) then rotok = aapp<=( aaqq / small ) else rotok = aaqq<=( aapp / small ) end if if( aapp>( small / aaqq ) ) then aapq = ( stdlib${ii}$_${ri}$dot( m, a( 1_${ik}$, p ), 1_${ik}$, a( 1_${ik}$,q ), 1_${ik}$ )*work( & p )*work( q ) /aaqq ) / aapp else call stdlib${ii}$_${ri}$copy( m, a( 1_${ik}$, q ), 1_${ik}$,work( n+1 ), 1_${ik}$ ) call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, aaqq,work( q ), m, 1_${ik}$,work( n+& 1_${ik}$ ), lda, ierr ) aapq = stdlib${ii}$_${ri}$dot( m, work( n+1 ), 1_${ik}$,a( 1_${ik}$, p ), 1_${ik}$ )*work( & p ) / aapp end if end if mxaapq = max( mxaapq, abs( aapq ) ) ! to rotate or not to rotate, that is the question ... if( abs( aapq )>tol ) then notrot = 0_${ik}$ ! [rtd] rotated = rotated + 1 pskipped = 0_${ik}$ iswrot = iswrot + 1_${ik}$ if( rotok ) then aqoap = aaqq / aapp apoaq = aapp / aaqq theta = -half*abs(aqoap-apoaq)/aapq if( aaqq>aapp0 )theta = -theta if( abs( theta )>bigtheta ) then t = half / theta fastr( 3_${ik}$ ) = t*work( p ) / work( q ) fastr( 4_${ik}$ ) = -t*work( q ) /work( p ) call stdlib${ii}$_${ri}$rotm( m, a( 1_${ik}$, p ), 1_${ik}$,a( 1_${ik}$, q ), 1_${ik}$, fastr ) if( rsvec )call stdlib${ii}$_${ri}$rotm( mvl,v( 1_${ik}$, p ), 1_${ik}$,v( 1_${ik}$, q ),& 1_${ik}$,fastr ) sva( q ) = aaqq*sqrt( max( zero,one+t*apoaq*aapq ) ) aapp = aapp*sqrt( max( zero,one-t*aqoap*aapq ) ) mxsinj = max( mxsinj, abs( t ) ) else ! Choose Correct Signum For Theta And Rotate thsign = -sign( one, aapq ) if( aaqq>aapp0 )thsign = -thsign t = one / ( theta+thsign*sqrt( one+theta*theta ) ) cs = sqrt( one / ( one+t*t ) ) sn = t*cs mxsinj = max( mxsinj, abs( sn ) ) sva( q ) = aaqq*sqrt( max( zero,one+t*apoaq*aapq ) ) aapp = aapp*sqrt( max( zero,one-t*aqoap*aapq ) ) apoaq = work( p ) / work( q ) aqoap = work( q ) / work( p ) if( work( p )>=one ) then if( work( q )>=one ) then fastr( 3_${ik}$ ) = t*apoaq fastr( 4_${ik}$ ) = -t*aqoap work( p ) = work( p )*cs work( q ) = work( q )*cs call stdlib${ii}$_${ri}$rotm( m, a( 1_${ik}$, p ), 1_${ik}$,a( 1_${ik}$, q ), 1_${ik}$,& fastr ) if( rsvec )call stdlib${ii}$_${ri}$rotm( mvl,v( 1_${ik}$, p ), 1_${ik}$, v( & 1_${ik}$, q ),1_${ik}$, fastr ) else call stdlib${ii}$_${ri}$axpy( m, -t*aqoap,a( 1_${ik}$, q ), 1_${ik}$,a( 1_${ik}$, & p ), 1_${ik}$ ) call stdlib${ii}$_${ri}$axpy( m, cs*sn*apoaq,a( 1_${ik}$, p ), 1_${ik}$,a( & 1_${ik}$, q ), 1_${ik}$ ) if( rsvec ) then call stdlib${ii}$_${ri}$axpy( mvl, -t*aqoap,v( 1_${ik}$, q ), 1_${ik}$,v(& 1_${ik}$, p ), 1_${ik}$ ) call stdlib${ii}$_${ri}$axpy( mvl,cs*sn*apoaq,v( 1_${ik}$, p ), 1_${ik}$,& v( 1_${ik}$, q ), 1_${ik}$ ) end if work( p ) = work( p )*cs work( q ) = work( q ) / cs end if else if( work( q )>=one ) then call stdlib${ii}$_${ri}$axpy( m, t*apoaq,a( 1_${ik}$, p ), 1_${ik}$,a( 1_${ik}$, q & ), 1_${ik}$ ) call stdlib${ii}$_${ri}$axpy( m, -cs*sn*aqoap,a( 1_${ik}$, q ), 1_${ik}$,a( & 1_${ik}$, p ), 1_${ik}$ ) if( rsvec ) then call stdlib${ii}$_${ri}$axpy( mvl, t*apoaq,v( 1_${ik}$, p ), 1_${ik}$,v( & 1_${ik}$, q ), 1_${ik}$ ) call stdlib${ii}$_${ri}$axpy( mvl,-cs*sn*aqoap,v( 1_${ik}$, q ), & 1_${ik}$,v( 1_${ik}$, p ), 1_${ik}$ ) end if work( p ) = work( p ) / cs work( q ) = work( q )*cs else if( work( p )>=work( q ) )then call stdlib${ii}$_${ri}$axpy( m, -t*aqoap,a( 1_${ik}$, q ), 1_${ik}$,a( & 1_${ik}$, p ), 1_${ik}$ ) call stdlib${ii}$_${ri}$axpy( m, cs*sn*apoaq,a( 1_${ik}$, p ), 1_${ik}$,& a( 1_${ik}$, q ), 1_${ik}$ ) work( p ) = work( p )*cs work( q ) = work( q ) / cs if( rsvec ) then call stdlib${ii}$_${ri}$axpy( mvl,-t*aqoap,v( 1_${ik}$, q ), 1_${ik}$,& v( 1_${ik}$, p ), 1_${ik}$ ) call stdlib${ii}$_${ri}$axpy( mvl,cs*sn*apoaq,v( 1_${ik}$, p ),& 1_${ik}$,v( 1_${ik}$, q ), 1_${ik}$ ) end if else call stdlib${ii}$_${ri}$axpy( m, t*apoaq,a( 1_${ik}$, p ), 1_${ik}$,a( 1_${ik}$,& q ), 1_${ik}$ ) call stdlib${ii}$_${ri}$axpy( m,-cs*sn*aqoap,a( 1_${ik}$, q ), 1_${ik}$,& a( 1_${ik}$, p ), 1_${ik}$ ) work( p ) = work( p ) / cs work( q ) = work( q )*cs if( rsvec ) then call stdlib${ii}$_${ri}$axpy( mvl,t*apoaq, v( 1_${ik}$, p ),1_${ik}$, & v( 1_${ik}$, q ), 1_${ik}$ ) call stdlib${ii}$_${ri}$axpy( mvl,-cs*sn*aqoap,v( 1_${ik}$, q )& , 1_${ik}$,v( 1_${ik}$, p ), 1_${ik}$ ) end if end if end if end if end if else if( aapp>aaqq ) then call stdlib${ii}$_${ri}$copy( m, a( 1_${ik}$, p ), 1_${ik}$,work( n+1 ), 1_${ik}$ ) call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, aapp, one,m, 1_${ik}$, work( n+1 & ), lda,ierr ) call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, aaqq, one,m, 1_${ik}$, a( 1_${ik}$, q ),& lda,ierr ) temp1 = -aapq*work( p ) / work( q ) call stdlib${ii}$_${ri}$axpy( m, temp1, work( n+1 ),1_${ik}$, a( 1_${ik}$, q ), 1_${ik}$ & ) call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, one, aaqq,m, 1_${ik}$, a( 1_${ik}$, q ),& lda,ierr ) sva( q ) = aaqq*sqrt( max( zero,one-aapq*aapq ) ) mxsinj = max( mxsinj, sfmin ) else call stdlib${ii}$_${ri}$copy( m, a( 1_${ik}$, q ), 1_${ik}$,work( n+1 ), 1_${ik}$ ) call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, aaqq, one,m, 1_${ik}$, work( n+1 & ), lda,ierr ) call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, aapp, one,m, 1_${ik}$, a( 1_${ik}$, p ),& lda,ierr ) temp1 = -aapq*work( q ) / work( p ) call stdlib${ii}$_${ri}$axpy( m, temp1, work( n+1 ),1_${ik}$, a( 1_${ik}$, p ), 1_${ik}$ & ) call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, one, aapp,m, 1_${ik}$, a( 1_${ik}$, p ),& lda,ierr ) sva( p ) = aapp*sqrt( max( zero,one-aapq*aapq ) ) mxsinj = max( mxsinj, sfmin ) end if end if ! end if rotok then ... else ! in the case of cancellation in updating sva(q) ! .. recompute sva(q) if( ( sva( q ) / aaqq )**2_${ik}$<=rooteps )then if( ( aaqq<rootbig ) .and.( aaqq>rootsfmin ) ) then sva( q ) = stdlib${ii}$_${ri}$nrm2( m, a( 1_${ik}$, q ), 1_${ik}$ )*work( q ) else t = zero aaqq = one call stdlib${ii}$_${ri}$lassq( m, a( 1_${ik}$, q ), 1_${ik}$, t,aaqq ) sva( q ) = t*sqrt( aaqq )*work( q ) end if end if if( ( aapp / aapp0 )**2_${ik}$<=rooteps ) then if( ( aapp<rootbig ) .and.( aapp>rootsfmin ) ) then aapp = stdlib${ii}$_${ri}$nrm2( m, a( 1_${ik}$, p ), 1_${ik}$ )*work( p ) else t = zero aapp = one call stdlib${ii}$_${ri}$lassq( m, a( 1_${ik}$, p ), 1_${ik}$, t,aapp ) aapp = t*sqrt( aapp )*work( p ) end if sva( p ) = aapp end if ! end of ok rotation else notrot = notrot + 1_${ik}$ ! [rtd] skipped = skipped + 1 pskipped = pskipped + 1_${ik}$ ijblsk = ijblsk + 1_${ik}$ end if else notrot = notrot + 1_${ik}$ pskipped = pskipped + 1_${ik}$ ijblsk = ijblsk + 1_${ik}$ end if if( ( i<=swband ) .and. ( ijblsk>=blskip ) )then sva( p ) = aapp notrot = 0_${ik}$ go to 2011 end if if( ( i<=swband ) .and.( pskipped>rowskip ) ) then aapp = -aapp notrot = 0_${ik}$ go to 2203 end if end do loop_2200 ! end of the q-loop 2203 continue sva( p ) = aapp else if( aapp==zero )notrot = notrot +min( jgl+kbl-1, n ) - jgl + 1_${ik}$ if( aapp<zero )notrot = 0_${ik}$ end if end do loop_2100 ! end of the p-loop end do loop_2010 ! end of the jbc-loop 2011 continue ! 2011 bailed out of the jbc-loop do p = igl, min( igl+kbl-1, n ) sva( p ) = abs( sva( p ) ) end do ! ** end do loop_2000 ! 2000 :: end of the ibr-loop ! .. update sva(n) if( ( sva( n )<rootbig ) .and. ( sva( n )>rootsfmin ) )then sva( n ) = stdlib${ii}$_${ri}$nrm2( m, a( 1_${ik}$, n ), 1_${ik}$ )*work( n ) else t = zero aapp = one call stdlib${ii}$_${ri}$lassq( m, a( 1_${ik}$, n ), 1_${ik}$, t, aapp ) sva( n ) = t*sqrt( aapp )*work( n ) end if ! additional steering devices if( ( i<swband ) .and. ( ( mxaapq<=roottol ) .or.( iswrot<=n ) ) )swband = i if( ( i>swband+1 ) .and. ( mxaapq<sqrt( real( n,KIND=${rk}$) )*tol ) .and. ( real( n,& KIND=${rk}$)*mxaapq*mxsinj<tol ) ) then go to 1994 end if if( notrot>=emptsw )go to 1994 end do loop_1993 ! end i=1:nsweep loop ! #:( reaching this point means that the procedure has not converged. info = nsweep - 1_${ik}$ go to 1995 1994 continue ! #:) reaching this point means numerical convergence after the i-th ! sweep. info = 0_${ik}$ ! #:) info = 0 confirms successful iterations. 1995 continue ! sort the singular values and find how many are above ! the underflow threshold. n2 = 0_${ik}$ n4 = 0_${ik}$ do p = 1, n - 1 q = stdlib${ii}$_i${ri}$amax( n-p+1, sva( p ), 1_${ik}$ ) + p - 1_${ik}$ if( p/=q ) then temp1 = sva( p ) sva( p ) = sva( q ) sva( q ) = temp1 temp1 = work( p ) work( p ) = work( q ) work( q ) = temp1 call stdlib${ii}$_${ri}$swap( m, a( 1_${ik}$, p ), 1_${ik}$, a( 1_${ik}$, q ), 1_${ik}$ ) if( rsvec )call stdlib${ii}$_${ri}$swap( mvl, v( 1_${ik}$, p ), 1_${ik}$, v( 1_${ik}$, q ), 1_${ik}$ ) end if if( sva( p )/=zero ) then n4 = n4 + 1_${ik}$ if( sva( p )*skl>sfmin )n2 = n2 + 1_${ik}$ end if end do if( sva( n )/=zero ) then n4 = n4 + 1_${ik}$ if( sva( n )*skl>sfmin )n2 = n2 + 1_${ik}$ end if ! normalize the left singular vectors. if( lsvec .or. uctol ) then do p = 1, n2 call stdlib${ii}$_${ri}$scal( m, work( p ) / sva( p ), a( 1_${ik}$, p ), 1_${ik}$ ) end do end if ! scale the product of jacobi rotations (assemble the fast rotations). if( rsvec ) then if( applv ) then do p = 1, n call stdlib${ii}$_${ri}$scal( mvl, work( p ), v( 1_${ik}$, p ), 1_${ik}$ ) end do else do p = 1, n temp1 = one / stdlib${ii}$_${ri}$nrm2( mvl, v( 1_${ik}$, p ), 1_${ik}$ ) call stdlib${ii}$_${ri}$scal( mvl, temp1, v( 1_${ik}$, p ), 1_${ik}$ ) end do end if end if ! undo scaling, if necessary (and possible). if( ( ( skl>one ) .and. ( sva( 1_${ik}$ )<( big / skl) ) ).or. ( ( skl<one ) .and. ( sva( max(& n2, 1_${ik}$ ) ) >( sfmin / skl) ) ) ) then do p = 1, n sva( p ) = skl*sva( p ) end do skl= one end if work( 1_${ik}$ ) = skl ! the singular values of a are skl*sva(1:n). if skl/=one ! then some of the singular values may overflow or underflow and ! the spectrum is given in this factored representation. work( 2_${ik}$ ) = real( n4,KIND=${rk}$) ! n4 is the number of computed nonzero singular values of a. work( 3_${ik}$ ) = real( n2,KIND=${rk}$) ! n2 is the number of singular values of a greater than sfmin. ! if n2<n, sva(n2:n) contains zeros and/or denormalized numbers ! that may carry some information. work( 4_${ik}$ ) = real( i,KIND=${rk}$) ! i is the index of the last sweep before declaring convergence. work( 5_${ik}$ ) = mxaapq ! mxaapq is the largest absolute value of scaled pivots in the ! last sweep work( 6_${ik}$ ) = mxsinj ! mxsinj is the largest absolute value of the sines of jacobi angles ! in the last sweep return end subroutine stdlib${ii}$_${ri}$gesvj #:endif #:endfor pure module subroutine stdlib${ii}$_cgesvj( joba, jobu, jobv, m, n, a, lda, sva, mv, v,ldv, cwork, lwork, & !! CGESVJ computes the singular value decomposition (SVD) of a complex !! M-by-N matrix A, where M >= N. The SVD of A is written as !! [++] [xx] [x0] [xx] !! A = U * SIGMA * V^*, [++] = [xx] * [ox] * [xx] !! [++] [xx] !! where SIGMA is an N-by-N diagonal matrix, U is an M-by-N orthonormal !! matrix, and V is an N-by-N unitary matrix. The diagonal elements !! of SIGMA are the singular values of A. The columns of U and V are the !! left and the right singular vectors of A, respectively. rwork, lrwork, info ) ! -- lapack computational routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments integer(${ik}$), intent(out) :: info integer(${ik}$), intent(in) :: lda, ldv, lwork, lrwork, m, mv, n character, intent(in) :: joba, jobu, jobv ! Array Arguments complex(sp), intent(inout) :: a(lda,*), v(ldv,*), cwork(lwork) real(sp), intent(inout) :: rwork(lrwork) real(sp), intent(out) :: sva(n) ! ===================================================================== ! Local Parameters integer(${ik}$), parameter :: nsweep = 30_${ik}$ ! Local Scalars complex(sp) :: aapq, ompq real(sp) :: aapp, aapp0, aapq1, aaqq, apoaq, aqoap, big, bigtheta, cs, ctol, epsln, & mxaapq, mxsinj, rootbig, rooteps, rootsfmin, roottol, skl, sfmin, small, sn, t, temp1, & theta, thsign, tol integer(${ik}$) :: blskip, emptsw, i, ibr, ierr, igl, ijblsk, ir1, iswrot, jbc, jgl, kbl, & lkahead, mvl, n2, n34, n4, nbl, notrot, p, pskipped, q, rowskip, swband logical(lk) :: applv, goscale, lower, lquery, lsvec, noscale, rotok, rsvec, uctol, & upper ! Intrinsic Functions ! from lapack ! from lapack ! Executable Statements ! test the input arguments lsvec = stdlib_lsame( jobu, 'U' ) .or. stdlib_lsame( jobu, 'F' ) uctol = stdlib_lsame( jobu, 'C' ) rsvec = stdlib_lsame( jobv, 'V' ) .or. stdlib_lsame( jobv, 'J' ) applv = stdlib_lsame( jobv, 'A' ) upper = stdlib_lsame( joba, 'U' ) lower = stdlib_lsame( joba, 'L' ) lquery = ( lwork == -1_${ik}$ ) .or. ( lrwork == -1_${ik}$ ) if( .not.( upper .or. lower .or. stdlib_lsame( joba, 'G' ) ) ) then info = -1_${ik}$ else if( .not.( lsvec .or. uctol .or. stdlib_lsame( jobu, 'N' ) ) ) then info = -2_${ik}$ else if( .not.( rsvec .or. applv .or. stdlib_lsame( jobv, 'N' ) ) ) then info = -3_${ik}$ else if( m<0_${ik}$ ) then info = -4_${ik}$ else if( ( n<0_${ik}$ ) .or. ( n>m ) ) then info = -5_${ik}$ else if( lda<m ) then info = -7_${ik}$ else if( mv<0_${ik}$ ) then info = -9_${ik}$ else if( ( rsvec .and. ( ldv<n ) ) .or.( applv .and. ( ldv<mv ) ) ) then info = -11_${ik}$ else if( uctol .and. ( rwork( 1_${ik}$ )<=one ) ) then info = -12_${ik}$ else if( lwork<( m+n ) .and. ( .not.lquery ) ) then info = -13_${ik}$ else if( lrwork<max( n, 6_${ik}$ ) .and. ( .not.lquery ) ) then info = -15_${ik}$ else info = 0_${ik}$ end if ! #:( if( info/=0_${ik}$ ) then call stdlib${ii}$_xerbla( 'CGESVJ', -info ) return else if ( lquery ) then cwork(1_${ik}$) = m + n rwork(1_${ik}$) = max( n, 6_${ik}$ ) return end if ! #:) quick return for void matrix if( ( m==0 ) .or. ( n==0 ) )return ! set numerical parameters ! the stopping criterion for jacobi rotations is ! max_{i<>j}|a(:,i)^* * a(:,j)| / (||a(:,i)||*||a(:,j)||) < ctol*eps ! where eps is the round-off and ctol is defined as follows: if( uctol ) then ! ... user controlled ctol = rwork( 1_${ik}$ ) else ! ... default if( lsvec .or. rsvec .or. applv ) then ctol = sqrt( real( m,KIND=sp) ) else ctol = real( m,KIND=sp) end if end if ! ... and the machine dependent parameters are ! [!] (make sure that stdlib${ii}$_slamch() works properly on the target machine.) epsln = stdlib${ii}$_slamch( 'EPSILON' ) rooteps = sqrt( epsln ) sfmin = stdlib${ii}$_slamch( 'SAFEMINIMUM' ) rootsfmin = sqrt( sfmin ) small = sfmin / epsln ! big = stdlib${ii}$_slamch( 'overflow' ) big = one / sfmin rootbig = one / rootsfmin ! large = big / sqrt( real( m*n,KIND=sp) ) bigtheta = one / rooteps tol = ctol*epsln roottol = sqrt( tol ) if( real( m,KIND=sp)*epsln>=one ) then info = -4_${ik}$ call stdlib${ii}$_xerbla( 'CGESVJ', -info ) return end if ! initialize the right singular vector matrix. if( rsvec ) then mvl = n call stdlib${ii}$_claset( 'A', mvl, n, czero, cone, v, ldv ) else if( applv ) then mvl = mv end if rsvec = rsvec .or. applv ! initialize sva( 1:n ) = ( ||a e_i||_2, i = 1:n ) ! (!) if necessary, scale a to protect the largest singular value ! from overflow. it is possible that saving the largest singular ! value destroys the information about the small ones. ! this initial scaling is almost minimal in the sense that the ! goal is to make sure that no column norm overflows, and that ! sqrt(n)*max_i sva(i) does not overflow. if infinite entries ! in a are detected, the procedure returns with info=-6. skl = one / sqrt( real( m,KIND=sp)*real( n,KIND=sp) ) noscale = .true. goscale = .true. if( lower ) then ! the input matrix is m-by-n lower triangular (trapezoidal) do p = 1, n aapp = zero aaqq = one call stdlib${ii}$_classq( m-p+1, a( p, p ), 1_${ik}$, aapp, aaqq ) if( aapp>big ) then info = -6_${ik}$ call stdlib${ii}$_xerbla( 'CGESVJ', -info ) return end if aaqq = sqrt( aaqq ) if( ( aapp<( big / aaqq ) ) .and. noscale ) then sva( p ) = aapp*aaqq else noscale = .false. sva( p ) = aapp*( aaqq*skl ) if( goscale ) then goscale = .false. do q = 1, p - 1 sva( q ) = sva( q )*skl end do end if end if end do else if( upper ) then ! the input matrix is m-by-n upper triangular (trapezoidal) do p = 1, n aapp = zero aaqq = one call stdlib${ii}$_classq( p, a( 1_${ik}$, p ), 1_${ik}$, aapp, aaqq ) if( aapp>big ) then info = -6_${ik}$ call stdlib${ii}$_xerbla( 'CGESVJ', -info ) return end if aaqq = sqrt( aaqq ) if( ( aapp<( big / aaqq ) ) .and. noscale ) then sva( p ) = aapp*aaqq else noscale = .false. sva( p ) = aapp*( aaqq*skl ) if( goscale ) then goscale = .false. do q = 1, p - 1 sva( q ) = sva( q )*skl end do end if end if end do else ! the input matrix is m-by-n general dense do p = 1, n aapp = zero aaqq = one call stdlib${ii}$_classq( m, a( 1_${ik}$, p ), 1_${ik}$, aapp, aaqq ) if( aapp>big ) then info = -6_${ik}$ call stdlib${ii}$_xerbla( 'CGESVJ', -info ) return end if aaqq = sqrt( aaqq ) if( ( aapp<( big / aaqq ) ) .and. noscale ) then sva( p ) = aapp*aaqq else noscale = .false. sva( p ) = aapp*( aaqq*skl ) if( goscale ) then goscale = .false. do q = 1, p - 1 sva( q ) = sva( q )*skl end do end if end if end do end if if( noscale )skl = one ! move the smaller part of the spectrum from the underflow threshold ! (!) start by determining the position of the nonzero entries of the ! array sva() relative to ( sfmin, big ). aapp = zero aaqq = big do p = 1, n if( sva( p )/=zero )aaqq = min( aaqq, sva( p ) ) aapp = max( aapp, sva( p ) ) end do ! #:) quick return for zero matrix if( aapp==zero ) then if( lsvec )call stdlib${ii}$_claset( 'G', m, n, czero, cone, a, lda ) rwork( 1_${ik}$ ) = one rwork( 2_${ik}$ ) = zero rwork( 3_${ik}$ ) = zero rwork( 4_${ik}$ ) = zero rwork( 5_${ik}$ ) = zero rwork( 6_${ik}$ ) = zero return end if ! #:) quick return for one-column matrix if( n==1_${ik}$ ) then if( lsvec )call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, sva( 1_${ik}$ ), skl, m, 1_${ik}$,a( 1_${ik}$, 1_${ik}$ ), lda, ierr ) rwork( 1_${ik}$ ) = one / skl if( sva( 1_${ik}$ )>=sfmin ) then rwork( 2_${ik}$ ) = one else rwork( 2_${ik}$ ) = zero end if rwork( 3_${ik}$ ) = zero rwork( 4_${ik}$ ) = zero rwork( 5_${ik}$ ) = zero rwork( 6_${ik}$ ) = zero return end if ! protect small singular values from underflow, and try to ! avoid underflows/overflows in computing jacobi rotations. sn = sqrt( sfmin / epsln ) temp1 = sqrt( big / real( n,KIND=sp) ) if( ( aapp<=sn ) .or. ( aaqq>=temp1 ) .or.( ( sn<=aaqq ) .and. ( aapp<=temp1 ) ) ) & then temp1 = min( big, temp1 / aapp ) ! aaqq = aaqq*temp1 ! aapp = aapp*temp1 else if( ( aaqq<=sn ) .and. ( aapp<=temp1 ) ) then temp1 = min( sn / aaqq, big / ( aapp*sqrt( real( n,KIND=sp) ) ) ) ! aaqq = aaqq*temp1 ! aapp = aapp*temp1 else if( ( aaqq>=sn ) .and. ( aapp>=temp1 ) ) then temp1 = max( sn / aaqq, temp1 / aapp ) ! aaqq = aaqq*temp1 ! aapp = aapp*temp1 else if( ( aaqq<=sn ) .and. ( aapp>=temp1 ) ) then temp1 = min( sn / aaqq, big / ( sqrt( real( n,KIND=sp) )*aapp ) ) ! aaqq = aaqq*temp1 ! aapp = aapp*temp1 else temp1 = one end if ! scale, if necessary if( temp1/=one ) then call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, one, temp1, n, 1_${ik}$, sva, n, ierr ) end if skl = temp1*skl if( skl/=one ) then call stdlib${ii}$_clascl( joba, 0_${ik}$, 0_${ik}$, one, skl, m, n, a, lda, ierr ) skl = one / skl end if ! row-cyclic jacobi svd algorithm with column pivoting emptsw = ( n*( n-1 ) ) / 2_${ik}$ notrot = 0_${ik}$ do q = 1, n cwork( q ) = cone end do swband = 3_${ik}$ ! [tp] swband is a tuning parameter [tp]. it is meaningful and effective ! if stdlib${ii}$_cgesvj is used as a computational routine in the preconditioned ! jacobi svd algorithm stdlib${ii}$_cgejsv. for sweeps i=1:swband the procedure ! works on pivots inside a band-like region around the diagonal. ! the boundaries are determined dynamically, based on the number of ! pivots above a threshold. kbl = min( 8_${ik}$, n ) ! [tp] kbl is a tuning parameter that defines the tile size in the ! tiling of the p-q loops of pivot pairs. in general, an optimal ! value of kbl depends on the matrix dimensions and on the ! parameters of the computer's memory. nbl = n / kbl if( ( nbl*kbl )/=n )nbl = nbl + 1_${ik}$ blskip = kbl**2_${ik}$ ! [tp] blkskip is a tuning parameter that depends on swband and kbl. rowskip = min( 5_${ik}$, kbl ) ! [tp] rowskip is a tuning parameter. lkahead = 1_${ik}$ ! [tp] lkahead is a tuning parameter. ! quasi block transformations, using the lower (upper) triangular ! structure of the input matrix. the quasi-block-cycling usually ! invokes cubic convergence. big part of this cycle is done inside ! canonical subspaces of dimensions less than m. if( ( lower .or. upper ) .and. ( n>max( 64_${ik}$, 4_${ik}$*kbl ) ) ) then ! [tp] the number of partition levels and the actual partition are ! tuning parameters. n4 = n / 4_${ik}$ n2 = n / 2_${ik}$ n34 = 3_${ik}$*n4 if( applv ) then q = 0_${ik}$ else q = 1_${ik}$ end if if( lower ) then ! this works very well on lower triangular matrices, in particular ! in the framework of the preconditioned jacobi svd (xgejsv). ! the idea is simple: ! [+ 0 0 0] note that jacobi transformations of [0 0] ! [+ + 0 0] [0 0] ! [+ + x 0] actually work on [x 0] [x 0] ! [+ + x x] [x x]. [x x] call stdlib${ii}$_cgsvj0( jobv, m-n34, n-n34, a( n34+1, n34+1 ), lda,cwork( n34+1 ), & sva( n34+1 ), mvl,v( n34*q+1, n34+1 ), ldv, epsln, sfmin, tol,2_${ik}$, cwork( n+1 ), & lwork-n, ierr ) call stdlib${ii}$_cgsvj0( jobv, m-n2, n34-n2, a( n2+1, n2+1 ), lda,cwork( n2+1 ), sva( & n2+1 ), mvl,v( n2*q+1, n2+1 ), ldv, epsln, sfmin, tol, 2_${ik}$,cwork( n+1 ), lwork-n, & ierr ) call stdlib${ii}$_cgsvj1( jobv, m-n2, n-n2, n4, a( n2+1, n2+1 ), lda,cwork( n2+1 ), & sva( n2+1 ), mvl,v( n2*q+1, n2+1 ), ldv, epsln, sfmin, tol, 1_${ik}$,cwork( n+1 ), & lwork-n, ierr ) call stdlib${ii}$_cgsvj0( jobv, m-n4, n2-n4, a( n4+1, n4+1 ), lda,cwork( n4+1 ), sva( & n4+1 ), mvl,v( n4*q+1, n4+1 ), ldv, epsln, sfmin, tol, 1_${ik}$,cwork( n+1 ), lwork-n, & ierr ) call stdlib${ii}$_cgsvj0( jobv, m, n4, a, lda, cwork, sva, mvl, v, ldv,epsln, sfmin, & tol, 1_${ik}$, cwork( n+1 ), lwork-n,ierr ) call stdlib${ii}$_cgsvj1( jobv, m, n2, n4, a, lda, cwork, sva, mvl, v,ldv, epsln, & sfmin, tol, 1_${ik}$, cwork( n+1 ),lwork-n, ierr ) else if( upper ) then call stdlib${ii}$_cgsvj0( jobv, n4, n4, a, lda, cwork, sva, mvl, v, ldv,epsln, sfmin, & tol, 2_${ik}$, cwork( n+1 ), lwork-n,ierr ) call stdlib${ii}$_cgsvj0( jobv, n2, n4, a( 1_${ik}$, n4+1 ), lda, cwork( n4+1 ),sva( n4+1 ), & mvl, v( n4*q+1, n4+1 ), ldv,epsln, sfmin, tol, 1_${ik}$, cwork( n+1 ), lwork-n,ierr ) call stdlib${ii}$_cgsvj1( jobv, n2, n2, n4, a, lda, cwork, sva, mvl, v,ldv, epsln, & sfmin, tol, 1_${ik}$, cwork( n+1 ),lwork-n, ierr ) call stdlib${ii}$_cgsvj0( jobv, n2+n4, n4, a( 1_${ik}$, n2+1 ), lda,cwork( n2+1 ), sva( n2+1 )& , mvl,v( n2*q+1, n2+1 ), ldv, epsln, sfmin, tol, 1_${ik}$,cwork( n+1 ), lwork-n, ierr ) end if end if ! .. row-cyclic pivot strategy with de rijk's pivoting .. loop_1993: do i = 1, nsweep ! .. go go go ... mxaapq = zero mxsinj = zero iswrot = 0_${ik}$ notrot = 0_${ik}$ pskipped = 0_${ik}$ ! each sweep is unrolled using kbl-by-kbl tiles over the pivot pairs ! 1 <= p < q <= n. this is the first step toward a blocked implementation ! of the rotations. new implementation, based on block transformations, ! is under development. loop_2000: do ibr = 1, nbl igl = ( ibr-1 )*kbl + 1_${ik}$ loop_1002: do ir1 = 0, min( lkahead, nbl-ibr ) igl = igl + ir1*kbl loop_2001: do p = igl, min( igl+kbl-1, n-1 ) ! .. de rijk's pivoting q = stdlib${ii}$_isamax( n-p+1, sva( p ), 1_${ik}$ ) + p - 1_${ik}$ if( p/=q ) then call stdlib${ii}$_cswap( m, a( 1_${ik}$, p ), 1_${ik}$, a( 1_${ik}$, q ), 1_${ik}$ ) if( rsvec )call stdlib${ii}$_cswap( mvl, v( 1_${ik}$, p ), 1_${ik}$,v( 1_${ik}$, q ), 1_${ik}$ ) temp1 = sva( p ) sva( p ) = sva( q ) sva( q ) = temp1 aapq = cwork(p) cwork(p) = cwork(q) cwork(q) = aapq end if if( ir1==0_${ik}$ ) then ! column norms are periodically updated by explicit ! norm computation. ! [!] caveat: ! unfortunately, some blas implementations compute stdlib${ii}$_scnrm2(m,a(1,p),1) ! as sqrt(s=stdlib${ii}$_cdotc(m,a(1,p),1,a(1,p),1)), which may cause the result to ! overflow for ||a(:,p)||_2 > sqrt(overflow_threshold), and to ! underflow for ||a(:,p)||_2 < sqrt(underflow_threshold). ! hence, stdlib${ii}$_scnrm2 cannot be trusted, not even in the case when ! the true norm is far from the under(over)flow boundaries. ! if properly implemented stdlib${ii}$_scnrm2 is available, the if-then-else-end if ! below should be replaced with "aapp = stdlib${ii}$_scnrm2( m, a(1,p), 1 )". if( ( sva( p )<rootbig ) .and.( sva( p )>rootsfmin ) ) then sva( p ) = stdlib${ii}$_scnrm2( m, a( 1_${ik}$, p ), 1_${ik}$ ) else temp1 = zero aapp = one call stdlib${ii}$_classq( m, a( 1_${ik}$, p ), 1_${ik}$, temp1, aapp ) sva( p ) = temp1*sqrt( aapp ) end if aapp = sva( p ) else aapp = sva( p ) end if if( aapp>zero ) then pskipped = 0_${ik}$ loop_2002: do q = p + 1, min( igl+kbl-1, n ) aaqq = sva( q ) if( aaqq>zero ) then aapp0 = aapp if( aaqq>=one ) then rotok = ( small*aapp )<=aaqq if( aapp<( big / aaqq ) ) then aapq = ( stdlib${ii}$_cdotc( m, a( 1_${ik}$, p ), 1_${ik}$,a( 1_${ik}$, q ), 1_${ik}$ ) / & aaqq ) / aapp else call stdlib${ii}$_ccopy( m, a( 1_${ik}$, p ), 1_${ik}$,cwork(n+1), 1_${ik}$ ) call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, aapp, one,m, 1_${ik}$, cwork(n+1), & lda, ierr ) aapq = stdlib${ii}$_cdotc( m, cwork(n+1), 1_${ik}$,a( 1_${ik}$, q ), 1_${ik}$ ) / & aaqq end if else rotok = aapp<=( aaqq / small ) if( aapp>( small / aaqq ) ) then aapq = ( stdlib${ii}$_cdotc( m, a( 1_${ik}$, p ), 1_${ik}$,a( 1_${ik}$, q ), 1_${ik}$ ) / & aapp ) / aaqq else call stdlib${ii}$_ccopy( m, a( 1_${ik}$, q ), 1_${ik}$,cwork(n+1), 1_${ik}$ ) call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, aaqq,one, m, 1_${ik}$,cwork(n+1), & lda, ierr ) aapq = stdlib${ii}$_cdotc( m, a(1_${ik}$, p ), 1_${ik}$,cwork(n+1), 1_${ik}$ ) / & aapp end if end if ! aapq = aapq * conjg( cwork(p) ) * cwork(q) aapq1 = -abs(aapq) mxaapq = max( mxaapq, -aapq1 ) ! to rotate or not to rotate, that is the question ... if( abs( aapq1 )>tol ) then ompq = aapq / abs(aapq) ! Rotate ! [rtd] rotated = rotated + one if( ir1==0_${ik}$ ) then notrot = 0_${ik}$ pskipped = 0_${ik}$ iswrot = iswrot + 1_${ik}$ end if if( rotok ) then aqoap = aaqq / aapp apoaq = aapp / aaqq theta = -half*abs( aqoap-apoaq )/aapq1 if( abs( theta )>bigtheta ) then t = half / theta cs = one call stdlib${ii}$_crot( m, a(1_${ik}$,p), 1_${ik}$, a(1_${ik}$,q), 1_${ik}$,cs, conjg(ompq)& *t ) if ( rsvec ) then call stdlib${ii}$_crot( mvl, v(1_${ik}$,p), 1_${ik}$,v(1_${ik}$,q), 1_${ik}$, cs, & conjg(ompq)*t ) end if sva( q ) = aaqq*sqrt( max( zero,one+t*apoaq*aapq1 ) ) aapp = aapp*sqrt( max( zero,one-t*aqoap*aapq1 ) ) mxsinj = max( mxsinj, abs( t ) ) else ! Choose Correct Signum For Theta And Rotate thsign = -sign( one, aapq1 ) t = one / ( theta+thsign*sqrt( one+theta*theta ) ) cs = sqrt( one / ( one+t*t ) ) sn = t*cs mxsinj = max( mxsinj, abs( sn ) ) sva( q ) = aaqq*sqrt( max( zero,one+t*apoaq*aapq1 ) ) aapp = aapp*sqrt( max( zero,one-t*aqoap*aapq1 ) ) call stdlib${ii}$_crot( m, a(1_${ik}$,p), 1_${ik}$, a(1_${ik}$,q), 1_${ik}$,cs, conjg(ompq)& *sn ) if ( rsvec ) then call stdlib${ii}$_crot( mvl, v(1_${ik}$,p), 1_${ik}$,v(1_${ik}$,q), 1_${ik}$, cs, & conjg(ompq)*sn ) end if end if cwork(p) = -cwork(q) * ompq else ! .. have to use modified gram-schmidt like transformation call stdlib${ii}$_ccopy( m, a( 1_${ik}$, p ), 1_${ik}$,cwork(n+1), 1_${ik}$ ) call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, aapp, one, m,1_${ik}$, cwork(n+1), & lda,ierr ) call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, aaqq, one, m,1_${ik}$, a( 1_${ik}$, q ), & lda, ierr ) call stdlib${ii}$_caxpy( m, -aapq, cwork(n+1), 1_${ik}$,a( 1_${ik}$, q ), 1_${ik}$ ) call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, one, aaqq, m,1_${ik}$, a( 1_${ik}$, q ), & lda, ierr ) sva( q ) = aaqq*sqrt( max( zero,one-aapq1*aapq1 ) ) mxsinj = max( mxsinj, sfmin ) end if ! end if rotok then ... else ! in the case of cancellation in updating sva(q), sva(p) ! recompute sva(q), sva(p). if( ( sva( q ) / aaqq )**2_${ik}$<=rooteps )then if( ( aaqq<rootbig ) .and.( aaqq>rootsfmin ) ) then sva( q ) = stdlib${ii}$_scnrm2( m, a( 1_${ik}$, q ), 1_${ik}$ ) else t = zero aaqq = one call stdlib${ii}$_classq( m, a( 1_${ik}$, q ), 1_${ik}$, t,aaqq ) sva( q ) = t*sqrt( aaqq ) end if end if if( ( aapp / aapp0 )<=rooteps ) then if( ( aapp<rootbig ) .and.( aapp>rootsfmin ) ) then aapp = stdlib${ii}$_scnrm2( m, a( 1_${ik}$, p ), 1_${ik}$ ) else t = zero aapp = one call stdlib${ii}$_classq( m, a( 1_${ik}$, p ), 1_${ik}$, t,aapp ) aapp = t*sqrt( aapp ) end if sva( p ) = aapp end if else ! a(:,p) and a(:,q) already numerically orthogonal if( ir1==0_${ik}$ )notrot = notrot + 1_${ik}$ ! [rtd] skipped = skipped + 1 pskipped = pskipped + 1_${ik}$ end if else ! a(:,q) is zero column if( ir1==0_${ik}$ )notrot = notrot + 1_${ik}$ pskipped = pskipped + 1_${ik}$ end if if( ( i<=swband ) .and.( pskipped>rowskip ) ) then if( ir1==0_${ik}$ )aapp = -aapp notrot = 0_${ik}$ go to 2103 end if end do loop_2002 ! end q-loop 2103 continue ! bailed out of q-loop sva( p ) = aapp else sva( p ) = aapp if( ( ir1==0_${ik}$ ) .and. ( aapp==zero ) )notrot = notrot + min( igl+kbl-1, & n ) - p end if end do loop_2001 ! end of the p-loop ! end of doing the block ( ibr, ibr ) end do loop_1002 ! end of ir1-loop ! ... go to the off diagonal blocks igl = ( ibr-1 )*kbl + 1_${ik}$ loop_2010: do jbc = ibr + 1, nbl jgl = ( jbc-1 )*kbl + 1_${ik}$ ! doing the block at ( ibr, jbc ) ijblsk = 0_${ik}$ loop_2100: do p = igl, min( igl+kbl-1, n ) aapp = sva( p ) if( aapp>zero ) then pskipped = 0_${ik}$ loop_2200: do q = jgl, min( jgl+kbl-1, n ) aaqq = sva( q ) if( aaqq>zero ) then aapp0 = aapp ! M X 2 Jacobi Svd ! safe gram matrix computation if( aaqq>=one ) then if( aapp>=aaqq ) then rotok = ( small*aapp )<=aaqq else rotok = ( small*aaqq )<=aapp end if if( aapp<( big / aaqq ) ) then aapq = ( stdlib${ii}$_cdotc( m, a( 1_${ik}$, p ), 1_${ik}$,a( 1_${ik}$, q ), 1_${ik}$ ) / & aaqq ) / aapp else call stdlib${ii}$_ccopy( m, a( 1_${ik}$, p ), 1_${ik}$,cwork(n+1), 1_${ik}$ ) call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, aapp,one, m, 1_${ik}$,cwork(n+1), & lda, ierr ) aapq = stdlib${ii}$_cdotc( m, cwork(n+1), 1_${ik}$,a( 1_${ik}$, q ), 1_${ik}$ ) / & aaqq end if else if( aapp>=aaqq ) then rotok = aapp<=( aaqq / small ) else rotok = aaqq<=( aapp / small ) end if if( aapp>( small / aaqq ) ) then aapq = ( stdlib${ii}$_cdotc( m, a( 1_${ik}$, p ), 1_${ik}$,a( 1_${ik}$, q ), 1_${ik}$ ) / max(& aaqq,aapp) )/ min(aaqq,aapp) else call stdlib${ii}$_ccopy( m, a( 1_${ik}$, q ), 1_${ik}$,cwork(n+1), 1_${ik}$ ) call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, aaqq,one, m, 1_${ik}$,cwork(n+1), & lda, ierr ) aapq = stdlib${ii}$_cdotc( m, a( 1_${ik}$, p ), 1_${ik}$,cwork(n+1), 1_${ik}$ ) / & aapp end if end if ! aapq = aapq * conjg(cwork(p))*cwork(q) aapq1 = -abs(aapq) mxaapq = max( mxaapq, -aapq1 ) ! to rotate or not to rotate, that is the question ... if( abs( aapq1 )>tol ) then ompq = aapq / abs(aapq) notrot = 0_${ik}$ ! [rtd] rotated = rotated + 1 pskipped = 0_${ik}$ iswrot = iswrot + 1_${ik}$ if( rotok ) then aqoap = aaqq / aapp apoaq = aapp / aaqq theta = -half*abs( aqoap-apoaq )/ aapq1 if( aaqq>aapp0 )theta = -theta if( abs( theta )>bigtheta ) then t = half / theta cs = one call stdlib${ii}$_crot( m, a(1_${ik}$,p), 1_${ik}$, a(1_${ik}$,q), 1_${ik}$,cs, conjg(ompq)& *t ) if( rsvec ) then call stdlib${ii}$_crot( mvl, v(1_${ik}$,p), 1_${ik}$,v(1_${ik}$,q), 1_${ik}$, cs, & conjg(ompq)*t ) end if sva( q ) = aaqq*sqrt( max( zero,one+t*apoaq*aapq1 ) ) aapp = aapp*sqrt( max( zero,one-t*aqoap*aapq1 ) ) mxsinj = max( mxsinj, abs( t ) ) else ! Choose Correct Signum For Theta And Rotate thsign = -sign( one, aapq1 ) if( aaqq>aapp0 )thsign = -thsign t = one / ( theta+thsign*sqrt( one+theta*theta ) ) cs = sqrt( one / ( one+t*t ) ) sn = t*cs mxsinj = max( mxsinj, abs( sn ) ) sva( q ) = aaqq*sqrt( max( zero,one+t*apoaq*aapq1 ) ) aapp = aapp*sqrt( max( zero,one-t*aqoap*aapq1 ) ) call stdlib${ii}$_crot( m, a(1_${ik}$,p), 1_${ik}$, a(1_${ik}$,q), 1_${ik}$,cs, conjg(ompq)& *sn ) if( rsvec ) then call stdlib${ii}$_crot( mvl, v(1_${ik}$,p), 1_${ik}$,v(1_${ik}$,q), 1_${ik}$, cs, & conjg(ompq)*sn ) end if end if cwork(p) = -cwork(q) * ompq else ! .. have to use modified gram-schmidt like transformation if( aapp>aaqq ) then call stdlib${ii}$_ccopy( m, a( 1_${ik}$, p ), 1_${ik}$,cwork(n+1), 1_${ik}$ ) call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, aapp, one,m, 1_${ik}$, cwork(n+1)& ,lda,ierr ) call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, aaqq, one,m, 1_${ik}$, a( 1_${ik}$, q ),& lda,ierr ) call stdlib${ii}$_caxpy( m, -aapq, cwork(n+1),1_${ik}$, a( 1_${ik}$, q ), 1_${ik}$ ) call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, one, aaqq,m, 1_${ik}$, a( 1_${ik}$, q ),& lda,ierr ) sva( q ) = aaqq*sqrt( max( zero,one-aapq1*aapq1 ) ) mxsinj = max( mxsinj, sfmin ) else call stdlib${ii}$_ccopy( m, a( 1_${ik}$, q ), 1_${ik}$,cwork(n+1), 1_${ik}$ ) call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, aaqq, one,m, 1_${ik}$, cwork(n+1)& ,lda,ierr ) call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, aapp, one,m, 1_${ik}$, a( 1_${ik}$, p ),& lda,ierr ) call stdlib${ii}$_caxpy( m, -conjg(aapq),cwork(n+1), 1_${ik}$, a( 1_${ik}$, & p ), 1_${ik}$ ) call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, one, aapp,m, 1_${ik}$, a( 1_${ik}$, p ),& lda,ierr ) sva( p ) = aapp*sqrt( max( zero,one-aapq1*aapq1 ) ) mxsinj = max( mxsinj, sfmin ) end if end if ! end if rotok then ... else ! in the case of cancellation in updating sva(q), sva(p) ! .. recompute sva(q), sva(p) if( ( sva( q ) / aaqq )**2_${ik}$<=rooteps )then if( ( aaqq<rootbig ) .and.( aaqq>rootsfmin ) ) then sva( q ) = stdlib${ii}$_scnrm2( m, a( 1_${ik}$, q ), 1_${ik}$) else t = zero aaqq = one call stdlib${ii}$_classq( m, a( 1_${ik}$, q ), 1_${ik}$, t,aaqq ) sva( q ) = t*sqrt( aaqq ) end if end if if( ( aapp / aapp0 )**2_${ik}$<=rooteps ) then if( ( aapp<rootbig ) .and.( aapp>rootsfmin ) ) then aapp = stdlib${ii}$_scnrm2( m, a( 1_${ik}$, p ), 1_${ik}$ ) else t = zero aapp = one call stdlib${ii}$_classq( m, a( 1_${ik}$, p ), 1_${ik}$, t,aapp ) aapp = t*sqrt( aapp ) end if sva( p ) = aapp end if ! end of ok rotation else notrot = notrot + 1_${ik}$ ! [rtd] skipped = skipped + 1 pskipped = pskipped + 1_${ik}$ ijblsk = ijblsk + 1_${ik}$ end if else notrot = notrot + 1_${ik}$ pskipped = pskipped + 1_${ik}$ ijblsk = ijblsk + 1_${ik}$ end if if( ( i<=swband ) .and. ( ijblsk>=blskip ) )then sva( p ) = aapp notrot = 0_${ik}$ go to 2011 end if if( ( i<=swband ) .and.( pskipped>rowskip ) ) then aapp = -aapp notrot = 0_${ik}$ go to 2203 end if end do loop_2200 ! end of the q-loop 2203 continue sva( p ) = aapp else if( aapp==zero )notrot = notrot +min( jgl+kbl-1, n ) - jgl + 1_${ik}$ if( aapp<zero )notrot = 0_${ik}$ end if end do loop_2100 ! end of the p-loop end do loop_2010 ! end of the jbc-loop 2011 continue ! 2011 bailed out of the jbc-loop do p = igl, min( igl+kbl-1, n ) sva( p ) = abs( sva( p ) ) end do ! ** end do loop_2000 ! 2000 :: end of the ibr-loop ! .. update sva(n) if( ( sva( n )<rootbig ) .and. ( sva( n )>rootsfmin ) )then sva( n ) = stdlib${ii}$_scnrm2( m, a( 1_${ik}$, n ), 1_${ik}$ ) else t = zero aapp = one call stdlib${ii}$_classq( m, a( 1_${ik}$, n ), 1_${ik}$, t, aapp ) sva( n ) = t*sqrt( aapp ) end if ! additional steering devices if( ( i<swband ) .and. ( ( mxaapq<=roottol ) .or.( iswrot<=n ) ) )swband = i if( ( i>swband+1 ) .and. ( mxaapq<sqrt( real( n,KIND=sp) )*tol ) .and. ( real( n,& KIND=sp)*mxaapq*mxsinj<tol ) ) then go to 1994 end if if( notrot>=emptsw )go to 1994 end do loop_1993 ! end i=1:nsweep loop ! #:( reaching this point means that the procedure has not converged. info = nsweep - 1_${ik}$ go to 1995 1994 continue ! #:) reaching this point means numerical convergence after the i-th ! sweep. info = 0_${ik}$ ! #:) info = 0 confirms successful iterations. 1995 continue ! sort the singular values and find how many are above ! the underflow threshold. n2 = 0_${ik}$ n4 = 0_${ik}$ do p = 1, n - 1 q = stdlib${ii}$_isamax( n-p+1, sva( p ), 1_${ik}$ ) + p - 1_${ik}$ if( p/=q ) then temp1 = sva( p ) sva( p ) = sva( q ) sva( q ) = temp1 call stdlib${ii}$_cswap( m, a( 1_${ik}$, p ), 1_${ik}$, a( 1_${ik}$, q ), 1_${ik}$ ) if( rsvec )call stdlib${ii}$_cswap( mvl, v( 1_${ik}$, p ), 1_${ik}$, v( 1_${ik}$, q ), 1_${ik}$ ) end if if( sva( p )/=zero ) then n4 = n4 + 1_${ik}$ if( sva( p )*skl>sfmin )n2 = n2 + 1_${ik}$ end if end do if( sva( n )/=zero ) then n4 = n4 + 1_${ik}$ if( sva( n )*skl>sfmin )n2 = n2 + 1_${ik}$ end if ! normalize the left singular vectors. if( lsvec .or. uctol ) then do p = 1, n4 ! call stdlib${ii}$_csscal( m, one / sva( p ), a( 1, p ), 1 ) call stdlib${ii}$_clascl( 'G',0_${ik}$,0_${ik}$, sva(p), one, m, 1_${ik}$, a(1_${ik}$,p), m, ierr ) end do end if ! scale the product of jacobi rotations. if( rsvec ) then do p = 1, n temp1 = one / stdlib${ii}$_scnrm2( mvl, v( 1_${ik}$, p ), 1_${ik}$ ) call stdlib${ii}$_csscal( mvl, temp1, v( 1_${ik}$, p ), 1_${ik}$ ) end do end if ! undo scaling, if necessary (and possible). if( ( ( skl>one ) .and. ( sva( 1_${ik}$ )<( big / skl ) ) ).or. ( ( skl<one ) .and. ( sva( & max( n2, 1_${ik}$ ) ) >( sfmin / skl ) ) ) ) then do p = 1, n sva( p ) = skl*sva( p ) end do skl = one end if rwork( 1_${ik}$ ) = skl ! the singular values of a are skl*sva(1:n). if skl/=one ! then some of the singular values may overflow or underflow and ! the spectrum is given in this factored representation. rwork( 2_${ik}$ ) = real( n4,KIND=sp) ! n4 is the number of computed nonzero singular values of a. rwork( 3_${ik}$ ) = real( n2,KIND=sp) ! n2 is the number of singular values of a greater than sfmin. ! if n2<n, sva(n2:n) contains zeros and/or denormalized numbers ! that may carry some information. rwork( 4_${ik}$ ) = real( i,KIND=sp) ! i is the index of the last sweep before declaring convergence. rwork( 5_${ik}$ ) = mxaapq ! mxaapq is the largest absolute value of scaled pivots in the ! last sweep rwork( 6_${ik}$ ) = mxsinj ! mxsinj is the largest absolute value of the sines of jacobi angles ! in the last sweep return end subroutine stdlib${ii}$_cgesvj pure module subroutine stdlib${ii}$_zgesvj( joba, jobu, jobv, m, n, a, lda, sva, mv, v,ldv, cwork, lwork, & !! ZGESVJ computes the singular value decomposition (SVD) of a complex !! M-by-N matrix A, where M >= N. The SVD of A is written as !! [++] [xx] [x0] [xx] !! A = U * SIGMA * V^*, [++] = [xx] * [ox] * [xx] !! [++] [xx] !! where SIGMA is an N-by-N diagonal matrix, U is an M-by-N orthonormal !! matrix, and V is an N-by-N unitary matrix. The diagonal elements !! of SIGMA are the singular values of A. The columns of U and V are the !! left and the right singular vectors of A, respectively. rwork, lrwork, info ) ! -- lapack computational routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments integer(${ik}$), intent(out) :: info integer(${ik}$), intent(in) :: lda, ldv, lwork, lrwork, m, mv, n character, intent(in) :: joba, jobu, jobv ! Array Arguments complex(dp), intent(inout) :: a(lda,*), v(ldv,*), cwork(lwork) real(dp), intent(inout) :: rwork(lrwork) real(dp), intent(out) :: sva(n) ! ===================================================================== ! Local Parameters integer(${ik}$), parameter :: nsweep = 30_${ik}$ ! Local Scalars complex(dp) :: aapq, ompq real(dp) :: aapp, aapp0, aapq1, aaqq, apoaq, aqoap, big, bigtheta, cs, ctol, epsln, & mxaapq, mxsinj, rootbig, rooteps, rootsfmin, roottol, skl, sfmin, small, sn, t, temp1, & theta, thsign, tol integer(${ik}$) :: blskip, emptsw, i, ibr, ierr, igl, ijblsk, ir1, iswrot, jbc, jgl, kbl, & lkahead, mvl, n2, n34, n4, nbl, notrot, p, pskipped, q, rowskip, swband logical(lk) :: applv, goscale, lower, lquery, lsvec, noscale, rotok, rsvec, uctol, & upper ! Intrinsic Functions ! from lapack ! from lapack ! Executable Statements ! test the input arguments lsvec = stdlib_lsame( jobu, 'U' ) .or. stdlib_lsame( jobu, 'F' ) uctol = stdlib_lsame( jobu, 'C' ) rsvec = stdlib_lsame( jobv, 'V' ) .or. stdlib_lsame( jobv, 'J' ) applv = stdlib_lsame( jobv, 'A' ) upper = stdlib_lsame( joba, 'U' ) lower = stdlib_lsame( joba, 'L' ) lquery = ( lwork == -1_${ik}$ ) .or. ( lrwork == -1_${ik}$ ) if( .not.( upper .or. lower .or. stdlib_lsame( joba, 'G' ) ) ) then info = -1_${ik}$ else if( .not.( lsvec .or. uctol .or. stdlib_lsame( jobu, 'N' ) ) ) then info = -2_${ik}$ else if( .not.( rsvec .or. applv .or. stdlib_lsame( jobv, 'N' ) ) ) then info = -3_${ik}$ else if( m<0_${ik}$ ) then info = -4_${ik}$ else if( ( n<0_${ik}$ ) .or. ( n>m ) ) then info = -5_${ik}$ else if( lda<m ) then info = -7_${ik}$ else if( mv<0_${ik}$ ) then info = -9_${ik}$ else if( ( rsvec .and. ( ldv<n ) ) .or.( applv .and. ( ldv<mv ) ) ) then info = -11_${ik}$ else if( uctol .and. ( rwork( 1_${ik}$ )<=one ) ) then info = -12_${ik}$ else if( ( lwork<( m+n ) ) .and. ( .not.lquery ) ) then info = -13_${ik}$ else if( ( lrwork<max( n, 6_${ik}$ ) ) .and. ( .not.lquery ) ) then info = -15_${ik}$ else info = 0_${ik}$ end if ! #:( if( info/=0_${ik}$ ) then call stdlib${ii}$_xerbla( 'ZGESVJ', -info ) return else if ( lquery ) then cwork(1_${ik}$) = m + n rwork(1_${ik}$) = max( n, 6_${ik}$ ) return end if ! #:) quick return for void matrix if( ( m==0 ) .or. ( n==0 ) )return ! set numerical parameters ! the stopping criterion for jacobi rotations is ! max_{i<>j}|a(:,i)^* * a(:,j)| / (||a(:,i)||*||a(:,j)||) < ctol*eps ! where eps is the round-off and ctol is defined as follows: if( uctol ) then ! ... user controlled ctol = rwork( 1_${ik}$ ) else ! ... default if( lsvec .or. rsvec .or. applv ) then ctol = sqrt( real( m,KIND=dp) ) else ctol = real( m,KIND=dp) end if end if ! ... and the machine dependent parameters are ! [!] (make sure that stdlib${ii}$_slamch() works properly on the target machine.) epsln = stdlib${ii}$_dlamch( 'EPSILON' ) rooteps = sqrt( epsln ) sfmin = stdlib${ii}$_dlamch( 'SAFEMINIMUM' ) rootsfmin = sqrt( sfmin ) small = sfmin / epsln big = stdlib${ii}$_dlamch( 'OVERFLOW' ) ! big = one / sfmin rootbig = one / rootsfmin ! large = big / sqrt( real( m*n,KIND=dp) ) bigtheta = one / rooteps tol = ctol*epsln roottol = sqrt( tol ) if( real( m,KIND=dp)*epsln>=one ) then info = -4_${ik}$ call stdlib${ii}$_xerbla( 'ZGESVJ', -info ) return end if ! initialize the right singular vector matrix. if( rsvec ) then mvl = n call stdlib${ii}$_zlaset( 'A', mvl, n, czero, cone, v, ldv ) else if( applv ) then mvl = mv end if rsvec = rsvec .or. applv ! initialize sva( 1:n ) = ( ||a e_i||_2, i = 1:n ) ! (!) if necessary, scale a to protect the largest singular value ! from overflow. it is possible that saving the largest singular ! value destroys the information about the small ones. ! this initial scaling is almost minimal in the sense that the ! goal is to make sure that no column norm overflows, and that ! sqrt(n)*max_i sva(i) does not overflow. if infinite entries ! in a are detected, the procedure returns with info=-6. skl = one / sqrt( real( m,KIND=dp)*real( n,KIND=dp) ) noscale = .true. goscale = .true. if( lower ) then ! the input matrix is m-by-n lower triangular (trapezoidal) do p = 1, n aapp = zero aaqq = one call stdlib${ii}$_zlassq( m-p+1, a( p, p ), 1_${ik}$, aapp, aaqq ) if( aapp>big ) then info = -6_${ik}$ call stdlib${ii}$_xerbla( 'ZGESVJ', -info ) return end if aaqq = sqrt( aaqq ) if( ( aapp<( big / aaqq ) ) .and. noscale ) then sva( p ) = aapp*aaqq else noscale = .false. sva( p ) = aapp*( aaqq*skl ) if( goscale ) then goscale = .false. do q = 1, p - 1 sva( q ) = sva( q )*skl end do end if end if end do else if( upper ) then ! the input matrix is m-by-n upper triangular (trapezoidal) do p = 1, n aapp = zero aaqq = one call stdlib${ii}$_zlassq( p, a( 1_${ik}$, p ), 1_${ik}$, aapp, aaqq ) if( aapp>big ) then info = -6_${ik}$ call stdlib${ii}$_xerbla( 'ZGESVJ', -info ) return end if aaqq = sqrt( aaqq ) if( ( aapp<( big / aaqq ) ) .and. noscale ) then sva( p ) = aapp*aaqq else noscale = .false. sva( p ) = aapp*( aaqq*skl ) if( goscale ) then goscale = .false. do q = 1, p - 1 sva( q ) = sva( q )*skl end do end if end if end do else ! the input matrix is m-by-n general dense do p = 1, n aapp = zero aaqq = one call stdlib${ii}$_zlassq( m, a( 1_${ik}$, p ), 1_${ik}$, aapp, aaqq ) if( aapp>big ) then info = -6_${ik}$ call stdlib${ii}$_xerbla( 'ZGESVJ', -info ) return end if aaqq = sqrt( aaqq ) if( ( aapp<( big / aaqq ) ) .and. noscale ) then sva( p ) = aapp*aaqq else noscale = .false. sva( p ) = aapp*( aaqq*skl ) if( goscale ) then goscale = .false. do q = 1, p - 1 sva( q ) = sva( q )*skl end do end if end if end do end if if( noscale )skl = one ! move the smaller part of the spectrum from the underflow threshold ! (!) start by determining the position of the nonzero entries of the ! array sva() relative to ( sfmin, big ). aapp = zero aaqq = big do p = 1, n if( sva( p )/=zero )aaqq = min( aaqq, sva( p ) ) aapp = max( aapp, sva( p ) ) end do ! #:) quick return for zero matrix if( aapp==zero ) then if( lsvec )call stdlib${ii}$_zlaset( 'G', m, n, czero, cone, a, lda ) rwork( 1_${ik}$ ) = one rwork( 2_${ik}$ ) = zero rwork( 3_${ik}$ ) = zero rwork( 4_${ik}$ ) = zero rwork( 5_${ik}$ ) = zero rwork( 6_${ik}$ ) = zero return end if ! #:) quick return for one-column matrix if( n==1_${ik}$ ) then if( lsvec )call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, sva( 1_${ik}$ ), skl, m, 1_${ik}$,a( 1_${ik}$, 1_${ik}$ ), lda, ierr ) rwork( 1_${ik}$ ) = one / skl if( sva( 1_${ik}$ )>=sfmin ) then rwork( 2_${ik}$ ) = one else rwork( 2_${ik}$ ) = zero end if rwork( 3_${ik}$ ) = zero rwork( 4_${ik}$ ) = zero rwork( 5_${ik}$ ) = zero rwork( 6_${ik}$ ) = zero return end if ! protect small singular values from underflow, and try to ! avoid underflows/overflows in computing jacobi rotations. sn = sqrt( sfmin / epsln ) temp1 = sqrt( big / real( n,KIND=dp) ) if( ( aapp<=sn ) .or. ( aaqq>=temp1 ) .or.( ( sn<=aaqq ) .and. ( aapp<=temp1 ) ) ) & then temp1 = min( big, temp1 / aapp ) ! aaqq = aaqq*temp1 ! aapp = aapp*temp1 else if( ( aaqq<=sn ) .and. ( aapp<=temp1 ) ) then temp1 = min( sn / aaqq, big / (aapp*sqrt( real(n,KIND=dp)) ) ) ! aaqq = aaqq*temp1 ! aapp = aapp*temp1 else if( ( aaqq>=sn ) .and. ( aapp>=temp1 ) ) then temp1 = max( sn / aaqq, temp1 / aapp ) ! aaqq = aaqq*temp1 ! aapp = aapp*temp1 else if( ( aaqq<=sn ) .and. ( aapp>=temp1 ) ) then temp1 = min( sn / aaqq, big / ( sqrt( real( n,KIND=dp) )*aapp ) ) ! aaqq = aaqq*temp1 ! aapp = aapp*temp1 else temp1 = one end if ! scale, if necessary if( temp1/=one ) then call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, one, temp1, n, 1_${ik}$, sva, n, ierr ) end if skl = temp1*skl if( skl/=one ) then call stdlib${ii}$_zlascl( joba, 0_${ik}$, 0_${ik}$, one, skl, m, n, a, lda, ierr ) skl = one / skl end if ! row-cyclic jacobi svd algorithm with column pivoting emptsw = ( n*( n-1 ) ) / 2_${ik}$ notrot = 0_${ik}$ do q = 1, n cwork( q ) = cone end do swband = 3_${ik}$ ! [tp] swband is a tuning parameter [tp]. it is meaningful and effective ! if stdlib${ii}$_zgesvj is used as a computational routine in the preconditioned ! jacobi svd algorithm stdlib${ii}$_zgejsv. for sweeps i=1:swband the procedure ! works on pivots inside a band-like region around the diagonal. ! the boundaries are determined dynamically, based on the number of ! pivots above a threshold. kbl = min( 8_${ik}$, n ) ! [tp] kbl is a tuning parameter that defines the tile size in the ! tiling of the p-q loops of pivot pairs. in general, an optimal ! value of kbl depends on the matrix dimensions and on the ! parameters of the computer's memory. nbl = n / kbl if( ( nbl*kbl )/=n )nbl = nbl + 1_${ik}$ blskip = kbl**2_${ik}$ ! [tp] blkskip is a tuning parameter that depends on swband and kbl. rowskip = min( 5_${ik}$, kbl ) ! [tp] rowskip is a tuning parameter. lkahead = 1_${ik}$ ! [tp] lkahead is a tuning parameter. ! quasi block transformations, using the lower (upper) triangular ! structure of the input matrix. the quasi-block-cycling usually ! invokes cubic convergence. big part of this cycle is done inside ! canonical subspaces of dimensions less than m. if( ( lower .or. upper ) .and. ( n>max( 64_${ik}$, 4_${ik}$*kbl ) ) ) then ! [tp] the number of partition levels and the actual partition are ! tuning parameters. n4 = n / 4_${ik}$ n2 = n / 2_${ik}$ n34 = 3_${ik}$*n4 if( applv ) then q = 0_${ik}$ else q = 1_${ik}$ end if if( lower ) then ! this works very well on lower triangular matrices, in particular ! in the framework of the preconditioned jacobi svd (xgejsv). ! the idea is simple: ! [+ 0 0 0] note that jacobi transformations of [0 0] ! [+ + 0 0] [0 0] ! [+ + x 0] actually work on [x 0] [x 0] ! [+ + x x] [x x]. [x x] call stdlib${ii}$_zgsvj0( jobv, m-n34, n-n34, a( n34+1, n34+1 ), lda,cwork( n34+1 ), & sva( n34+1 ), mvl,v( n34*q+1, n34+1 ), ldv, epsln, sfmin, tol,2_${ik}$, cwork( n+1 ), & lwork-n, ierr ) call stdlib${ii}$_zgsvj0( jobv, m-n2, n34-n2, a( n2+1, n2+1 ), lda,cwork( n2+1 ), sva( & n2+1 ), mvl,v( n2*q+1, n2+1 ), ldv, epsln, sfmin, tol, 2_${ik}$,cwork( n+1 ), lwork-n, & ierr ) call stdlib${ii}$_zgsvj1( jobv, m-n2, n-n2, n4, a( n2+1, n2+1 ), lda,cwork( n2+1 ), & sva( n2+1 ), mvl,v( n2*q+1, n2+1 ), ldv, epsln, sfmin, tol, 1_${ik}$,cwork( n+1 ), & lwork-n, ierr ) call stdlib${ii}$_zgsvj0( jobv, m-n4, n2-n4, a( n4+1, n4+1 ), lda,cwork( n4+1 ), sva( & n4+1 ), mvl,v( n4*q+1, n4+1 ), ldv, epsln, sfmin, tol, 1_${ik}$,cwork( n+1 ), lwork-n, & ierr ) call stdlib${ii}$_zgsvj0( jobv, m, n4, a, lda, cwork, sva, mvl, v, ldv,epsln, sfmin, & tol, 1_${ik}$, cwork( n+1 ), lwork-n,ierr ) call stdlib${ii}$_zgsvj1( jobv, m, n2, n4, a, lda, cwork, sva, mvl, v,ldv, epsln, & sfmin, tol, 1_${ik}$, cwork( n+1 ),lwork-n, ierr ) else if( upper ) then call stdlib${ii}$_zgsvj0( jobv, n4, n4, a, lda, cwork, sva, mvl, v, ldv,epsln, sfmin, & tol, 2_${ik}$, cwork( n+1 ), lwork-n,ierr ) call stdlib${ii}$_zgsvj0( jobv, n2, n4, a( 1_${ik}$, n4+1 ), lda, cwork( n4+1 ),sva( n4+1 ), & mvl, v( n4*q+1, n4+1 ), ldv,epsln, sfmin, tol, 1_${ik}$, cwork( n+1 ), lwork-n,ierr ) call stdlib${ii}$_zgsvj1( jobv, n2, n2, n4, a, lda, cwork, sva, mvl, v,ldv, epsln, & sfmin, tol, 1_${ik}$, cwork( n+1 ),lwork-n, ierr ) call stdlib${ii}$_zgsvj0( jobv, n2+n4, n4, a( 1_${ik}$, n2+1 ), lda,cwork( n2+1 ), sva( n2+1 )& , mvl,v( n2*q+1, n2+1 ), ldv, epsln, sfmin, tol, 1_${ik}$,cwork( n+1 ), lwork-n, ierr ) end if end if ! .. row-cyclic pivot strategy with de rijk's pivoting .. loop_1993: do i = 1, nsweep ! .. go go go ... mxaapq = zero mxsinj = zero iswrot = 0_${ik}$ notrot = 0_${ik}$ pskipped = 0_${ik}$ ! each sweep is unrolled using kbl-by-kbl tiles over the pivot pairs ! 1 <= p < q <= n. this is the first step toward a blocked implementation ! of the rotations. new implementation, based on block transformations, ! is under development. loop_2000: do ibr = 1, nbl igl = ( ibr-1 )*kbl + 1_${ik}$ loop_1002: do ir1 = 0, min( lkahead, nbl-ibr ) igl = igl + ir1*kbl loop_2001: do p = igl, min( igl+kbl-1, n-1 ) ! .. de rijk's pivoting q = stdlib${ii}$_idamax( n-p+1, sva( p ), 1_${ik}$ ) + p - 1_${ik}$ if( p/=q ) then call stdlib${ii}$_zswap( m, a( 1_${ik}$, p ), 1_${ik}$, a( 1_${ik}$, q ), 1_${ik}$ ) if( rsvec )call stdlib${ii}$_zswap( mvl, v( 1_${ik}$, p ), 1_${ik}$,v( 1_${ik}$, q ), 1_${ik}$ ) temp1 = sva( p ) sva( p ) = sva( q ) sva( q ) = temp1 aapq = cwork(p) cwork(p) = cwork(q) cwork(q) = aapq end if if( ir1==0_${ik}$ ) then ! column norms are periodically updated by explicit ! norm computation. ! [!] caveat: ! unfortunately, some blas implementations compute stdlib${ii}$_dznrm2(m,a(1,p),1) ! as sqrt(s=stdlib${ii}$_cdotc(m,a(1,p),1,a(1,p),1)), which may cause the result to ! overflow for ||a(:,p)||_2 > sqrt(overflow_threshold), and to ! underflow for ||a(:,p)||_2 < sqrt(underflow_threshold). ! hence, stdlib${ii}$_dznrm2 cannot be trusted, not even in the case when ! the true norm is far from the under(over)flow boundaries. ! if properly implemented stdlib${ii}$_scnrm2 is available, the if-then-else-end if ! below should be replaced with "aapp = stdlib${ii}$_dznrm2( m, a(1,p), 1 )". if( ( sva( p )<rootbig ) .and.( sva( p )>rootsfmin ) ) then sva( p ) = stdlib${ii}$_dznrm2( m, a( 1_${ik}$, p ), 1_${ik}$ ) else temp1 = zero aapp = one call stdlib${ii}$_zlassq( m, a( 1_${ik}$, p ), 1_${ik}$, temp1, aapp ) sva( p ) = temp1*sqrt( aapp ) end if aapp = sva( p ) else aapp = sva( p ) end if if( aapp>zero ) then pskipped = 0_${ik}$ loop_2002: do q = p + 1, min( igl+kbl-1, n ) aaqq = sva( q ) if( aaqq>zero ) then aapp0 = aapp if( aaqq>=one ) then rotok = ( small*aapp )<=aaqq if( aapp<( big / aaqq ) ) then aapq = ( stdlib${ii}$_zdotc( m, a( 1_${ik}$, p ), 1_${ik}$,a( 1_${ik}$, q ), 1_${ik}$ ) / & aaqq ) / aapp else call stdlib${ii}$_zcopy( m, a( 1_${ik}$, p ), 1_${ik}$,cwork(n+1), 1_${ik}$ ) call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, aapp, one,m, 1_${ik}$, cwork(n+1), & lda, ierr ) aapq = stdlib${ii}$_zdotc( m, cwork(n+1), 1_${ik}$,a( 1_${ik}$, q ), 1_${ik}$ ) / & aaqq end if else rotok = aapp<=( aaqq / small ) if( aapp>( small / aaqq ) ) then aapq = ( stdlib${ii}$_zdotc( m, a( 1_${ik}$, p ), 1_${ik}$,a( 1_${ik}$, q ), 1_${ik}$ ) / & aapp ) / aaqq else call stdlib${ii}$_zcopy( m, a( 1_${ik}$, q ), 1_${ik}$,cwork(n+1), 1_${ik}$ ) call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, aaqq,one, m, 1_${ik}$,cwork(n+1), & lda, ierr ) aapq = stdlib${ii}$_zdotc( m, a(1_${ik}$, p ), 1_${ik}$,cwork(n+1), 1_${ik}$ ) / & aapp end if end if ! aapq = aapq * conjg( cwork(p) ) * cwork(q) aapq1 = -abs(aapq) mxaapq = max( mxaapq, -aapq1 ) ! to rotate or not to rotate, that is the question ... if( abs( aapq1 )>tol ) then ompq = aapq / abs(aapq) ! Rotate ! [rtd] rotated = rotated + one if( ir1==0_${ik}$ ) then notrot = 0_${ik}$ pskipped = 0_${ik}$ iswrot = iswrot + 1_${ik}$ end if if( rotok ) then aqoap = aaqq / aapp apoaq = aapp / aaqq theta = -half*abs( aqoap-apoaq )/aapq1 if( abs( theta )>bigtheta ) then t = half / theta cs = one call stdlib${ii}$_zrot( m, a(1_${ik}$,p), 1_${ik}$, a(1_${ik}$,q), 1_${ik}$,cs, conjg(ompq)& *t ) if ( rsvec ) then call stdlib${ii}$_zrot( mvl, v(1_${ik}$,p), 1_${ik}$,v(1_${ik}$,q), 1_${ik}$, cs, & conjg(ompq)*t ) end if sva( q ) = aaqq*sqrt( max( zero,one+t*apoaq*aapq1 ) ) aapp = aapp*sqrt( max( zero,one-t*aqoap*aapq1 ) ) mxsinj = max( mxsinj, abs( t ) ) else ! Choose Correct Signum For Theta And Rotate thsign = -sign( one, aapq1 ) t = one / ( theta+thsign*sqrt( one+theta*theta ) ) cs = sqrt( one / ( one+t*t ) ) sn = t*cs mxsinj = max( mxsinj, abs( sn ) ) sva( q ) = aaqq*sqrt( max( zero,one+t*apoaq*aapq1 ) ) aapp = aapp*sqrt( max( zero,one-t*aqoap*aapq1 ) ) call stdlib${ii}$_zrot( m, a(1_${ik}$,p), 1_${ik}$, a(1_${ik}$,q), 1_${ik}$,cs, conjg(ompq)& *sn ) if ( rsvec ) then call stdlib${ii}$_zrot( mvl, v(1_${ik}$,p), 1_${ik}$,v(1_${ik}$,q), 1_${ik}$, cs, & conjg(ompq)*sn ) end if end if cwork(p) = -cwork(q) * ompq else ! .. have to use modified gram-schmidt like transformation call stdlib${ii}$_zcopy( m, a( 1_${ik}$, p ), 1_${ik}$,cwork(n+1), 1_${ik}$ ) call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, aapp, one, m,1_${ik}$, cwork(n+1), & lda,ierr ) call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, aaqq, one, m,1_${ik}$, a( 1_${ik}$, q ), & lda, ierr ) call stdlib${ii}$_zaxpy( m, -aapq, cwork(n+1), 1_${ik}$,a( 1_${ik}$, q ), 1_${ik}$ ) call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, one, aaqq, m,1_${ik}$, a( 1_${ik}$, q ), & lda, ierr ) sva( q ) = aaqq*sqrt( max( zero,one-aapq1*aapq1 ) ) mxsinj = max( mxsinj, sfmin ) end if ! end if rotok then ... else ! in the case of cancellation in updating sva(q), sva(p) ! recompute sva(q), sva(p). if( ( sva( q ) / aaqq )**2_${ik}$<=rooteps )then if( ( aaqq<rootbig ) .and.( aaqq>rootsfmin ) ) then sva( q ) = stdlib${ii}$_dznrm2( m, a( 1_${ik}$, q ), 1_${ik}$ ) else t = zero aaqq = one call stdlib${ii}$_zlassq( m, a( 1_${ik}$, q ), 1_${ik}$, t,aaqq ) sva( q ) = t*sqrt( aaqq ) end if end if if( ( aapp / aapp0 )<=rooteps ) then if( ( aapp<rootbig ) .and.( aapp>rootsfmin ) ) then aapp = stdlib${ii}$_dznrm2( m, a( 1_${ik}$, p ), 1_${ik}$ ) else t = zero aapp = one call stdlib${ii}$_zlassq( m, a( 1_${ik}$, p ), 1_${ik}$, t,aapp ) aapp = t*sqrt( aapp ) end if sva( p ) = aapp end if else ! a(:,p) and a(:,q) already numerically orthogonal if( ir1==0_${ik}$ )notrot = notrot + 1_${ik}$ ! [rtd] skipped = skipped + 1 pskipped = pskipped + 1_${ik}$ end if else ! a(:,q) is zero column if( ir1==0_${ik}$ )notrot = notrot + 1_${ik}$ pskipped = pskipped + 1_${ik}$ end if if( ( i<=swband ) .and.( pskipped>rowskip ) ) then if( ir1==0_${ik}$ )aapp = -aapp notrot = 0_${ik}$ go to 2103 end if end do loop_2002 ! end q-loop 2103 continue ! bailed out of q-loop sva( p ) = aapp else sva( p ) = aapp if( ( ir1==0_${ik}$ ) .and. ( aapp==zero ) )notrot = notrot + min( igl+kbl-1, & n ) - p end if end do loop_2001 ! end of the p-loop ! end of doing the block ( ibr, ibr ) end do loop_1002 ! end of ir1-loop ! ... go to the off diagonal blocks igl = ( ibr-1 )*kbl + 1_${ik}$ loop_2010: do jbc = ibr + 1, nbl jgl = ( jbc-1 )*kbl + 1_${ik}$ ! doing the block at ( ibr, jbc ) ijblsk = 0_${ik}$ loop_2100: do p = igl, min( igl+kbl-1, n ) aapp = sva( p ) if( aapp>zero ) then pskipped = 0_${ik}$ loop_2200: do q = jgl, min( jgl+kbl-1, n ) aaqq = sva( q ) if( aaqq>zero ) then aapp0 = aapp ! M X 2 Jacobi Svd ! safe gram matrix computation if( aaqq>=one ) then if( aapp>=aaqq ) then rotok = ( small*aapp )<=aaqq else rotok = ( small*aaqq )<=aapp end if if( aapp<( big / aaqq ) ) then aapq = ( stdlib${ii}$_zdotc( m, a( 1_${ik}$, p ), 1_${ik}$,a( 1_${ik}$, q ), 1_${ik}$ ) / & aaqq ) / aapp else call stdlib${ii}$_zcopy( m, a( 1_${ik}$, p ), 1_${ik}$,cwork(n+1), 1_${ik}$ ) call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, aapp,one, m, 1_${ik}$,cwork(n+1), & lda, ierr ) aapq = stdlib${ii}$_zdotc( m, cwork(n+1), 1_${ik}$,a( 1_${ik}$, q ), 1_${ik}$ ) / & aaqq end if else if( aapp>=aaqq ) then rotok = aapp<=( aaqq / small ) else rotok = aaqq<=( aapp / small ) end if if( aapp>( small / aaqq ) ) then aapq = ( stdlib${ii}$_zdotc( m, a( 1_${ik}$, p ), 1_${ik}$,a( 1_${ik}$, q ), 1_${ik}$ ) / max(& aaqq,aapp) )/ min(aaqq,aapp) else call stdlib${ii}$_zcopy( m, a( 1_${ik}$, q ), 1_${ik}$,cwork(n+1), 1_${ik}$ ) call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, aaqq,one, m, 1_${ik}$,cwork(n+1), & lda, ierr ) aapq = stdlib${ii}$_zdotc( m, a( 1_${ik}$, p ), 1_${ik}$,cwork(n+1), 1_${ik}$ ) / & aapp end if end if ! aapq = aapq * conjg(cwork(p))*cwork(q) aapq1 = -abs(aapq) mxaapq = max( mxaapq, -aapq1 ) ! to rotate or not to rotate, that is the question ... if( abs( aapq1 )>tol ) then ompq = aapq / abs(aapq) notrot = 0_${ik}$ ! [rtd] rotated = rotated + 1 pskipped = 0_${ik}$ iswrot = iswrot + 1_${ik}$ if( rotok ) then aqoap = aaqq / aapp apoaq = aapp / aaqq theta = -half*abs( aqoap-apoaq )/ aapq1 if( aaqq>aapp0 )theta = -theta if( abs( theta )>bigtheta ) then t = half / theta cs = one call stdlib${ii}$_zrot( m, a(1_${ik}$,p), 1_${ik}$, a(1_${ik}$,q), 1_${ik}$,cs, conjg(ompq)& *t ) if( rsvec ) then call stdlib${ii}$_zrot( mvl, v(1_${ik}$,p), 1_${ik}$,v(1_${ik}$,q), 1_${ik}$, cs, & conjg(ompq)*t ) end if sva( q ) = aaqq*sqrt( max( zero,one+t*apoaq*aapq1 ) ) aapp = aapp*sqrt( max( zero,one-t*aqoap*aapq1 ) ) mxsinj = max( mxsinj, abs( t ) ) else ! Choose Correct Signum For Theta And Rotate thsign = -sign( one, aapq1 ) if( aaqq>aapp0 )thsign = -thsign t = one / ( theta+thsign*sqrt( one+theta*theta ) ) cs = sqrt( one / ( one+t*t ) ) sn = t*cs mxsinj = max( mxsinj, abs( sn ) ) sva( q ) = aaqq*sqrt( max( zero,one+t*apoaq*aapq1 ) ) aapp = aapp*sqrt( max( zero,one-t*aqoap*aapq1 ) ) call stdlib${ii}$_zrot( m, a(1_${ik}$,p), 1_${ik}$, a(1_${ik}$,q), 1_${ik}$,cs, conjg(ompq)& *sn ) if( rsvec ) then call stdlib${ii}$_zrot( mvl, v(1_${ik}$,p), 1_${ik}$,v(1_${ik}$,q), 1_${ik}$, cs, & conjg(ompq)*sn ) end if end if cwork(p) = -cwork(q) * ompq else ! .. have to use modified gram-schmidt like transformation if( aapp>aaqq ) then call stdlib${ii}$_zcopy( m, a( 1_${ik}$, p ), 1_${ik}$,cwork(n+1), 1_${ik}$ ) call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, aapp, one,m, 1_${ik}$, cwork(n+1)& ,lda,ierr ) call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, aaqq, one,m, 1_${ik}$, a( 1_${ik}$, q ),& lda,ierr ) call stdlib${ii}$_zaxpy( m, -aapq, cwork(n+1),1_${ik}$, a( 1_${ik}$, q ), 1_${ik}$ ) call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, one, aaqq,m, 1_${ik}$, a( 1_${ik}$, q ),& lda,ierr ) sva( q ) = aaqq*sqrt( max( zero,one-aapq1*aapq1 ) ) mxsinj = max( mxsinj, sfmin ) else call stdlib${ii}$_zcopy( m, a( 1_${ik}$, q ), 1_${ik}$,cwork(n+1), 1_${ik}$ ) call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, aaqq, one,m, 1_${ik}$, cwork(n+1)& ,lda,ierr ) call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, aapp, one,m, 1_${ik}$, a( 1_${ik}$, p ),& lda,ierr ) call stdlib${ii}$_zaxpy( m, -conjg(aapq),cwork(n+1), 1_${ik}$, a( 1_${ik}$, & p ), 1_${ik}$ ) call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, one, aapp,m, 1_${ik}$, a( 1_${ik}$, p ),& lda,ierr ) sva( p ) = aapp*sqrt( max( zero,one-aapq1*aapq1 ) ) mxsinj = max( mxsinj, sfmin ) end if end if ! end if rotok then ... else ! in the case of cancellation in updating sva(q), sva(p) ! .. recompute sva(q), sva(p) if( ( sva( q ) / aaqq )**2_${ik}$<=rooteps )then if( ( aaqq<rootbig ) .and.( aaqq>rootsfmin ) ) then sva( q ) = stdlib${ii}$_dznrm2( m, a( 1_${ik}$, q ), 1_${ik}$) else t = zero aaqq = one call stdlib${ii}$_zlassq( m, a( 1_${ik}$, q ), 1_${ik}$, t,aaqq ) sva( q ) = t*sqrt( aaqq ) end if end if if( ( aapp / aapp0 )**2_${ik}$<=rooteps ) then if( ( aapp<rootbig ) .and.( aapp>rootsfmin ) ) then aapp = stdlib${ii}$_dznrm2( m, a( 1_${ik}$, p ), 1_${ik}$ ) else t = zero aapp = one call stdlib${ii}$_zlassq( m, a( 1_${ik}$, p ), 1_${ik}$, t,aapp ) aapp = t*sqrt( aapp ) end if sva( p ) = aapp end if ! end of ok rotation else notrot = notrot + 1_${ik}$ ! [rtd] skipped = skipped + 1 pskipped = pskipped + 1_${ik}$ ijblsk = ijblsk + 1_${ik}$ end if else notrot = notrot + 1_${ik}$ pskipped = pskipped + 1_${ik}$ ijblsk = ijblsk + 1_${ik}$ end if if( ( i<=swband ) .and. ( ijblsk>=blskip ) )then sva( p ) = aapp notrot = 0_${ik}$ go to 2011 end if if( ( i<=swband ) .and.( pskipped>rowskip ) ) then aapp = -aapp notrot = 0_${ik}$ go to 2203 end if end do loop_2200 ! end of the q-loop 2203 continue sva( p ) = aapp else if( aapp==zero )notrot = notrot +min( jgl+kbl-1, n ) - jgl + 1_${ik}$ if( aapp<zero )notrot = 0_${ik}$ end if end do loop_2100 ! end of the p-loop end do loop_2010 ! end of the jbc-loop 2011 continue ! 2011 bailed out of the jbc-loop do p = igl, min( igl+kbl-1, n ) sva( p ) = abs( sva( p ) ) end do ! ** end do loop_2000 ! 2000 :: end of the ibr-loop ! .. update sva(n) if( ( sva( n )<rootbig ) .and. ( sva( n )>rootsfmin ) )then sva( n ) = stdlib${ii}$_dznrm2( m, a( 1_${ik}$, n ), 1_${ik}$ ) else t = zero aapp = one call stdlib${ii}$_zlassq( m, a( 1_${ik}$, n ), 1_${ik}$, t, aapp ) sva( n ) = t*sqrt( aapp ) end if ! additional steering devices if( ( i<swband ) .and. ( ( mxaapq<=roottol ) .or.( iswrot<=n ) ) )swband = i if( ( i>swband+1 ) .and. ( mxaapq<sqrt( real( n,KIND=dp) )*tol ) .and. ( real( n,& KIND=dp)*mxaapq*mxsinj<tol ) ) then go to 1994 end if if( notrot>=emptsw )go to 1994 end do loop_1993 ! end i=1:nsweep loop ! #:( reaching this point means that the procedure has not converged. info = nsweep - 1_${ik}$ go to 1995 1994 continue ! #:) reaching this point means numerical convergence after the i-th ! sweep. info = 0_${ik}$ ! #:) info = 0 confirms successful iterations. 1995 continue ! sort the singular values and find how many are above ! the underflow threshold. n2 = 0_${ik}$ n4 = 0_${ik}$ do p = 1, n - 1 q = stdlib${ii}$_idamax( n-p+1, sva( p ), 1_${ik}$ ) + p - 1_${ik}$ if( p/=q ) then temp1 = sva( p ) sva( p ) = sva( q ) sva( q ) = temp1 call stdlib${ii}$_zswap( m, a( 1_${ik}$, p ), 1_${ik}$, a( 1_${ik}$, q ), 1_${ik}$ ) if( rsvec )call stdlib${ii}$_zswap( mvl, v( 1_${ik}$, p ), 1_${ik}$, v( 1_${ik}$, q ), 1_${ik}$ ) end if if( sva( p )/=zero ) then n4 = n4 + 1_${ik}$ if( sva( p )*skl>sfmin )n2 = n2 + 1_${ik}$ end if end do if( sva( n )/=zero ) then n4 = n4 + 1_${ik}$ if( sva( n )*skl>sfmin )n2 = n2 + 1_${ik}$ end if ! normalize the left singular vectors. if( lsvec .or. uctol ) then do p = 1, n4 ! call stdlib${ii}$_zdscal( m, one / sva( p ), a( 1, p ), 1 ) call stdlib${ii}$_zlascl( 'G',0_${ik}$,0_${ik}$, sva(p), one, m, 1_${ik}$, a(1_${ik}$,p), m, ierr ) end do end if ! scale the product of jacobi rotations. if( rsvec ) then do p = 1, n temp1 = one / stdlib${ii}$_dznrm2( mvl, v( 1_${ik}$, p ), 1_${ik}$ ) call stdlib${ii}$_zdscal( mvl, temp1, v( 1_${ik}$, p ), 1_${ik}$ ) end do end if ! undo scaling, if necessary (and possible). if( ( ( skl>one ) .and. ( sva( 1_${ik}$ )<( big / skl ) ) ).or. ( ( skl<one ) .and. ( sva( & max( n2, 1_${ik}$ ) ) >( sfmin / skl ) ) ) ) then do p = 1, n sva( p ) = skl*sva( p ) end do skl = one end if rwork( 1_${ik}$ ) = skl ! the singular values of a are skl*sva(1:n). if skl/=one ! then some of the singular values may overflow or underflow and ! the spectrum is given in this factored representation. rwork( 2_${ik}$ ) = real( n4,KIND=dp) ! n4 is the number of computed nonzero singular values of a. rwork( 3_${ik}$ ) = real( n2,KIND=dp) ! n2 is the number of singular values of a greater than sfmin. ! if n2<n, sva(n2:n) contains zeros and/or denormalized numbers ! that may carry some information. rwork( 4_${ik}$ ) = real( i,KIND=dp) ! i is the index of the last sweep before declaring convergence. rwork( 5_${ik}$ ) = mxaapq ! mxaapq is the largest absolute value of scaled pivots in the ! last sweep rwork( 6_${ik}$ ) = mxsinj ! mxsinj is the largest absolute value of the sines of jacobi angles ! in the last sweep return end subroutine stdlib${ii}$_zgesvj #:for ck,ct,ci in CMPLX_KINDS_TYPES #:if not ck in ["sp","dp"] pure module subroutine stdlib${ii}$_${ci}$gesvj( joba, jobu, jobv, m, n, a, lda, sva, mv, v,ldv, cwork, lwork, & !! ZGESVJ: computes the singular value decomposition (SVD) of a complex !! M-by-N matrix A, where M >= N. The SVD of A is written as !! [++] [xx] [x0] [xx] !! A = U * SIGMA * V^*, [++] = [xx] * [ox] * [xx] !! [++] [xx] !! where SIGMA is an N-by-N diagonal matrix, U is an M-by-N orthonormal !! matrix, and V is an N-by-N unitary matrix. The diagonal elements !! of SIGMA are the singular values of A. The columns of U and V are the !! left and the right singular vectors of A, respectively. rwork, lrwork, info ) ! -- lapack computational routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_${ck}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments integer(${ik}$), intent(out) :: info integer(${ik}$), intent(in) :: lda, ldv, lwork, lrwork, m, mv, n character, intent(in) :: joba, jobu, jobv ! Array Arguments complex(${ck}$), intent(inout) :: a(lda,*), v(ldv,*), cwork(lwork) real(${ck}$), intent(inout) :: rwork(lrwork) real(${ck}$), intent(out) :: sva(n) ! ===================================================================== ! Local Parameters integer(${ik}$), parameter :: nsweep = 30_${ik}$ ! Local Scalars complex(${ck}$) :: aapq, ompq real(${ck}$) :: aapp, aapp0, aapq1, aaqq, apoaq, aqoap, big, bigtheta, cs, ctol, epsln, & mxaapq, mxsinj, rootbig, rooteps, rootsfmin, roottol, skl, sfmin, small, sn, t, temp1, & theta, thsign, tol integer(${ik}$) :: blskip, emptsw, i, ibr, ierr, igl, ijblsk, ir1, iswrot, jbc, jgl, kbl, & lkahead, mvl, n2, n34, n4, nbl, notrot, p, pskipped, q, rowskip, swband logical(lk) :: applv, goscale, lower, lquery, lsvec, noscale, rotok, rsvec, uctol, & upper ! Intrinsic Functions ! from lapack ! from lapack ! Executable Statements ! test the input arguments lsvec = stdlib_lsame( jobu, 'U' ) .or. stdlib_lsame( jobu, 'F' ) uctol = stdlib_lsame( jobu, 'C' ) rsvec = stdlib_lsame( jobv, 'V' ) .or. stdlib_lsame( jobv, 'J' ) applv = stdlib_lsame( jobv, 'A' ) upper = stdlib_lsame( joba, 'U' ) lower = stdlib_lsame( joba, 'L' ) lquery = ( lwork == -1_${ik}$ ) .or. ( lrwork == -1_${ik}$ ) if( .not.( upper .or. lower .or. stdlib_lsame( joba, 'G' ) ) ) then info = -1_${ik}$ else if( .not.( lsvec .or. uctol .or. stdlib_lsame( jobu, 'N' ) ) ) then info = -2_${ik}$ else if( .not.( rsvec .or. applv .or. stdlib_lsame( jobv, 'N' ) ) ) then info = -3_${ik}$ else if( m<0_${ik}$ ) then info = -4_${ik}$ else if( ( n<0_${ik}$ ) .or. ( n>m ) ) then info = -5_${ik}$ else if( lda<m ) then info = -7_${ik}$ else if( mv<0_${ik}$ ) then info = -9_${ik}$ else if( ( rsvec .and. ( ldv<n ) ) .or.( applv .and. ( ldv<mv ) ) ) then info = -11_${ik}$ else if( uctol .and. ( rwork( 1_${ik}$ )<=one ) ) then info = -12_${ik}$ else if( ( lwork<( m+n ) ) .and. ( .not.lquery ) ) then info = -13_${ik}$ else if( ( lrwork<max( n, 6_${ik}$ ) ) .and. ( .not.lquery ) ) then info = -15_${ik}$ else info = 0_${ik}$ end if ! #:( if( info/=0_${ik}$ ) then call stdlib${ii}$_xerbla( 'ZGESVJ', -info ) return else if ( lquery ) then cwork(1_${ik}$) = m + n rwork(1_${ik}$) = max( n, 6_${ik}$ ) return end if ! #:) quick return for void matrix if( ( m==0 ) .or. ( n==0 ) )return ! set numerical parameters ! the stopping criterion for jacobi rotations is ! max_{i<>j}|a(:,i)^* * a(:,j)| / (||a(:,i)||*||a(:,j)||) < ctol*eps ! where eps is the round-off and ctol is defined as follows: if( uctol ) then ! ... user controlled ctol = rwork( 1_${ik}$ ) else ! ... default if( lsvec .or. rsvec .or. applv ) then ctol = sqrt( real( m,KIND=${ck}$) ) else ctol = real( m,KIND=${ck}$) end if end if ! ... and the machine dependent parameters are ! [!] (make sure that stdlib${ii}$_dlamch() works properly on the target machine.) epsln = stdlib${ii}$_${c2ri(ci)}$lamch( 'EPSILON' ) rooteps = sqrt( epsln ) sfmin = stdlib${ii}$_${c2ri(ci)}$lamch( 'SAFEMINIMUM' ) rootsfmin = sqrt( sfmin ) small = sfmin / epsln big = stdlib${ii}$_${c2ri(ci)}$lamch( 'OVERFLOW' ) ! big = one / sfmin rootbig = one / rootsfmin ! large = big / sqrt( real( m*n,KIND=${ck}$) ) bigtheta = one / rooteps tol = ctol*epsln roottol = sqrt( tol ) if( real( m,KIND=${ck}$)*epsln>=one ) then info = -4_${ik}$ call stdlib${ii}$_xerbla( 'ZGESVJ', -info ) return end if ! initialize the right singular vector matrix. if( rsvec ) then mvl = n call stdlib${ii}$_${ci}$laset( 'A', mvl, n, czero, cone, v, ldv ) else if( applv ) then mvl = mv end if rsvec = rsvec .or. applv ! initialize sva( 1:n ) = ( ||a e_i||_2, i = 1:n ) ! (!) if necessary, scale a to protect the largest singular value ! from overflow. it is possible that saving the largest singular ! value destroys the information about the small ones. ! this initial scaling is almost minimal in the sense that the ! goal is to make sure that no column norm overflows, and that ! sqrt(n)*max_i sva(i) does not overflow. if infinite entries ! in a are detected, the procedure returns with info=-6. skl = one / sqrt( real( m,KIND=${ck}$)*real( n,KIND=${ck}$) ) noscale = .true. goscale = .true. if( lower ) then ! the input matrix is m-by-n lower triangular (trapezoidal) do p = 1, n aapp = zero aaqq = one call stdlib${ii}$_${ci}$lassq( m-p+1, a( p, p ), 1_${ik}$, aapp, aaqq ) if( aapp>big ) then info = -6_${ik}$ call stdlib${ii}$_xerbla( 'ZGESVJ', -info ) return end if aaqq = sqrt( aaqq ) if( ( aapp<( big / aaqq ) ) .and. noscale ) then sva( p ) = aapp*aaqq else noscale = .false. sva( p ) = aapp*( aaqq*skl ) if( goscale ) then goscale = .false. do q = 1, p - 1 sva( q ) = sva( q )*skl end do end if end if end do else if( upper ) then ! the input matrix is m-by-n upper triangular (trapezoidal) do p = 1, n aapp = zero aaqq = one call stdlib${ii}$_${ci}$lassq( p, a( 1_${ik}$, p ), 1_${ik}$, aapp, aaqq ) if( aapp>big ) then info = -6_${ik}$ call stdlib${ii}$_xerbla( 'ZGESVJ', -info ) return end if aaqq = sqrt( aaqq ) if( ( aapp<( big / aaqq ) ) .and. noscale ) then sva( p ) = aapp*aaqq else noscale = .false. sva( p ) = aapp*( aaqq*skl ) if( goscale ) then goscale = .false. do q = 1, p - 1 sva( q ) = sva( q )*skl end do end if end if end do else ! the input matrix is m-by-n general dense do p = 1, n aapp = zero aaqq = one call stdlib${ii}$_${ci}$lassq( m, a( 1_${ik}$, p ), 1_${ik}$, aapp, aaqq ) if( aapp>big ) then info = -6_${ik}$ call stdlib${ii}$_xerbla( 'ZGESVJ', -info ) return end if aaqq = sqrt( aaqq ) if( ( aapp<( big / aaqq ) ) .and. noscale ) then sva( p ) = aapp*aaqq else noscale = .false. sva( p ) = aapp*( aaqq*skl ) if( goscale ) then goscale = .false. do q = 1, p - 1 sva( q ) = sva( q )*skl end do end if end if end do end if if( noscale )skl = one ! move the smaller part of the spectrum from the underflow threshold ! (!) start by determining the position of the nonzero entries of the ! array sva() relative to ( sfmin, big ). aapp = zero aaqq = big do p = 1, n if( sva( p )/=zero )aaqq = min( aaqq, sva( p ) ) aapp = max( aapp, sva( p ) ) end do ! #:) quick return for zero matrix if( aapp==zero ) then if( lsvec )call stdlib${ii}$_${ci}$laset( 'G', m, n, czero, cone, a, lda ) rwork( 1_${ik}$ ) = one rwork( 2_${ik}$ ) = zero rwork( 3_${ik}$ ) = zero rwork( 4_${ik}$ ) = zero rwork( 5_${ik}$ ) = zero rwork( 6_${ik}$ ) = zero return end if ! #:) quick return for one-column matrix if( n==1_${ik}$ ) then if( lsvec )call stdlib${ii}$_${ci}$lascl( 'G', 0_${ik}$, 0_${ik}$, sva( 1_${ik}$ ), skl, m, 1_${ik}$,a( 1_${ik}$, 1_${ik}$ ), lda, ierr ) rwork( 1_${ik}$ ) = one / skl if( sva( 1_${ik}$ )>=sfmin ) then rwork( 2_${ik}$ ) = one else rwork( 2_${ik}$ ) = zero end if rwork( 3_${ik}$ ) = zero rwork( 4_${ik}$ ) = zero rwork( 5_${ik}$ ) = zero rwork( 6_${ik}$ ) = zero return end if ! protect small singular values from underflow, and try to ! avoid underflows/overflows in computing jacobi rotations. sn = sqrt( sfmin / epsln ) temp1 = sqrt( big / real( n,KIND=${ck}$) ) if( ( aapp<=sn ) .or. ( aaqq>=temp1 ) .or.( ( sn<=aaqq ) .and. ( aapp<=temp1 ) ) ) & then temp1 = min( big, temp1 / aapp ) ! aaqq = aaqq*temp1 ! aapp = aapp*temp1 else if( ( aaqq<=sn ) .and. ( aapp<=temp1 ) ) then temp1 = min( sn / aaqq, big / (aapp*sqrt( real(n,KIND=${ck}$)) ) ) ! aaqq = aaqq*temp1 ! aapp = aapp*temp1 else if( ( aaqq>=sn ) .and. ( aapp>=temp1 ) ) then temp1 = max( sn / aaqq, temp1 / aapp ) ! aaqq = aaqq*temp1 ! aapp = aapp*temp1 else if( ( aaqq<=sn ) .and. ( aapp>=temp1 ) ) then temp1 = min( sn / aaqq, big / ( sqrt( real( n,KIND=${ck}$) )*aapp ) ) ! aaqq = aaqq*temp1 ! aapp = aapp*temp1 else temp1 = one end if ! scale, if necessary if( temp1/=one ) then call stdlib${ii}$_${c2ri(ci)}$lascl( 'G', 0_${ik}$, 0_${ik}$, one, temp1, n, 1_${ik}$, sva, n, ierr ) end if skl = temp1*skl if( skl/=one ) then call stdlib${ii}$_${ci}$lascl( joba, 0_${ik}$, 0_${ik}$, one, skl, m, n, a, lda, ierr ) skl = one / skl end if ! row-cyclic jacobi svd algorithm with column pivoting emptsw = ( n*( n-1 ) ) / 2_${ik}$ notrot = 0_${ik}$ do q = 1, n cwork( q ) = cone end do swband = 3_${ik}$ ! [tp] swband is a tuning parameter [tp]. it is meaningful and effective ! if stdlib${ii}$_${ci}$gesvj is used as a computational routine in the preconditioned ! jacobi svd algorithm stdlib${ii}$_${ci}$gejsv. for sweeps i=1:swband the procedure ! works on pivots inside a band-like region around the diagonal. ! the boundaries are determined dynamically, based on the number of ! pivots above a threshold. kbl = min( 8_${ik}$, n ) ! [tp] kbl is a tuning parameter that defines the tile size in the ! tiling of the p-q loops of pivot pairs. in general, an optimal ! value of kbl depends on the matrix dimensions and on the ! parameters of the computer's memory. nbl = n / kbl if( ( nbl*kbl )/=n )nbl = nbl + 1_${ik}$ blskip = kbl**2_${ik}$ ! [tp] blkskip is a tuning parameter that depends on swband and kbl. rowskip = min( 5_${ik}$, kbl ) ! [tp] rowskip is a tuning parameter. lkahead = 1_${ik}$ ! [tp] lkahead is a tuning parameter. ! quasi block transformations, using the lower (upper) triangular ! structure of the input matrix. the quasi-block-cycling usually ! invokes cubic convergence. big part of this cycle is done inside ! canonical subspaces of dimensions less than m. if( ( lower .or. upper ) .and. ( n>max( 64_${ik}$, 4_${ik}$*kbl ) ) ) then ! [tp] the number of partition levels and the actual partition are ! tuning parameters. n4 = n / 4_${ik}$ n2 = n / 2_${ik}$ n34 = 3_${ik}$*n4 if( applv ) then q = 0_${ik}$ else q = 1_${ik}$ end if if( lower ) then ! this works very well on lower triangular matrices, in particular ! in the framework of the preconditioned jacobi svd (xgejsv). ! the idea is simple: ! [+ 0 0 0] note that jacobi transformations of [0 0] ! [+ + 0 0] [0 0] ! [+ + x 0] actually work on [x 0] [x 0] ! [+ + x x] [x x]. [x x] call stdlib${ii}$_${ci}$gsvj0( jobv, m-n34, n-n34, a( n34+1, n34+1 ), lda,cwork( n34+1 ), & sva( n34+1 ), mvl,v( n34*q+1, n34+1 ), ldv, epsln, sfmin, tol,2_${ik}$, cwork( n+1 ), & lwork-n, ierr ) call stdlib${ii}$_${ci}$gsvj0( jobv, m-n2, n34-n2, a( n2+1, n2+1 ), lda,cwork( n2+1 ), sva( & n2+1 ), mvl,v( n2*q+1, n2+1 ), ldv, epsln, sfmin, tol, 2_${ik}$,cwork( n+1 ), lwork-n, & ierr ) call stdlib${ii}$_${ci}$gsvj1( jobv, m-n2, n-n2, n4, a( n2+1, n2+1 ), lda,cwork( n2+1 ), & sva( n2+1 ), mvl,v( n2*q+1, n2+1 ), ldv, epsln, sfmin, tol, 1_${ik}$,cwork( n+1 ), & lwork-n, ierr ) call stdlib${ii}$_${ci}$gsvj0( jobv, m-n4, n2-n4, a( n4+1, n4+1 ), lda,cwork( n4+1 ), sva( & n4+1 ), mvl,v( n4*q+1, n4+1 ), ldv, epsln, sfmin, tol, 1_${ik}$,cwork( n+1 ), lwork-n, & ierr ) call stdlib${ii}$_${ci}$gsvj0( jobv, m, n4, a, lda, cwork, sva, mvl, v, ldv,epsln, sfmin, & tol, 1_${ik}$, cwork( n+1 ), lwork-n,ierr ) call stdlib${ii}$_${ci}$gsvj1( jobv, m, n2, n4, a, lda, cwork, sva, mvl, v,ldv, epsln, & sfmin, tol, 1_${ik}$, cwork( n+1 ),lwork-n, ierr ) else if( upper ) then call stdlib${ii}$_${ci}$gsvj0( jobv, n4, n4, a, lda, cwork, sva, mvl, v, ldv,epsln, sfmin, & tol, 2_${ik}$, cwork( n+1 ), lwork-n,ierr ) call stdlib${ii}$_${ci}$gsvj0( jobv, n2, n4, a( 1_${ik}$, n4+1 ), lda, cwork( n4+1 ),sva( n4+1 ), & mvl, v( n4*q+1, n4+1 ), ldv,epsln, sfmin, tol, 1_${ik}$, cwork( n+1 ), lwork-n,ierr ) call stdlib${ii}$_${ci}$gsvj1( jobv, n2, n2, n4, a, lda, cwork, sva, mvl, v,ldv, epsln, & sfmin, tol, 1_${ik}$, cwork( n+1 ),lwork-n, ierr ) call stdlib${ii}$_${ci}$gsvj0( jobv, n2+n4, n4, a( 1_${ik}$, n2+1 ), lda,cwork( n2+1 ), sva( n2+1 )& , mvl,v( n2*q+1, n2+1 ), ldv, epsln, sfmin, tol, 1_${ik}$,cwork( n+1 ), lwork-n, ierr ) end if end if ! .. row-cyclic pivot strategy with de rijk's pivoting .. loop_1993: do i = 1, nsweep ! .. go go go ... mxaapq = zero mxsinj = zero iswrot = 0_${ik}$ notrot = 0_${ik}$ pskipped = 0_${ik}$ ! each sweep is unrolled using kbl-by-kbl tiles over the pivot pairs ! 1 <= p < q <= n. this is the first step toward a blocked implementation ! of the rotations. new implementation, based on block transformations, ! is under development. loop_2000: do ibr = 1, nbl igl = ( ibr-1 )*kbl + 1_${ik}$ loop_1002: do ir1 = 0, min( lkahead, nbl-ibr ) igl = igl + ir1*kbl loop_2001: do p = igl, min( igl+kbl-1, n-1 ) ! .. de rijk's pivoting q = stdlib${ii}$_i${c2ri(ci)}$amax( n-p+1, sva( p ), 1_${ik}$ ) + p - 1_${ik}$ if( p/=q ) then call stdlib${ii}$_${ci}$swap( m, a( 1_${ik}$, p ), 1_${ik}$, a( 1_${ik}$, q ), 1_${ik}$ ) if( rsvec )call stdlib${ii}$_${ci}$swap( mvl, v( 1_${ik}$, p ), 1_${ik}$,v( 1_${ik}$, q ), 1_${ik}$ ) temp1 = sva( p ) sva( p ) = sva( q ) sva( q ) = temp1 aapq = cwork(p) cwork(p) = cwork(q) cwork(q) = aapq end if if( ir1==0_${ik}$ ) then ! column norms are periodically updated by explicit ! norm computation. ! [!] caveat: ! unfortunately, some blas implementations compute stdlib${ii}$_${c2ri(ci)}$znrm2(m,a(1,p),1) ! as sqrt(s=stdlib${ii}$_zdotc(m,a(1,p),1,a(1,p),1)), which may cause the result to ! overflow for ||a(:,p)||_2 > sqrt(overflow_threshold), and to ! underflow for ||a(:,p)||_2 < sqrt(underflow_threshold). ! hence, stdlib${ii}$_${c2ri(ci)}$znrm2 cannot be trusted, not even in the case when ! the true norm is far from the under(over)flow boundaries. ! if properly implemented stdlib${ii}$_dcnrm2 is available, the if-then-else-end if ! below should be replaced with "aapp = stdlib${ii}$_${c2ri(ci)}$znrm2( m, a(1,p), 1 )". if( ( sva( p )<rootbig ) .and.( sva( p )>rootsfmin ) ) then sva( p ) = stdlib${ii}$_${c2ri(ci)}$znrm2( m, a( 1_${ik}$, p ), 1_${ik}$ ) else temp1 = zero aapp = one call stdlib${ii}$_${ci}$lassq( m, a( 1_${ik}$, p ), 1_${ik}$, temp1, aapp ) sva( p ) = temp1*sqrt( aapp ) end if aapp = sva( p ) else aapp = sva( p ) end if if( aapp>zero ) then pskipped = 0_${ik}$ loop_2002: do q = p + 1, min( igl+kbl-1, n ) aaqq = sva( q ) if( aaqq>zero ) then aapp0 = aapp if( aaqq>=one ) then rotok = ( small*aapp )<=aaqq if( aapp<( big / aaqq ) ) then aapq = ( stdlib${ii}$_${ci}$dotc( m, a( 1_${ik}$, p ), 1_${ik}$,a( 1_${ik}$, q ), 1_${ik}$ ) / & aaqq ) / aapp else call stdlib${ii}$_${ci}$copy( m, a( 1_${ik}$, p ), 1_${ik}$,cwork(n+1), 1_${ik}$ ) call stdlib${ii}$_${ci}$lascl( 'G', 0_${ik}$, 0_${ik}$, aapp, one,m, 1_${ik}$, cwork(n+1), & lda, ierr ) aapq = stdlib${ii}$_${ci}$dotc( m, cwork(n+1), 1_${ik}$,a( 1_${ik}$, q ), 1_${ik}$ ) / & aaqq end if else rotok = aapp<=( aaqq / small ) if( aapp>( small / aaqq ) ) then aapq = ( stdlib${ii}$_${ci}$dotc( m, a( 1_${ik}$, p ), 1_${ik}$,a( 1_${ik}$, q ), 1_${ik}$ ) / & aapp ) / aaqq else call stdlib${ii}$_${ci}$copy( m, a( 1_${ik}$, q ), 1_${ik}$,cwork(n+1), 1_${ik}$ ) call stdlib${ii}$_${ci}$lascl( 'G', 0_${ik}$, 0_${ik}$, aaqq,one, m, 1_${ik}$,cwork(n+1), & lda, ierr ) aapq = stdlib${ii}$_${ci}$dotc( m, a(1_${ik}$, p ), 1_${ik}$,cwork(n+1), 1_${ik}$ ) / & aapp end if end if ! aapq = aapq * conjg( cwork(p) ) * cwork(q) aapq1 = -abs(aapq) mxaapq = max( mxaapq, -aapq1 ) ! to rotate or not to rotate, that is the question ... if( abs( aapq1 )>tol ) then ompq = aapq / abs(aapq) ! Rotate ! [rtd] rotated = rotated + one if( ir1==0_${ik}$ ) then notrot = 0_${ik}$ pskipped = 0_${ik}$ iswrot = iswrot + 1_${ik}$ end if if( rotok ) then aqoap = aaqq / aapp apoaq = aapp / aaqq theta = -half*abs( aqoap-apoaq )/aapq1 if( abs( theta )>bigtheta ) then t = half / theta cs = one call stdlib${ii}$_${ci}$rot( m, a(1_${ik}$,p), 1_${ik}$, a(1_${ik}$,q), 1_${ik}$,cs, conjg(ompq)& *t ) if ( rsvec ) then call stdlib${ii}$_${ci}$rot( mvl, v(1_${ik}$,p), 1_${ik}$,v(1_${ik}$,q), 1_${ik}$, cs, & conjg(ompq)*t ) end if sva( q ) = aaqq*sqrt( max( zero,one+t*apoaq*aapq1 ) ) aapp = aapp*sqrt( max( zero,one-t*aqoap*aapq1 ) ) mxsinj = max( mxsinj, abs( t ) ) else ! Choose Correct Signum For Theta And Rotate thsign = -sign( one, aapq1 ) t = one / ( theta+thsign*sqrt( one+theta*theta ) ) cs = sqrt( one / ( one+t*t ) ) sn = t*cs mxsinj = max( mxsinj, abs( sn ) ) sva( q ) = aaqq*sqrt( max( zero,one+t*apoaq*aapq1 ) ) aapp = aapp*sqrt( max( zero,one-t*aqoap*aapq1 ) ) call stdlib${ii}$_${ci}$rot( m, a(1_${ik}$,p), 1_${ik}$, a(1_${ik}$,q), 1_${ik}$,cs, conjg(ompq)& *sn ) if ( rsvec ) then call stdlib${ii}$_${ci}$rot( mvl, v(1_${ik}$,p), 1_${ik}$,v(1_${ik}$,q), 1_${ik}$, cs, & conjg(ompq)*sn ) end if end if cwork(p) = -cwork(q) * ompq else ! .. have to use modified gram-schmidt like transformation call stdlib${ii}$_${ci}$copy( m, a( 1_${ik}$, p ), 1_${ik}$,cwork(n+1), 1_${ik}$ ) call stdlib${ii}$_${ci}$lascl( 'G', 0_${ik}$, 0_${ik}$, aapp, one, m,1_${ik}$, cwork(n+1), & lda,ierr ) call stdlib${ii}$_${ci}$lascl( 'G', 0_${ik}$, 0_${ik}$, aaqq, one, m,1_${ik}$, a( 1_${ik}$, q ), & lda, ierr ) call stdlib${ii}$_${ci}$axpy( m, -aapq, cwork(n+1), 1_${ik}$,a( 1_${ik}$, q ), 1_${ik}$ ) call stdlib${ii}$_${ci}$lascl( 'G', 0_${ik}$, 0_${ik}$, one, aaqq, m,1_${ik}$, a( 1_${ik}$, q ), & lda, ierr ) sva( q ) = aaqq*sqrt( max( zero,one-aapq1*aapq1 ) ) mxsinj = max( mxsinj, sfmin ) end if ! end if rotok then ... else ! in the case of cancellation in updating sva(q), sva(p) ! recompute sva(q), sva(p). if( ( sva( q ) / aaqq )**2_${ik}$<=rooteps )then if( ( aaqq<rootbig ) .and.( aaqq>rootsfmin ) ) then sva( q ) = stdlib${ii}$_${c2ri(ci)}$znrm2( m, a( 1_${ik}$, q ), 1_${ik}$ ) else t = zero aaqq = one call stdlib${ii}$_${ci}$lassq( m, a( 1_${ik}$, q ), 1_${ik}$, t,aaqq ) sva( q ) = t*sqrt( aaqq ) end if end if if( ( aapp / aapp0 )<=rooteps ) then if( ( aapp<rootbig ) .and.( aapp>rootsfmin ) ) then aapp = stdlib${ii}$_${c2ri(ci)}$znrm2( m, a( 1_${ik}$, p ), 1_${ik}$ ) else t = zero aapp = one call stdlib${ii}$_${ci}$lassq( m, a( 1_${ik}$, p ), 1_${ik}$, t,aapp ) aapp = t*sqrt( aapp ) end if sva( p ) = aapp end if else ! a(:,p) and a(:,q) already numerically orthogonal if( ir1==0_${ik}$ )notrot = notrot + 1_${ik}$ ! [rtd] skipped = skipped + 1 pskipped = pskipped + 1_${ik}$ end if else ! a(:,q) is zero column if( ir1==0_${ik}$ )notrot = notrot + 1_${ik}$ pskipped = pskipped + 1_${ik}$ end if if( ( i<=swband ) .and.( pskipped>rowskip ) ) then if( ir1==0_${ik}$ )aapp = -aapp notrot = 0_${ik}$ go to 2103 end if end do loop_2002 ! end q-loop 2103 continue ! bailed out of q-loop sva( p ) = aapp else sva( p ) = aapp if( ( ir1==0_${ik}$ ) .and. ( aapp==zero ) )notrot = notrot + min( igl+kbl-1, & n ) - p end if end do loop_2001 ! end of the p-loop ! end of doing the block ( ibr, ibr ) end do loop_1002 ! end of ir1-loop ! ... go to the off diagonal blocks igl = ( ibr-1 )*kbl + 1_${ik}$ loop_2010: do jbc = ibr + 1, nbl jgl = ( jbc-1 )*kbl + 1_${ik}$ ! doing the block at ( ibr, jbc ) ijblsk = 0_${ik}$ loop_2100: do p = igl, min( igl+kbl-1, n ) aapp = sva( p ) if( aapp>zero ) then pskipped = 0_${ik}$ loop_2200: do q = jgl, min( jgl+kbl-1, n ) aaqq = sva( q ) if( aaqq>zero ) then aapp0 = aapp ! M X 2 Jacobi Svd ! safe gram matrix computation if( aaqq>=one ) then if( aapp>=aaqq ) then rotok = ( small*aapp )<=aaqq else rotok = ( small*aaqq )<=aapp end if if( aapp<( big / aaqq ) ) then aapq = ( stdlib${ii}$_${ci}$dotc( m, a( 1_${ik}$, p ), 1_${ik}$,a( 1_${ik}$, q ), 1_${ik}$ ) / & aaqq ) / aapp else call stdlib${ii}$_${ci}$copy( m, a( 1_${ik}$, p ), 1_${ik}$,cwork(n+1), 1_${ik}$ ) call stdlib${ii}$_${ci}$lascl( 'G', 0_${ik}$, 0_${ik}$, aapp,one, m, 1_${ik}$,cwork(n+1), & lda, ierr ) aapq = stdlib${ii}$_${ci}$dotc( m, cwork(n+1), 1_${ik}$,a( 1_${ik}$, q ), 1_${ik}$ ) / & aaqq end if else if( aapp>=aaqq ) then rotok = aapp<=( aaqq / small ) else rotok = aaqq<=( aapp / small ) end if if( aapp>( small / aaqq ) ) then aapq = ( stdlib${ii}$_${ci}$dotc( m, a( 1_${ik}$, p ), 1_${ik}$,a( 1_${ik}$, q ), 1_${ik}$ ) / max(& aaqq,aapp) )/ min(aaqq,aapp) else call stdlib${ii}$_${ci}$copy( m, a( 1_${ik}$, q ), 1_${ik}$,cwork(n+1), 1_${ik}$ ) call stdlib${ii}$_${ci}$lascl( 'G', 0_${ik}$, 0_${ik}$, aaqq,one, m, 1_${ik}$,cwork(n+1), & lda, ierr ) aapq = stdlib${ii}$_${ci}$dotc( m, a( 1_${ik}$, p ), 1_${ik}$,cwork(n+1), 1_${ik}$ ) / & aapp end if end if ! aapq = aapq * conjg(cwork(p))*cwork(q) aapq1 = -abs(aapq) mxaapq = max( mxaapq, -aapq1 ) ! to rotate or not to rotate, that is the question ... if( abs( aapq1 )>tol ) then ompq = aapq / abs(aapq) notrot = 0_${ik}$ ! [rtd] rotated = rotated + 1 pskipped = 0_${ik}$ iswrot = iswrot + 1_${ik}$ if( rotok ) then aqoap = aaqq / aapp apoaq = aapp / aaqq theta = -half*abs( aqoap-apoaq )/ aapq1 if( aaqq>aapp0 )theta = -theta if( abs( theta )>bigtheta ) then t = half / theta cs = one call stdlib${ii}$_${ci}$rot( m, a(1_${ik}$,p), 1_${ik}$, a(1_${ik}$,q), 1_${ik}$,cs, conjg(ompq)& *t ) if( rsvec ) then call stdlib${ii}$_${ci}$rot( mvl, v(1_${ik}$,p), 1_${ik}$,v(1_${ik}$,q), 1_${ik}$, cs, & conjg(ompq)*t ) end if sva( q ) = aaqq*sqrt( max( zero,one+t*apoaq*aapq1 ) ) aapp = aapp*sqrt( max( zero,one-t*aqoap*aapq1 ) ) mxsinj = max( mxsinj, abs( t ) ) else ! Choose Correct Signum For Theta And Rotate thsign = -sign( one, aapq1 ) if( aaqq>aapp0 )thsign = -thsign t = one / ( theta+thsign*sqrt( one+theta*theta ) ) cs = sqrt( one / ( one+t*t ) ) sn = t*cs mxsinj = max( mxsinj, abs( sn ) ) sva( q ) = aaqq*sqrt( max( zero,one+t*apoaq*aapq1 ) ) aapp = aapp*sqrt( max( zero,one-t*aqoap*aapq1 ) ) call stdlib${ii}$_${ci}$rot( m, a(1_${ik}$,p), 1_${ik}$, a(1_${ik}$,q), 1_${ik}$,cs, conjg(ompq)& *sn ) if( rsvec ) then call stdlib${ii}$_${ci}$rot( mvl, v(1_${ik}$,p), 1_${ik}$,v(1_${ik}$,q), 1_${ik}$, cs, & conjg(ompq)*sn ) end if end if cwork(p) = -cwork(q) * ompq else ! .. have to use modified gram-schmidt like transformation if( aapp>aaqq ) then call stdlib${ii}$_${ci}$copy( m, a( 1_${ik}$, p ), 1_${ik}$,cwork(n+1), 1_${ik}$ ) call stdlib${ii}$_${ci}$lascl( 'G', 0_${ik}$, 0_${ik}$, aapp, one,m, 1_${ik}$, cwork(n+1)& ,lda,ierr ) call stdlib${ii}$_${ci}$lascl( 'G', 0_${ik}$, 0_${ik}$, aaqq, one,m, 1_${ik}$, a( 1_${ik}$, q ),& lda,ierr ) call stdlib${ii}$_${ci}$axpy( m, -aapq, cwork(n+1),1_${ik}$, a( 1_${ik}$, q ), 1_${ik}$ ) call stdlib${ii}$_${ci}$lascl( 'G', 0_${ik}$, 0_${ik}$, one, aaqq,m, 1_${ik}$, a( 1_${ik}$, q ),& lda,ierr ) sva( q ) = aaqq*sqrt( max( zero,one-aapq1*aapq1 ) ) mxsinj = max( mxsinj, sfmin ) else call stdlib${ii}$_${ci}$copy( m, a( 1_${ik}$, q ), 1_${ik}$,cwork(n+1), 1_${ik}$ ) call stdlib${ii}$_${ci}$lascl( 'G', 0_${ik}$, 0_${ik}$, aaqq, one,m, 1_${ik}$, cwork(n+1)& ,lda,ierr ) call stdlib${ii}$_${ci}$lascl( 'G', 0_${ik}$, 0_${ik}$, aapp, one,m, 1_${ik}$, a( 1_${ik}$, p ),& lda,ierr ) call stdlib${ii}$_${ci}$axpy( m, -conjg(aapq),cwork(n+1), 1_${ik}$, a( 1_${ik}$, & p ), 1_${ik}$ ) call stdlib${ii}$_${ci}$lascl( 'G', 0_${ik}$, 0_${ik}$, one, aapp,m, 1_${ik}$, a( 1_${ik}$, p ),& lda,ierr ) sva( p ) = aapp*sqrt( max( zero,one-aapq1*aapq1 ) ) mxsinj = max( mxsinj, sfmin ) end if end if ! end if rotok then ... else ! in the case of cancellation in updating sva(q), sva(p) ! .. recompute sva(q), sva(p) if( ( sva( q ) / aaqq )**2_${ik}$<=rooteps )then if( ( aaqq<rootbig ) .and.( aaqq>rootsfmin ) ) then sva( q ) = stdlib${ii}$_${c2ri(ci)}$znrm2( m, a( 1_${ik}$, q ), 1_${ik}$) else t = zero aaqq = one call stdlib${ii}$_${ci}$lassq( m, a( 1_${ik}$, q ), 1_${ik}$, t,aaqq ) sva( q ) = t*sqrt( aaqq ) end if end if if( ( aapp / aapp0 )**2_${ik}$<=rooteps ) then if( ( aapp<rootbig ) .and.( aapp>rootsfmin ) ) then aapp = stdlib${ii}$_${c2ri(ci)}$znrm2( m, a( 1_${ik}$, p ), 1_${ik}$ ) else t = zero aapp = one call stdlib${ii}$_${ci}$lassq( m, a( 1_${ik}$, p ), 1_${ik}$, t,aapp ) aapp = t*sqrt( aapp ) end if sva( p ) = aapp end if ! end of ok rotation else notrot = notrot + 1_${ik}$ ! [rtd] skipped = skipped + 1 pskipped = pskipped + 1_${ik}$ ijblsk = ijblsk + 1_${ik}$ end if else notrot = notrot + 1_${ik}$ pskipped = pskipped + 1_${ik}$ ijblsk = ijblsk + 1_${ik}$ end if if( ( i<=swband ) .and. ( ijblsk>=blskip ) )then sva( p ) = aapp notrot = 0_${ik}$ go to 2011 end if if( ( i<=swband ) .and.( pskipped>rowskip ) ) then aapp = -aapp notrot = 0_${ik}$ go to 2203 end if end do loop_2200 ! end of the q-loop 2203 continue sva( p ) = aapp else if( aapp==zero )notrot = notrot +min( jgl+kbl-1, n ) - jgl + 1_${ik}$ if( aapp<zero )notrot = 0_${ik}$ end if end do loop_2100 ! end of the p-loop end do loop_2010 ! end of the jbc-loop 2011 continue ! 2011 bailed out of the jbc-loop do p = igl, min( igl+kbl-1, n ) sva( p ) = abs( sva( p ) ) end do ! ** end do loop_2000 ! 2000 :: end of the ibr-loop ! .. update sva(n) if( ( sva( n )<rootbig ) .and. ( sva( n )>rootsfmin ) )then sva( n ) = stdlib${ii}$_${c2ri(ci)}$znrm2( m, a( 1_${ik}$, n ), 1_${ik}$ ) else t = zero aapp = one call stdlib${ii}$_${ci}$lassq( m, a( 1_${ik}$, n ), 1_${ik}$, t, aapp ) sva( n ) = t*sqrt( aapp ) end if ! additional steering devices if( ( i<swband ) .and. ( ( mxaapq<=roottol ) .or.( iswrot<=n ) ) )swband = i if( ( i>swband+1 ) .and. ( mxaapq<sqrt( real( n,KIND=${ck}$) )*tol ) .and. ( real( n,& KIND=${ck}$)*mxaapq*mxsinj<tol ) ) then go to 1994 end if if( notrot>=emptsw )go to 1994 end do loop_1993 ! end i=1:nsweep loop ! #:( reaching this point means that the procedure has not converged. info = nsweep - 1_${ik}$ go to 1995 1994 continue ! #:) reaching this point means numerical convergence after the i-th ! sweep. info = 0_${ik}$ ! #:) info = 0 confirms successful iterations. 1995 continue ! sort the singular values and find how many are above ! the underflow threshold. n2 = 0_${ik}$ n4 = 0_${ik}$ do p = 1, n - 1 q = stdlib${ii}$_i${c2ri(ci)}$amax( n-p+1, sva( p ), 1_${ik}$ ) + p - 1_${ik}$ if( p/=q ) then temp1 = sva( p ) sva( p ) = sva( q ) sva( q ) = temp1 call stdlib${ii}$_${ci}$swap( m, a( 1_${ik}$, p ), 1_${ik}$, a( 1_${ik}$, q ), 1_${ik}$ ) if( rsvec )call stdlib${ii}$_${ci}$swap( mvl, v( 1_${ik}$, p ), 1_${ik}$, v( 1_${ik}$, q ), 1_${ik}$ ) end if if( sva( p )/=zero ) then n4 = n4 + 1_${ik}$ if( sva( p )*skl>sfmin )n2 = n2 + 1_${ik}$ end if end do if( sva( n )/=zero ) then n4 = n4 + 1_${ik}$ if( sva( n )*skl>sfmin )n2 = n2 + 1_${ik}$ end if ! normalize the left singular vectors. if( lsvec .or. uctol ) then do p = 1, n4 ! call stdlib${ii}$_${ci}$dscal( m, one / sva( p ), a( 1, p ), 1 ) call stdlib${ii}$_${ci}$lascl( 'G',0_${ik}$,0_${ik}$, sva(p), one, m, 1_${ik}$, a(1_${ik}$,p), m, ierr ) end do end if ! scale the product of jacobi rotations. if( rsvec ) then do p = 1, n temp1 = one / stdlib${ii}$_${c2ri(ci)}$znrm2( mvl, v( 1_${ik}$, p ), 1_${ik}$ ) call stdlib${ii}$_${ci}$dscal( mvl, temp1, v( 1_${ik}$, p ), 1_${ik}$ ) end do end if ! undo scaling, if necessary (and possible). if( ( ( skl>one ) .and. ( sva( 1_${ik}$ )<( big / skl ) ) ).or. ( ( skl<one ) .and. ( sva( & max( n2, 1_${ik}$ ) ) >( sfmin / skl ) ) ) ) then do p = 1, n sva( p ) = skl*sva( p ) end do skl = one end if rwork( 1_${ik}$ ) = skl ! the singular values of a are skl*sva(1:n). if skl/=one ! then some of the singular values may overflow or underflow and ! the spectrum is given in this factored representation. rwork( 2_${ik}$ ) = real( n4,KIND=${ck}$) ! n4 is the number of computed nonzero singular values of a. rwork( 3_${ik}$ ) = real( n2,KIND=${ck}$) ! n2 is the number of singular values of a greater than sfmin. ! if n2<n, sva(n2:n) contains zeros and/or denormalized numbers ! that may carry some information. rwork( 4_${ik}$ ) = real( i,KIND=${ck}$) ! i is the index of the last sweep before declaring convergence. rwork( 5_${ik}$ ) = mxaapq ! mxaapq is the largest absolute value of scaled pivots in the ! last sweep rwork( 6_${ik}$ ) = mxsinj ! mxsinj is the largest absolute value of the sines of jacobi angles ! in the last sweep return end subroutine stdlib${ii}$_${ci}$gesvj #:endif #:endfor #:endfor end submodule stdlib_lapack_eigv_svd_drivers2