#:include "common.fypp" submodule(stdlib_lapack_eig_svd_lsq) stdlib_lapack_eigv_svd_bidiag_dc implicit none contains #:for ik,it,ii in LINALG_INT_KINDS_TYPES pure module subroutine stdlib${ii}$_slasd0( n, sqre, d, e, u, ldu, vt, ldvt, smlsiz, iwork,work, info ) !! Using a divide and conquer approach, SLASD0: computes the singular !! value decomposition (SVD) of a real upper bidiagonal N-by-M !! matrix B with diagonal D and offdiagonal E, where M = N + SQRE. !! The algorithm computes orthogonal matrices U and VT such that !! B = U * S * VT. The singular values S are overwritten on D. !! A related subroutine, SLASDA, computes only the singular values, !! and optionally, the singular vectors in compact form. ! -- lapack auxiliary routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments integer(${ik}$), intent(out) :: info integer(${ik}$), intent(in) :: ldu, ldvt, n, smlsiz, sqre ! Array Arguments integer(${ik}$), intent(out) :: iwork(*) real(sp), intent(inout) :: d(*), e(*) real(sp), intent(out) :: u(ldu,*), vt(ldvt,*), work(*) ! ===================================================================== ! Local Scalars integer(${ik}$) :: i, i1, ic, idxq, idxqc, im1, inode, itemp, iwk, j, lf, ll, lvl, m, ncc,& nd, ndb1, ndiml, ndimr, nl, nlf, nlp1, nlvl, nr, nrf, nrp1, sqrei real(sp) :: alpha, beta ! Executable Statements ! test the input parameters. info = 0_${ik}$ if( n<0_${ik}$ ) then info = -1_${ik}$ else if( ( sqre<0_${ik}$ ) .or. ( sqre>1_${ik}$ ) ) then info = -2_${ik}$ end if m = n + sqre if( ldu<n ) then info = -6_${ik}$ else if( ldvt<m ) then info = -8_${ik}$ else if( smlsiz<3_${ik}$ ) then info = -9_${ik}$ end if if( info/=0_${ik}$ ) then call stdlib${ii}$_xerbla( 'SLASD0', -info ) return end if ! if the input matrix is too small, call stdlib${ii}$_slasdq to find the svd. if( n<=smlsiz ) then call stdlib${ii}$_slasdq( 'U', sqre, n, m, n, 0_${ik}$, d, e, vt, ldvt, u, ldu, u,ldu, work, & info ) return end if ! set up the computation tree. inode = 1_${ik}$ ndiml = inode + n ndimr = ndiml + n idxq = ndimr + n iwk = idxq + n call stdlib${ii}$_slasdt( n, nlvl, nd, iwork( inode ), iwork( ndiml ),iwork( ndimr ), smlsiz & ) ! for the nodes on bottom level of the tree, solve ! their subproblems by stdlib${ii}$_slasdq. ndb1 = ( nd+1 ) / 2_${ik}$ ncc = 0_${ik}$ loop_30: do i = ndb1, nd ! ic : center row of each node ! nl : number of rows of left subproblem ! nr : number of rows of right subproblem ! nlf: starting row of the left subproblem ! nrf: starting row of the right subproblem i1 = i - 1_${ik}$ ic = iwork( inode+i1 ) nl = iwork( ndiml+i1 ) nlp1 = nl + 1_${ik}$ nr = iwork( ndimr+i1 ) nrp1 = nr + 1_${ik}$ nlf = ic - nl nrf = ic + 1_${ik}$ sqrei = 1_${ik}$ call stdlib${ii}$_slasdq( 'U', sqrei, nl, nlp1, nl, ncc, d( nlf ), e( nlf ),vt( nlf, nlf )& , ldvt, u( nlf, nlf ), ldu,u( nlf, nlf ), ldu, work, info ) if( info/=0_${ik}$ ) then return end if itemp = idxq + nlf - 2_${ik}$ do j = 1, nl iwork( itemp+j ) = j end do if( i==nd ) then sqrei = sqre else sqrei = 1_${ik}$ end if nrp1 = nr + sqrei call stdlib${ii}$_slasdq( 'U', sqrei, nr, nrp1, nr, ncc, d( nrf ), e( nrf ),vt( nrf, nrf )& , ldvt, u( nrf, nrf ), ldu,u( nrf, nrf ), ldu, work, info ) if( info/=0_${ik}$ ) then return end if itemp = idxq + ic do j = 1, nr iwork( itemp+j-1 ) = j end do end do loop_30 ! now conquer each subproblem bottom-up. loop_50: do lvl = nlvl, 1, -1 ! find the first node lf and last node ll on the ! current level lvl. if( lvl==1_${ik}$ ) then lf = 1_${ik}$ ll = 1_${ik}$ else lf = 2_${ik}$**( lvl-1 ) ll = 2_${ik}$*lf - 1_${ik}$ end if do i = lf, ll im1 = i - 1_${ik}$ ic = iwork( inode+im1 ) nl = iwork( ndiml+im1 ) nr = iwork( ndimr+im1 ) nlf = ic - nl if( ( sqre==0_${ik}$ ) .and. ( i==ll ) ) then sqrei = sqre else sqrei = 1_${ik}$ end if idxqc = idxq + nlf - 1_${ik}$ alpha = d( ic ) beta = e( ic ) call stdlib${ii}$_slasd1( nl, nr, sqrei, d( nlf ), alpha, beta,u( nlf, nlf ), ldu, vt( & nlf, nlf ), ldvt,iwork( idxqc ), iwork( iwk ), work, info ) ! report the possible convergence failure. if( info/=0_${ik}$ ) then return end if end do end do loop_50 return end subroutine stdlib${ii}$_slasd0 pure module subroutine stdlib${ii}$_dlasd0( n, sqre, d, e, u, ldu, vt, ldvt, smlsiz, iwork,work, info ) !! Using a divide and conquer approach, DLASD0: computes the singular !! value decomposition (SVD) of a real upper bidiagonal N-by-M !! matrix B with diagonal D and offdiagonal E, where M = N + SQRE. !! The algorithm computes orthogonal matrices U and VT such that !! B = U * S * VT. The singular values S are overwritten on D. !! A related subroutine, DLASDA, computes only the singular values, !! and optionally, the singular vectors in compact form. ! -- lapack auxiliary routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments integer(${ik}$), intent(out) :: info integer(${ik}$), intent(in) :: ldu, ldvt, n, smlsiz, sqre ! Array Arguments integer(${ik}$), intent(out) :: iwork(*) real(dp), intent(inout) :: d(*), e(*) real(dp), intent(out) :: u(ldu,*), vt(ldvt,*), work(*) ! ===================================================================== ! Local Scalars integer(${ik}$) :: i, i1, ic, idxq, idxqc, im1, inode, itemp, iwk, j, lf, ll, lvl, m, ncc,& nd, ndb1, ndiml, ndimr, nl, nlf, nlp1, nlvl, nr, nrf, nrp1, sqrei real(dp) :: alpha, beta ! Executable Statements ! test the input parameters. info = 0_${ik}$ if( n<0_${ik}$ ) then info = -1_${ik}$ else if( ( sqre<0_${ik}$ ) .or. ( sqre>1_${ik}$ ) ) then info = -2_${ik}$ end if m = n + sqre if( ldu<n ) then info = -6_${ik}$ else if( ldvt<m ) then info = -8_${ik}$ else if( smlsiz<3_${ik}$ ) then info = -9_${ik}$ end if if( info/=0_${ik}$ ) then call stdlib${ii}$_xerbla( 'DLASD0', -info ) return end if ! if the input matrix is too small, call stdlib${ii}$_dlasdq to find the svd. if( n<=smlsiz ) then call stdlib${ii}$_dlasdq( 'U', sqre, n, m, n, 0_${ik}$, d, e, vt, ldvt, u, ldu, u,ldu, work, & info ) return end if ! set up the computation tree. inode = 1_${ik}$ ndiml = inode + n ndimr = ndiml + n idxq = ndimr + n iwk = idxq + n call stdlib${ii}$_dlasdt( n, nlvl, nd, iwork( inode ), iwork( ndiml ),iwork( ndimr ), smlsiz & ) ! for the nodes on bottom level of the tree, solve ! their subproblems by stdlib${ii}$_dlasdq. ndb1 = ( nd+1 ) / 2_${ik}$ ncc = 0_${ik}$ loop_30: do i = ndb1, nd ! ic : center row of each node ! nl : number of rows of left subproblem ! nr : number of rows of right subproblem ! nlf: starting row of the left subproblem ! nrf: starting row of the right subproblem i1 = i - 1_${ik}$ ic = iwork( inode+i1 ) nl = iwork( ndiml+i1 ) nlp1 = nl + 1_${ik}$ nr = iwork( ndimr+i1 ) nrp1 = nr + 1_${ik}$ nlf = ic - nl nrf = ic + 1_${ik}$ sqrei = 1_${ik}$ call stdlib${ii}$_dlasdq( 'U', sqrei, nl, nlp1, nl, ncc, d( nlf ), e( nlf ),vt( nlf, nlf )& , ldvt, u( nlf, nlf ), ldu,u( nlf, nlf ), ldu, work, info ) if( info/=0_${ik}$ ) then return end if itemp = idxq + nlf - 2_${ik}$ do j = 1, nl iwork( itemp+j ) = j end do if( i==nd ) then sqrei = sqre else sqrei = 1_${ik}$ end if nrp1 = nr + sqrei call stdlib${ii}$_dlasdq( 'U', sqrei, nr, nrp1, nr, ncc, d( nrf ), e( nrf ),vt( nrf, nrf )& , ldvt, u( nrf, nrf ), ldu,u( nrf, nrf ), ldu, work, info ) if( info/=0_${ik}$ ) then return end if itemp = idxq + ic do j = 1, nr iwork( itemp+j-1 ) = j end do end do loop_30 ! now conquer each subproblem bottom-up. loop_50: do lvl = nlvl, 1, -1 ! find the first node lf and last node ll on the ! current level lvl. if( lvl==1_${ik}$ ) then lf = 1_${ik}$ ll = 1_${ik}$ else lf = 2_${ik}$**( lvl-1 ) ll = 2_${ik}$*lf - 1_${ik}$ end if do i = lf, ll im1 = i - 1_${ik}$ ic = iwork( inode+im1 ) nl = iwork( ndiml+im1 ) nr = iwork( ndimr+im1 ) nlf = ic - nl if( ( sqre==0_${ik}$ ) .and. ( i==ll ) ) then sqrei = sqre else sqrei = 1_${ik}$ end if idxqc = idxq + nlf - 1_${ik}$ alpha = d( ic ) beta = e( ic ) call stdlib${ii}$_dlasd1( nl, nr, sqrei, d( nlf ), alpha, beta,u( nlf, nlf ), ldu, vt( & nlf, nlf ), ldvt,iwork( idxqc ), iwork( iwk ), work, info ) ! report the possible convergence failure. if( info/=0_${ik}$ ) then return end if end do end do loop_50 return end subroutine stdlib${ii}$_dlasd0 #:for rk,rt,ri in REAL_KINDS_TYPES #:if not rk in ["sp","dp"] pure module subroutine stdlib${ii}$_${ri}$lasd0( n, sqre, d, e, u, ldu, vt, ldvt, smlsiz, iwork,work, info ) !! Using a divide and conquer approach, DLASD0: computes the singular !! value decomposition (SVD) of a real upper bidiagonal N-by-M !! matrix B with diagonal D and offdiagonal E, where M = N + SQRE. !! The algorithm computes orthogonal matrices U and VT such that !! B = U * S * VT. The singular values S are overwritten on D. !! A related subroutine, DLASDA, computes only the singular values, !! and optionally, the singular vectors in compact form. ! -- lapack auxiliary routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments integer(${ik}$), intent(out) :: info integer(${ik}$), intent(in) :: ldu, ldvt, n, smlsiz, sqre ! Array Arguments integer(${ik}$), intent(out) :: iwork(*) real(${rk}$), intent(inout) :: d(*), e(*) real(${rk}$), intent(out) :: u(ldu,*), vt(ldvt,*), work(*) ! ===================================================================== ! Local Scalars integer(${ik}$) :: i, i1, ic, idxq, idxqc, im1, inode, itemp, iwk, j, lf, ll, lvl, m, ncc,& nd, ndb1, ndiml, ndimr, nl, nlf, nlp1, nlvl, nr, nrf, nrp1, sqrei real(${rk}$) :: alpha, beta ! Executable Statements ! test the input parameters. info = 0_${ik}$ if( n<0_${ik}$ ) then info = -1_${ik}$ else if( ( sqre<0_${ik}$ ) .or. ( sqre>1_${ik}$ ) ) then info = -2_${ik}$ end if m = n + sqre if( ldu<n ) then info = -6_${ik}$ else if( ldvt<m ) then info = -8_${ik}$ else if( smlsiz<3_${ik}$ ) then info = -9_${ik}$ end if if( info/=0_${ik}$ ) then call stdlib${ii}$_xerbla( 'DLASD0', -info ) return end if ! if the input matrix is too small, call stdlib${ii}$_${ri}$lasdq to find the svd. if( n<=smlsiz ) then call stdlib${ii}$_${ri}$lasdq( 'U', sqre, n, m, n, 0_${ik}$, d, e, vt, ldvt, u, ldu, u,ldu, work, & info ) return end if ! set up the computation tree. inode = 1_${ik}$ ndiml = inode + n ndimr = ndiml + n idxq = ndimr + n iwk = idxq + n call stdlib${ii}$_${ri}$lasdt( n, nlvl, nd, iwork( inode ), iwork( ndiml ),iwork( ndimr ), smlsiz & ) ! for the nodes on bottom level of the tree, solve ! their subproblems by stdlib${ii}$_${ri}$lasdq. ndb1 = ( nd+1 ) / 2_${ik}$ ncc = 0_${ik}$ loop_30: do i = ndb1, nd ! ic : center row of each node ! nl : number of rows of left subproblem ! nr : number of rows of right subproblem ! nlf: starting row of the left subproblem ! nrf: starting row of the right subproblem i1 = i - 1_${ik}$ ic = iwork( inode+i1 ) nl = iwork( ndiml+i1 ) nlp1 = nl + 1_${ik}$ nr = iwork( ndimr+i1 ) nrp1 = nr + 1_${ik}$ nlf = ic - nl nrf = ic + 1_${ik}$ sqrei = 1_${ik}$ call stdlib${ii}$_${ri}$lasdq( 'U', sqrei, nl, nlp1, nl, ncc, d( nlf ), e( nlf ),vt( nlf, nlf )& , ldvt, u( nlf, nlf ), ldu,u( nlf, nlf ), ldu, work, info ) if( info/=0_${ik}$ ) then return end if itemp = idxq + nlf - 2_${ik}$ do j = 1, nl iwork( itemp+j ) = j end do if( i==nd ) then sqrei = sqre else sqrei = 1_${ik}$ end if nrp1 = nr + sqrei call stdlib${ii}$_${ri}$lasdq( 'U', sqrei, nr, nrp1, nr, ncc, d( nrf ), e( nrf ),vt( nrf, nrf )& , ldvt, u( nrf, nrf ), ldu,u( nrf, nrf ), ldu, work, info ) if( info/=0_${ik}$ ) then return end if itemp = idxq + ic do j = 1, nr iwork( itemp+j-1 ) = j end do end do loop_30 ! now conquer each subproblem bottom-up. loop_50: do lvl = nlvl, 1, -1 ! find the first node lf and last node ll on the ! current level lvl. if( lvl==1_${ik}$ ) then lf = 1_${ik}$ ll = 1_${ik}$ else lf = 2_${ik}$**( lvl-1 ) ll = 2_${ik}$*lf - 1_${ik}$ end if do i = lf, ll im1 = i - 1_${ik}$ ic = iwork( inode+im1 ) nl = iwork( ndiml+im1 ) nr = iwork( ndimr+im1 ) nlf = ic - nl if( ( sqre==0_${ik}$ ) .and. ( i==ll ) ) then sqrei = sqre else sqrei = 1_${ik}$ end if idxqc = idxq + nlf - 1_${ik}$ alpha = d( ic ) beta = e( ic ) call stdlib${ii}$_${ri}$lasd1( nl, nr, sqrei, d( nlf ), alpha, beta,u( nlf, nlf ), ldu, vt( & nlf, nlf ), ldvt,iwork( idxqc ), iwork( iwk ), work, info ) ! report the possible convergence failure. if( info/=0_${ik}$ ) then return end if end do end do loop_50 return end subroutine stdlib${ii}$_${ri}$lasd0 #:endif #:endfor pure module subroutine stdlib${ii}$_slasdt( n, lvl, nd, inode, ndiml, ndimr, msub ) !! SLASDT creates a tree of subproblems for bidiagonal divide and !! conquer. ! -- lapack auxiliary routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments integer(${ik}$), intent(out) :: lvl, nd integer(${ik}$), intent(in) :: msub, n ! Array Arguments integer(${ik}$), intent(out) :: inode(*), ndiml(*), ndimr(*) ! ===================================================================== ! Local Scalars integer(${ik}$) :: i, il, ir, llst, maxn, ncrnt, nlvl real(sp) :: temp ! Intrinsic Functions ! Executable Statements ! find the number of levels on the tree. maxn = max( 1_${ik}$, n ) temp = log( real( maxn,KIND=sp) / real( msub+1,KIND=sp) ) / log( two ) lvl = int( temp,KIND=${ik}$) + 1_${ik}$ i = n / 2_${ik}$ inode( 1_${ik}$ ) = i + 1_${ik}$ ndiml( 1_${ik}$ ) = i ndimr( 1_${ik}$ ) = n - i - 1_${ik}$ il = 0_${ik}$ ir = 1_${ik}$ llst = 1_${ik}$ do nlvl = 1, lvl - 1 ! constructing the tree at (nlvl+1)-st level. the number of ! nodes created on this level is llst * 2. do i = 0, llst - 1 il = il + 2_${ik}$ ir = ir + 2_${ik}$ ncrnt = llst + i ndiml( il ) = ndiml( ncrnt ) / 2_${ik}$ ndimr( il ) = ndiml( ncrnt ) - ndiml( il ) - 1_${ik}$ inode( il ) = inode( ncrnt ) - ndimr( il ) - 1_${ik}$ ndiml( ir ) = ndimr( ncrnt ) / 2_${ik}$ ndimr( ir ) = ndimr( ncrnt ) - ndiml( ir ) - 1_${ik}$ inode( ir ) = inode( ncrnt ) + ndiml( ir ) + 1_${ik}$ end do llst = llst*2_${ik}$ end do nd = llst*2_${ik}$ - 1_${ik}$ return end subroutine stdlib${ii}$_slasdt pure module subroutine stdlib${ii}$_dlasdt( n, lvl, nd, inode, ndiml, ndimr, msub ) !! DLASDT creates a tree of subproblems for bidiagonal divide and !! conquer. ! -- lapack auxiliary routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments integer(${ik}$), intent(out) :: lvl, nd integer(${ik}$), intent(in) :: msub, n ! Array Arguments integer(${ik}$), intent(out) :: inode(*), ndiml(*), ndimr(*) ! ===================================================================== ! Local Scalars integer(${ik}$) :: i, il, ir, llst, maxn, ncrnt, nlvl real(dp) :: temp ! Intrinsic Functions ! Executable Statements ! find the number of levels on the tree. maxn = max( 1_${ik}$, n ) temp = log( real( maxn,KIND=dp) / real( msub+1,KIND=dp) ) / log( two ) lvl = int( temp,KIND=${ik}$) + 1_${ik}$ i = n / 2_${ik}$ inode( 1_${ik}$ ) = i + 1_${ik}$ ndiml( 1_${ik}$ ) = i ndimr( 1_${ik}$ ) = n - i - 1_${ik}$ il = 0_${ik}$ ir = 1_${ik}$ llst = 1_${ik}$ do nlvl = 1, lvl - 1 ! constructing the tree at (nlvl+1)-st level. the number of ! nodes created on this level is llst * 2. do i = 0, llst - 1 il = il + 2_${ik}$ ir = ir + 2_${ik}$ ncrnt = llst + i ndiml( il ) = ndiml( ncrnt ) / 2_${ik}$ ndimr( il ) = ndiml( ncrnt ) - ndiml( il ) - 1_${ik}$ inode( il ) = inode( ncrnt ) - ndimr( il ) - 1_${ik}$ ndiml( ir ) = ndimr( ncrnt ) / 2_${ik}$ ndimr( ir ) = ndimr( ncrnt ) - ndiml( ir ) - 1_${ik}$ inode( ir ) = inode( ncrnt ) + ndiml( ir ) + 1_${ik}$ end do llst = llst*2_${ik}$ end do nd = llst*2_${ik}$ - 1_${ik}$ return end subroutine stdlib${ii}$_dlasdt #:for rk,rt,ri in REAL_KINDS_TYPES #:if not rk in ["sp","dp"] pure module subroutine stdlib${ii}$_${ri}$lasdt( n, lvl, nd, inode, ndiml, ndimr, msub ) !! DLASDT: creates a tree of subproblems for bidiagonal divide and !! conquer. ! -- lapack auxiliary routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments integer(${ik}$), intent(out) :: lvl, nd integer(${ik}$), intent(in) :: msub, n ! Array Arguments integer(${ik}$), intent(out) :: inode(*), ndiml(*), ndimr(*) ! ===================================================================== ! Local Scalars integer(${ik}$) :: i, il, ir, llst, maxn, ncrnt, nlvl real(${rk}$) :: temp ! Intrinsic Functions ! Executable Statements ! find the number of levels on the tree. maxn = max( 1_${ik}$, n ) temp = log( real( maxn,KIND=${rk}$) / real( msub+1,KIND=${rk}$) ) / log( two ) lvl = int( temp,KIND=${ik}$) + 1_${ik}$ i = n / 2_${ik}$ inode( 1_${ik}$ ) = i + 1_${ik}$ ndiml( 1_${ik}$ ) = i ndimr( 1_${ik}$ ) = n - i - 1_${ik}$ il = 0_${ik}$ ir = 1_${ik}$ llst = 1_${ik}$ do nlvl = 1, lvl - 1 ! constructing the tree at (nlvl+1)-st level. the number of ! nodes created on this level is llst * 2. do i = 0, llst - 1 il = il + 2_${ik}$ ir = ir + 2_${ik}$ ncrnt = llst + i ndiml( il ) = ndiml( ncrnt ) / 2_${ik}$ ndimr( il ) = ndiml( ncrnt ) - ndiml( il ) - 1_${ik}$ inode( il ) = inode( ncrnt ) - ndimr( il ) - 1_${ik}$ ndiml( ir ) = ndimr( ncrnt ) / 2_${ik}$ ndimr( ir ) = ndimr( ncrnt ) - ndiml( ir ) - 1_${ik}$ inode( ir ) = inode( ncrnt ) + ndiml( ir ) + 1_${ik}$ end do llst = llst*2_${ik}$ end do nd = llst*2_${ik}$ - 1_${ik}$ return end subroutine stdlib${ii}$_${ri}$lasdt #:endif #:endfor pure module subroutine stdlib${ii}$_slasd1( nl, nr, sqre, d, alpha, beta, u, ldu, vt, ldvt,idxq, iwork, & !! SLASD1 computes the SVD of an upper bidiagonal N-by-M matrix B, !! where N = NL + NR + 1 and M = N + SQRE. SLASD1 is called from SLASD0. !! A related subroutine SLASD7 handles the case in which the singular !! values (and the singular vectors in factored form) are desired. !! SLASD1 computes the SVD as follows: !! ( D1(in) 0 0 0 ) !! B = U(in) * ( Z1**T a Z2**T b ) * VT(in) !! ( 0 0 D2(in) 0 ) !! = U(out) * ( D(out) 0) * VT(out) !! where Z**T = (Z1**T a Z2**T b) = u**T VT**T, and u is a vector of dimension M !! with ALPHA and BETA in the NL+1 and NL+2 th entries and zeros !! elsewhere; and the entry b is empty if SQRE = 0. !! The left singular vectors of the original matrix are stored in U, and !! the transpose of the right singular vectors are stored in VT, and the !! singular values are in D. The algorithm consists of three stages: !! The first stage consists of deflating the size of the problem !! when there are multiple singular values or when there are zeros in !! the Z vector. For each such occurrence the dimension of the !! secular equation problem is reduced by one. This stage is !! performed by the routine SLASD2. !! The second stage consists of calculating the updated !! singular values. This is done by finding the square roots of the !! roots of the secular equation via the routine SLASD4 (as called !! by SLASD3). This routine also calculates the singular vectors of !! the current problem. !! The final stage consists of computing the updated singular vectors !! directly using the updated singular values. The singular vectors !! for the current problem are multiplied with the singular vectors !! from the overall problem. work, info ) ! -- lapack auxiliary routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments integer(${ik}$), intent(out) :: info integer(${ik}$), intent(in) :: ldu, ldvt, nl, nr, sqre real(sp), intent(inout) :: alpha, beta ! Array Arguments integer(${ik}$), intent(inout) :: idxq(*) integer(${ik}$), intent(out) :: iwork(*) real(sp), intent(inout) :: d(*), u(ldu,*), vt(ldvt,*) real(sp), intent(out) :: work(*) ! ===================================================================== ! Local Scalars integer(${ik}$) :: coltyp, i, idx, idxc, idxp, iq, isigma, iu2, ivt2, iz, k, ldq, ldu2, & ldvt2, m, n, n1, n2 real(sp) :: orgnrm ! Intrinsic Functions ! Executable Statements ! test the input parameters. info = 0_${ik}$ if( nl<1_${ik}$ ) then info = -1_${ik}$ else if( nr<1_${ik}$ ) then info = -2_${ik}$ else if( ( sqre<0_${ik}$ ) .or. ( sqre>1_${ik}$ ) ) then info = -3_${ik}$ end if if( info/=0_${ik}$ ) then call stdlib${ii}$_xerbla( 'SLASD1', -info ) return end if n = nl + nr + 1_${ik}$ m = n + sqre ! the following values are for bookkeeping purposes only. they are ! integer pointers which indicate the portion of the workspace ! used by a particular array in stdlib${ii}$_slasd2 and stdlib${ii}$_slasd3. ldu2 = n ldvt2 = m iz = 1_${ik}$ isigma = iz + m iu2 = isigma + n ivt2 = iu2 + ldu2*n iq = ivt2 + ldvt2*m idx = 1_${ik}$ idxc = idx + n coltyp = idxc + n idxp = coltyp + n ! scale. orgnrm = max( abs( alpha ), abs( beta ) ) d( nl+1 ) = zero do i = 1, n if( abs( d( i ) )>orgnrm ) then orgnrm = abs( d( i ) ) end if end do call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, orgnrm, one, n, 1_${ik}$, d, n, info ) alpha = alpha / orgnrm beta = beta / orgnrm ! deflate singular values. call stdlib${ii}$_slasd2( nl, nr, sqre, k, d, work( iz ), alpha, beta, u, ldu,vt, ldvt, work(& isigma ), work( iu2 ), ldu2,work( ivt2 ), ldvt2, iwork( idxp ), iwork( idx ),iwork( & idxc ), idxq, iwork( coltyp ), info ) ! solve secular equation and update singular vectors. ldq = k call stdlib${ii}$_slasd3( nl, nr, sqre, k, d, work( iq ), ldq, work( isigma ),u, ldu, work( & iu2 ), ldu2, vt, ldvt, work( ivt2 ),ldvt2, iwork( idxc ), iwork( coltyp ), work( iz ),& info ) ! report the possible convergence failure. if( info/=0_${ik}$ ) then return end if ! unscale. call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, one, orgnrm, n, 1_${ik}$, d, n, info ) ! prepare the idxq sorting permutation. n1 = k n2 = n - k call stdlib${ii}$_slamrg( n1, n2, d, 1_${ik}$, -1_${ik}$, idxq ) return end subroutine stdlib${ii}$_slasd1 pure module subroutine stdlib${ii}$_dlasd1( nl, nr, sqre, d, alpha, beta, u, ldu, vt, ldvt,idxq, iwork, & !! DLASD1 computes the SVD of an upper bidiagonal N-by-M matrix B, !! where N = NL + NR + 1 and M = N + SQRE. DLASD1 is called from DLASD0. !! A related subroutine DLASD7 handles the case in which the singular !! values (and the singular vectors in factored form) are desired. !! DLASD1 computes the SVD as follows: !! ( D1(in) 0 0 0 ) !! B = U(in) * ( Z1**T a Z2**T b ) * VT(in) !! ( 0 0 D2(in) 0 ) !! = U(out) * ( D(out) 0) * VT(out) !! where Z**T = (Z1**T a Z2**T b) = u**T VT**T, and u is a vector of dimension M !! with ALPHA and BETA in the NL+1 and NL+2 th entries and zeros !! elsewhere; and the entry b is empty if SQRE = 0. !! The left singular vectors of the original matrix are stored in U, and !! the transpose of the right singular vectors are stored in VT, and the !! singular values are in D. The algorithm consists of three stages: !! The first stage consists of deflating the size of the problem !! when there are multiple singular values or when there are zeros in !! the Z vector. For each such occurrence the dimension of the !! secular equation problem is reduced by one. This stage is !! performed by the routine DLASD2. !! The second stage consists of calculating the updated !! singular values. This is done by finding the square roots of the !! roots of the secular equation via the routine DLASD4 (as called !! by DLASD3). This routine also calculates the singular vectors of !! the current problem. !! The final stage consists of computing the updated singular vectors !! directly using the updated singular values. The singular vectors !! for the current problem are multiplied with the singular vectors !! from the overall problem. work, info ) ! -- lapack auxiliary routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments integer(${ik}$), intent(out) :: info integer(${ik}$), intent(in) :: ldu, ldvt, nl, nr, sqre real(dp), intent(inout) :: alpha, beta ! Array Arguments integer(${ik}$), intent(inout) :: idxq(*) integer(${ik}$), intent(out) :: iwork(*) real(dp), intent(inout) :: d(*), u(ldu,*), vt(ldvt,*) real(dp), intent(out) :: work(*) ! ===================================================================== ! Local Scalars integer(${ik}$) :: coltyp, i, idx, idxc, idxp, iq, isigma, iu2, ivt2, iz, k, ldq, ldu2, & ldvt2, m, n, n1, n2 real(dp) :: orgnrm ! Intrinsic Functions ! Executable Statements ! test the input parameters. info = 0_${ik}$ if( nl<1_${ik}$ ) then info = -1_${ik}$ else if( nr<1_${ik}$ ) then info = -2_${ik}$ else if( ( sqre<0_${ik}$ ) .or. ( sqre>1_${ik}$ ) ) then info = -3_${ik}$ end if if( info/=0_${ik}$ ) then call stdlib${ii}$_xerbla( 'DLASD1', -info ) return end if n = nl + nr + 1_${ik}$ m = n + sqre ! the following values are for bookkeeping purposes only. they are ! integer pointers which indicate the portion of the workspace ! used by a particular array in stdlib${ii}$_dlasd2 and stdlib${ii}$_dlasd3. ldu2 = n ldvt2 = m iz = 1_${ik}$ isigma = iz + m iu2 = isigma + n ivt2 = iu2 + ldu2*n iq = ivt2 + ldvt2*m idx = 1_${ik}$ idxc = idx + n coltyp = idxc + n idxp = coltyp + n ! scale. orgnrm = max( abs( alpha ), abs( beta ) ) d( nl+1 ) = zero do i = 1, n if( abs( d( i ) )>orgnrm ) then orgnrm = abs( d( i ) ) end if end do call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, orgnrm, one, n, 1_${ik}$, d, n, info ) alpha = alpha / orgnrm beta = beta / orgnrm ! deflate singular values. call stdlib${ii}$_dlasd2( nl, nr, sqre, k, d, work( iz ), alpha, beta, u, ldu,vt, ldvt, work(& isigma ), work( iu2 ), ldu2,work( ivt2 ), ldvt2, iwork( idxp ), iwork( idx ),iwork( & idxc ), idxq, iwork( coltyp ), info ) ! solve secular equation and update singular vectors. ldq = k call stdlib${ii}$_dlasd3( nl, nr, sqre, k, d, work( iq ), ldq, work( isigma ),u, ldu, work( & iu2 ), ldu2, vt, ldvt, work( ivt2 ),ldvt2, iwork( idxc ), iwork( coltyp ), work( iz ),& info ) ! report the convergence failure. if( info/=0_${ik}$ ) then return end if ! unscale. call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, one, orgnrm, n, 1_${ik}$, d, n, info ) ! prepare the idxq sorting permutation. n1 = k n2 = n - k call stdlib${ii}$_dlamrg( n1, n2, d, 1_${ik}$, -1_${ik}$, idxq ) return end subroutine stdlib${ii}$_dlasd1 #:for rk,rt,ri in REAL_KINDS_TYPES #:if not rk in ["sp","dp"] pure module subroutine stdlib${ii}$_${ri}$lasd1( nl, nr, sqre, d, alpha, beta, u, ldu, vt, ldvt,idxq, iwork, & !! DLASD1: computes the SVD of an upper bidiagonal N-by-M matrix B, !! where N = NL + NR + 1 and M = N + SQRE. DLASD1 is called from DLASD0. !! A related subroutine DLASD7 handles the case in which the singular !! values (and the singular vectors in factored form) are desired. !! DLASD1 computes the SVD as follows: !! ( D1(in) 0 0 0 ) !! B = U(in) * ( Z1**T a Z2**T b ) * VT(in) !! ( 0 0 D2(in) 0 ) !! = U(out) * ( D(out) 0) * VT(out) !! where Z**T = (Z1**T a Z2**T b) = u**T VT**T, and u is a vector of dimension M !! with ALPHA and BETA in the NL+1 and NL+2 th entries and zeros !! elsewhere; and the entry b is empty if SQRE = 0. !! The left singular vectors of the original matrix are stored in U, and !! the transpose of the right singular vectors are stored in VT, and the !! singular values are in D. The algorithm consists of three stages: !! The first stage consists of deflating the size of the problem !! when there are multiple singular values or when there are zeros in !! the Z vector. For each such occurrence the dimension of the !! secular equation problem is reduced by one. This stage is !! performed by the routine DLASD2. !! The second stage consists of calculating the updated !! singular values. This is done by finding the square roots of the !! roots of the secular equation via the routine DLASD4 (as called !! by DLASD3). This routine also calculates the singular vectors of !! the current problem. !! The final stage consists of computing the updated singular vectors !! directly using the updated singular values. The singular vectors !! for the current problem are multiplied with the singular vectors !! from the overall problem. work, info ) ! -- lapack auxiliary routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments integer(${ik}$), intent(out) :: info integer(${ik}$), intent(in) :: ldu, ldvt, nl, nr, sqre real(${rk}$), intent(inout) :: alpha, beta ! Array Arguments integer(${ik}$), intent(inout) :: idxq(*) integer(${ik}$), intent(out) :: iwork(*) real(${rk}$), intent(inout) :: d(*), u(ldu,*), vt(ldvt,*) real(${rk}$), intent(out) :: work(*) ! ===================================================================== ! Local Scalars integer(${ik}$) :: coltyp, i, idx, idxc, idxp, iq, isigma, iu2, ivt2, iz, k, ldq, ldu2, & ldvt2, m, n, n1, n2 real(${rk}$) :: orgnrm ! Intrinsic Functions ! Executable Statements ! test the input parameters. info = 0_${ik}$ if( nl<1_${ik}$ ) then info = -1_${ik}$ else if( nr<1_${ik}$ ) then info = -2_${ik}$ else if( ( sqre<0_${ik}$ ) .or. ( sqre>1_${ik}$ ) ) then info = -3_${ik}$ end if if( info/=0_${ik}$ ) then call stdlib${ii}$_xerbla( 'DLASD1', -info ) return end if n = nl + nr + 1_${ik}$ m = n + sqre ! the following values are for bookkeeping purposes only. they are ! integer pointers which indicate the portion of the workspace ! used by a particular array in stdlib${ii}$_${ri}$lasd2 and stdlib${ii}$_${ri}$lasd3. ldu2 = n ldvt2 = m iz = 1_${ik}$ isigma = iz + m iu2 = isigma + n ivt2 = iu2 + ldu2*n iq = ivt2 + ldvt2*m idx = 1_${ik}$ idxc = idx + n coltyp = idxc + n idxp = coltyp + n ! scale. orgnrm = max( abs( alpha ), abs( beta ) ) d( nl+1 ) = zero do i = 1, n if( abs( d( i ) )>orgnrm ) then orgnrm = abs( d( i ) ) end if end do call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, orgnrm, one, n, 1_${ik}$, d, n, info ) alpha = alpha / orgnrm beta = beta / orgnrm ! deflate singular values. call stdlib${ii}$_${ri}$lasd2( nl, nr, sqre, k, d, work( iz ), alpha, beta, u, ldu,vt, ldvt, work(& isigma ), work( iu2 ), ldu2,work( ivt2 ), ldvt2, iwork( idxp ), iwork( idx ),iwork( & idxc ), idxq, iwork( coltyp ), info ) ! solve secular equation and update singular vectors. ldq = k call stdlib${ii}$_${ri}$lasd3( nl, nr, sqre, k, d, work( iq ), ldq, work( isigma ),u, ldu, work( & iu2 ), ldu2, vt, ldvt, work( ivt2 ),ldvt2, iwork( idxc ), iwork( coltyp ), work( iz ),& info ) ! report the convergence failure. if( info/=0_${ik}$ ) then return end if ! unscale. call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, one, orgnrm, n, 1_${ik}$, d, n, info ) ! prepare the idxq sorting permutation. n1 = k n2 = n - k call stdlib${ii}$_${ri}$lamrg( n1, n2, d, 1_${ik}$, -1_${ik}$, idxq ) return end subroutine stdlib${ii}$_${ri}$lasd1 #:endif #:endfor pure module subroutine stdlib${ii}$_slasd2( nl, nr, sqre, k, d, z, alpha, beta, u, ldu, vt,ldvt, dsigma, & !! SLASD2 merges the two sets of singular values together into a single !! sorted set. Then it tries to deflate the size of the problem. !! There are two ways in which deflation can occur: when two or more !! singular values are close together or if there is a tiny entry in the !! Z vector. For each such occurrence the order of the related secular !! equation problem is reduced by one. !! SLASD2 is called from SLASD1. u2, ldu2, vt2, ldvt2, idxp, idx,idxc, idxq, coltyp, info ) ! -- lapack auxiliary routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments integer(${ik}$), intent(out) :: info, k integer(${ik}$), intent(in) :: ldu, ldu2, ldvt, ldvt2, nl, nr, sqre real(sp), intent(in) :: alpha, beta ! Array Arguments integer(${ik}$), intent(out) :: coltyp(*), idx(*), idxc(*), idxp(*) integer(${ik}$), intent(inout) :: idxq(*) real(sp), intent(inout) :: d(*), u(ldu,*), vt(ldvt,*) real(sp), intent(out) :: dsigma(*), u2(ldu2,*), vt2(ldvt2,*), z(*) ! ===================================================================== ! Local Arrays integer(${ik}$) :: ctot(4_${ik}$), psm(4_${ik}$) ! Local Scalars integer(${ik}$) :: ct, i, idxi, idxj, idxjp, j, jp, jprev, k2, m, n, nlp1, nlp2 real(sp) :: c, eps, hlftol, s, tau, tol, z1 ! Intrinsic Functions ! Executable Statements ! test the input parameters. info = 0_${ik}$ if( nl<1_${ik}$ ) then info = -1_${ik}$ else if( nr<1_${ik}$ ) then info = -2_${ik}$ else if( ( sqre/=1_${ik}$ ) .and. ( sqre/=0_${ik}$ ) ) then info = -3_${ik}$ end if n = nl + nr + 1_${ik}$ m = n + sqre if( ldu<n ) then info = -10_${ik}$ else if( ldvt<m ) then info = -12_${ik}$ else if( ldu2<n ) then info = -15_${ik}$ else if( ldvt2<m ) then info = -17_${ik}$ end if if( info/=0_${ik}$ ) then call stdlib${ii}$_xerbla( 'SLASD2', -info ) return end if nlp1 = nl + 1_${ik}$ nlp2 = nl + 2_${ik}$ ! generate the first part of the vector z; and move the singular ! values in the first part of d one position backward. z1 = alpha*vt( nlp1, nlp1 ) z( 1_${ik}$ ) = z1 do i = nl, 1, -1 z( i+1 ) = alpha*vt( i, nlp1 ) d( i+1 ) = d( i ) idxq( i+1 ) = idxq( i ) + 1_${ik}$ end do ! generate the second part of the vector z. do i = nlp2, m z( i ) = beta*vt( i, nlp2 ) end do ! initialize some reference arrays. do i = 2, nlp1 coltyp( i ) = 1_${ik}$ end do do i = nlp2, n coltyp( i ) = 2_${ik}$ end do ! sort the singular values into increasing order do i = nlp2, n idxq( i ) = idxq( i ) + nlp1 end do ! dsigma, idxc, idxc, and the first column of u2 ! are used as storage space. do i = 2, n dsigma( i ) = d( idxq( i ) ) u2( i, 1_${ik}$ ) = z( idxq( i ) ) idxc( i ) = coltyp( idxq( i ) ) end do call stdlib${ii}$_slamrg( nl, nr, dsigma( 2_${ik}$ ), 1_${ik}$, 1_${ik}$, idx( 2_${ik}$ ) ) do i = 2, n idxi = 1_${ik}$ + idx( i ) d( i ) = dsigma( idxi ) z( i ) = u2( idxi, 1_${ik}$ ) coltyp( i ) = idxc( idxi ) end do ! calculate the allowable deflation tolerance eps = stdlib${ii}$_slamch( 'EPSILON' ) tol = max( abs( alpha ), abs( beta ) ) tol = eight*eps*max( abs( d( n ) ), tol ) ! there are 2 kinds of deflation -- first a value in the z-vector ! is small, second two (or more) singular values are very close ! together (their difference is small). ! if the value in the z-vector is small, we simply permute the ! array so that the corresponding singular value is moved to the ! end. ! if two values in the d-vector are close, we perform a two-sided ! rotation designed to make one of the corresponding z-vector ! entries zero, and then permute the array so that the deflated ! singular value is moved to the end. ! if there are multiple singular values then the problem deflates. ! here the number of equal singular values are found. as each equal ! singular value is found, an elementary reflector is computed to ! rotate the corresponding singular subspace so that the ! corresponding components of z are zero in this new basis. k = 1_${ik}$ k2 = n + 1_${ik}$ do j = 2, n if( abs( z( j ) )<=tol ) then ! deflate due to small z component. k2 = k2 - 1_${ik}$ idxp( k2 ) = j coltyp( j ) = 4_${ik}$ if( j==n )go to 120 else jprev = j go to 90 end if end do 90 continue j = jprev 100 continue j = j + 1_${ik}$ if( j>n )go to 110 if( abs( z( j ) )<=tol ) then ! deflate due to small z component. k2 = k2 - 1_${ik}$ idxp( k2 ) = j coltyp( j ) = 4_${ik}$ else ! check if singular values are close enough to allow deflation. if( abs( d( j )-d( jprev ) )<=tol ) then ! deflation is possible. s = z( jprev ) c = z( j ) ! find sqrt(a**2+b**2) without overflow or ! destructive underflow. tau = stdlib${ii}$_slapy2( c, s ) c = c / tau s = -s / tau z( j ) = tau z( jprev ) = zero ! apply back the givens rotation to the left and right ! singular vector matrices. idxjp = idxq( idx( jprev )+1_${ik}$ ) idxj = idxq( idx( j )+1_${ik}$ ) if( idxjp<=nlp1 ) then idxjp = idxjp - 1_${ik}$ end if if( idxj<=nlp1 ) then idxj = idxj - 1_${ik}$ end if call stdlib${ii}$_srot( n, u( 1_${ik}$, idxjp ), 1_${ik}$, u( 1_${ik}$, idxj ), 1_${ik}$, c, s ) call stdlib${ii}$_srot( m, vt( idxjp, 1_${ik}$ ), ldvt, vt( idxj, 1_${ik}$ ), ldvt, c,s ) if( coltyp( j )/=coltyp( jprev ) ) then coltyp( j ) = 3_${ik}$ end if coltyp( jprev ) = 4_${ik}$ k2 = k2 - 1_${ik}$ idxp( k2 ) = jprev jprev = j else k = k + 1_${ik}$ u2( k, 1_${ik}$ ) = z( jprev ) dsigma( k ) = d( jprev ) idxp( k ) = jprev jprev = j end if end if go to 100 110 continue ! record the last singular value. k = k + 1_${ik}$ u2( k, 1_${ik}$ ) = z( jprev ) dsigma( k ) = d( jprev ) idxp( k ) = jprev 120 continue ! count up the total number of the various types of columns, then ! form a permutation which positions the four column types into ! four groups of uniform structure (although one or more of these ! groups may be empty). do j = 1, 4 ctot( j ) = 0_${ik}$ end do do j = 2, n ct = coltyp( j ) ctot( ct ) = ctot( ct ) + 1_${ik}$ end do ! psm(*) = position in submatrix (of types 1 through 4) psm( 1_${ik}$ ) = 2_${ik}$ psm( 2_${ik}$ ) = 2_${ik}$ + ctot( 1_${ik}$ ) psm( 3_${ik}$ ) = psm( 2_${ik}$ ) + ctot( 2_${ik}$ ) psm( 4_${ik}$ ) = psm( 3_${ik}$ ) + ctot( 3_${ik}$ ) ! fill out the idxc array so that the permutation which it induces ! will place all type-1 columns first, all type-2 columns next, ! then all type-3's, and finally all type-4's, starting from the ! second column. this applies similarly to the rows of vt. do j = 2, n jp = idxp( j ) ct = coltyp( jp ) idxc( psm( ct ) ) = j psm( ct ) = psm( ct ) + 1_${ik}$ end do ! sort the singular values and corresponding singular vectors into ! dsigma, u2, and vt2 respectively. the singular values/vectors ! which were not deflated go into the first k slots of dsigma, u2, ! and vt2 respectively, while those which were deflated go into the ! last n - k slots, except that the first column/row will be treated ! separately. do j = 2, n jp = idxp( j ) dsigma( j ) = d( jp ) idxj = idxq( idx( idxp( idxc( j ) ) )+1_${ik}$ ) if( idxj<=nlp1 ) then idxj = idxj - 1_${ik}$ end if call stdlib${ii}$_scopy( n, u( 1_${ik}$, idxj ), 1_${ik}$, u2( 1_${ik}$, j ), 1_${ik}$ ) call stdlib${ii}$_scopy( m, vt( idxj, 1_${ik}$ ), ldvt, vt2( j, 1_${ik}$ ), ldvt2 ) end do ! determine dsigma(1), dsigma(2) and z(1) dsigma( 1_${ik}$ ) = zero hlftol = tol / two if( abs( dsigma( 2_${ik}$ ) )<=hlftol )dsigma( 2_${ik}$ ) = hlftol if( m>n ) then z( 1_${ik}$ ) = stdlib${ii}$_slapy2( z1, z( m ) ) if( z( 1_${ik}$ )<=tol ) then c = one s = zero z( 1_${ik}$ ) = tol else c = z1 / z( 1_${ik}$ ) s = z( m ) / z( 1_${ik}$ ) end if else if( abs( z1 )<=tol ) then z( 1_${ik}$ ) = tol else z( 1_${ik}$ ) = z1 end if end if ! move the rest of the updating row to z. call stdlib${ii}$_scopy( k-1, u2( 2_${ik}$, 1_${ik}$ ), 1_${ik}$, z( 2_${ik}$ ), 1_${ik}$ ) ! determine the first column of u2, the first row of vt2 and the ! last row of vt. call stdlib${ii}$_slaset( 'A', n, 1_${ik}$, zero, zero, u2, ldu2 ) u2( nlp1, 1_${ik}$ ) = one if( m>n ) then do i = 1, nlp1 vt( m, i ) = -s*vt( nlp1, i ) vt2( 1_${ik}$, i ) = c*vt( nlp1, i ) end do do i = nlp2, m vt2( 1_${ik}$, i ) = s*vt( m, i ) vt( m, i ) = c*vt( m, i ) end do else call stdlib${ii}$_scopy( m, vt( nlp1, 1_${ik}$ ), ldvt, vt2( 1_${ik}$, 1_${ik}$ ), ldvt2 ) end if if( m>n ) then call stdlib${ii}$_scopy( m, vt( m, 1_${ik}$ ), ldvt, vt2( m, 1_${ik}$ ), ldvt2 ) end if ! the deflated singular values and their corresponding vectors go ! into the back of d, u, and v respectively. if( n>k ) then call stdlib${ii}$_scopy( n-k, dsigma( k+1 ), 1_${ik}$, d( k+1 ), 1_${ik}$ ) call stdlib${ii}$_slacpy( 'A', n, n-k, u2( 1_${ik}$, k+1 ), ldu2, u( 1_${ik}$, k+1 ),ldu ) call stdlib${ii}$_slacpy( 'A', n-k, m, vt2( k+1, 1_${ik}$ ), ldvt2, vt( k+1, 1_${ik}$ ),ldvt ) end if ! copy ctot into coltyp for referencing in stdlib${ii}$_slasd3. do j = 1, 4 coltyp( j ) = ctot( j ) end do return end subroutine stdlib${ii}$_slasd2 pure module subroutine stdlib${ii}$_dlasd2( nl, nr, sqre, k, d, z, alpha, beta, u, ldu, vt,ldvt, dsigma, & !! DLASD2 merges the two sets of singular values together into a single !! sorted set. Then it tries to deflate the size of the problem. !! There are two ways in which deflation can occur: when two or more !! singular values are close together or if there is a tiny entry in the !! Z vector. For each such occurrence the order of the related secular !! equation problem is reduced by one. !! DLASD2 is called from DLASD1. u2, ldu2, vt2, ldvt2, idxp, idx,idxc, idxq, coltyp, info ) ! -- lapack auxiliary routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments integer(${ik}$), intent(out) :: info, k integer(${ik}$), intent(in) :: ldu, ldu2, ldvt, ldvt2, nl, nr, sqre real(dp), intent(in) :: alpha, beta ! Array Arguments integer(${ik}$), intent(out) :: coltyp(*), idx(*), idxc(*), idxp(*) integer(${ik}$), intent(inout) :: idxq(*) real(dp), intent(inout) :: d(*), u(ldu,*), vt(ldvt,*) real(dp), intent(out) :: dsigma(*), u2(ldu2,*), vt2(ldvt2,*), z(*) ! ===================================================================== ! Local Arrays integer(${ik}$) :: ctot(4_${ik}$), psm(4_${ik}$) ! Local Scalars integer(${ik}$) :: ct, i, idxi, idxj, idxjp, j, jp, jprev, k2, m, n, nlp1, nlp2 real(dp) :: c, eps, hlftol, s, tau, tol, z1 ! Intrinsic Functions ! Executable Statements ! test the input parameters. info = 0_${ik}$ if( nl<1_${ik}$ ) then info = -1_${ik}$ else if( nr<1_${ik}$ ) then info = -2_${ik}$ else if( ( sqre/=1_${ik}$ ) .and. ( sqre/=0_${ik}$ ) ) then info = -3_${ik}$ end if n = nl + nr + 1_${ik}$ m = n + sqre if( ldu<n ) then info = -10_${ik}$ else if( ldvt<m ) then info = -12_${ik}$ else if( ldu2<n ) then info = -15_${ik}$ else if( ldvt2<m ) then info = -17_${ik}$ end if if( info/=0_${ik}$ ) then call stdlib${ii}$_xerbla( 'DLASD2', -info ) return end if nlp1 = nl + 1_${ik}$ nlp2 = nl + 2_${ik}$ ! generate the first part of the vector z; and move the singular ! values in the first part of d one position backward. z1 = alpha*vt( nlp1, nlp1 ) z( 1_${ik}$ ) = z1 do i = nl, 1, -1 z( i+1 ) = alpha*vt( i, nlp1 ) d( i+1 ) = d( i ) idxq( i+1 ) = idxq( i ) + 1_${ik}$ end do ! generate the second part of the vector z. do i = nlp2, m z( i ) = beta*vt( i, nlp2 ) end do ! initialize some reference arrays. do i = 2, nlp1 coltyp( i ) = 1_${ik}$ end do do i = nlp2, n coltyp( i ) = 2_${ik}$ end do ! sort the singular values into increasing order do i = nlp2, n idxq( i ) = idxq( i ) + nlp1 end do ! dsigma, idxc, idxc, and the first column of u2 ! are used as storage space. do i = 2, n dsigma( i ) = d( idxq( i ) ) u2( i, 1_${ik}$ ) = z( idxq( i ) ) idxc( i ) = coltyp( idxq( i ) ) end do call stdlib${ii}$_dlamrg( nl, nr, dsigma( 2_${ik}$ ), 1_${ik}$, 1_${ik}$, idx( 2_${ik}$ ) ) do i = 2, n idxi = 1_${ik}$ + idx( i ) d( i ) = dsigma( idxi ) z( i ) = u2( idxi, 1_${ik}$ ) coltyp( i ) = idxc( idxi ) end do ! calculate the allowable deflation tolerance eps = stdlib${ii}$_dlamch( 'EPSILON' ) tol = max( abs( alpha ), abs( beta ) ) tol = eight*eps*max( abs( d( n ) ), tol ) ! there are 2 kinds of deflation -- first a value in the z-vector ! is small, second two (or more) singular values are very close ! together (their difference is small). ! if the value in the z-vector is small, we simply permute the ! array so that the corresponding singular value is moved to the ! end. ! if two values in the d-vector are close, we perform a two-sided ! rotation designed to make one of the corresponding z-vector ! entries zero, and then permute the array so that the deflated ! singular value is moved to the end. ! if there are multiple singular values then the problem deflates. ! here the number of equal singular values are found. as each equal ! singular value is found, an elementary reflector is computed to ! rotate the corresponding singular subspace so that the ! corresponding components of z are zero in this new basis. k = 1_${ik}$ k2 = n + 1_${ik}$ do j = 2, n if( abs( z( j ) )<=tol ) then ! deflate due to small z component. k2 = k2 - 1_${ik}$ idxp( k2 ) = j coltyp( j ) = 4_${ik}$ if( j==n )go to 120 else jprev = j go to 90 end if end do 90 continue j = jprev 100 continue j = j + 1_${ik}$ if( j>n )go to 110 if( abs( z( j ) )<=tol ) then ! deflate due to small z component. k2 = k2 - 1_${ik}$ idxp( k2 ) = j coltyp( j ) = 4_${ik}$ else ! check if singular values are close enough to allow deflation. if( abs( d( j )-d( jprev ) )<=tol ) then ! deflation is possible. s = z( jprev ) c = z( j ) ! find sqrt(a**2+b**2) without overflow or ! destructive underflow. tau = stdlib${ii}$_dlapy2( c, s ) c = c / tau s = -s / tau z( j ) = tau z( jprev ) = zero ! apply back the givens rotation to the left and right ! singular vector matrices. idxjp = idxq( idx( jprev )+1_${ik}$ ) idxj = idxq( idx( j )+1_${ik}$ ) if( idxjp<=nlp1 ) then idxjp = idxjp - 1_${ik}$ end if if( idxj<=nlp1 ) then idxj = idxj - 1_${ik}$ end if call stdlib${ii}$_drot( n, u( 1_${ik}$, idxjp ), 1_${ik}$, u( 1_${ik}$, idxj ), 1_${ik}$, c, s ) call stdlib${ii}$_drot( m, vt( idxjp, 1_${ik}$ ), ldvt, vt( idxj, 1_${ik}$ ), ldvt, c,s ) if( coltyp( j )/=coltyp( jprev ) ) then coltyp( j ) = 3_${ik}$ end if coltyp( jprev ) = 4_${ik}$ k2 = k2 - 1_${ik}$ idxp( k2 ) = jprev jprev = j else k = k + 1_${ik}$ u2( k, 1_${ik}$ ) = z( jprev ) dsigma( k ) = d( jprev ) idxp( k ) = jprev jprev = j end if end if go to 100 110 continue ! record the last singular value. k = k + 1_${ik}$ u2( k, 1_${ik}$ ) = z( jprev ) dsigma( k ) = d( jprev ) idxp( k ) = jprev 120 continue ! count up the total number of the various types of columns, then ! form a permutation which positions the four column types into ! four groups of uniform structure (although one or more of these ! groups may be empty). do j = 1, 4 ctot( j ) = 0_${ik}$ end do do j = 2, n ct = coltyp( j ) ctot( ct ) = ctot( ct ) + 1_${ik}$ end do ! psm(*) = position in submatrix (of types 1 through 4) psm( 1_${ik}$ ) = 2_${ik}$ psm( 2_${ik}$ ) = 2_${ik}$ + ctot( 1_${ik}$ ) psm( 3_${ik}$ ) = psm( 2_${ik}$ ) + ctot( 2_${ik}$ ) psm( 4_${ik}$ ) = psm( 3_${ik}$ ) + ctot( 3_${ik}$ ) ! fill out the idxc array so that the permutation which it induces ! will place all type-1 columns first, all type-2 columns next, ! then all type-3's, and finally all type-4's, starting from the ! second column. this applies similarly to the rows of vt. do j = 2, n jp = idxp( j ) ct = coltyp( jp ) idxc( psm( ct ) ) = j psm( ct ) = psm( ct ) + 1_${ik}$ end do ! sort the singular values and corresponding singular vectors into ! dsigma, u2, and vt2 respectively. the singular values/vectors ! which were not deflated go into the first k slots of dsigma, u2, ! and vt2 respectively, while those which were deflated go into the ! last n - k slots, except that the first column/row will be treated ! separately. do j = 2, n jp = idxp( j ) dsigma( j ) = d( jp ) idxj = idxq( idx( idxp( idxc( j ) ) )+1_${ik}$ ) if( idxj<=nlp1 ) then idxj = idxj - 1_${ik}$ end if call stdlib${ii}$_dcopy( n, u( 1_${ik}$, idxj ), 1_${ik}$, u2( 1_${ik}$, j ), 1_${ik}$ ) call stdlib${ii}$_dcopy( m, vt( idxj, 1_${ik}$ ), ldvt, vt2( j, 1_${ik}$ ), ldvt2 ) end do ! determine dsigma(1), dsigma(2) and z(1) dsigma( 1_${ik}$ ) = zero hlftol = tol / two if( abs( dsigma( 2_${ik}$ ) )<=hlftol )dsigma( 2_${ik}$ ) = hlftol if( m>n ) then z( 1_${ik}$ ) = stdlib${ii}$_dlapy2( z1, z( m ) ) if( z( 1_${ik}$ )<=tol ) then c = one s = zero z( 1_${ik}$ ) = tol else c = z1 / z( 1_${ik}$ ) s = z( m ) / z( 1_${ik}$ ) end if else if( abs( z1 )<=tol ) then z( 1_${ik}$ ) = tol else z( 1_${ik}$ ) = z1 end if end if ! move the rest of the updating row to z. call stdlib${ii}$_dcopy( k-1, u2( 2_${ik}$, 1_${ik}$ ), 1_${ik}$, z( 2_${ik}$ ), 1_${ik}$ ) ! determine the first column of u2, the first row of vt2 and the ! last row of vt. call stdlib${ii}$_dlaset( 'A', n, 1_${ik}$, zero, zero, u2, ldu2 ) u2( nlp1, 1_${ik}$ ) = one if( m>n ) then do i = 1, nlp1 vt( m, i ) = -s*vt( nlp1, i ) vt2( 1_${ik}$, i ) = c*vt( nlp1, i ) end do do i = nlp2, m vt2( 1_${ik}$, i ) = s*vt( m, i ) vt( m, i ) = c*vt( m, i ) end do else call stdlib${ii}$_dcopy( m, vt( nlp1, 1_${ik}$ ), ldvt, vt2( 1_${ik}$, 1_${ik}$ ), ldvt2 ) end if if( m>n ) then call stdlib${ii}$_dcopy( m, vt( m, 1_${ik}$ ), ldvt, vt2( m, 1_${ik}$ ), ldvt2 ) end if ! the deflated singular values and their corresponding vectors go ! into the back of d, u, and v respectively. if( n>k ) then call stdlib${ii}$_dcopy( n-k, dsigma( k+1 ), 1_${ik}$, d( k+1 ), 1_${ik}$ ) call stdlib${ii}$_dlacpy( 'A', n, n-k, u2( 1_${ik}$, k+1 ), ldu2, u( 1_${ik}$, k+1 ),ldu ) call stdlib${ii}$_dlacpy( 'A', n-k, m, vt2( k+1, 1_${ik}$ ), ldvt2, vt( k+1, 1_${ik}$ ),ldvt ) end if ! copy ctot into coltyp for referencing in stdlib${ii}$_dlasd3. do j = 1, 4 coltyp( j ) = ctot( j ) end do return end subroutine stdlib${ii}$_dlasd2 #:for rk,rt,ri in REAL_KINDS_TYPES #:if not rk in ["sp","dp"] pure module subroutine stdlib${ii}$_${ri}$lasd2( nl, nr, sqre, k, d, z, alpha, beta, u, ldu, vt,ldvt, dsigma, & !! DLASD2: merges the two sets of singular values together into a single !! sorted set. Then it tries to deflate the size of the problem. !! There are two ways in which deflation can occur: when two or more !! singular values are close together or if there is a tiny entry in the !! Z vector. For each such occurrence the order of the related secular !! equation problem is reduced by one. !! DLASD2 is called from DLASD1. u2, ldu2, vt2, ldvt2, idxp, idx,idxc, idxq, coltyp, info ) ! -- lapack auxiliary routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments integer(${ik}$), intent(out) :: info, k integer(${ik}$), intent(in) :: ldu, ldu2, ldvt, ldvt2, nl, nr, sqre real(${rk}$), intent(in) :: alpha, beta ! Array Arguments integer(${ik}$), intent(out) :: coltyp(*), idx(*), idxc(*), idxp(*) integer(${ik}$), intent(inout) :: idxq(*) real(${rk}$), intent(inout) :: d(*), u(ldu,*), vt(ldvt,*) real(${rk}$), intent(out) :: dsigma(*), u2(ldu2,*), vt2(ldvt2,*), z(*) ! ===================================================================== ! Local Arrays integer(${ik}$) :: ctot(4_${ik}$), psm(4_${ik}$) ! Local Scalars integer(${ik}$) :: ct, i, idxi, idxj, idxjp, j, jp, jprev, k2, m, n, nlp1, nlp2 real(${rk}$) :: c, eps, hlftol, s, tau, tol, z1 ! Intrinsic Functions ! Executable Statements ! test the input parameters. info = 0_${ik}$ if( nl<1_${ik}$ ) then info = -1_${ik}$ else if( nr<1_${ik}$ ) then info = -2_${ik}$ else if( ( sqre/=1_${ik}$ ) .and. ( sqre/=0_${ik}$ ) ) then info = -3_${ik}$ end if n = nl + nr + 1_${ik}$ m = n + sqre if( ldu<n ) then info = -10_${ik}$ else if( ldvt<m ) then info = -12_${ik}$ else if( ldu2<n ) then info = -15_${ik}$ else if( ldvt2<m ) then info = -17_${ik}$ end if if( info/=0_${ik}$ ) then call stdlib${ii}$_xerbla( 'DLASD2', -info ) return end if nlp1 = nl + 1_${ik}$ nlp2 = nl + 2_${ik}$ ! generate the first part of the vector z; and move the singular ! values in the first part of d one position backward. z1 = alpha*vt( nlp1, nlp1 ) z( 1_${ik}$ ) = z1 do i = nl, 1, -1 z( i+1 ) = alpha*vt( i, nlp1 ) d( i+1 ) = d( i ) idxq( i+1 ) = idxq( i ) + 1_${ik}$ end do ! generate the second part of the vector z. do i = nlp2, m z( i ) = beta*vt( i, nlp2 ) end do ! initialize some reference arrays. do i = 2, nlp1 coltyp( i ) = 1_${ik}$ end do do i = nlp2, n coltyp( i ) = 2_${ik}$ end do ! sort the singular values into increasing order do i = nlp2, n idxq( i ) = idxq( i ) + nlp1 end do ! dsigma, idxc, idxc, and the first column of u2 ! are used as storage space. do i = 2, n dsigma( i ) = d( idxq( i ) ) u2( i, 1_${ik}$ ) = z( idxq( i ) ) idxc( i ) = coltyp( idxq( i ) ) end do call stdlib${ii}$_${ri}$lamrg( nl, nr, dsigma( 2_${ik}$ ), 1_${ik}$, 1_${ik}$, idx( 2_${ik}$ ) ) do i = 2, n idxi = 1_${ik}$ + idx( i ) d( i ) = dsigma( idxi ) z( i ) = u2( idxi, 1_${ik}$ ) coltyp( i ) = idxc( idxi ) end do ! calculate the allowable deflation tolerance eps = stdlib${ii}$_${ri}$lamch( 'EPSILON' ) tol = max( abs( alpha ), abs( beta ) ) tol = eight*eps*max( abs( d( n ) ), tol ) ! there are 2 kinds of deflation -- first a value in the z-vector ! is small, second two (or more) singular values are very close ! together (their difference is small). ! if the value in the z-vector is small, we simply permute the ! array so that the corresponding singular value is moved to the ! end. ! if two values in the d-vector are close, we perform a two-sided ! rotation designed to make one of the corresponding z-vector ! entries zero, and then permute the array so that the deflated ! singular value is moved to the end. ! if there are multiple singular values then the problem deflates. ! here the number of equal singular values are found. as each equal ! singular value is found, an elementary reflector is computed to ! rotate the corresponding singular subspace so that the ! corresponding components of z are zero in this new basis. k = 1_${ik}$ k2 = n + 1_${ik}$ do j = 2, n if( abs( z( j ) )<=tol ) then ! deflate due to small z component. k2 = k2 - 1_${ik}$ idxp( k2 ) = j coltyp( j ) = 4_${ik}$ if( j==n )go to 120 else jprev = j go to 90 end if end do 90 continue j = jprev 100 continue j = j + 1_${ik}$ if( j>n )go to 110 if( abs( z( j ) )<=tol ) then ! deflate due to small z component. k2 = k2 - 1_${ik}$ idxp( k2 ) = j coltyp( j ) = 4_${ik}$ else ! check if singular values are close enough to allow deflation. if( abs( d( j )-d( jprev ) )<=tol ) then ! deflation is possible. s = z( jprev ) c = z( j ) ! find sqrt(a**2+b**2) without overflow or ! destructive underflow. tau = stdlib${ii}$_${ri}$lapy2( c, s ) c = c / tau s = -s / tau z( j ) = tau z( jprev ) = zero ! apply back the givens rotation to the left and right ! singular vector matrices. idxjp = idxq( idx( jprev )+1_${ik}$ ) idxj = idxq( idx( j )+1_${ik}$ ) if( idxjp<=nlp1 ) then idxjp = idxjp - 1_${ik}$ end if if( idxj<=nlp1 ) then idxj = idxj - 1_${ik}$ end if call stdlib${ii}$_${ri}$rot( n, u( 1_${ik}$, idxjp ), 1_${ik}$, u( 1_${ik}$, idxj ), 1_${ik}$, c, s ) call stdlib${ii}$_${ri}$rot( m, vt( idxjp, 1_${ik}$ ), ldvt, vt( idxj, 1_${ik}$ ), ldvt, c,s ) if( coltyp( j )/=coltyp( jprev ) ) then coltyp( j ) = 3_${ik}$ end if coltyp( jprev ) = 4_${ik}$ k2 = k2 - 1_${ik}$ idxp( k2 ) = jprev jprev = j else k = k + 1_${ik}$ u2( k, 1_${ik}$ ) = z( jprev ) dsigma( k ) = d( jprev ) idxp( k ) = jprev jprev = j end if end if go to 100 110 continue ! record the last singular value. k = k + 1_${ik}$ u2( k, 1_${ik}$ ) = z( jprev ) dsigma( k ) = d( jprev ) idxp( k ) = jprev 120 continue ! count up the total number of the various types of columns, then ! form a permutation which positions the four column types into ! four groups of uniform structure (although one or more of these ! groups may be empty). do j = 1, 4 ctot( j ) = 0_${ik}$ end do do j = 2, n ct = coltyp( j ) ctot( ct ) = ctot( ct ) + 1_${ik}$ end do ! psm(*) = position in submatrix (of types 1 through 4) psm( 1_${ik}$ ) = 2_${ik}$ psm( 2_${ik}$ ) = 2_${ik}$ + ctot( 1_${ik}$ ) psm( 3_${ik}$ ) = psm( 2_${ik}$ ) + ctot( 2_${ik}$ ) psm( 4_${ik}$ ) = psm( 3_${ik}$ ) + ctot( 3_${ik}$ ) ! fill out the idxc array so that the permutation which it induces ! will place all type-1 columns first, all type-2 columns next, ! then all type-3's, and finally all type-4's, starting from the ! second column. this applies similarly to the rows of vt. do j = 2, n jp = idxp( j ) ct = coltyp( jp ) idxc( psm( ct ) ) = j psm( ct ) = psm( ct ) + 1_${ik}$ end do ! sort the singular values and corresponding singular vectors into ! dsigma, u2, and vt2 respectively. the singular values/vectors ! which were not deflated go into the first k slots of dsigma, u2, ! and vt2 respectively, while those which were deflated go into the ! last n - k slots, except that the first column/row will be treated ! separately. do j = 2, n jp = idxp( j ) dsigma( j ) = d( jp ) idxj = idxq( idx( idxp( idxc( j ) ) )+1_${ik}$ ) if( idxj<=nlp1 ) then idxj = idxj - 1_${ik}$ end if call stdlib${ii}$_${ri}$copy( n, u( 1_${ik}$, idxj ), 1_${ik}$, u2( 1_${ik}$, j ), 1_${ik}$ ) call stdlib${ii}$_${ri}$copy( m, vt( idxj, 1_${ik}$ ), ldvt, vt2( j, 1_${ik}$ ), ldvt2 ) end do ! determine dsigma(1), dsigma(2) and z(1) dsigma( 1_${ik}$ ) = zero hlftol = tol / two if( abs( dsigma( 2_${ik}$ ) )<=hlftol )dsigma( 2_${ik}$ ) = hlftol if( m>n ) then z( 1_${ik}$ ) = stdlib${ii}$_${ri}$lapy2( z1, z( m ) ) if( z( 1_${ik}$ )<=tol ) then c = one s = zero z( 1_${ik}$ ) = tol else c = z1 / z( 1_${ik}$ ) s = z( m ) / z( 1_${ik}$ ) end if else if( abs( z1 )<=tol ) then z( 1_${ik}$ ) = tol else z( 1_${ik}$ ) = z1 end if end if ! move the rest of the updating row to z. call stdlib${ii}$_${ri}$copy( k-1, u2( 2_${ik}$, 1_${ik}$ ), 1_${ik}$, z( 2_${ik}$ ), 1_${ik}$ ) ! determine the first column of u2, the first row of vt2 and the ! last row of vt. call stdlib${ii}$_${ri}$laset( 'A', n, 1_${ik}$, zero, zero, u2, ldu2 ) u2( nlp1, 1_${ik}$ ) = one if( m>n ) then do i = 1, nlp1 vt( m, i ) = -s*vt( nlp1, i ) vt2( 1_${ik}$, i ) = c*vt( nlp1, i ) end do do i = nlp2, m vt2( 1_${ik}$, i ) = s*vt( m, i ) vt( m, i ) = c*vt( m, i ) end do else call stdlib${ii}$_${ri}$copy( m, vt( nlp1, 1_${ik}$ ), ldvt, vt2( 1_${ik}$, 1_${ik}$ ), ldvt2 ) end if if( m>n ) then call stdlib${ii}$_${ri}$copy( m, vt( m, 1_${ik}$ ), ldvt, vt2( m, 1_${ik}$ ), ldvt2 ) end if ! the deflated singular values and their corresponding vectors go ! into the back of d, u, and v respectively. if( n>k ) then call stdlib${ii}$_${ri}$copy( n-k, dsigma( k+1 ), 1_${ik}$, d( k+1 ), 1_${ik}$ ) call stdlib${ii}$_${ri}$lacpy( 'A', n, n-k, u2( 1_${ik}$, k+1 ), ldu2, u( 1_${ik}$, k+1 ),ldu ) call stdlib${ii}$_${ri}$lacpy( 'A', n-k, m, vt2( k+1, 1_${ik}$ ), ldvt2, vt( k+1, 1_${ik}$ ),ldvt ) end if ! copy ctot into coltyp for referencing in stdlib${ii}$_${ri}$lasd3. do j = 1, 4 coltyp( j ) = ctot( j ) end do return end subroutine stdlib${ii}$_${ri}$lasd2 #:endif #:endfor pure module subroutine stdlib${ii}$_slasd3( nl, nr, sqre, k, d, q, ldq, dsigma, u, ldu, u2,ldu2, vt, ldvt,& !! SLASD3 finds all the square roots of the roots of the secular !! equation, as defined by the values in D and Z. It makes the !! appropriate calls to SLASD4 and then updates the singular !! vectors by matrix multiplication. !! This code makes very mild assumptions about floating point !! arithmetic. It will work on machines with a guard digit in !! add/subtract, or on those binary machines without guard digits !! which subtract like the Cray XMP, Cray YMP, Cray C 90, or Cray 2. !! It could conceivably fail on hexadecimal or decimal machines !! without guard digits, but we know of none. !! SLASD3 is called from SLASD1. vt2, ldvt2, idxc, ctot, z,info ) ! -- lapack auxiliary routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments integer(${ik}$), intent(out) :: info integer(${ik}$), intent(in) :: k, ldq, ldu, ldu2, ldvt, ldvt2, nl, nr, sqre ! Array Arguments integer(${ik}$), intent(in) :: ctot(*), idxc(*) real(sp), intent(out) :: d(*), q(ldq,*), u(ldu,*), vt(ldvt,*) real(sp), intent(inout) :: dsigma(*), vt2(ldvt2,*), z(*) real(sp), intent(in) :: u2(ldu2,*) ! ===================================================================== ! Local Scalars integer(${ik}$) :: ctemp, i, j, jc, ktemp, m, n, nlp1, nlp2, nrp1 real(sp) :: rho, temp ! Intrinsic Functions ! Executable Statements ! test the input parameters. info = 0_${ik}$ if( nl<1_${ik}$ ) then info = -1_${ik}$ else if( nr<1_${ik}$ ) then info = -2_${ik}$ else if( ( sqre/=1_${ik}$ ) .and. ( sqre/=0_${ik}$ ) ) then info = -3_${ik}$ end if n = nl + nr + 1_${ik}$ m = n + sqre nlp1 = nl + 1_${ik}$ nlp2 = nl + 2_${ik}$ if( ( k<1_${ik}$ ) .or. ( k>n ) ) then info = -4_${ik}$ else if( ldq<k ) then info = -7_${ik}$ else if( ldu<n ) then info = -10_${ik}$ else if( ldu2<n ) then info = -12_${ik}$ else if( ldvt<m ) then info = -14_${ik}$ else if( ldvt2<m ) then info = -16_${ik}$ end if if( info/=0_${ik}$ ) then call stdlib${ii}$_xerbla( 'SLASD3', -info ) return end if ! quick return if possible if( k==1_${ik}$ ) then d( 1_${ik}$ ) = abs( z( 1_${ik}$ ) ) call stdlib${ii}$_scopy( m, vt2( 1_${ik}$, 1_${ik}$ ), ldvt2, vt( 1_${ik}$, 1_${ik}$ ), ldvt ) if( z( 1_${ik}$ )>zero ) then call stdlib${ii}$_scopy( n, u2( 1_${ik}$, 1_${ik}$ ), 1_${ik}$, u( 1_${ik}$, 1_${ik}$ ), 1_${ik}$ ) else do i = 1, n u( i, 1_${ik}$ ) = -u2( i, 1_${ik}$ ) end do end if return end if ! modify values dsigma(i) to make sure all dsigma(i)-dsigma(j) can ! be computed with high relative accuracy (barring over/underflow). ! this is a problem on machines without a guard digit in ! add/subtract (cray xmp, cray ymp, cray c 90 and cray 2). ! the following code replaces dsigma(i) by 2*dsigma(i)-dsigma(i), ! which on any of these machines zeros out the bottommost ! bit of dsigma(i) if it is 1; this makes the subsequent ! subtractions dsigma(i)-dsigma(j) unproblematic when cancellation ! occurs. on binary machines with a guard digit (almost all ! machines) it does not change dsigma(i) at all. on hexadecimal ! and decimal machines with a guard digit, it slightly ! changes the bottommost bits of dsigma(i). it does not account ! for hexadecimal or decimal machines without guard digits ! (we know of none). we use a subroutine call to compute ! 2*dsigma(i) to prevent optimizing compilers from eliminating ! this code. do i = 1, k dsigma( i ) = stdlib${ii}$_slamc3( dsigma( i ), dsigma( i ) ) - dsigma( i ) end do ! keep a copy of z. call stdlib${ii}$_scopy( k, z, 1_${ik}$, q, 1_${ik}$ ) ! normalize z. rho = stdlib${ii}$_snrm2( k, z, 1_${ik}$ ) call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, rho, one, k, 1_${ik}$, z, k, info ) rho = rho*rho ! find the new singular values. do j = 1, k call stdlib${ii}$_slasd4( k, j, dsigma, z, u( 1_${ik}$, j ), rho, d( j ),vt( 1_${ik}$, j ), info ) ! if the zero finder fails, report the convergence failure. if( info/=0_${ik}$ ) then return end if end do ! compute updated z. do i = 1, k z( i ) = u( i, k )*vt( i, k ) do j = 1, i - 1 z( i ) = z( i )*( u( i, j )*vt( i, j ) /( dsigma( i )-dsigma( j ) ) /( dsigma( i & )+dsigma( j ) ) ) end do do j = i, k - 1 z( i ) = z( i )*( u( i, j )*vt( i, j ) /( dsigma( i )-dsigma( j+1 ) ) /( dsigma( & i )+dsigma( j+1 ) ) ) end do z( i ) = sign( sqrt( abs( z( i ) ) ), q( i, 1_${ik}$ ) ) end do ! compute left singular vectors of the modified diagonal matrix, ! and store related information for the right singular vectors. do i = 1, k vt( 1_${ik}$, i ) = z( 1_${ik}$ ) / u( 1_${ik}$, i ) / vt( 1_${ik}$, i ) u( 1_${ik}$, i ) = negone do j = 2, k vt( j, i ) = z( j ) / u( j, i ) / vt( j, i ) u( j, i ) = dsigma( j )*vt( j, i ) end do temp = stdlib${ii}$_snrm2( k, u( 1_${ik}$, i ), 1_${ik}$ ) q( 1_${ik}$, i ) = u( 1_${ik}$, i ) / temp do j = 2, k jc = idxc( j ) q( j, i ) = u( jc, i ) / temp end do end do ! update the left singular vector matrix. if( k==2_${ik}$ ) then call stdlib${ii}$_sgemm( 'N', 'N', n, k, k, one, u2, ldu2, q, ldq, zero, u,ldu ) go to 100 end if if( ctot( 1_${ik}$ )>0_${ik}$ ) then call stdlib${ii}$_sgemm( 'N', 'N', nl, k, ctot( 1_${ik}$ ), one, u2( 1_${ik}$, 2_${ik}$ ), ldu2,q( 2_${ik}$, 1_${ik}$ ), ldq,& zero, u( 1_${ik}$, 1_${ik}$ ), ldu ) if( ctot( 3_${ik}$ )>0_${ik}$ ) then ktemp = 2_${ik}$ + ctot( 1_${ik}$ ) + ctot( 2_${ik}$ ) call stdlib${ii}$_sgemm( 'N', 'N', nl, k, ctot( 3_${ik}$ ), one, u2( 1_${ik}$, ktemp ),ldu2, q( & ktemp, 1_${ik}$ ), ldq, one, u( 1_${ik}$, 1_${ik}$ ), ldu ) end if else if( ctot( 3_${ik}$ )>0_${ik}$ ) then ktemp = 2_${ik}$ + ctot( 1_${ik}$ ) + ctot( 2_${ik}$ ) call stdlib${ii}$_sgemm( 'N', 'N', nl, k, ctot( 3_${ik}$ ), one, u2( 1_${ik}$, ktemp ),ldu2, q( ktemp, & 1_${ik}$ ), ldq, zero, u( 1_${ik}$, 1_${ik}$ ), ldu ) else call stdlib${ii}$_slacpy( 'F', nl, k, u2, ldu2, u, ldu ) end if call stdlib${ii}$_scopy( k, q( 1_${ik}$, 1_${ik}$ ), ldq, u( nlp1, 1_${ik}$ ), ldu ) ktemp = 2_${ik}$ + ctot( 1_${ik}$ ) ctemp = ctot( 2_${ik}$ ) + ctot( 3_${ik}$ ) call stdlib${ii}$_sgemm( 'N', 'N', nr, k, ctemp, one, u2( nlp2, ktemp ), ldu2,q( ktemp, 1_${ik}$ ), & ldq, zero, u( nlp2, 1_${ik}$ ), ldu ) ! generate the right singular vectors. 100 continue do i = 1, k temp = stdlib${ii}$_snrm2( k, vt( 1_${ik}$, i ), 1_${ik}$ ) q( i, 1_${ik}$ ) = vt( 1_${ik}$, i ) / temp do j = 2, k jc = idxc( j ) q( i, j ) = vt( jc, i ) / temp end do end do ! update the right singular vector matrix. if( k==2_${ik}$ ) then call stdlib${ii}$_sgemm( 'N', 'N', k, m, k, one, q, ldq, vt2, ldvt2, zero,vt, ldvt ) return end if ktemp = 1_${ik}$ + ctot( 1_${ik}$ ) call stdlib${ii}$_sgemm( 'N', 'N', k, nlp1, ktemp, one, q( 1_${ik}$, 1_${ik}$ ), ldq,vt2( 1_${ik}$, 1_${ik}$ ), ldvt2, & zero, vt( 1_${ik}$, 1_${ik}$ ), ldvt ) ktemp = 2_${ik}$ + ctot( 1_${ik}$ ) + ctot( 2_${ik}$ ) if( ktemp<=ldvt2 )call stdlib${ii}$_sgemm( 'N', 'N', k, nlp1, ctot( 3_${ik}$ ), one, q( 1_${ik}$, ktemp ),& ldq, vt2( ktemp, 1_${ik}$ ), ldvt2, one, vt( 1_${ik}$, 1_${ik}$ ),ldvt ) ktemp = ctot( 1_${ik}$ ) + 1_${ik}$ nrp1 = nr + sqre if( ktemp>1_${ik}$ ) then do i = 1, k q( i, ktemp ) = q( i, 1_${ik}$ ) end do do i = nlp2, m vt2( ktemp, i ) = vt2( 1_${ik}$, i ) end do end if ctemp = 1_${ik}$ + ctot( 2_${ik}$ ) + ctot( 3_${ik}$ ) call stdlib${ii}$_sgemm( 'N', 'N', k, nrp1, ctemp, one, q( 1_${ik}$, ktemp ), ldq,vt2( ktemp, nlp2 )& , ldvt2, zero, vt( 1_${ik}$, nlp2 ), ldvt ) return end subroutine stdlib${ii}$_slasd3 pure module subroutine stdlib${ii}$_dlasd3( nl, nr, sqre, k, d, q, ldq, dsigma, u, ldu, u2,ldu2, vt, ldvt,& !! DLASD3 finds all the square roots of the roots of the secular !! equation, as defined by the values in D and Z. It makes the !! appropriate calls to DLASD4 and then updates the singular !! vectors by matrix multiplication. !! This code makes very mild assumptions about floating point !! arithmetic. It will work on machines with a guard digit in !! add/subtract, or on those binary machines without guard digits !! which subtract like the Cray XMP, Cray YMP, Cray C 90, or Cray 2. !! It could conceivably fail on hexadecimal or decimal machines !! without guard digits, but we know of none. !! DLASD3 is called from DLASD1. vt2, ldvt2, idxc, ctot, z,info ) ! -- lapack auxiliary routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments integer(${ik}$), intent(out) :: info integer(${ik}$), intent(in) :: k, ldq, ldu, ldu2, ldvt, ldvt2, nl, nr, sqre ! Array Arguments integer(${ik}$), intent(in) :: ctot(*), idxc(*) real(dp), intent(out) :: d(*), q(ldq,*), u(ldu,*), vt(ldvt,*) real(dp), intent(inout) :: dsigma(*), vt2(ldvt2,*), z(*) real(dp), intent(in) :: u2(ldu2,*) ! ===================================================================== ! Local Scalars integer(${ik}$) :: ctemp, i, j, jc, ktemp, m, n, nlp1, nlp2, nrp1 real(dp) :: rho, temp ! Intrinsic Functions ! Executable Statements ! test the input parameters. info = 0_${ik}$ if( nl<1_${ik}$ ) then info = -1_${ik}$ else if( nr<1_${ik}$ ) then info = -2_${ik}$ else if( ( sqre/=1_${ik}$ ) .and. ( sqre/=0_${ik}$ ) ) then info = -3_${ik}$ end if n = nl + nr + 1_${ik}$ m = n + sqre nlp1 = nl + 1_${ik}$ nlp2 = nl + 2_${ik}$ if( ( k<1_${ik}$ ) .or. ( k>n ) ) then info = -4_${ik}$ else if( ldq<k ) then info = -7_${ik}$ else if( ldu<n ) then info = -10_${ik}$ else if( ldu2<n ) then info = -12_${ik}$ else if( ldvt<m ) then info = -14_${ik}$ else if( ldvt2<m ) then info = -16_${ik}$ end if if( info/=0_${ik}$ ) then call stdlib${ii}$_xerbla( 'DLASD3', -info ) return end if ! quick return if possible if( k==1_${ik}$ ) then d( 1_${ik}$ ) = abs( z( 1_${ik}$ ) ) call stdlib${ii}$_dcopy( m, vt2( 1_${ik}$, 1_${ik}$ ), ldvt2, vt( 1_${ik}$, 1_${ik}$ ), ldvt ) if( z( 1_${ik}$ )>zero ) then call stdlib${ii}$_dcopy( n, u2( 1_${ik}$, 1_${ik}$ ), 1_${ik}$, u( 1_${ik}$, 1_${ik}$ ), 1_${ik}$ ) else do i = 1, n u( i, 1_${ik}$ ) = -u2( i, 1_${ik}$ ) end do end if return end if ! modify values dsigma(i) to make sure all dsigma(i)-dsigma(j) can ! be computed with high relative accuracy (barring over/underflow). ! this is a problem on machines without a guard digit in ! add/subtract (cray xmp, cray ymp, cray c 90 and cray 2). ! the following code replaces dsigma(i) by 2*dsigma(i)-dsigma(i), ! which on any of these machines zeros out the bottommost ! bit of dsigma(i) if it is 1; this makes the subsequent ! subtractions dsigma(i)-dsigma(j) unproblematic when cancellation ! occurs. on binary machines with a guard digit (almost all ! machines) it does not change dsigma(i) at all. on hexadecimal ! and decimal machines with a guard digit, it slightly ! changes the bottommost bits of dsigma(i). it does not account ! for hexadecimal or decimal machines without guard digits ! (we know of none). we use a subroutine call to compute ! 2*dsigma(i) to prevent optimizing compilers from eliminating ! this code. do i = 1, k dsigma( i ) = stdlib${ii}$_dlamc3( dsigma( i ), dsigma( i ) ) - dsigma( i ) end do ! keep a copy of z. call stdlib${ii}$_dcopy( k, z, 1_${ik}$, q, 1_${ik}$ ) ! normalize z. rho = stdlib${ii}$_dnrm2( k, z, 1_${ik}$ ) call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, rho, one, k, 1_${ik}$, z, k, info ) rho = rho*rho ! find the new singular values. do j = 1, k call stdlib${ii}$_dlasd4( k, j, dsigma, z, u( 1_${ik}$, j ), rho, d( j ),vt( 1_${ik}$, j ), info ) ! if the zero finder fails, report the convergence failure. if( info/=0_${ik}$ ) then return end if end do ! compute updated z. do i = 1, k z( i ) = u( i, k )*vt( i, k ) do j = 1, i - 1 z( i ) = z( i )*( u( i, j )*vt( i, j ) /( dsigma( i )-dsigma( j ) ) /( dsigma( i & )+dsigma( j ) ) ) end do do j = i, k - 1 z( i ) = z( i )*( u( i, j )*vt( i, j ) /( dsigma( i )-dsigma( j+1 ) ) /( dsigma( & i )+dsigma( j+1 ) ) ) end do z( i ) = sign( sqrt( abs( z( i ) ) ), q( i, 1_${ik}$ ) ) end do ! compute left singular vectors of the modified diagonal matrix, ! and store related information for the right singular vectors. do i = 1, k vt( 1_${ik}$, i ) = z( 1_${ik}$ ) / u( 1_${ik}$, i ) / vt( 1_${ik}$, i ) u( 1_${ik}$, i ) = negone do j = 2, k vt( j, i ) = z( j ) / u( j, i ) / vt( j, i ) u( j, i ) = dsigma( j )*vt( j, i ) end do temp = stdlib${ii}$_dnrm2( k, u( 1_${ik}$, i ), 1_${ik}$ ) q( 1_${ik}$, i ) = u( 1_${ik}$, i ) / temp do j = 2, k jc = idxc( j ) q( j, i ) = u( jc, i ) / temp end do end do ! update the left singular vector matrix. if( k==2_${ik}$ ) then call stdlib${ii}$_dgemm( 'N', 'N', n, k, k, one, u2, ldu2, q, ldq, zero, u,ldu ) go to 100 end if if( ctot( 1_${ik}$ )>0_${ik}$ ) then call stdlib${ii}$_dgemm( 'N', 'N', nl, k, ctot( 1_${ik}$ ), one, u2( 1_${ik}$, 2_${ik}$ ), ldu2,q( 2_${ik}$, 1_${ik}$ ), ldq,& zero, u( 1_${ik}$, 1_${ik}$ ), ldu ) if( ctot( 3_${ik}$ )>0_${ik}$ ) then ktemp = 2_${ik}$ + ctot( 1_${ik}$ ) + ctot( 2_${ik}$ ) call stdlib${ii}$_dgemm( 'N', 'N', nl, k, ctot( 3_${ik}$ ), one, u2( 1_${ik}$, ktemp ),ldu2, q( & ktemp, 1_${ik}$ ), ldq, one, u( 1_${ik}$, 1_${ik}$ ), ldu ) end if else if( ctot( 3_${ik}$ )>0_${ik}$ ) then ktemp = 2_${ik}$ + ctot( 1_${ik}$ ) + ctot( 2_${ik}$ ) call stdlib${ii}$_dgemm( 'N', 'N', nl, k, ctot( 3_${ik}$ ), one, u2( 1_${ik}$, ktemp ),ldu2, q( ktemp, & 1_${ik}$ ), ldq, zero, u( 1_${ik}$, 1_${ik}$ ), ldu ) else call stdlib${ii}$_dlacpy( 'F', nl, k, u2, ldu2, u, ldu ) end if call stdlib${ii}$_dcopy( k, q( 1_${ik}$, 1_${ik}$ ), ldq, u( nlp1, 1_${ik}$ ), ldu ) ktemp = 2_${ik}$ + ctot( 1_${ik}$ ) ctemp = ctot( 2_${ik}$ ) + ctot( 3_${ik}$ ) call stdlib${ii}$_dgemm( 'N', 'N', nr, k, ctemp, one, u2( nlp2, ktemp ), ldu2,q( ktemp, 1_${ik}$ ), & ldq, zero, u( nlp2, 1_${ik}$ ), ldu ) ! generate the right singular vectors. 100 continue do i = 1, k temp = stdlib${ii}$_dnrm2( k, vt( 1_${ik}$, i ), 1_${ik}$ ) q( i, 1_${ik}$ ) = vt( 1_${ik}$, i ) / temp do j = 2, k jc = idxc( j ) q( i, j ) = vt( jc, i ) / temp end do end do ! update the right singular vector matrix. if( k==2_${ik}$ ) then call stdlib${ii}$_dgemm( 'N', 'N', k, m, k, one, q, ldq, vt2, ldvt2, zero,vt, ldvt ) return end if ktemp = 1_${ik}$ + ctot( 1_${ik}$ ) call stdlib${ii}$_dgemm( 'N', 'N', k, nlp1, ktemp, one, q( 1_${ik}$, 1_${ik}$ ), ldq,vt2( 1_${ik}$, 1_${ik}$ ), ldvt2, & zero, vt( 1_${ik}$, 1_${ik}$ ), ldvt ) ktemp = 2_${ik}$ + ctot( 1_${ik}$ ) + ctot( 2_${ik}$ ) if( ktemp<=ldvt2 )call stdlib${ii}$_dgemm( 'N', 'N', k, nlp1, ctot( 3_${ik}$ ), one, q( 1_${ik}$, ktemp ),& ldq, vt2( ktemp, 1_${ik}$ ), ldvt2, one, vt( 1_${ik}$, 1_${ik}$ ),ldvt ) ktemp = ctot( 1_${ik}$ ) + 1_${ik}$ nrp1 = nr + sqre if( ktemp>1_${ik}$ ) then do i = 1, k q( i, ktemp ) = q( i, 1_${ik}$ ) end do do i = nlp2, m vt2( ktemp, i ) = vt2( 1_${ik}$, i ) end do end if ctemp = 1_${ik}$ + ctot( 2_${ik}$ ) + ctot( 3_${ik}$ ) call stdlib${ii}$_dgemm( 'N', 'N', k, nrp1, ctemp, one, q( 1_${ik}$, ktemp ), ldq,vt2( ktemp, nlp2 )& , ldvt2, zero, vt( 1_${ik}$, nlp2 ), ldvt ) return end subroutine stdlib${ii}$_dlasd3 #:for rk,rt,ri in REAL_KINDS_TYPES #:if not rk in ["sp","dp"] pure module subroutine stdlib${ii}$_${ri}$lasd3( nl, nr, sqre, k, d, q, ldq, dsigma, u, ldu, u2,ldu2, vt, ldvt,& !! DLASD3: finds all the square roots of the roots of the secular !! equation, as defined by the values in D and Z. It makes the !! appropriate calls to DLASD4 and then updates the singular !! vectors by matrix multiplication. !! This code makes very mild assumptions about floating point !! arithmetic. It will work on machines with a guard digit in !! add/subtract, or on those binary machines without guard digits !! which subtract like the Cray XMP, Cray YMP, Cray C 90, or Cray 2. !! It could conceivably fail on hexadecimal or decimal machines !! without guard digits, but we know of none. !! DLASD3 is called from DLASD1. vt2, ldvt2, idxc, ctot, z,info ) ! -- lapack auxiliary routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments integer(${ik}$), intent(out) :: info integer(${ik}$), intent(in) :: k, ldq, ldu, ldu2, ldvt, ldvt2, nl, nr, sqre ! Array Arguments integer(${ik}$), intent(in) :: ctot(*), idxc(*) real(${rk}$), intent(out) :: d(*), q(ldq,*), u(ldu,*), vt(ldvt,*) real(${rk}$), intent(inout) :: dsigma(*), vt2(ldvt2,*), z(*) real(${rk}$), intent(in) :: u2(ldu2,*) ! ===================================================================== ! Local Scalars integer(${ik}$) :: ctemp, i, j, jc, ktemp, m, n, nlp1, nlp2, nrp1 real(${rk}$) :: rho, temp ! Intrinsic Functions ! Executable Statements ! test the input parameters. info = 0_${ik}$ if( nl<1_${ik}$ ) then info = -1_${ik}$ else if( nr<1_${ik}$ ) then info = -2_${ik}$ else if( ( sqre/=1_${ik}$ ) .and. ( sqre/=0_${ik}$ ) ) then info = -3_${ik}$ end if n = nl + nr + 1_${ik}$ m = n + sqre nlp1 = nl + 1_${ik}$ nlp2 = nl + 2_${ik}$ if( ( k<1_${ik}$ ) .or. ( k>n ) ) then info = -4_${ik}$ else if( ldq<k ) then info = -7_${ik}$ else if( ldu<n ) then info = -10_${ik}$ else if( ldu2<n ) then info = -12_${ik}$ else if( ldvt<m ) then info = -14_${ik}$ else if( ldvt2<m ) then info = -16_${ik}$ end if if( info/=0_${ik}$ ) then call stdlib${ii}$_xerbla( 'DLASD3', -info ) return end if ! quick return if possible if( k==1_${ik}$ ) then d( 1_${ik}$ ) = abs( z( 1_${ik}$ ) ) call stdlib${ii}$_${ri}$copy( m, vt2( 1_${ik}$, 1_${ik}$ ), ldvt2, vt( 1_${ik}$, 1_${ik}$ ), ldvt ) if( z( 1_${ik}$ )>zero ) then call stdlib${ii}$_${ri}$copy( n, u2( 1_${ik}$, 1_${ik}$ ), 1_${ik}$, u( 1_${ik}$, 1_${ik}$ ), 1_${ik}$ ) else do i = 1, n u( i, 1_${ik}$ ) = -u2( i, 1_${ik}$ ) end do end if return end if ! modify values dsigma(i) to make sure all dsigma(i)-dsigma(j) can ! be computed with high relative accuracy (barring over/underflow). ! this is a problem on machines without a guard digit in ! add/subtract (cray xmp, cray ymp, cray c 90 and cray 2). ! the following code replaces dsigma(i) by 2*dsigma(i)-dsigma(i), ! which on any of these machines zeros out the bottommost ! bit of dsigma(i) if it is 1; this makes the subsequent ! subtractions dsigma(i)-dsigma(j) unproblematic when cancellation ! occurs. on binary machines with a guard digit (almost all ! machines) it does not change dsigma(i) at all. on hexadecimal ! and decimal machines with a guard digit, it slightly ! changes the bottommost bits of dsigma(i). it does not account ! for hexadecimal or decimal machines without guard digits ! (we know of none). we use a subroutine call to compute ! 2*dsigma(i) to prevent optimizing compilers from eliminating ! this code. do i = 1, k dsigma( i ) = stdlib${ii}$_${ri}$lamc3( dsigma( i ), dsigma( i ) ) - dsigma( i ) end do ! keep a copy of z. call stdlib${ii}$_${ri}$copy( k, z, 1_${ik}$, q, 1_${ik}$ ) ! normalize z. rho = stdlib${ii}$_${ri}$nrm2( k, z, 1_${ik}$ ) call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, rho, one, k, 1_${ik}$, z, k, info ) rho = rho*rho ! find the new singular values. do j = 1, k call stdlib${ii}$_${ri}$lasd4( k, j, dsigma, z, u( 1_${ik}$, j ), rho, d( j ),vt( 1_${ik}$, j ), info ) ! if the zero finder fails, report the convergence failure. if( info/=0_${ik}$ ) then return end if end do ! compute updated z. do i = 1, k z( i ) = u( i, k )*vt( i, k ) do j = 1, i - 1 z( i ) = z( i )*( u( i, j )*vt( i, j ) /( dsigma( i )-dsigma( j ) ) /( dsigma( i & )+dsigma( j ) ) ) end do do j = i, k - 1 z( i ) = z( i )*( u( i, j )*vt( i, j ) /( dsigma( i )-dsigma( j+1 ) ) /( dsigma( & i )+dsigma( j+1 ) ) ) end do z( i ) = sign( sqrt( abs( z( i ) ) ), q( i, 1_${ik}$ ) ) end do ! compute left singular vectors of the modified diagonal matrix, ! and store related information for the right singular vectors. do i = 1, k vt( 1_${ik}$, i ) = z( 1_${ik}$ ) / u( 1_${ik}$, i ) / vt( 1_${ik}$, i ) u( 1_${ik}$, i ) = negone do j = 2, k vt( j, i ) = z( j ) / u( j, i ) / vt( j, i ) u( j, i ) = dsigma( j )*vt( j, i ) end do temp = stdlib${ii}$_${ri}$nrm2( k, u( 1_${ik}$, i ), 1_${ik}$ ) q( 1_${ik}$, i ) = u( 1_${ik}$, i ) / temp do j = 2, k jc = idxc( j ) q( j, i ) = u( jc, i ) / temp end do end do ! update the left singular vector matrix. if( k==2_${ik}$ ) then call stdlib${ii}$_${ri}$gemm( 'N', 'N', n, k, k, one, u2, ldu2, q, ldq, zero, u,ldu ) go to 100 end if if( ctot( 1_${ik}$ )>0_${ik}$ ) then call stdlib${ii}$_${ri}$gemm( 'N', 'N', nl, k, ctot( 1_${ik}$ ), one, u2( 1_${ik}$, 2_${ik}$ ), ldu2,q( 2_${ik}$, 1_${ik}$ ), ldq,& zero, u( 1_${ik}$, 1_${ik}$ ), ldu ) if( ctot( 3_${ik}$ )>0_${ik}$ ) then ktemp = 2_${ik}$ + ctot( 1_${ik}$ ) + ctot( 2_${ik}$ ) call stdlib${ii}$_${ri}$gemm( 'N', 'N', nl, k, ctot( 3_${ik}$ ), one, u2( 1_${ik}$, ktemp ),ldu2, q( & ktemp, 1_${ik}$ ), ldq, one, u( 1_${ik}$, 1_${ik}$ ), ldu ) end if else if( ctot( 3_${ik}$ )>0_${ik}$ ) then ktemp = 2_${ik}$ + ctot( 1_${ik}$ ) + ctot( 2_${ik}$ ) call stdlib${ii}$_${ri}$gemm( 'N', 'N', nl, k, ctot( 3_${ik}$ ), one, u2( 1_${ik}$, ktemp ),ldu2, q( ktemp, & 1_${ik}$ ), ldq, zero, u( 1_${ik}$, 1_${ik}$ ), ldu ) else call stdlib${ii}$_${ri}$lacpy( 'F', nl, k, u2, ldu2, u, ldu ) end if call stdlib${ii}$_${ri}$copy( k, q( 1_${ik}$, 1_${ik}$ ), ldq, u( nlp1, 1_${ik}$ ), ldu ) ktemp = 2_${ik}$ + ctot( 1_${ik}$ ) ctemp = ctot( 2_${ik}$ ) + ctot( 3_${ik}$ ) call stdlib${ii}$_${ri}$gemm( 'N', 'N', nr, k, ctemp, one, u2( nlp2, ktemp ), ldu2,q( ktemp, 1_${ik}$ ), & ldq, zero, u( nlp2, 1_${ik}$ ), ldu ) ! generate the right singular vectors. 100 continue do i = 1, k temp = stdlib${ii}$_${ri}$nrm2( k, vt( 1_${ik}$, i ), 1_${ik}$ ) q( i, 1_${ik}$ ) = vt( 1_${ik}$, i ) / temp do j = 2, k jc = idxc( j ) q( i, j ) = vt( jc, i ) / temp end do end do ! update the right singular vector matrix. if( k==2_${ik}$ ) then call stdlib${ii}$_${ri}$gemm( 'N', 'N', k, m, k, one, q, ldq, vt2, ldvt2, zero,vt, ldvt ) return end if ktemp = 1_${ik}$ + ctot( 1_${ik}$ ) call stdlib${ii}$_${ri}$gemm( 'N', 'N', k, nlp1, ktemp, one, q( 1_${ik}$, 1_${ik}$ ), ldq,vt2( 1_${ik}$, 1_${ik}$ ), ldvt2, & zero, vt( 1_${ik}$, 1_${ik}$ ), ldvt ) ktemp = 2_${ik}$ + ctot( 1_${ik}$ ) + ctot( 2_${ik}$ ) if( ktemp<=ldvt2 )call stdlib${ii}$_${ri}$gemm( 'N', 'N', k, nlp1, ctot( 3_${ik}$ ), one, q( 1_${ik}$, ktemp ),& ldq, vt2( ktemp, 1_${ik}$ ), ldvt2, one, vt( 1_${ik}$, 1_${ik}$ ),ldvt ) ktemp = ctot( 1_${ik}$ ) + 1_${ik}$ nrp1 = nr + sqre if( ktemp>1_${ik}$ ) then do i = 1, k q( i, ktemp ) = q( i, 1_${ik}$ ) end do do i = nlp2, m vt2( ktemp, i ) = vt2( 1_${ik}$, i ) end do end if ctemp = 1_${ik}$ + ctot( 2_${ik}$ ) + ctot( 3_${ik}$ ) call stdlib${ii}$_${ri}$gemm( 'N', 'N', k, nrp1, ctemp, one, q( 1_${ik}$, ktemp ), ldq,vt2( ktemp, nlp2 )& , ldvt2, zero, vt( 1_${ik}$, nlp2 ), ldvt ) return end subroutine stdlib${ii}$_${ri}$lasd3 #:endif #:endfor pure module subroutine stdlib${ii}$_slasd4( n, i, d, z, delta, rho, sigma, work, info ) !! This subroutine computes the square root of the I-th updated !! eigenvalue of a positive symmetric rank-one modification to !! a positive diagonal matrix whose entries are given as the squares !! of the corresponding entries in the array d, and that !! 0 <= D(i) < D(j) for i < j !! and that RHO > 0. This is arranged by the calling routine, and is !! no loss in generality. The rank-one modified system is thus !! diag( D ) * diag( D ) + RHO * Z * Z_transpose. !! where we assume the Euclidean norm of Z is 1. !! The method consists of approximating the rational functions in the !! secular equation by simpler interpolating rational functions. ! -- lapack auxiliary routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments integer(${ik}$), intent(in) :: i, n integer(${ik}$), intent(out) :: info real(sp), intent(in) :: rho real(sp), intent(out) :: sigma ! Array Arguments real(sp), intent(in) :: d(*), z(*) real(sp), intent(out) :: delta(*), work(*) ! ===================================================================== ! Parameters integer(${ik}$), parameter :: maxit = 400_${ik}$ ! Local Scalars logical(lk) :: orgati, swtch, swtch3, geomavg integer(${ik}$) :: ii, iim1, iip1, ip1, iter, j, niter real(sp) :: a, b, c, delsq, delsq2, sq2, dphi, dpsi, dtiim, dtiip, dtipsq, dtisq, & dtnsq, dtnsq1, dw, eps, erretm, eta, phi, prew, psi, rhoinv, sglb, sgub, tau, tau2, & temp, temp1, temp2, w ! Local Arrays real(sp) :: dd(3_${ik}$), zz(3_${ik}$) ! Intrinsic Functions ! Executable Statements ! since this routine is called in an inner loop, we do no argument ! checking. ! quick return for n=1 and 2. info = 0_${ik}$ if( n==1_${ik}$ ) then ! presumably, i=1 upon entry sigma = sqrt( d( 1_${ik}$ )*d( 1_${ik}$ )+rho*z( 1_${ik}$ )*z( 1_${ik}$ ) ) delta( 1_${ik}$ ) = one work( 1_${ik}$ ) = one return end if if( n==2_${ik}$ ) then call stdlib${ii}$_slasd5( i, d, z, delta, rho, sigma, work ) return end if ! compute machine epsilon eps = stdlib${ii}$_slamch( 'EPSILON' ) rhoinv = one / rho tau2= zero ! the case i = n if( i==n ) then ! initialize some basic variables ii = n - 1_${ik}$ niter = 1_${ik}$ ! calculate initial guess temp = rho / two ! if ||z||_2 is not one, then temp should be set to ! rho * ||z||_2^2 / two temp1 = temp / ( d( n )+sqrt( d( n )*d( n )+temp ) ) do j = 1, n work( j ) = d( j ) + d( n ) + temp1 delta( j ) = ( d( j )-d( n ) ) - temp1 end do psi = zero do j = 1, n - 2 psi = psi + z( j )*z( j ) / ( delta( j )*work( j ) ) end do c = rhoinv + psi w = c + z( ii )*z( ii ) / ( delta( ii )*work( ii ) ) +z( n )*z( n ) / ( delta( n )& *work( n ) ) if( w<=zero ) then temp1 = sqrt( d( n )*d( n )+rho ) temp = z( n-1 )*z( n-1 ) / ( ( d( n-1 )+temp1 )*( d( n )-d( n-1 )+rho / ( d( n )+& temp1 ) ) ) +z( n )*z( n ) / rho ! the following tau2 is to approximate ! sigma_n^2 - d( n )*d( n ) if( c<=temp ) then tau = rho else delsq = ( d( n )-d( n-1 ) )*( d( n )+d( n-1 ) ) a = -c*delsq + z( n-1 )*z( n-1 ) + z( n )*z( n ) b = z( n )*z( n )*delsq if( a<zero ) then tau2 = two*b / ( sqrt( a*a+four*b*c )-a ) else tau2 = ( a+sqrt( a*a+four*b*c ) ) / ( two*c ) end if tau = tau2 / ( d( n )+sqrt( d( n )*d( n )+tau2 ) ) end if ! it can be proved that ! d(n)^2+rho/2 <= sigma_n^2 < d(n)^2+tau2 <= d(n)^2+rho else delsq = ( d( n )-d( n-1 ) )*( d( n )+d( n-1 ) ) a = -c*delsq + z( n-1 )*z( n-1 ) + z( n )*z( n ) b = z( n )*z( n )*delsq ! the following tau2 is to approximate ! sigma_n^2 - d( n )*d( n ) if( a<zero ) then tau2 = two*b / ( sqrt( a*a+four*b*c )-a ) else tau2 = ( a+sqrt( a*a+four*b*c ) ) / ( two*c ) end if tau = tau2 / ( d( n )+sqrt( d( n )*d( n )+tau2 ) ) ! it can be proved that ! d(n)^2 < d(n)^2+tau2 < sigma(n)^2 < d(n)^2+rho/2 end if ! the following tau is to approximate sigma_n - d( n ) ! tau = tau2 / ( d( n )+sqrt( d( n )*d( n )+tau2 ) ) sigma = d( n ) + tau do j = 1, n delta( j ) = ( d( j )-d( n ) ) - tau work( j ) = d( j ) + d( n ) + tau end do ! evaluate psi and the derivative dpsi dpsi = zero psi = zero erretm = zero do j = 1, ii temp = z( j ) / ( delta( j )*work( j ) ) psi = psi + z( j )*temp dpsi = dpsi + temp*temp erretm = erretm + psi end do erretm = abs( erretm ) ! evaluate phi and the derivative dphi temp = z( n ) / ( delta( n )*work( n ) ) phi = z( n )*temp dphi = temp*temp erretm = eight*( -phi-psi ) + erretm - phi + rhoinv ! $ + abs( tau2 )*( dpsi+dphi ) w = rhoinv + phi + psi ! test for convergence if( abs( w )<=eps*erretm ) then go to 240 end if ! calculate the new step niter = niter + 1_${ik}$ dtnsq1 = work( n-1 )*delta( n-1 ) dtnsq = work( n )*delta( n ) c = w - dtnsq1*dpsi - dtnsq*dphi a = ( dtnsq+dtnsq1 )*w - dtnsq*dtnsq1*( dpsi+dphi ) b = dtnsq*dtnsq1*w if( c<zero )c = abs( c ) if( c==zero ) then eta = rho - sigma*sigma else if( a>=zero ) then eta = ( a+sqrt( abs( a*a-four*b*c ) ) ) / ( two*c ) else eta = two*b / ( a-sqrt( abs( a*a-four*b*c ) ) ) end if ! note, eta should be positive if w is negative, and ! eta should be negative otherwise. however, ! if for some reason caused by roundoff, eta*w > 0, ! we simply use one newton step instead. this way ! will guarantee eta*w < 0. if( w*eta>zero )eta = -w / ( dpsi+dphi ) temp = eta - dtnsq if( temp>rho )eta = rho + dtnsq eta = eta / ( sigma+sqrt( eta+sigma*sigma ) ) tau = tau + eta sigma = sigma + eta do j = 1, n delta( j ) = delta( j ) - eta work( j ) = work( j ) + eta end do ! evaluate psi and the derivative dpsi dpsi = zero psi = zero erretm = zero do j = 1, ii temp = z( j ) / ( work( j )*delta( j ) ) psi = psi + z( j )*temp dpsi = dpsi + temp*temp erretm = erretm + psi end do erretm = abs( erretm ) ! evaluate phi and the derivative dphi tau2 = work( n )*delta( n ) temp = z( n ) / tau2 phi = z( n )*temp dphi = temp*temp erretm = eight*( -phi-psi ) + erretm - phi + rhoinv ! $ + abs( tau2 )*( dpsi+dphi ) w = rhoinv + phi + psi ! main loop to update the values of the array delta iter = niter + 1_${ik}$ loop_90: do niter = iter, maxit ! test for convergence if( abs( w )<=eps*erretm ) then go to 240 end if ! calculate the new step dtnsq1 = work( n-1 )*delta( n-1 ) dtnsq = work( n )*delta( n ) c = w - dtnsq1*dpsi - dtnsq*dphi a = ( dtnsq+dtnsq1 )*w - dtnsq1*dtnsq*( dpsi+dphi ) b = dtnsq1*dtnsq*w if( a>=zero ) then eta = ( a+sqrt( abs( a*a-four*b*c ) ) ) / ( two*c ) else eta = two*b / ( a-sqrt( abs( a*a-four*b*c ) ) ) end if ! note, eta should be positive if w is negative, and ! eta should be negative otherwise. however, ! if for some reason caused by roundoff, eta*w > 0, ! we simply use one newton step instead. this way ! will guarantee eta*w < 0. if( w*eta>zero )eta = -w / ( dpsi+dphi ) temp = eta - dtnsq if( temp<=zero )eta = eta / two eta = eta / ( sigma+sqrt( eta+sigma*sigma ) ) tau = tau + eta sigma = sigma + eta do j = 1, n delta( j ) = delta( j ) - eta work( j ) = work( j ) + eta end do ! evaluate psi and the derivative dpsi dpsi = zero psi = zero erretm = zero do j = 1, ii temp = z( j ) / ( work( j )*delta( j ) ) psi = psi + z( j )*temp dpsi = dpsi + temp*temp erretm = erretm + psi end do erretm = abs( erretm ) ! evaluate phi and the derivative dphi tau2 = work( n )*delta( n ) temp = z( n ) / tau2 phi = z( n )*temp dphi = temp*temp erretm = eight*( -phi-psi ) + erretm - phi + rhoinv ! $ + abs( tau2 )*( dpsi+dphi ) w = rhoinv + phi + psi end do loop_90 ! return with info = 1, niter = maxit and not converged info = 1_${ik}$ go to 240 ! end for the case i = n else ! the case for i < n niter = 1_${ik}$ ip1 = i + 1_${ik}$ ! calculate initial guess delsq = ( d( ip1 )-d( i ) )*( d( ip1 )+d( i ) ) delsq2 = delsq / two sq2=sqrt( ( d( i )*d( i )+d( ip1 )*d( ip1 ) ) / two ) temp = delsq2 / ( d( i )+sq2 ) do j = 1, n work( j ) = d( j ) + d( i ) + temp delta( j ) = ( d( j )-d( i ) ) - temp end do psi = zero do j = 1, i - 1 psi = psi + z( j )*z( j ) / ( work( j )*delta( j ) ) end do phi = zero do j = n, i + 2, -1 phi = phi + z( j )*z( j ) / ( work( j )*delta( j ) ) end do c = rhoinv + psi + phi w = c + z( i )*z( i ) / ( work( i )*delta( i ) ) +z( ip1 )*z( ip1 ) / ( work( ip1 )& *delta( ip1 ) ) geomavg = .false. if( w>zero ) then ! d(i)^2 < the ith sigma^2 < (d(i)^2+d(i+1)^2)/2 ! we choose d(i) as origin. orgati = .true. ii = i sglb = zero sgub = delsq2 / ( d( i )+sq2 ) a = c*delsq + z( i )*z( i ) + z( ip1 )*z( ip1 ) b = z( i )*z( i )*delsq if( a>zero ) then tau2 = two*b / ( a+sqrt( abs( a*a-four*b*c ) ) ) else tau2 = ( a-sqrt( abs( a*a-four*b*c ) ) ) / ( two*c ) end if ! tau2 now is an estimation of sigma^2 - d( i )^2. the ! following, however, is the corresponding estimation of ! sigma - d( i ). tau = tau2 / ( d( i )+sqrt( d( i )*d( i )+tau2 ) ) temp = sqrt(eps) if( (d(i)<=temp*d(ip1)).and.(abs(z(i))<=temp).and.(d(i)>zero) ) then tau = min( ten*d(i), sgub ) geomavg = .true. end if else ! (d(i)^2+d(i+1)^2)/2 <= the ith sigma^2 < d(i+1)^2/2 ! we choose d(i+1) as origin. orgati = .false. ii = ip1 sglb = -delsq2 / ( d( ii )+sq2 ) sgub = zero a = c*delsq - z( i )*z( i ) - z( ip1 )*z( ip1 ) b = z( ip1 )*z( ip1 )*delsq if( a<zero ) then tau2 = two*b / ( a-sqrt( abs( a*a+four*b*c ) ) ) else tau2 = -( a+sqrt( abs( a*a+four*b*c ) ) ) / ( two*c ) end if ! tau2 now is an estimation of sigma^2 - d( ip1 )^2. the ! following, however, is the corresponding estimation of ! sigma - d( ip1 ). tau = tau2 / ( d( ip1 )+sqrt( abs( d( ip1 )*d( ip1 )+tau2 ) ) ) end if sigma = d( ii ) + tau do j = 1, n work( j ) = d( j ) + d( ii ) + tau delta( j ) = ( d( j )-d( ii ) ) - tau end do iim1 = ii - 1_${ik}$ iip1 = ii + 1_${ik}$ ! evaluate psi and the derivative dpsi dpsi = zero psi = zero erretm = zero do j = 1, iim1 temp = z( j ) / ( work( j )*delta( j ) ) psi = psi + z( j )*temp dpsi = dpsi + temp*temp erretm = erretm + psi end do erretm = abs( erretm ) ! evaluate phi and the derivative dphi dphi = zero phi = zero do j = n, iip1, -1 temp = z( j ) / ( work( j )*delta( j ) ) phi = phi + z( j )*temp dphi = dphi + temp*temp erretm = erretm + phi end do w = rhoinv + phi + psi ! w is the value of the secular function with ! its ii-th element removed. swtch3 = .false. if( orgati ) then if( w<zero )swtch3 = .true. else if( w>zero )swtch3 = .true. end if if( ii==1_${ik}$ .or. ii==n )swtch3 = .false. temp = z( ii ) / ( work( ii )*delta( ii ) ) dw = dpsi + dphi + temp*temp temp = z( ii )*temp w = w + temp erretm = eight*( phi-psi ) + erretm + two*rhoinv+ three*abs( temp ) ! $ + abs( tau2 )*dw ! test for convergence if( abs( w )<=eps*erretm ) then go to 240 end if if( w<=zero ) then sglb = max( sglb, tau ) else sgub = min( sgub, tau ) end if ! calculate the new step niter = niter + 1_${ik}$ if( .not.swtch3 ) then dtipsq = work( ip1 )*delta( ip1 ) dtisq = work( i )*delta( i ) if( orgati ) then c = w - dtipsq*dw + delsq*( z( i ) / dtisq )**2_${ik}$ else c = w - dtisq*dw - delsq*( z( ip1 ) / dtipsq )**2_${ik}$ end if a = ( dtipsq+dtisq )*w - dtipsq*dtisq*dw b = dtipsq*dtisq*w if( c==zero ) then if( a==zero ) then if( orgati ) then a = z( i )*z( i ) + dtipsq*dtipsq*( dpsi+dphi ) else a = z( ip1 )*z( ip1 ) + dtisq*dtisq*( dpsi+dphi ) end if end if eta = b / a else if( a<=zero ) then eta = ( a-sqrt( abs( a*a-four*b*c ) ) ) / ( two*c ) else eta = two*b / ( a+sqrt( abs( a*a-four*b*c ) ) ) end if else ! interpolation using three most relevant poles dtiim = work( iim1 )*delta( iim1 ) dtiip = work( iip1 )*delta( iip1 ) temp = rhoinv + psi + phi if( orgati ) then temp1 = z( iim1 ) / dtiim temp1 = temp1*temp1 c = ( temp - dtiip*( dpsi+dphi ) ) -( d( iim1 )-d( iip1 ) )*( d( iim1 )+d( & iip1 ) )*temp1 zz( 1_${ik}$ ) = z( iim1 )*z( iim1 ) if( dpsi<temp1 ) then zz( 3_${ik}$ ) = dtiip*dtiip*dphi else zz( 3_${ik}$ ) = dtiip*dtiip*( ( dpsi-temp1 )+dphi ) end if else temp1 = z( iip1 ) / dtiip temp1 = temp1*temp1 c = ( temp - dtiim*( dpsi+dphi ) ) -( d( iip1 )-d( iim1 ) )*( d( iim1 )+d( & iip1 ) )*temp1 if( dphi<temp1 ) then zz( 1_${ik}$ ) = dtiim*dtiim*dpsi else zz( 1_${ik}$ ) = dtiim*dtiim*( dpsi+( dphi-temp1 ) ) end if zz( 3_${ik}$ ) = z( iip1 )*z( iip1 ) end if zz( 2_${ik}$ ) = z( ii )*z( ii ) dd( 1_${ik}$ ) = dtiim dd( 2_${ik}$ ) = delta( ii )*work( ii ) dd( 3_${ik}$ ) = dtiip call stdlib${ii}$_slaed6( niter, orgati, c, dd, zz, w, eta, info ) if( info/=0_${ik}$ ) then ! if info is not 0, i.e., stdlib${ii}$_slaed6 failed, switch back ! to 2 pole interpolation. swtch3 = .false. info = 0_${ik}$ dtipsq = work( ip1 )*delta( ip1 ) dtisq = work( i )*delta( i ) if( orgati ) then c = w - dtipsq*dw + delsq*( z( i ) / dtisq )**2_${ik}$ else c = w - dtisq*dw - delsq*( z( ip1 ) / dtipsq )**2_${ik}$ end if a = ( dtipsq+dtisq )*w - dtipsq*dtisq*dw b = dtipsq*dtisq*w if( c==zero ) then if( a==zero ) then if( orgati ) then a = z( i )*z( i ) + dtipsq*dtipsq*( dpsi+dphi ) else a = z( ip1 )*z( ip1 ) + dtisq*dtisq*( dpsi+dphi) end if end if eta = b / a else if( a<=zero ) then eta = ( a-sqrt( abs( a*a-four*b*c ) ) ) / ( two*c ) else eta = two*b / ( a+sqrt( abs( a*a-four*b*c ) ) ) end if end if end if ! note, eta should be positive if w is negative, and ! eta should be negative otherwise. however, ! if for some reason caused by roundoff, eta*w > 0, ! we simply use one newton step instead. this way ! will guarantee eta*w < 0. if( w*eta>=zero )eta = -w / dw eta = eta / ( sigma+sqrt( sigma*sigma+eta ) ) temp = tau + eta if( temp>sgub .or. temp<sglb ) then if( w<zero ) then eta = ( sgub-tau ) / two else eta = ( sglb-tau ) / two end if if( geomavg ) then if( w < zero ) then if( tau > zero ) then eta = sqrt(sgub*tau)-tau end if else if( sglb > zero ) then eta = sqrt(sglb*tau)-tau end if end if end if end if prew = w tau = tau + eta sigma = sigma + eta do j = 1, n work( j ) = work( j ) + eta delta( j ) = delta( j ) - eta end do ! evaluate psi and the derivative dpsi dpsi = zero psi = zero erretm = zero do j = 1, iim1 temp = z( j ) / ( work( j )*delta( j ) ) psi = psi + z( j )*temp dpsi = dpsi + temp*temp erretm = erretm + psi end do erretm = abs( erretm ) ! evaluate phi and the derivative dphi dphi = zero phi = zero do j = n, iip1, -1 temp = z( j ) / ( work( j )*delta( j ) ) phi = phi + z( j )*temp dphi = dphi + temp*temp erretm = erretm + phi end do tau2 = work( ii )*delta( ii ) temp = z( ii ) / tau2 dw = dpsi + dphi + temp*temp temp = z( ii )*temp w = rhoinv + phi + psi + temp erretm = eight*( phi-psi ) + erretm + two*rhoinv+ three*abs( temp ) ! $ + abs( tau2 )*dw swtch = .false. if( orgati ) then if( -w>abs( prew ) / ten )swtch = .true. else if( w>abs( prew ) / ten )swtch = .true. end if ! main loop to update the values of the array delta and work iter = niter + 1_${ik}$ loop_230: do niter = iter, maxit ! test for convergence if( abs( w )<=eps*erretm ) then ! $ .or. (sgub-sglb)<=eight*abs(sgub+sglb) ) then go to 240 end if if( w<=zero ) then sglb = max( sglb, tau ) else sgub = min( sgub, tau ) end if ! calculate the new step if( .not.swtch3 ) then dtipsq = work( ip1 )*delta( ip1 ) dtisq = work( i )*delta( i ) if( .not.swtch ) then if( orgati ) then c = w - dtipsq*dw + delsq*( z( i ) / dtisq )**2_${ik}$ else c = w - dtisq*dw - delsq*( z( ip1 ) / dtipsq )**2_${ik}$ end if else temp = z( ii ) / ( work( ii )*delta( ii ) ) if( orgati ) then dpsi = dpsi + temp*temp else dphi = dphi + temp*temp end if c = w - dtisq*dpsi - dtipsq*dphi end if a = ( dtipsq+dtisq )*w - dtipsq*dtisq*dw b = dtipsq*dtisq*w if( c==zero ) then if( a==zero ) then if( .not.swtch ) then if( orgati ) then a = z( i )*z( i ) + dtipsq*dtipsq*( dpsi+dphi ) else a = z( ip1 )*z( ip1 ) +dtisq*dtisq*( dpsi+dphi ) end if else a = dtisq*dtisq*dpsi + dtipsq*dtipsq*dphi end if end if eta = b / a else if( a<=zero ) then eta = ( a-sqrt( abs( a*a-four*b*c ) ) ) / ( two*c ) else eta = two*b / ( a+sqrt( abs( a*a-four*b*c ) ) ) end if else ! interpolation using three most relevant poles dtiim = work( iim1 )*delta( iim1 ) dtiip = work( iip1 )*delta( iip1 ) temp = rhoinv + psi + phi if( swtch ) then c = temp - dtiim*dpsi - dtiip*dphi zz( 1_${ik}$ ) = dtiim*dtiim*dpsi zz( 3_${ik}$ ) = dtiip*dtiip*dphi else if( orgati ) then temp1 = z( iim1 ) / dtiim temp1 = temp1*temp1 temp2 = ( d( iim1 )-d( iip1 ) )*( d( iim1 )+d( iip1 ) )*temp1 c = temp - dtiip*( dpsi+dphi ) - temp2 zz( 1_${ik}$ ) = z( iim1 )*z( iim1 ) if( dpsi<temp1 ) then zz( 3_${ik}$ ) = dtiip*dtiip*dphi else zz( 3_${ik}$ ) = dtiip*dtiip*( ( dpsi-temp1 )+dphi ) end if else temp1 = z( iip1 ) / dtiip temp1 = temp1*temp1 temp2 = ( d( iip1 )-d( iim1 ) )*( d( iim1 )+d( iip1 ) )*temp1 c = temp - dtiim*( dpsi+dphi ) - temp2 if( dphi<temp1 ) then zz( 1_${ik}$ ) = dtiim*dtiim*dpsi else zz( 1_${ik}$ ) = dtiim*dtiim*( dpsi+( dphi-temp1 ) ) end if zz( 3_${ik}$ ) = z( iip1 )*z( iip1 ) end if end if dd( 1_${ik}$ ) = dtiim dd( 2_${ik}$ ) = delta( ii )*work( ii ) dd( 3_${ik}$ ) = dtiip call stdlib${ii}$_slaed6( niter, orgati, c, dd, zz, w, eta, info ) if( info/=0_${ik}$ ) then ! if info is not 0, i.e., stdlib${ii}$_slaed6 failed, switch ! back to two pole interpolation swtch3 = .false. info = 0_${ik}$ dtipsq = work( ip1 )*delta( ip1 ) dtisq = work( i )*delta( i ) if( .not.swtch ) then if( orgati ) then c = w - dtipsq*dw + delsq*( z( i )/dtisq )**2_${ik}$ else c = w - dtisq*dw - delsq*( z( ip1 )/dtipsq )**2_${ik}$ end if else temp = z( ii ) / ( work( ii )*delta( ii ) ) if( orgati ) then dpsi = dpsi + temp*temp else dphi = dphi + temp*temp end if c = w - dtisq*dpsi - dtipsq*dphi end if a = ( dtipsq+dtisq )*w - dtipsq*dtisq*dw b = dtipsq*dtisq*w if( c==zero ) then if( a==zero ) then if( .not.swtch ) then if( orgati ) then a = z( i )*z( i ) + dtipsq*dtipsq*( dpsi+dphi ) else a = z( ip1 )*z( ip1 ) +dtisq*dtisq*( dpsi+dphi ) end if else a = dtisq*dtisq*dpsi + dtipsq*dtipsq*dphi end if end if eta = b / a else if( a<=zero ) then eta = ( a-sqrt( abs( a*a-four*b*c ) ) ) / ( two*c ) else eta = two*b / ( a+sqrt( abs( a*a-four*b*c ) ) ) end if end if end if ! note, eta should be positive if w is negative, and ! eta should be negative otherwise. however, ! if for some reason caused by roundoff, eta*w > 0, ! we simply use one newton step instead. this way ! will guarantee eta*w < 0. if( w*eta>=zero )eta = -w / dw eta = eta / ( sigma+sqrt( sigma*sigma+eta ) ) temp=tau+eta if( temp>sgub .or. temp<sglb ) then if( w<zero ) then eta = ( sgub-tau ) / two else eta = ( sglb-tau ) / two end if if( geomavg ) then if( w < zero ) then if( tau > zero ) then eta = sqrt(sgub*tau)-tau end if else if( sglb > zero ) then eta = sqrt(sglb*tau)-tau end if end if end if end if prew = w tau = tau + eta sigma = sigma + eta do j = 1, n work( j ) = work( j ) + eta delta( j ) = delta( j ) - eta end do ! evaluate psi and the derivative dpsi dpsi = zero psi = zero erretm = zero do j = 1, iim1 temp = z( j ) / ( work( j )*delta( j ) ) psi = psi + z( j )*temp dpsi = dpsi + temp*temp erretm = erretm + psi end do erretm = abs( erretm ) ! evaluate phi and the derivative dphi dphi = zero phi = zero do j = n, iip1, -1 temp = z( j ) / ( work( j )*delta( j ) ) phi = phi + z( j )*temp dphi = dphi + temp*temp erretm = erretm + phi end do tau2 = work( ii )*delta( ii ) temp = z( ii ) / tau2 dw = dpsi + dphi + temp*temp temp = z( ii )*temp w = rhoinv + phi + psi + temp erretm = eight*( phi-psi ) + erretm + two*rhoinv+ three*abs( temp ) ! $ + abs( tau2 )*dw if( w*prew>zero .and. abs( w )>abs( prew ) / ten )swtch = .not.swtch end do loop_230 ! return with info = 1, niter = maxit and not converged info = 1_${ik}$ end if 240 continue return end subroutine stdlib${ii}$_slasd4 pure module subroutine stdlib${ii}$_dlasd4( n, i, d, z, delta, rho, sigma, work, info ) !! This subroutine computes the square root of the I-th updated !! eigenvalue of a positive symmetric rank-one modification to !! a positive diagonal matrix whose entries are given as the squares !! of the corresponding entries in the array d, and that !! 0 <= D(i) < D(j) for i < j !! and that RHO > 0. This is arranged by the calling routine, and is !! no loss in generality. The rank-one modified system is thus !! diag( D ) * diag( D ) + RHO * Z * Z_transpose. !! where we assume the Euclidean norm of Z is 1. !! The method consists of approximating the rational functions in the !! secular equation by simpler interpolating rational functions. ! -- lapack auxiliary routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments integer(${ik}$), intent(in) :: i, n integer(${ik}$), intent(out) :: info real(dp), intent(in) :: rho real(dp), intent(out) :: sigma ! Array Arguments real(dp), intent(in) :: d(*), z(*) real(dp), intent(out) :: delta(*), work(*) ! ===================================================================== ! Parameters integer(${ik}$), parameter :: maxit = 400_${ik}$ ! Local Scalars logical(lk) :: orgati, swtch, swtch3, geomavg integer(${ik}$) :: ii, iim1, iip1, ip1, iter, j, niter real(dp) :: a, b, c, delsq, delsq2, sq2, dphi, dpsi, dtiim, dtiip, dtipsq, dtisq, & dtnsq, dtnsq1, dw, eps, erretm, eta, phi, prew, psi, rhoinv, sglb, sgub, tau, tau2, & temp, temp1, temp2, w ! Local Arrays real(dp) :: dd(3_${ik}$), zz(3_${ik}$) ! Intrinsic Functions ! Executable Statements ! since this routine is called in an inner loop, we do no argument ! checking. ! quick return for n=1 and 2. info = 0_${ik}$ if( n==1_${ik}$ ) then ! presumably, i=1 upon entry sigma = sqrt( d( 1_${ik}$ )*d( 1_${ik}$ )+rho*z( 1_${ik}$ )*z( 1_${ik}$ ) ) delta( 1_${ik}$ ) = one work( 1_${ik}$ ) = one return end if if( n==2_${ik}$ ) then call stdlib${ii}$_dlasd5( i, d, z, delta, rho, sigma, work ) return end if ! compute machine epsilon eps = stdlib${ii}$_dlamch( 'EPSILON' ) rhoinv = one / rho tau2= zero ! the case i = n if( i==n ) then ! initialize some basic variables ii = n - 1_${ik}$ niter = 1_${ik}$ ! calculate initial guess temp = rho / two ! if ||z||_2 is not one, then temp should be set to ! rho * ||z||_2^2 / two temp1 = temp / ( d( n )+sqrt( d( n )*d( n )+temp ) ) do j = 1, n work( j ) = d( j ) + d( n ) + temp1 delta( j ) = ( d( j )-d( n ) ) - temp1 end do psi = zero do j = 1, n - 2 psi = psi + z( j )*z( j ) / ( delta( j )*work( j ) ) end do c = rhoinv + psi w = c + z( ii )*z( ii ) / ( delta( ii )*work( ii ) ) +z( n )*z( n ) / ( delta( n )& *work( n ) ) if( w<=zero ) then temp1 = sqrt( d( n )*d( n )+rho ) temp = z( n-1 )*z( n-1 ) / ( ( d( n-1 )+temp1 )*( d( n )-d( n-1 )+rho / ( d( n )+& temp1 ) ) ) +z( n )*z( n ) / rho ! the following tau2 is to approximate ! sigma_n^2 - d( n )*d( n ) if( c<=temp ) then tau = rho else delsq = ( d( n )-d( n-1 ) )*( d( n )+d( n-1 ) ) a = -c*delsq + z( n-1 )*z( n-1 ) + z( n )*z( n ) b = z( n )*z( n )*delsq if( a<zero ) then tau2 = two*b / ( sqrt( a*a+four*b*c )-a ) else tau2 = ( a+sqrt( a*a+four*b*c ) ) / ( two*c ) end if tau = tau2 / ( d( n )+sqrt( d( n )*d( n )+tau2 ) ) end if ! it can be proved that ! d(n)^2+rho/2 <= sigma_n^2 < d(n)^2+tau2 <= d(n)^2+rho else delsq = ( d( n )-d( n-1 ) )*( d( n )+d( n-1 ) ) a = -c*delsq + z( n-1 )*z( n-1 ) + z( n )*z( n ) b = z( n )*z( n )*delsq ! the following tau2 is to approximate ! sigma_n^2 - d( n )*d( n ) if( a<zero ) then tau2 = two*b / ( sqrt( a*a+four*b*c )-a ) else tau2 = ( a+sqrt( a*a+four*b*c ) ) / ( two*c ) end if tau = tau2 / ( d( n )+sqrt( d( n )*d( n )+tau2 ) ) ! it can be proved that ! d(n)^2 < d(n)^2+tau2 < sigma(n)^2 < d(n)^2+rho/2 end if ! the following tau is to approximate sigma_n - d( n ) ! tau = tau2 / ( d( n )+sqrt( d( n )*d( n )+tau2 ) ) sigma = d( n ) + tau do j = 1, n delta( j ) = ( d( j )-d( n ) ) - tau work( j ) = d( j ) + d( n ) + tau end do ! evaluate psi and the derivative dpsi dpsi = zero psi = zero erretm = zero do j = 1, ii temp = z( j ) / ( delta( j )*work( j ) ) psi = psi + z( j )*temp dpsi = dpsi + temp*temp erretm = erretm + psi end do erretm = abs( erretm ) ! evaluate phi and the derivative dphi temp = z( n ) / ( delta( n )*work( n ) ) phi = z( n )*temp dphi = temp*temp erretm = eight*( -phi-psi ) + erretm - phi + rhoinv ! $ + abs( tau2 )*( dpsi+dphi ) w = rhoinv + phi + psi ! test for convergence if( abs( w )<=eps*erretm ) then go to 240 end if ! calculate the new step niter = niter + 1_${ik}$ dtnsq1 = work( n-1 )*delta( n-1 ) dtnsq = work( n )*delta( n ) c = w - dtnsq1*dpsi - dtnsq*dphi a = ( dtnsq+dtnsq1 )*w - dtnsq*dtnsq1*( dpsi+dphi ) b = dtnsq*dtnsq1*w if( c<zero )c = abs( c ) if( c==zero ) then eta = rho - sigma*sigma else if( a>=zero ) then eta = ( a+sqrt( abs( a*a-four*b*c ) ) ) / ( two*c ) else eta = two*b / ( a-sqrt( abs( a*a-four*b*c ) ) ) end if ! note, eta should be positive if w is negative, and ! eta should be negative otherwise. however, ! if for some reason caused by roundoff, eta*w > 0, ! we simply use one newton step instead. this way ! will guarantee eta*w < 0. if( w*eta>zero )eta = -w / ( dpsi+dphi ) temp = eta - dtnsq if( temp>rho )eta = rho + dtnsq eta = eta / ( sigma+sqrt( eta+sigma*sigma ) ) tau = tau + eta sigma = sigma + eta do j = 1, n delta( j ) = delta( j ) - eta work( j ) = work( j ) + eta end do ! evaluate psi and the derivative dpsi dpsi = zero psi = zero erretm = zero do j = 1, ii temp = z( j ) / ( work( j )*delta( j ) ) psi = psi + z( j )*temp dpsi = dpsi + temp*temp erretm = erretm + psi end do erretm = abs( erretm ) ! evaluate phi and the derivative dphi tau2 = work( n )*delta( n ) temp = z( n ) / tau2 phi = z( n )*temp dphi = temp*temp erretm = eight*( -phi-psi ) + erretm - phi + rhoinv ! $ + abs( tau2 )*( dpsi+dphi ) w = rhoinv + phi + psi ! main loop to update the values of the array delta iter = niter + 1_${ik}$ loop_90: do niter = iter, maxit ! test for convergence if( abs( w )<=eps*erretm ) then go to 240 end if ! calculate the new step dtnsq1 = work( n-1 )*delta( n-1 ) dtnsq = work( n )*delta( n ) c = w - dtnsq1*dpsi - dtnsq*dphi a = ( dtnsq+dtnsq1 )*w - dtnsq1*dtnsq*( dpsi+dphi ) b = dtnsq1*dtnsq*w if( a>=zero ) then eta = ( a+sqrt( abs( a*a-four*b*c ) ) ) / ( two*c ) else eta = two*b / ( a-sqrt( abs( a*a-four*b*c ) ) ) end if ! note, eta should be positive if w is negative, and ! eta should be negative otherwise. however, ! if for some reason caused by roundoff, eta*w > 0, ! we simply use one newton step instead. this way ! will guarantee eta*w < 0. if( w*eta>zero )eta = -w / ( dpsi+dphi ) temp = eta - dtnsq if( temp<=zero )eta = eta / two eta = eta / ( sigma+sqrt( eta+sigma*sigma ) ) tau = tau + eta sigma = sigma + eta do j = 1, n delta( j ) = delta( j ) - eta work( j ) = work( j ) + eta end do ! evaluate psi and the derivative dpsi dpsi = zero psi = zero erretm = zero do j = 1, ii temp = z( j ) / ( work( j )*delta( j ) ) psi = psi + z( j )*temp dpsi = dpsi + temp*temp erretm = erretm + psi end do erretm = abs( erretm ) ! evaluate phi and the derivative dphi tau2 = work( n )*delta( n ) temp = z( n ) / tau2 phi = z( n )*temp dphi = temp*temp erretm = eight*( -phi-psi ) + erretm - phi + rhoinv ! $ + abs( tau2 )*( dpsi+dphi ) w = rhoinv + phi + psi end do loop_90 ! return with info = 1, niter = maxit and not converged info = 1_${ik}$ go to 240 ! end for the case i = n else ! the case for i < n niter = 1_${ik}$ ip1 = i + 1_${ik}$ ! calculate initial guess delsq = ( d( ip1 )-d( i ) )*( d( ip1 )+d( i ) ) delsq2 = delsq / two sq2=sqrt( ( d( i )*d( i )+d( ip1 )*d( ip1 ) ) / two ) temp = delsq2 / ( d( i )+sq2 ) do j = 1, n work( j ) = d( j ) + d( i ) + temp delta( j ) = ( d( j )-d( i ) ) - temp end do psi = zero do j = 1, i - 1 psi = psi + z( j )*z( j ) / ( work( j )*delta( j ) ) end do phi = zero do j = n, i + 2, -1 phi = phi + z( j )*z( j ) / ( work( j )*delta( j ) ) end do c = rhoinv + psi + phi w = c + z( i )*z( i ) / ( work( i )*delta( i ) ) +z( ip1 )*z( ip1 ) / ( work( ip1 )& *delta( ip1 ) ) geomavg = .false. if( w>zero ) then ! d(i)^2 < the ith sigma^2 < (d(i)^2+d(i+1)^2)/2 ! we choose d(i) as origin. orgati = .true. ii = i sglb = zero sgub = delsq2 / ( d( i )+sq2 ) a = c*delsq + z( i )*z( i ) + z( ip1 )*z( ip1 ) b = z( i )*z( i )*delsq if( a>zero ) then tau2 = two*b / ( a+sqrt( abs( a*a-four*b*c ) ) ) else tau2 = ( a-sqrt( abs( a*a-four*b*c ) ) ) / ( two*c ) end if ! tau2 now is an estimation of sigma^2 - d( i )^2. the ! following, however, is the corresponding estimation of ! sigma - d( i ). tau = tau2 / ( d( i )+sqrt( d( i )*d( i )+tau2 ) ) temp = sqrt(eps) if( (d(i)<=temp*d(ip1)).and.(abs(z(i))<=temp).and.(d(i)>zero) ) then tau = min( ten*d(i), sgub ) geomavg = .true. end if else ! (d(i)^2+d(i+1)^2)/2 <= the ith sigma^2 < d(i+1)^2/2 ! we choose d(i+1) as origin. orgati = .false. ii = ip1 sglb = -delsq2 / ( d( ii )+sq2 ) sgub = zero a = c*delsq - z( i )*z( i ) - z( ip1 )*z( ip1 ) b = z( ip1 )*z( ip1 )*delsq if( a<zero ) then tau2 = two*b / ( a-sqrt( abs( a*a+four*b*c ) ) ) else tau2 = -( a+sqrt( abs( a*a+four*b*c ) ) ) / ( two*c ) end if ! tau2 now is an estimation of sigma^2 - d( ip1 )^2. the ! following, however, is the corresponding estimation of ! sigma - d( ip1 ). tau = tau2 / ( d( ip1 )+sqrt( abs( d( ip1 )*d( ip1 )+tau2 ) ) ) end if sigma = d( ii ) + tau do j = 1, n work( j ) = d( j ) + d( ii ) + tau delta( j ) = ( d( j )-d( ii ) ) - tau end do iim1 = ii - 1_${ik}$ iip1 = ii + 1_${ik}$ ! evaluate psi and the derivative dpsi dpsi = zero psi = zero erretm = zero do j = 1, iim1 temp = z( j ) / ( work( j )*delta( j ) ) psi = psi + z( j )*temp dpsi = dpsi + temp*temp erretm = erretm + psi end do erretm = abs( erretm ) ! evaluate phi and the derivative dphi dphi = zero phi = zero do j = n, iip1, -1 temp = z( j ) / ( work( j )*delta( j ) ) phi = phi + z( j )*temp dphi = dphi + temp*temp erretm = erretm + phi end do w = rhoinv + phi + psi ! w is the value of the secular function with ! its ii-th element removed. swtch3 = .false. if( orgati ) then if( w<zero )swtch3 = .true. else if( w>zero )swtch3 = .true. end if if( ii==1_${ik}$ .or. ii==n )swtch3 = .false. temp = z( ii ) / ( work( ii )*delta( ii ) ) dw = dpsi + dphi + temp*temp temp = z( ii )*temp w = w + temp erretm = eight*( phi-psi ) + erretm + two*rhoinv+ three*abs( temp ) ! $ + abs( tau2 )*dw ! test for convergence if( abs( w )<=eps*erretm ) then go to 240 end if if( w<=zero ) then sglb = max( sglb, tau ) else sgub = min( sgub, tau ) end if ! calculate the new step niter = niter + 1_${ik}$ if( .not.swtch3 ) then dtipsq = work( ip1 )*delta( ip1 ) dtisq = work( i )*delta( i ) if( orgati ) then c = w - dtipsq*dw + delsq*( z( i ) / dtisq )**2_${ik}$ else c = w - dtisq*dw - delsq*( z( ip1 ) / dtipsq )**2_${ik}$ end if a = ( dtipsq+dtisq )*w - dtipsq*dtisq*dw b = dtipsq*dtisq*w if( c==zero ) then if( a==zero ) then if( orgati ) then a = z( i )*z( i ) + dtipsq*dtipsq*( dpsi+dphi ) else a = z( ip1 )*z( ip1 ) + dtisq*dtisq*( dpsi+dphi ) end if end if eta = b / a else if( a<=zero ) then eta = ( a-sqrt( abs( a*a-four*b*c ) ) ) / ( two*c ) else eta = two*b / ( a+sqrt( abs( a*a-four*b*c ) ) ) end if else ! interpolation using three most relevant poles dtiim = work( iim1 )*delta( iim1 ) dtiip = work( iip1 )*delta( iip1 ) temp = rhoinv + psi + phi if( orgati ) then temp1 = z( iim1 ) / dtiim temp1 = temp1*temp1 c = ( temp - dtiip*( dpsi+dphi ) ) -( d( iim1 )-d( iip1 ) )*( d( iim1 )+d( & iip1 ) )*temp1 zz( 1_${ik}$ ) = z( iim1 )*z( iim1 ) if( dpsi<temp1 ) then zz( 3_${ik}$ ) = dtiip*dtiip*dphi else zz( 3_${ik}$ ) = dtiip*dtiip*( ( dpsi-temp1 )+dphi ) end if else temp1 = z( iip1 ) / dtiip temp1 = temp1*temp1 c = ( temp - dtiim*( dpsi+dphi ) ) -( d( iip1 )-d( iim1 ) )*( d( iim1 )+d( & iip1 ) )*temp1 if( dphi<temp1 ) then zz( 1_${ik}$ ) = dtiim*dtiim*dpsi else zz( 1_${ik}$ ) = dtiim*dtiim*( dpsi+( dphi-temp1 ) ) end if zz( 3_${ik}$ ) = z( iip1 )*z( iip1 ) end if zz( 2_${ik}$ ) = z( ii )*z( ii ) dd( 1_${ik}$ ) = dtiim dd( 2_${ik}$ ) = delta( ii )*work( ii ) dd( 3_${ik}$ ) = dtiip call stdlib${ii}$_dlaed6( niter, orgati, c, dd, zz, w, eta, info ) if( info/=0_${ik}$ ) then ! if info is not 0, i.e., stdlib${ii}$_dlaed6 failed, switch back ! to 2 pole interpolation. swtch3 = .false. info = 0_${ik}$ dtipsq = work( ip1 )*delta( ip1 ) dtisq = work( i )*delta( i ) if( orgati ) then c = w - dtipsq*dw + delsq*( z( i ) / dtisq )**2_${ik}$ else c = w - dtisq*dw - delsq*( z( ip1 ) / dtipsq )**2_${ik}$ end if a = ( dtipsq+dtisq )*w - dtipsq*dtisq*dw b = dtipsq*dtisq*w if( c==zero ) then if( a==zero ) then if( orgati ) then a = z( i )*z( i ) + dtipsq*dtipsq*( dpsi+dphi ) else a = z( ip1 )*z( ip1 ) + dtisq*dtisq*( dpsi+dphi) end if end if eta = b / a else if( a<=zero ) then eta = ( a-sqrt( abs( a*a-four*b*c ) ) ) / ( two*c ) else eta = two*b / ( a+sqrt( abs( a*a-four*b*c ) ) ) end if end if end if ! note, eta should be positive if w is negative, and ! eta should be negative otherwise. however, ! if for some reason caused by roundoff, eta*w > 0, ! we simply use one newton step instead. this way ! will guarantee eta*w < 0. if( w*eta>=zero )eta = -w / dw eta = eta / ( sigma+sqrt( sigma*sigma+eta ) ) temp = tau + eta if( temp>sgub .or. temp<sglb ) then if( w<zero ) then eta = ( sgub-tau ) / two else eta = ( sglb-tau ) / two end if if( geomavg ) then if( w < zero ) then if( tau > zero ) then eta = sqrt(sgub*tau)-tau end if else if( sglb > zero ) then eta = sqrt(sglb*tau)-tau end if end if end if end if prew = w tau = tau + eta sigma = sigma + eta do j = 1, n work( j ) = work( j ) + eta delta( j ) = delta( j ) - eta end do ! evaluate psi and the derivative dpsi dpsi = zero psi = zero erretm = zero do j = 1, iim1 temp = z( j ) / ( work( j )*delta( j ) ) psi = psi + z( j )*temp dpsi = dpsi + temp*temp erretm = erretm + psi end do erretm = abs( erretm ) ! evaluate phi and the derivative dphi dphi = zero phi = zero do j = n, iip1, -1 temp = z( j ) / ( work( j )*delta( j ) ) phi = phi + z( j )*temp dphi = dphi + temp*temp erretm = erretm + phi end do tau2 = work( ii )*delta( ii ) temp = z( ii ) / tau2 dw = dpsi + dphi + temp*temp temp = z( ii )*temp w = rhoinv + phi + psi + temp erretm = eight*( phi-psi ) + erretm + two*rhoinv+ three*abs( temp ) ! $ + abs( tau2 )*dw swtch = .false. if( orgati ) then if( -w>abs( prew ) / ten )swtch = .true. else if( w>abs( prew ) / ten )swtch = .true. end if ! main loop to update the values of the array delta and work iter = niter + 1_${ik}$ loop_230: do niter = iter, maxit ! test for convergence if( abs( w )<=eps*erretm ) then ! $ .or. (sgub-sglb)<=eight*abs(sgub+sglb) ) then go to 240 end if if( w<=zero ) then sglb = max( sglb, tau ) else sgub = min( sgub, tau ) end if ! calculate the new step if( .not.swtch3 ) then dtipsq = work( ip1 )*delta( ip1 ) dtisq = work( i )*delta( i ) if( .not.swtch ) then if( orgati ) then c = w - dtipsq*dw + delsq*( z( i ) / dtisq )**2_${ik}$ else c = w - dtisq*dw - delsq*( z( ip1 ) / dtipsq )**2_${ik}$ end if else temp = z( ii ) / ( work( ii )*delta( ii ) ) if( orgati ) then dpsi = dpsi + temp*temp else dphi = dphi + temp*temp end if c = w - dtisq*dpsi - dtipsq*dphi end if a = ( dtipsq+dtisq )*w - dtipsq*dtisq*dw b = dtipsq*dtisq*w if( c==zero ) then if( a==zero ) then if( .not.swtch ) then if( orgati ) then a = z( i )*z( i ) + dtipsq*dtipsq*( dpsi+dphi ) else a = z( ip1 )*z( ip1 ) +dtisq*dtisq*( dpsi+dphi ) end if else a = dtisq*dtisq*dpsi + dtipsq*dtipsq*dphi end if end if eta = b / a else if( a<=zero ) then eta = ( a-sqrt( abs( a*a-four*b*c ) ) ) / ( two*c ) else eta = two*b / ( a+sqrt( abs( a*a-four*b*c ) ) ) end if else ! interpolation using three most relevant poles dtiim = work( iim1 )*delta( iim1 ) dtiip = work( iip1 )*delta( iip1 ) temp = rhoinv + psi + phi if( swtch ) then c = temp - dtiim*dpsi - dtiip*dphi zz( 1_${ik}$ ) = dtiim*dtiim*dpsi zz( 3_${ik}$ ) = dtiip*dtiip*dphi else if( orgati ) then temp1 = z( iim1 ) / dtiim temp1 = temp1*temp1 temp2 = ( d( iim1 )-d( iip1 ) )*( d( iim1 )+d( iip1 ) )*temp1 c = temp - dtiip*( dpsi+dphi ) - temp2 zz( 1_${ik}$ ) = z( iim1 )*z( iim1 ) if( dpsi<temp1 ) then zz( 3_${ik}$ ) = dtiip*dtiip*dphi else zz( 3_${ik}$ ) = dtiip*dtiip*( ( dpsi-temp1 )+dphi ) end if else temp1 = z( iip1 ) / dtiip temp1 = temp1*temp1 temp2 = ( d( iip1 )-d( iim1 ) )*( d( iim1 )+d( iip1 ) )*temp1 c = temp - dtiim*( dpsi+dphi ) - temp2 if( dphi<temp1 ) then zz( 1_${ik}$ ) = dtiim*dtiim*dpsi else zz( 1_${ik}$ ) = dtiim*dtiim*( dpsi+( dphi-temp1 ) ) end if zz( 3_${ik}$ ) = z( iip1 )*z( iip1 ) end if end if dd( 1_${ik}$ ) = dtiim dd( 2_${ik}$ ) = delta( ii )*work( ii ) dd( 3_${ik}$ ) = dtiip call stdlib${ii}$_dlaed6( niter, orgati, c, dd, zz, w, eta, info ) if( info/=0_${ik}$ ) then ! if info is not 0, i.e., stdlib${ii}$_dlaed6 failed, switch ! back to two pole interpolation swtch3 = .false. info = 0_${ik}$ dtipsq = work( ip1 )*delta( ip1 ) dtisq = work( i )*delta( i ) if( .not.swtch ) then if( orgati ) then c = w - dtipsq*dw + delsq*( z( i )/dtisq )**2_${ik}$ else c = w - dtisq*dw - delsq*( z( ip1 )/dtipsq )**2_${ik}$ end if else temp = z( ii ) / ( work( ii )*delta( ii ) ) if( orgati ) then dpsi = dpsi + temp*temp else dphi = dphi + temp*temp end if c = w - dtisq*dpsi - dtipsq*dphi end if a = ( dtipsq+dtisq )*w - dtipsq*dtisq*dw b = dtipsq*dtisq*w if( c==zero ) then if( a==zero ) then if( .not.swtch ) then if( orgati ) then a = z( i )*z( i ) + dtipsq*dtipsq*( dpsi+dphi ) else a = z( ip1 )*z( ip1 ) +dtisq*dtisq*( dpsi+dphi ) end if else a = dtisq*dtisq*dpsi + dtipsq*dtipsq*dphi end if end if eta = b / a else if( a<=zero ) then eta = ( a-sqrt( abs( a*a-four*b*c ) ) ) / ( two*c ) else eta = two*b / ( a+sqrt( abs( a*a-four*b*c ) ) ) end if end if end if ! note, eta should be positive if w is negative, and ! eta should be negative otherwise. however, ! if for some reason caused by roundoff, eta*w > 0, ! we simply use one newton step instead. this way ! will guarantee eta*w < 0. if( w*eta>=zero )eta = -w / dw eta = eta / ( sigma+sqrt( sigma*sigma+eta ) ) temp=tau+eta if( temp>sgub .or. temp<sglb ) then if( w<zero ) then eta = ( sgub-tau ) / two else eta = ( sglb-tau ) / two end if if( geomavg ) then if( w < zero ) then if( tau > zero ) then eta = sqrt(sgub*tau)-tau end if else if( sglb > zero ) then eta = sqrt(sglb*tau)-tau end if end if end if end if prew = w tau = tau + eta sigma = sigma + eta do j = 1, n work( j ) = work( j ) + eta delta( j ) = delta( j ) - eta end do ! evaluate psi and the derivative dpsi dpsi = zero psi = zero erretm = zero do j = 1, iim1 temp = z( j ) / ( work( j )*delta( j ) ) psi = psi + z( j )*temp dpsi = dpsi + temp*temp erretm = erretm + psi end do erretm = abs( erretm ) ! evaluate phi and the derivative dphi dphi = zero phi = zero do j = n, iip1, -1 temp = z( j ) / ( work( j )*delta( j ) ) phi = phi + z( j )*temp dphi = dphi + temp*temp erretm = erretm + phi end do tau2 = work( ii )*delta( ii ) temp = z( ii ) / tau2 dw = dpsi + dphi + temp*temp temp = z( ii )*temp w = rhoinv + phi + psi + temp erretm = eight*( phi-psi ) + erretm + two*rhoinv+ three*abs( temp ) ! $ + abs( tau2 )*dw if( w*prew>zero .and. abs( w )>abs( prew ) / ten )swtch = .not.swtch end do loop_230 ! return with info = 1, niter = maxit and not converged info = 1_${ik}$ end if 240 continue return end subroutine stdlib${ii}$_dlasd4 #:for rk,rt,ri in REAL_KINDS_TYPES #:if not rk in ["sp","dp"] pure module subroutine stdlib${ii}$_${ri}$lasd4( n, i, d, z, delta, rho, sigma, work, info ) !! This subroutine computes the square root of the I-th updated !! eigenvalue of a positive symmetric rank-one modification to !! a positive diagonal matrix whose entries are given as the squares !! of the corresponding entries in the array d, and that !! 0 <= D(i) < D(j) for i < j !! and that RHO > 0. This is arranged by the calling routine, and is !! no loss in generality. The rank-one modified system is thus !! diag( D ) * diag( D ) + RHO * Z * Z_transpose. !! where we assume the Euclidean norm of Z is 1. !! The method consists of approximating the rational functions in the !! secular equation by simpler interpolating rational functions. ! -- lapack auxiliary routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments integer(${ik}$), intent(in) :: i, n integer(${ik}$), intent(out) :: info real(${rk}$), intent(in) :: rho real(${rk}$), intent(out) :: sigma ! Array Arguments real(${rk}$), intent(in) :: d(*), z(*) real(${rk}$), intent(out) :: delta(*), work(*) ! ===================================================================== ! Parameters integer(${ik}$), parameter :: maxit = 400_${ik}$ ! Local Scalars logical(lk) :: orgati, swtch, swtch3, geomavg integer(${ik}$) :: ii, iim1, iip1, ip1, iter, j, niter real(${rk}$) :: a, b, c, delsq, delsq2, sq2, dphi, dpsi, dtiim, dtiip, dtipsq, dtisq, & dtnsq, dtnsq1, dw, eps, erretm, eta, phi, prew, psi, rhoinv, sglb, sgub, tau, tau2, & temp, temp1, temp2, w ! Local Arrays real(${rk}$) :: dd(3_${ik}$), zz(3_${ik}$) ! Intrinsic Functions ! Executable Statements ! since this routine is called in an inner loop, we do no argument ! checking. ! quick return for n=1 and 2. info = 0_${ik}$ if( n==1_${ik}$ ) then ! presumably, i=1 upon entry sigma = sqrt( d( 1_${ik}$ )*d( 1_${ik}$ )+rho*z( 1_${ik}$ )*z( 1_${ik}$ ) ) delta( 1_${ik}$ ) = one work( 1_${ik}$ ) = one return end if if( n==2_${ik}$ ) then call stdlib${ii}$_${ri}$lasd5( i, d, z, delta, rho, sigma, work ) return end if ! compute machine epsilon eps = stdlib${ii}$_${ri}$lamch( 'EPSILON' ) rhoinv = one / rho tau2= zero ! the case i = n if( i==n ) then ! initialize some basic variables ii = n - 1_${ik}$ niter = 1_${ik}$ ! calculate initial guess temp = rho / two ! if ||z||_2 is not one, then temp should be set to ! rho * ||z||_2^2 / two temp1 = temp / ( d( n )+sqrt( d( n )*d( n )+temp ) ) do j = 1, n work( j ) = d( j ) + d( n ) + temp1 delta( j ) = ( d( j )-d( n ) ) - temp1 end do psi = zero do j = 1, n - 2 psi = psi + z( j )*z( j ) / ( delta( j )*work( j ) ) end do c = rhoinv + psi w = c + z( ii )*z( ii ) / ( delta( ii )*work( ii ) ) +z( n )*z( n ) / ( delta( n )& *work( n ) ) if( w<=zero ) then temp1 = sqrt( d( n )*d( n )+rho ) temp = z( n-1 )*z( n-1 ) / ( ( d( n-1 )+temp1 )*( d( n )-d( n-1 )+rho / ( d( n )+& temp1 ) ) ) +z( n )*z( n ) / rho ! the following tau2 is to approximate ! sigma_n^2 - d( n )*d( n ) if( c<=temp ) then tau = rho else delsq = ( d( n )-d( n-1 ) )*( d( n )+d( n-1 ) ) a = -c*delsq + z( n-1 )*z( n-1 ) + z( n )*z( n ) b = z( n )*z( n )*delsq if( a<zero ) then tau2 = two*b / ( sqrt( a*a+four*b*c )-a ) else tau2 = ( a+sqrt( a*a+four*b*c ) ) / ( two*c ) end if tau = tau2 / ( d( n )+sqrt( d( n )*d( n )+tau2 ) ) end if ! it can be proved that ! d(n)^2+rho/2 <= sigma_n^2 < d(n)^2+tau2 <= d(n)^2+rho else delsq = ( d( n )-d( n-1 ) )*( d( n )+d( n-1 ) ) a = -c*delsq + z( n-1 )*z( n-1 ) + z( n )*z( n ) b = z( n )*z( n )*delsq ! the following tau2 is to approximate ! sigma_n^2 - d( n )*d( n ) if( a<zero ) then tau2 = two*b / ( sqrt( a*a+four*b*c )-a ) else tau2 = ( a+sqrt( a*a+four*b*c ) ) / ( two*c ) end if tau = tau2 / ( d( n )+sqrt( d( n )*d( n )+tau2 ) ) ! it can be proved that ! d(n)^2 < d(n)^2+tau2 < sigma(n)^2 < d(n)^2+rho/2 end if ! the following tau is to approximate sigma_n - d( n ) ! tau = tau2 / ( d( n )+sqrt( d( n )*d( n )+tau2 ) ) sigma = d( n ) + tau do j = 1, n delta( j ) = ( d( j )-d( n ) ) - tau work( j ) = d( j ) + d( n ) + tau end do ! evaluate psi and the derivative dpsi dpsi = zero psi = zero erretm = zero do j = 1, ii temp = z( j ) / ( delta( j )*work( j ) ) psi = psi + z( j )*temp dpsi = dpsi + temp*temp erretm = erretm + psi end do erretm = abs( erretm ) ! evaluate phi and the derivative dphi temp = z( n ) / ( delta( n )*work( n ) ) phi = z( n )*temp dphi = temp*temp erretm = eight*( -phi-psi ) + erretm - phi + rhoinv ! $ + abs( tau2 )*( dpsi+dphi ) w = rhoinv + phi + psi ! test for convergence if( abs( w )<=eps*erretm ) then go to 240 end if ! calculate the new step niter = niter + 1_${ik}$ dtnsq1 = work( n-1 )*delta( n-1 ) dtnsq = work( n )*delta( n ) c = w - dtnsq1*dpsi - dtnsq*dphi a = ( dtnsq+dtnsq1 )*w - dtnsq*dtnsq1*( dpsi+dphi ) b = dtnsq*dtnsq1*w if( c<zero )c = abs( c ) if( c==zero ) then eta = rho - sigma*sigma else if( a>=zero ) then eta = ( a+sqrt( abs( a*a-four*b*c ) ) ) / ( two*c ) else eta = two*b / ( a-sqrt( abs( a*a-four*b*c ) ) ) end if ! note, eta should be positive if w is negative, and ! eta should be negative otherwise. however, ! if for some reason caused by roundoff, eta*w > 0, ! we simply use one newton step instead. this way ! will guarantee eta*w < 0. if( w*eta>zero )eta = -w / ( dpsi+dphi ) temp = eta - dtnsq if( temp>rho )eta = rho + dtnsq eta = eta / ( sigma+sqrt( eta+sigma*sigma ) ) tau = tau + eta sigma = sigma + eta do j = 1, n delta( j ) = delta( j ) - eta work( j ) = work( j ) + eta end do ! evaluate psi and the derivative dpsi dpsi = zero psi = zero erretm = zero do j = 1, ii temp = z( j ) / ( work( j )*delta( j ) ) psi = psi + z( j )*temp dpsi = dpsi + temp*temp erretm = erretm + psi end do erretm = abs( erretm ) ! evaluate phi and the derivative dphi tau2 = work( n )*delta( n ) temp = z( n ) / tau2 phi = z( n )*temp dphi = temp*temp erretm = eight*( -phi-psi ) + erretm - phi + rhoinv ! $ + abs( tau2 )*( dpsi+dphi ) w = rhoinv + phi + psi ! main loop to update the values of the array delta iter = niter + 1_${ik}$ loop_90: do niter = iter, maxit ! test for convergence if( abs( w )<=eps*erretm ) then go to 240 end if ! calculate the new step dtnsq1 = work( n-1 )*delta( n-1 ) dtnsq = work( n )*delta( n ) c = w - dtnsq1*dpsi - dtnsq*dphi a = ( dtnsq+dtnsq1 )*w - dtnsq1*dtnsq*( dpsi+dphi ) b = dtnsq1*dtnsq*w if( a>=zero ) then eta = ( a+sqrt( abs( a*a-four*b*c ) ) ) / ( two*c ) else eta = two*b / ( a-sqrt( abs( a*a-four*b*c ) ) ) end if ! note, eta should be positive if w is negative, and ! eta should be negative otherwise. however, ! if for some reason caused by roundoff, eta*w > 0, ! we simply use one newton step instead. this way ! will guarantee eta*w < 0. if( w*eta>zero )eta = -w / ( dpsi+dphi ) temp = eta - dtnsq if( temp<=zero )eta = eta / two eta = eta / ( sigma+sqrt( eta+sigma*sigma ) ) tau = tau + eta sigma = sigma + eta do j = 1, n delta( j ) = delta( j ) - eta work( j ) = work( j ) + eta end do ! evaluate psi and the derivative dpsi dpsi = zero psi = zero erretm = zero do j = 1, ii temp = z( j ) / ( work( j )*delta( j ) ) psi = psi + z( j )*temp dpsi = dpsi + temp*temp erretm = erretm + psi end do erretm = abs( erretm ) ! evaluate phi and the derivative dphi tau2 = work( n )*delta( n ) temp = z( n ) / tau2 phi = z( n )*temp dphi = temp*temp erretm = eight*( -phi-psi ) + erretm - phi + rhoinv ! $ + abs( tau2 )*( dpsi+dphi ) w = rhoinv + phi + psi end do loop_90 ! return with info = 1, niter = maxit and not converged info = 1_${ik}$ go to 240 ! end for the case i = n else ! the case for i < n niter = 1_${ik}$ ip1 = i + 1_${ik}$ ! calculate initial guess delsq = ( d( ip1 )-d( i ) )*( d( ip1 )+d( i ) ) delsq2 = delsq / two sq2=sqrt( ( d( i )*d( i )+d( ip1 )*d( ip1 ) ) / two ) temp = delsq2 / ( d( i )+sq2 ) do j = 1, n work( j ) = d( j ) + d( i ) + temp delta( j ) = ( d( j )-d( i ) ) - temp end do psi = zero do j = 1, i - 1 psi = psi + z( j )*z( j ) / ( work( j )*delta( j ) ) end do phi = zero do j = n, i + 2, -1 phi = phi + z( j )*z( j ) / ( work( j )*delta( j ) ) end do c = rhoinv + psi + phi w = c + z( i )*z( i ) / ( work( i )*delta( i ) ) +z( ip1 )*z( ip1 ) / ( work( ip1 )& *delta( ip1 ) ) geomavg = .false. if( w>zero ) then ! d(i)^2 < the ith sigma^2 < (d(i)^2+d(i+1)^2)/2 ! we choose d(i) as origin. orgati = .true. ii = i sglb = zero sgub = delsq2 / ( d( i )+sq2 ) a = c*delsq + z( i )*z( i ) + z( ip1 )*z( ip1 ) b = z( i )*z( i )*delsq if( a>zero ) then tau2 = two*b / ( a+sqrt( abs( a*a-four*b*c ) ) ) else tau2 = ( a-sqrt( abs( a*a-four*b*c ) ) ) / ( two*c ) end if ! tau2 now is an estimation of sigma^2 - d( i )^2. the ! following, however, is the corresponding estimation of ! sigma - d( i ). tau = tau2 / ( d( i )+sqrt( d( i )*d( i )+tau2 ) ) temp = sqrt(eps) if( (d(i)<=temp*d(ip1)).and.(abs(z(i))<=temp).and.(d(i)>zero) ) then tau = min( ten*d(i), sgub ) geomavg = .true. end if else ! (d(i)^2+d(i+1)^2)/2 <= the ith sigma^2 < d(i+1)^2/2 ! we choose d(i+1) as origin. orgati = .false. ii = ip1 sglb = -delsq2 / ( d( ii )+sq2 ) sgub = zero a = c*delsq - z( i )*z( i ) - z( ip1 )*z( ip1 ) b = z( ip1 )*z( ip1 )*delsq if( a<zero ) then tau2 = two*b / ( a-sqrt( abs( a*a+four*b*c ) ) ) else tau2 = -( a+sqrt( abs( a*a+four*b*c ) ) ) / ( two*c ) end if ! tau2 now is an estimation of sigma^2 - d( ip1 )^2. the ! following, however, is the corresponding estimation of ! sigma - d( ip1 ). tau = tau2 / ( d( ip1 )+sqrt( abs( d( ip1 )*d( ip1 )+tau2 ) ) ) end if sigma = d( ii ) + tau do j = 1, n work( j ) = d( j ) + d( ii ) + tau delta( j ) = ( d( j )-d( ii ) ) - tau end do iim1 = ii - 1_${ik}$ iip1 = ii + 1_${ik}$ ! evaluate psi and the derivative dpsi dpsi = zero psi = zero erretm = zero do j = 1, iim1 temp = z( j ) / ( work( j )*delta( j ) ) psi = psi + z( j )*temp dpsi = dpsi + temp*temp erretm = erretm + psi end do erretm = abs( erretm ) ! evaluate phi and the derivative dphi dphi = zero phi = zero do j = n, iip1, -1 temp = z( j ) / ( work( j )*delta( j ) ) phi = phi + z( j )*temp dphi = dphi + temp*temp erretm = erretm + phi end do w = rhoinv + phi + psi ! w is the value of the secular function with ! its ii-th element removed. swtch3 = .false. if( orgati ) then if( w<zero )swtch3 = .true. else if( w>zero )swtch3 = .true. end if if( ii==1_${ik}$ .or. ii==n )swtch3 = .false. temp = z( ii ) / ( work( ii )*delta( ii ) ) dw = dpsi + dphi + temp*temp temp = z( ii )*temp w = w + temp erretm = eight*( phi-psi ) + erretm + two*rhoinv+ three*abs( temp ) ! $ + abs( tau2 )*dw ! test for convergence if( abs( w )<=eps*erretm ) then go to 240 end if if( w<=zero ) then sglb = max( sglb, tau ) else sgub = min( sgub, tau ) end if ! calculate the new step niter = niter + 1_${ik}$ if( .not.swtch3 ) then dtipsq = work( ip1 )*delta( ip1 ) dtisq = work( i )*delta( i ) if( orgati ) then c = w - dtipsq*dw + delsq*( z( i ) / dtisq )**2_${ik}$ else c = w - dtisq*dw - delsq*( z( ip1 ) / dtipsq )**2_${ik}$ end if a = ( dtipsq+dtisq )*w - dtipsq*dtisq*dw b = dtipsq*dtisq*w if( c==zero ) then if( a==zero ) then if( orgati ) then a = z( i )*z( i ) + dtipsq*dtipsq*( dpsi+dphi ) else a = z( ip1 )*z( ip1 ) + dtisq*dtisq*( dpsi+dphi ) end if end if eta = b / a else if( a<=zero ) then eta = ( a-sqrt( abs( a*a-four*b*c ) ) ) / ( two*c ) else eta = two*b / ( a+sqrt( abs( a*a-four*b*c ) ) ) end if else ! interpolation using three most relevant poles dtiim = work( iim1 )*delta( iim1 ) dtiip = work( iip1 )*delta( iip1 ) temp = rhoinv + psi + phi if( orgati ) then temp1 = z( iim1 ) / dtiim temp1 = temp1*temp1 c = ( temp - dtiip*( dpsi+dphi ) ) -( d( iim1 )-d( iip1 ) )*( d( iim1 )+d( & iip1 ) )*temp1 zz( 1_${ik}$ ) = z( iim1 )*z( iim1 ) if( dpsi<temp1 ) then zz( 3_${ik}$ ) = dtiip*dtiip*dphi else zz( 3_${ik}$ ) = dtiip*dtiip*( ( dpsi-temp1 )+dphi ) end if else temp1 = z( iip1 ) / dtiip temp1 = temp1*temp1 c = ( temp - dtiim*( dpsi+dphi ) ) -( d( iip1 )-d( iim1 ) )*( d( iim1 )+d( & iip1 ) )*temp1 if( dphi<temp1 ) then zz( 1_${ik}$ ) = dtiim*dtiim*dpsi else zz( 1_${ik}$ ) = dtiim*dtiim*( dpsi+( dphi-temp1 ) ) end if zz( 3_${ik}$ ) = z( iip1 )*z( iip1 ) end if zz( 2_${ik}$ ) = z( ii )*z( ii ) dd( 1_${ik}$ ) = dtiim dd( 2_${ik}$ ) = delta( ii )*work( ii ) dd( 3_${ik}$ ) = dtiip call stdlib${ii}$_${ri}$laed6( niter, orgati, c, dd, zz, w, eta, info ) if( info/=0_${ik}$ ) then ! if info is not 0, i.e., stdlib${ii}$_${ri}$laed6 failed, switch back ! to 2 pole interpolation. swtch3 = .false. info = 0_${ik}$ dtipsq = work( ip1 )*delta( ip1 ) dtisq = work( i )*delta( i ) if( orgati ) then c = w - dtipsq*dw + delsq*( z( i ) / dtisq )**2_${ik}$ else c = w - dtisq*dw - delsq*( z( ip1 ) / dtipsq )**2_${ik}$ end if a = ( dtipsq+dtisq )*w - dtipsq*dtisq*dw b = dtipsq*dtisq*w if( c==zero ) then if( a==zero ) then if( orgati ) then a = z( i )*z( i ) + dtipsq*dtipsq*( dpsi+dphi ) else a = z( ip1 )*z( ip1 ) + dtisq*dtisq*( dpsi+dphi) end if end if eta = b / a else if( a<=zero ) then eta = ( a-sqrt( abs( a*a-four*b*c ) ) ) / ( two*c ) else eta = two*b / ( a+sqrt( abs( a*a-four*b*c ) ) ) end if end if end if ! note, eta should be positive if w is negative, and ! eta should be negative otherwise. however, ! if for some reason caused by roundoff, eta*w > 0, ! we simply use one newton step instead. this way ! will guarantee eta*w < 0. if( w*eta>=zero )eta = -w / dw eta = eta / ( sigma+sqrt( sigma*sigma+eta ) ) temp = tau + eta if( temp>sgub .or. temp<sglb ) then if( w<zero ) then eta = ( sgub-tau ) / two else eta = ( sglb-tau ) / two end if if( geomavg ) then if( w < zero ) then if( tau > zero ) then eta = sqrt(sgub*tau)-tau end if else if( sglb > zero ) then eta = sqrt(sglb*tau)-tau end if end if end if end if prew = w tau = tau + eta sigma = sigma + eta do j = 1, n work( j ) = work( j ) + eta delta( j ) = delta( j ) - eta end do ! evaluate psi and the derivative dpsi dpsi = zero psi = zero erretm = zero do j = 1, iim1 temp = z( j ) / ( work( j )*delta( j ) ) psi = psi + z( j )*temp dpsi = dpsi + temp*temp erretm = erretm + psi end do erretm = abs( erretm ) ! evaluate phi and the derivative dphi dphi = zero phi = zero do j = n, iip1, -1 temp = z( j ) / ( work( j )*delta( j ) ) phi = phi + z( j )*temp dphi = dphi + temp*temp erretm = erretm + phi end do tau2 = work( ii )*delta( ii ) temp = z( ii ) / tau2 dw = dpsi + dphi + temp*temp temp = z( ii )*temp w = rhoinv + phi + psi + temp erretm = eight*( phi-psi ) + erretm + two*rhoinv+ three*abs( temp ) ! $ + abs( tau2 )*dw swtch = .false. if( orgati ) then if( -w>abs( prew ) / ten )swtch = .true. else if( w>abs( prew ) / ten )swtch = .true. end if ! main loop to update the values of the array delta and work iter = niter + 1_${ik}$ loop_230: do niter = iter, maxit ! test for convergence if( abs( w )<=eps*erretm ) then ! $ .or. (sgub-sglb)<=eight*abs(sgub+sglb) ) then go to 240 end if if( w<=zero ) then sglb = max( sglb, tau ) else sgub = min( sgub, tau ) end if ! calculate the new step if( .not.swtch3 ) then dtipsq = work( ip1 )*delta( ip1 ) dtisq = work( i )*delta( i ) if( .not.swtch ) then if( orgati ) then c = w - dtipsq*dw + delsq*( z( i ) / dtisq )**2_${ik}$ else c = w - dtisq*dw - delsq*( z( ip1 ) / dtipsq )**2_${ik}$ end if else temp = z( ii ) / ( work( ii )*delta( ii ) ) if( orgati ) then dpsi = dpsi + temp*temp else dphi = dphi + temp*temp end if c = w - dtisq*dpsi - dtipsq*dphi end if a = ( dtipsq+dtisq )*w - dtipsq*dtisq*dw b = dtipsq*dtisq*w if( c==zero ) then if( a==zero ) then if( .not.swtch ) then if( orgati ) then a = z( i )*z( i ) + dtipsq*dtipsq*( dpsi+dphi ) else a = z( ip1 )*z( ip1 ) +dtisq*dtisq*( dpsi+dphi ) end if else a = dtisq*dtisq*dpsi + dtipsq*dtipsq*dphi end if end if eta = b / a else if( a<=zero ) then eta = ( a-sqrt( abs( a*a-four*b*c ) ) ) / ( two*c ) else eta = two*b / ( a+sqrt( abs( a*a-four*b*c ) ) ) end if else ! interpolation using three most relevant poles dtiim = work( iim1 )*delta( iim1 ) dtiip = work( iip1 )*delta( iip1 ) temp = rhoinv + psi + phi if( swtch ) then c = temp - dtiim*dpsi - dtiip*dphi zz( 1_${ik}$ ) = dtiim*dtiim*dpsi zz( 3_${ik}$ ) = dtiip*dtiip*dphi else if( orgati ) then temp1 = z( iim1 ) / dtiim temp1 = temp1*temp1 temp2 = ( d( iim1 )-d( iip1 ) )*( d( iim1 )+d( iip1 ) )*temp1 c = temp - dtiip*( dpsi+dphi ) - temp2 zz( 1_${ik}$ ) = z( iim1 )*z( iim1 ) if( dpsi<temp1 ) then zz( 3_${ik}$ ) = dtiip*dtiip*dphi else zz( 3_${ik}$ ) = dtiip*dtiip*( ( dpsi-temp1 )+dphi ) end if else temp1 = z( iip1 ) / dtiip temp1 = temp1*temp1 temp2 = ( d( iip1 )-d( iim1 ) )*( d( iim1 )+d( iip1 ) )*temp1 c = temp - dtiim*( dpsi+dphi ) - temp2 if( dphi<temp1 ) then zz( 1_${ik}$ ) = dtiim*dtiim*dpsi else zz( 1_${ik}$ ) = dtiim*dtiim*( dpsi+( dphi-temp1 ) ) end if zz( 3_${ik}$ ) = z( iip1 )*z( iip1 ) end if end if dd( 1_${ik}$ ) = dtiim dd( 2_${ik}$ ) = delta( ii )*work( ii ) dd( 3_${ik}$ ) = dtiip call stdlib${ii}$_${ri}$laed6( niter, orgati, c, dd, zz, w, eta, info ) if( info/=0_${ik}$ ) then ! if info is not 0, i.e., stdlib${ii}$_${ri}$laed6 failed, switch ! back to two pole interpolation swtch3 = .false. info = 0_${ik}$ dtipsq = work( ip1 )*delta( ip1 ) dtisq = work( i )*delta( i ) if( .not.swtch ) then if( orgati ) then c = w - dtipsq*dw + delsq*( z( i )/dtisq )**2_${ik}$ else c = w - dtisq*dw - delsq*( z( ip1 )/dtipsq )**2_${ik}$ end if else temp = z( ii ) / ( work( ii )*delta( ii ) ) if( orgati ) then dpsi = dpsi + temp*temp else dphi = dphi + temp*temp end if c = w - dtisq*dpsi - dtipsq*dphi end if a = ( dtipsq+dtisq )*w - dtipsq*dtisq*dw b = dtipsq*dtisq*w if( c==zero ) then if( a==zero ) then if( .not.swtch ) then if( orgati ) then a = z( i )*z( i ) + dtipsq*dtipsq*( dpsi+dphi ) else a = z( ip1 )*z( ip1 ) +dtisq*dtisq*( dpsi+dphi ) end if else a = dtisq*dtisq*dpsi + dtipsq*dtipsq*dphi end if end if eta = b / a else if( a<=zero ) then eta = ( a-sqrt( abs( a*a-four*b*c ) ) ) / ( two*c ) else eta = two*b / ( a+sqrt( abs( a*a-four*b*c ) ) ) end if end if end if ! note, eta should be positive if w is negative, and ! eta should be negative otherwise. however, ! if for some reason caused by roundoff, eta*w > 0, ! we simply use one newton step instead. this way ! will guarantee eta*w < 0. if( w*eta>=zero )eta = -w / dw eta = eta / ( sigma+sqrt( sigma*sigma+eta ) ) temp=tau+eta if( temp>sgub .or. temp<sglb ) then if( w<zero ) then eta = ( sgub-tau ) / two else eta = ( sglb-tau ) / two end if if( geomavg ) then if( w < zero ) then if( tau > zero ) then eta = sqrt(sgub*tau)-tau end if else if( sglb > zero ) then eta = sqrt(sglb*tau)-tau end if end if end if end if prew = w tau = tau + eta sigma = sigma + eta do j = 1, n work( j ) = work( j ) + eta delta( j ) = delta( j ) - eta end do ! evaluate psi and the derivative dpsi dpsi = zero psi = zero erretm = zero do j = 1, iim1 temp = z( j ) / ( work( j )*delta( j ) ) psi = psi + z( j )*temp dpsi = dpsi + temp*temp erretm = erretm + psi end do erretm = abs( erretm ) ! evaluate phi and the derivative dphi dphi = zero phi = zero do j = n, iip1, -1 temp = z( j ) / ( work( j )*delta( j ) ) phi = phi + z( j )*temp dphi = dphi + temp*temp erretm = erretm + phi end do tau2 = work( ii )*delta( ii ) temp = z( ii ) / tau2 dw = dpsi + dphi + temp*temp temp = z( ii )*temp w = rhoinv + phi + psi + temp erretm = eight*( phi-psi ) + erretm + two*rhoinv+ three*abs( temp ) ! $ + abs( tau2 )*dw if( w*prew>zero .and. abs( w )>abs( prew ) / ten )swtch = .not.swtch end do loop_230 ! return with info = 1, niter = maxit and not converged info = 1_${ik}$ end if 240 continue return end subroutine stdlib${ii}$_${ri}$lasd4 #:endif #:endfor pure module subroutine stdlib${ii}$_slasd5( i, d, z, delta, rho, dsigma, work ) !! This subroutine computes the square root of the I-th eigenvalue !! of a positive symmetric rank-one modification of a 2-by-2 diagonal !! matrix !! diag( D ) * diag( D ) + RHO * Z * transpose(Z) . !! The diagonal entries in the array D are assumed to satisfy !! 0 <= D(i) < D(j) for i < j . !! We also assume RHO > 0 and that the Euclidean norm of the vector !! Z is one. ! -- lapack auxiliary routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments integer(${ik}$), intent(in) :: i real(sp), intent(out) :: dsigma real(sp), intent(in) :: rho ! Array Arguments real(sp), intent(in) :: d(2_${ik}$), z(2_${ik}$) real(sp), intent(out) :: delta(2_${ik}$), work(2_${ik}$) ! ===================================================================== ! Local Scalars real(sp) :: b, c, del, delsq, tau, w ! Intrinsic Functions ! Executable Statements del = d( 2_${ik}$ ) - d( 1_${ik}$ ) delsq = del*( d( 2_${ik}$ )+d( 1_${ik}$ ) ) if( i==1_${ik}$ ) then w = one + four*rho*( z( 2_${ik}$ )*z( 2_${ik}$ ) / ( d( 1_${ik}$ )+three*d( 2_${ik}$ ) )-z( 1_${ik}$ )*z( 1_${ik}$ ) / ( & three*d( 1_${ik}$ )+d( 2_${ik}$ ) ) ) / del if( w>zero ) then b = delsq + rho*( z( 1_${ik}$ )*z( 1_${ik}$ )+z( 2_${ik}$ )*z( 2_${ik}$ ) ) c = rho*z( 1_${ik}$ )*z( 1_${ik}$ )*delsq ! b > zero, always ! the following tau is dsigma * dsigma - d( 1 ) * d( 1 ) tau = two*c / ( b+sqrt( abs( b*b-four*c ) ) ) ! the following tau is dsigma - d( 1 ) tau = tau / ( d( 1_${ik}$ )+sqrt( d( 1_${ik}$ )*d( 1_${ik}$ )+tau ) ) dsigma = d( 1_${ik}$ ) + tau delta( 1_${ik}$ ) = -tau delta( 2_${ik}$ ) = del - tau work( 1_${ik}$ ) = two*d( 1_${ik}$ ) + tau work( 2_${ik}$ ) = ( d( 1_${ik}$ )+tau ) + d( 2_${ik}$ ) ! delta( 1 ) = -z( 1 ) / tau ! delta( 2 ) = z( 2 ) / ( del-tau ) else b = -delsq + rho*( z( 1_${ik}$ )*z( 1_${ik}$ )+z( 2_${ik}$ )*z( 2_${ik}$ ) ) c = rho*z( 2_${ik}$ )*z( 2_${ik}$ )*delsq ! the following tau is dsigma * dsigma - d( 2 ) * d( 2 ) if( b>zero ) then tau = -two*c / ( b+sqrt( b*b+four*c ) ) else tau = ( b-sqrt( b*b+four*c ) ) / two end if ! the following tau is dsigma - d( 2 ) tau = tau / ( d( 2_${ik}$ )+sqrt( abs( d( 2_${ik}$ )*d( 2_${ik}$ )+tau ) ) ) dsigma = d( 2_${ik}$ ) + tau delta( 1_${ik}$ ) = -( del+tau ) delta( 2_${ik}$ ) = -tau work( 1_${ik}$ ) = d( 1_${ik}$ ) + tau + d( 2_${ik}$ ) work( 2_${ik}$ ) = two*d( 2_${ik}$ ) + tau ! delta( 1 ) = -z( 1 ) / ( del+tau ) ! delta( 2 ) = -z( 2 ) / tau end if ! temp = sqrt( delta( 1 )*delta( 1 )+delta( 2 )*delta( 2 ) ) ! delta( 1 ) = delta( 1 ) / temp ! delta( 2 ) = delta( 2 ) / temp else ! now i=2 b = -delsq + rho*( z( 1_${ik}$ )*z( 1_${ik}$ )+z( 2_${ik}$ )*z( 2_${ik}$ ) ) c = rho*z( 2_${ik}$ )*z( 2_${ik}$ )*delsq ! the following tau is dsigma * dsigma - d( 2 ) * d( 2 ) if( b>zero ) then tau = ( b+sqrt( b*b+four*c ) ) / two else tau = two*c / ( -b+sqrt( b*b+four*c ) ) end if ! the following tau is dsigma - d( 2 ) tau = tau / ( d( 2_${ik}$ )+sqrt( d( 2_${ik}$ )*d( 2_${ik}$ )+tau ) ) dsigma = d( 2_${ik}$ ) + tau delta( 1_${ik}$ ) = -( del+tau ) delta( 2_${ik}$ ) = -tau work( 1_${ik}$ ) = d( 1_${ik}$ ) + tau + d( 2_${ik}$ ) work( 2_${ik}$ ) = two*d( 2_${ik}$ ) + tau ! delta( 1 ) = -z( 1 ) / ( del+tau ) ! delta( 2 ) = -z( 2 ) / tau ! temp = sqrt( delta( 1 )*delta( 1 )+delta( 2 )*delta( 2 ) ) ! delta( 1 ) = delta( 1 ) / temp ! delta( 2 ) = delta( 2 ) / temp end if return end subroutine stdlib${ii}$_slasd5 pure module subroutine stdlib${ii}$_dlasd5( i, d, z, delta, rho, dsigma, work ) !! This subroutine computes the square root of the I-th eigenvalue !! of a positive symmetric rank-one modification of a 2-by-2 diagonal !! matrix !! diag( D ) * diag( D ) + RHO * Z * transpose(Z) . !! The diagonal entries in the array D are assumed to satisfy !! 0 <= D(i) < D(j) for i < j . !! We also assume RHO > 0 and that the Euclidean norm of the vector !! Z is one. ! -- lapack auxiliary routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments integer(${ik}$), intent(in) :: i real(dp), intent(out) :: dsigma real(dp), intent(in) :: rho ! Array Arguments real(dp), intent(in) :: d(2_${ik}$), z(2_${ik}$) real(dp), intent(out) :: delta(2_${ik}$), work(2_${ik}$) ! ===================================================================== ! Local Scalars real(dp) :: b, c, del, delsq, tau, w ! Intrinsic Functions ! Executable Statements del = d( 2_${ik}$ ) - d( 1_${ik}$ ) delsq = del*( d( 2_${ik}$ )+d( 1_${ik}$ ) ) if( i==1_${ik}$ ) then w = one + four*rho*( z( 2_${ik}$ )*z( 2_${ik}$ ) / ( d( 1_${ik}$ )+three*d( 2_${ik}$ ) )-z( 1_${ik}$ )*z( 1_${ik}$ ) / ( & three*d( 1_${ik}$ )+d( 2_${ik}$ ) ) ) / del if( w>zero ) then b = delsq + rho*( z( 1_${ik}$ )*z( 1_${ik}$ )+z( 2_${ik}$ )*z( 2_${ik}$ ) ) c = rho*z( 1_${ik}$ )*z( 1_${ik}$ )*delsq ! b > zero, always ! the following tau is dsigma * dsigma - d( 1 ) * d( 1 ) tau = two*c / ( b+sqrt( abs( b*b-four*c ) ) ) ! the following tau is dsigma - d( 1 ) tau = tau / ( d( 1_${ik}$ )+sqrt( d( 1_${ik}$ )*d( 1_${ik}$ )+tau ) ) dsigma = d( 1_${ik}$ ) + tau delta( 1_${ik}$ ) = -tau delta( 2_${ik}$ ) = del - tau work( 1_${ik}$ ) = two*d( 1_${ik}$ ) + tau work( 2_${ik}$ ) = ( d( 1_${ik}$ )+tau ) + d( 2_${ik}$ ) ! delta( 1 ) = -z( 1 ) / tau ! delta( 2 ) = z( 2 ) / ( del-tau ) else b = -delsq + rho*( z( 1_${ik}$ )*z( 1_${ik}$ )+z( 2_${ik}$ )*z( 2_${ik}$ ) ) c = rho*z( 2_${ik}$ )*z( 2_${ik}$ )*delsq ! the following tau is dsigma * dsigma - d( 2 ) * d( 2 ) if( b>zero ) then tau = -two*c / ( b+sqrt( b*b+four*c ) ) else tau = ( b-sqrt( b*b+four*c ) ) / two end if ! the following tau is dsigma - d( 2 ) tau = tau / ( d( 2_${ik}$ )+sqrt( abs( d( 2_${ik}$ )*d( 2_${ik}$ )+tau ) ) ) dsigma = d( 2_${ik}$ ) + tau delta( 1_${ik}$ ) = -( del+tau ) delta( 2_${ik}$ ) = -tau work( 1_${ik}$ ) = d( 1_${ik}$ ) + tau + d( 2_${ik}$ ) work( 2_${ik}$ ) = two*d( 2_${ik}$ ) + tau ! delta( 1 ) = -z( 1 ) / ( del+tau ) ! delta( 2 ) = -z( 2 ) / tau end if ! temp = sqrt( delta( 1 )*delta( 1 )+delta( 2 )*delta( 2 ) ) ! delta( 1 ) = delta( 1 ) / temp ! delta( 2 ) = delta( 2 ) / temp else ! now i=2 b = -delsq + rho*( z( 1_${ik}$ )*z( 1_${ik}$ )+z( 2_${ik}$ )*z( 2_${ik}$ ) ) c = rho*z( 2_${ik}$ )*z( 2_${ik}$ )*delsq ! the following tau is dsigma * dsigma - d( 2 ) * d( 2 ) if( b>zero ) then tau = ( b+sqrt( b*b+four*c ) ) / two else tau = two*c / ( -b+sqrt( b*b+four*c ) ) end if ! the following tau is dsigma - d( 2 ) tau = tau / ( d( 2_${ik}$ )+sqrt( d( 2_${ik}$ )*d( 2_${ik}$ )+tau ) ) dsigma = d( 2_${ik}$ ) + tau delta( 1_${ik}$ ) = -( del+tau ) delta( 2_${ik}$ ) = -tau work( 1_${ik}$ ) = d( 1_${ik}$ ) + tau + d( 2_${ik}$ ) work( 2_${ik}$ ) = two*d( 2_${ik}$ ) + tau ! delta( 1 ) = -z( 1 ) / ( del+tau ) ! delta( 2 ) = -z( 2 ) / tau ! temp = sqrt( delta( 1 )*delta( 1 )+delta( 2 )*delta( 2 ) ) ! delta( 1 ) = delta( 1 ) / temp ! delta( 2 ) = delta( 2 ) / temp end if return end subroutine stdlib${ii}$_dlasd5 #:for rk,rt,ri in REAL_KINDS_TYPES #:if not rk in ["sp","dp"] pure module subroutine stdlib${ii}$_${ri}$lasd5( i, d, z, delta, rho, dsigma, work ) !! This subroutine computes the square root of the I-th eigenvalue !! of a positive symmetric rank-one modification of a 2-by-2 diagonal !! matrix !! diag( D ) * diag( D ) + RHO * Z * transpose(Z) . !! The diagonal entries in the array D are assumed to satisfy !! 0 <= D(i) < D(j) for i < j . !! We also assume RHO > 0 and that the Euclidean norm of the vector !! Z is one. ! -- lapack auxiliary routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments integer(${ik}$), intent(in) :: i real(${rk}$), intent(out) :: dsigma real(${rk}$), intent(in) :: rho ! Array Arguments real(${rk}$), intent(in) :: d(2_${ik}$), z(2_${ik}$) real(${rk}$), intent(out) :: delta(2_${ik}$), work(2_${ik}$) ! ===================================================================== ! Local Scalars real(${rk}$) :: b, c, del, delsq, tau, w ! Intrinsic Functions ! Executable Statements del = d( 2_${ik}$ ) - d( 1_${ik}$ ) delsq = del*( d( 2_${ik}$ )+d( 1_${ik}$ ) ) if( i==1_${ik}$ ) then w = one + four*rho*( z( 2_${ik}$ )*z( 2_${ik}$ ) / ( d( 1_${ik}$ )+three*d( 2_${ik}$ ) )-z( 1_${ik}$ )*z( 1_${ik}$ ) / ( & three*d( 1_${ik}$ )+d( 2_${ik}$ ) ) ) / del if( w>zero ) then b = delsq + rho*( z( 1_${ik}$ )*z( 1_${ik}$ )+z( 2_${ik}$ )*z( 2_${ik}$ ) ) c = rho*z( 1_${ik}$ )*z( 1_${ik}$ )*delsq ! b > zero, always ! the following tau is dsigma * dsigma - d( 1 ) * d( 1 ) tau = two*c / ( b+sqrt( abs( b*b-four*c ) ) ) ! the following tau is dsigma - d( 1 ) tau = tau / ( d( 1_${ik}$ )+sqrt( d( 1_${ik}$ )*d( 1_${ik}$ )+tau ) ) dsigma = d( 1_${ik}$ ) + tau delta( 1_${ik}$ ) = -tau delta( 2_${ik}$ ) = del - tau work( 1_${ik}$ ) = two*d( 1_${ik}$ ) + tau work( 2_${ik}$ ) = ( d( 1_${ik}$ )+tau ) + d( 2_${ik}$ ) ! delta( 1 ) = -z( 1 ) / tau ! delta( 2 ) = z( 2 ) / ( del-tau ) else b = -delsq + rho*( z( 1_${ik}$ )*z( 1_${ik}$ )+z( 2_${ik}$ )*z( 2_${ik}$ ) ) c = rho*z( 2_${ik}$ )*z( 2_${ik}$ )*delsq ! the following tau is dsigma * dsigma - d( 2 ) * d( 2 ) if( b>zero ) then tau = -two*c / ( b+sqrt( b*b+four*c ) ) else tau = ( b-sqrt( b*b+four*c ) ) / two end if ! the following tau is dsigma - d( 2 ) tau = tau / ( d( 2_${ik}$ )+sqrt( abs( d( 2_${ik}$ )*d( 2_${ik}$ )+tau ) ) ) dsigma = d( 2_${ik}$ ) + tau delta( 1_${ik}$ ) = -( del+tau ) delta( 2_${ik}$ ) = -tau work( 1_${ik}$ ) = d( 1_${ik}$ ) + tau + d( 2_${ik}$ ) work( 2_${ik}$ ) = two*d( 2_${ik}$ ) + tau ! delta( 1 ) = -z( 1 ) / ( del+tau ) ! delta( 2 ) = -z( 2 ) / tau end if ! temp = sqrt( delta( 1 )*delta( 1 )+delta( 2 )*delta( 2 ) ) ! delta( 1 ) = delta( 1 ) / temp ! delta( 2 ) = delta( 2 ) / temp else ! now i=2 b = -delsq + rho*( z( 1_${ik}$ )*z( 1_${ik}$ )+z( 2_${ik}$ )*z( 2_${ik}$ ) ) c = rho*z( 2_${ik}$ )*z( 2_${ik}$ )*delsq ! the following tau is dsigma * dsigma - d( 2 ) * d( 2 ) if( b>zero ) then tau = ( b+sqrt( b*b+four*c ) ) / two else tau = two*c / ( -b+sqrt( b*b+four*c ) ) end if ! the following tau is dsigma - d( 2 ) tau = tau / ( d( 2_${ik}$ )+sqrt( d( 2_${ik}$ )*d( 2_${ik}$ )+tau ) ) dsigma = d( 2_${ik}$ ) + tau delta( 1_${ik}$ ) = -( del+tau ) delta( 2_${ik}$ ) = -tau work( 1_${ik}$ ) = d( 1_${ik}$ ) + tau + d( 2_${ik}$ ) work( 2_${ik}$ ) = two*d( 2_${ik}$ ) + tau ! delta( 1 ) = -z( 1 ) / ( del+tau ) ! delta( 2 ) = -z( 2 ) / tau ! temp = sqrt( delta( 1 )*delta( 1 )+delta( 2 )*delta( 2 ) ) ! delta( 1 ) = delta( 1 ) / temp ! delta( 2 ) = delta( 2 ) / temp end if return end subroutine stdlib${ii}$_${ri}$lasd5 #:endif #:endfor pure module subroutine stdlib${ii}$_slasdq( uplo, sqre, n, ncvt, nru, ncc, d, e, vt, ldvt,u, ldu, c, ldc, & !! SLASDQ computes the singular value decomposition (SVD) of a real !! (upper or lower) bidiagonal matrix with diagonal D and offdiagonal !! E, accumulating the transformations if desired. Letting B denote !! the input bidiagonal matrix, the algorithm computes orthogonal !! matrices Q and P such that B = Q * S * P**T (P**T denotes the transpose !! of P). The singular values S are overwritten on D. !! The input matrix U is changed to U * Q if desired. !! The input matrix VT is changed to P**T * VT if desired. !! The input matrix C is changed to Q**T * C if desired. !! See "Computing Small Singular Values of Bidiagonal Matrices With !! Guaranteed High Relative Accuracy," by J. Demmel and W. Kahan, !! LAPACK Working Note #3, for a detailed description of the algorithm. work, info ) ! -- lapack auxiliary routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments character, intent(in) :: uplo integer(${ik}$), intent(out) :: info integer(${ik}$), intent(in) :: ldc, ldu, ldvt, n, ncc, ncvt, nru, sqre ! Array Arguments real(sp), intent(inout) :: c(ldc,*), d(*), e(*), u(ldu,*), vt(ldvt,*) real(sp), intent(out) :: work(*) ! ===================================================================== ! Local Scalars logical(lk) :: rotate integer(${ik}$) :: i, isub, iuplo, j, np1, sqre1 real(sp) :: cs, r, smin, sn ! Intrinsic Functions ! Executable Statements ! test the input parameters. info = 0_${ik}$ iuplo = 0_${ik}$ if( stdlib_lsame( uplo, 'U' ) )iuplo = 1_${ik}$ if( stdlib_lsame( uplo, 'L' ) )iuplo = 2_${ik}$ if( iuplo==0_${ik}$ ) then info = -1_${ik}$ else if( ( sqre<0_${ik}$ ) .or. ( sqre>1_${ik}$ ) ) then info = -2_${ik}$ else if( n<0_${ik}$ ) then info = -3_${ik}$ else if( ncvt<0_${ik}$ ) then info = -4_${ik}$ else if( nru<0_${ik}$ ) then info = -5_${ik}$ else if( ncc<0_${ik}$ ) then info = -6_${ik}$ else if( ( ncvt==0_${ik}$ .and. ldvt<1_${ik}$ ) .or.( ncvt>0_${ik}$ .and. ldvt<max( 1_${ik}$, n ) ) ) then info = -10_${ik}$ else if( ldu<max( 1_${ik}$, nru ) ) then info = -12_${ik}$ else if( ( ncc==0_${ik}$ .and. ldc<1_${ik}$ ) .or.( ncc>0_${ik}$ .and. ldc<max( 1_${ik}$, n ) ) ) then info = -14_${ik}$ end if if( info/=0_${ik}$ ) then call stdlib${ii}$_xerbla( 'SLASDQ', -info ) return end if if( n==0 )return ! rotate is true if any singular vectors desired, false otherwise rotate = ( ncvt>0_${ik}$ ) .or. ( nru>0_${ik}$ ) .or. ( ncc>0_${ik}$ ) np1 = n + 1_${ik}$ sqre1 = sqre ! if matrix non-square upper bidiagonal, rotate to be lower ! bidiagonal. the rotations are on the right. if( ( iuplo==1_${ik}$ ) .and. ( sqre1==1_${ik}$ ) ) then do i = 1, n - 1 call stdlib${ii}$_slartg( d( i ), e( i ), cs, sn, r ) d( i ) = r e( i ) = sn*d( i+1 ) d( i+1 ) = cs*d( i+1 ) if( rotate ) then work( i ) = cs work( n+i ) = sn end if end do call stdlib${ii}$_slartg( d( n ), e( n ), cs, sn, r ) d( n ) = r e( n ) = zero if( rotate ) then work( n ) = cs work( n+n ) = sn end if iuplo = 2_${ik}$ sqre1 = 0_${ik}$ ! update singular vectors if desired. if( ncvt>0_${ik}$ )call stdlib${ii}$_slasr( 'L', 'V', 'F', np1, ncvt, work( 1_${ik}$ ),work( np1 ), vt, & ldvt ) end if ! if matrix lower bidiagonal, rotate to be upper bidiagonal ! by applying givens rotations on the left. if( iuplo==2_${ik}$ ) then do i = 1, n - 1 call stdlib${ii}$_slartg( d( i ), e( i ), cs, sn, r ) d( i ) = r e( i ) = sn*d( i+1 ) d( i+1 ) = cs*d( i+1 ) if( rotate ) then work( i ) = cs work( n+i ) = sn end if end do ! if matrix (n+1)-by-n lower bidiagonal, one additional ! rotation is needed. if( sqre1==1_${ik}$ ) then call stdlib${ii}$_slartg( d( n ), e( n ), cs, sn, r ) d( n ) = r if( rotate ) then work( n ) = cs work( n+n ) = sn end if end if ! update singular vectors if desired. if( nru>0_${ik}$ ) then if( sqre1==0_${ik}$ ) then call stdlib${ii}$_slasr( 'R', 'V', 'F', nru, n, work( 1_${ik}$ ),work( np1 ), u, ldu ) else call stdlib${ii}$_slasr( 'R', 'V', 'F', nru, np1, work( 1_${ik}$ ),work( np1 ), u, ldu ) end if end if if( ncc>0_${ik}$ ) then if( sqre1==0_${ik}$ ) then call stdlib${ii}$_slasr( 'L', 'V', 'F', n, ncc, work( 1_${ik}$ ),work( np1 ), c, ldc ) else call stdlib${ii}$_slasr( 'L', 'V', 'F', np1, ncc, work( 1_${ik}$ ),work( np1 ), c, ldc ) end if end if end if ! call stdlib${ii}$_sbdsqr to compute the svd of the reduced real ! n-by-n upper bidiagonal matrix. call stdlib${ii}$_sbdsqr( 'U', n, ncvt, nru, ncc, d, e, vt, ldvt, u, ldu, c,ldc, work, info ) ! sort the singular values into ascending order (insertion sort on ! singular values, but only one transposition per singular vector) do i = 1, n ! scan for smallest d(i). isub = i smin = d( i ) do j = i + 1, n if( d( j )<smin ) then isub = j smin = d( j ) end if end do if( isub/=i ) then ! swap singular values and vectors. d( isub ) = d( i ) d( i ) = smin if( ncvt>0_${ik}$ )call stdlib${ii}$_sswap( ncvt, vt( isub, 1_${ik}$ ), ldvt, vt( i, 1_${ik}$ ), ldvt ) if( nru>0_${ik}$ )call stdlib${ii}$_sswap( nru, u( 1_${ik}$, isub ), 1_${ik}$, u( 1_${ik}$, i ), 1_${ik}$ ) if( ncc>0_${ik}$ )call stdlib${ii}$_sswap( ncc, c( isub, 1_${ik}$ ), ldc, c( i, 1_${ik}$ ), ldc ) end if end do return end subroutine stdlib${ii}$_slasdq pure module subroutine stdlib${ii}$_dlasdq( uplo, sqre, n, ncvt, nru, ncc, d, e, vt, ldvt,u, ldu, c, ldc, & !! DLASDQ computes the singular value decomposition (SVD) of a real !! (upper or lower) bidiagonal matrix with diagonal D and offdiagonal !! E, accumulating the transformations if desired. Letting B denote !! the input bidiagonal matrix, the algorithm computes orthogonal !! matrices Q and P such that B = Q * S * P**T (P**T denotes the transpose !! of P). The singular values S are overwritten on D. !! The input matrix U is changed to U * Q if desired. !! The input matrix VT is changed to P**T * VT if desired. !! The input matrix C is changed to Q**T * C if desired. !! See "Computing Small Singular Values of Bidiagonal Matrices With !! Guaranteed High Relative Accuracy," by J. Demmel and W. Kahan, !! LAPACK Working Note #3, for a detailed description of the algorithm. work, info ) ! -- lapack auxiliary routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments character, intent(in) :: uplo integer(${ik}$), intent(out) :: info integer(${ik}$), intent(in) :: ldc, ldu, ldvt, n, ncc, ncvt, nru, sqre ! Array Arguments real(dp), intent(inout) :: c(ldc,*), d(*), e(*), u(ldu,*), vt(ldvt,*) real(dp), intent(out) :: work(*) ! ===================================================================== ! Local Scalars logical(lk) :: rotate integer(${ik}$) :: i, isub, iuplo, j, np1, sqre1 real(dp) :: cs, r, smin, sn ! Intrinsic Functions ! Executable Statements ! test the input parameters. info = 0_${ik}$ iuplo = 0_${ik}$ if( stdlib_lsame( uplo, 'U' ) )iuplo = 1_${ik}$ if( stdlib_lsame( uplo, 'L' ) )iuplo = 2_${ik}$ if( iuplo==0_${ik}$ ) then info = -1_${ik}$ else if( ( sqre<0_${ik}$ ) .or. ( sqre>1_${ik}$ ) ) then info = -2_${ik}$ else if( n<0_${ik}$ ) then info = -3_${ik}$ else if( ncvt<0_${ik}$ ) then info = -4_${ik}$ else if( nru<0_${ik}$ ) then info = -5_${ik}$ else if( ncc<0_${ik}$ ) then info = -6_${ik}$ else if( ( ncvt==0_${ik}$ .and. ldvt<1_${ik}$ ) .or.( ncvt>0_${ik}$ .and. ldvt<max( 1_${ik}$, n ) ) ) then info = -10_${ik}$ else if( ldu<max( 1_${ik}$, nru ) ) then info = -12_${ik}$ else if( ( ncc==0_${ik}$ .and. ldc<1_${ik}$ ) .or.( ncc>0_${ik}$ .and. ldc<max( 1_${ik}$, n ) ) ) then info = -14_${ik}$ end if if( info/=0_${ik}$ ) then call stdlib${ii}$_xerbla( 'DLASDQ', -info ) return end if if( n==0 )return ! rotate is true if any singular vectors desired, false otherwise rotate = ( ncvt>0_${ik}$ ) .or. ( nru>0_${ik}$ ) .or. ( ncc>0_${ik}$ ) np1 = n + 1_${ik}$ sqre1 = sqre ! if matrix non-square upper bidiagonal, rotate to be lower ! bidiagonal. the rotations are on the right. if( ( iuplo==1_${ik}$ ) .and. ( sqre1==1_${ik}$ ) ) then do i = 1, n - 1 call stdlib${ii}$_dlartg( d( i ), e( i ), cs, sn, r ) d( i ) = r e( i ) = sn*d( i+1 ) d( i+1 ) = cs*d( i+1 ) if( rotate ) then work( i ) = cs work( n+i ) = sn end if end do call stdlib${ii}$_dlartg( d( n ), e( n ), cs, sn, r ) d( n ) = r e( n ) = zero if( rotate ) then work( n ) = cs work( n+n ) = sn end if iuplo = 2_${ik}$ sqre1 = 0_${ik}$ ! update singular vectors if desired. if( ncvt>0_${ik}$ )call stdlib${ii}$_dlasr( 'L', 'V', 'F', np1, ncvt, work( 1_${ik}$ ),work( np1 ), vt, & ldvt ) end if ! if matrix lower bidiagonal, rotate to be upper bidiagonal ! by applying givens rotations on the left. if( iuplo==2_${ik}$ ) then do i = 1, n - 1 call stdlib${ii}$_dlartg( d( i ), e( i ), cs, sn, r ) d( i ) = r e( i ) = sn*d( i+1 ) d( i+1 ) = cs*d( i+1 ) if( rotate ) then work( i ) = cs work( n+i ) = sn end if end do ! if matrix (n+1)-by-n lower bidiagonal, one additional ! rotation is needed. if( sqre1==1_${ik}$ ) then call stdlib${ii}$_dlartg( d( n ), e( n ), cs, sn, r ) d( n ) = r if( rotate ) then work( n ) = cs work( n+n ) = sn end if end if ! update singular vectors if desired. if( nru>0_${ik}$ ) then if( sqre1==0_${ik}$ ) then call stdlib${ii}$_dlasr( 'R', 'V', 'F', nru, n, work( 1_${ik}$ ),work( np1 ), u, ldu ) else call stdlib${ii}$_dlasr( 'R', 'V', 'F', nru, np1, work( 1_${ik}$ ),work( np1 ), u, ldu ) end if end if if( ncc>0_${ik}$ ) then if( sqre1==0_${ik}$ ) then call stdlib${ii}$_dlasr( 'L', 'V', 'F', n, ncc, work( 1_${ik}$ ),work( np1 ), c, ldc ) else call stdlib${ii}$_dlasr( 'L', 'V', 'F', np1, ncc, work( 1_${ik}$ ),work( np1 ), c, ldc ) end if end if end if ! call stdlib${ii}$_dbdsqr to compute the svd of the reduced real ! n-by-n upper bidiagonal matrix. call stdlib${ii}$_dbdsqr( 'U', n, ncvt, nru, ncc, d, e, vt, ldvt, u, ldu, c,ldc, work, info ) ! sort the singular values into ascending order (insertion sort on ! singular values, but only one transposition per singular vector) do i = 1, n ! scan for smallest d(i). isub = i smin = d( i ) do j = i + 1, n if( d( j )<smin ) then isub = j smin = d( j ) end if end do if( isub/=i ) then ! swap singular values and vectors. d( isub ) = d( i ) d( i ) = smin if( ncvt>0_${ik}$ )call stdlib${ii}$_dswap( ncvt, vt( isub, 1_${ik}$ ), ldvt, vt( i, 1_${ik}$ ), ldvt ) if( nru>0_${ik}$ )call stdlib${ii}$_dswap( nru, u( 1_${ik}$, isub ), 1_${ik}$, u( 1_${ik}$, i ), 1_${ik}$ ) if( ncc>0_${ik}$ )call stdlib${ii}$_dswap( ncc, c( isub, 1_${ik}$ ), ldc, c( i, 1_${ik}$ ), ldc ) end if end do return end subroutine stdlib${ii}$_dlasdq #:for rk,rt,ri in REAL_KINDS_TYPES #:if not rk in ["sp","dp"] pure module subroutine stdlib${ii}$_${ri}$lasdq( uplo, sqre, n, ncvt, nru, ncc, d, e, vt, ldvt,u, ldu, c, ldc, & !! DLASDQ: computes the singular value decomposition (SVD) of a real !! (upper or lower) bidiagonal matrix with diagonal D and offdiagonal !! E, accumulating the transformations if desired. Letting B denote !! the input bidiagonal matrix, the algorithm computes orthogonal !! matrices Q and P such that B = Q * S * P**T (P**T denotes the transpose !! of P). The singular values S are overwritten on D. !! The input matrix U is changed to U * Q if desired. !! The input matrix VT is changed to P**T * VT if desired. !! The input matrix C is changed to Q**T * C if desired. !! See "Computing Small Singular Values of Bidiagonal Matrices With !! Guaranteed High Relative Accuracy," by J. Demmel and W. Kahan, !! LAPACK Working Note #3, for a detailed description of the algorithm. work, info ) ! -- lapack auxiliary routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments character, intent(in) :: uplo integer(${ik}$), intent(out) :: info integer(${ik}$), intent(in) :: ldc, ldu, ldvt, n, ncc, ncvt, nru, sqre ! Array Arguments real(${rk}$), intent(inout) :: c(ldc,*), d(*), e(*), u(ldu,*), vt(ldvt,*) real(${rk}$), intent(out) :: work(*) ! ===================================================================== ! Local Scalars logical(lk) :: rotate integer(${ik}$) :: i, isub, iuplo, j, np1, sqre1 real(${rk}$) :: cs, r, smin, sn ! Intrinsic Functions ! Executable Statements ! test the input parameters. info = 0_${ik}$ iuplo = 0_${ik}$ if( stdlib_lsame( uplo, 'U' ) )iuplo = 1_${ik}$ if( stdlib_lsame( uplo, 'L' ) )iuplo = 2_${ik}$ if( iuplo==0_${ik}$ ) then info = -1_${ik}$ else if( ( sqre<0_${ik}$ ) .or. ( sqre>1_${ik}$ ) ) then info = -2_${ik}$ else if( n<0_${ik}$ ) then info = -3_${ik}$ else if( ncvt<0_${ik}$ ) then info = -4_${ik}$ else if( nru<0_${ik}$ ) then info = -5_${ik}$ else if( ncc<0_${ik}$ ) then info = -6_${ik}$ else if( ( ncvt==0_${ik}$ .and. ldvt<1_${ik}$ ) .or.( ncvt>0_${ik}$ .and. ldvt<max( 1_${ik}$, n ) ) ) then info = -10_${ik}$ else if( ldu<max( 1_${ik}$, nru ) ) then info = -12_${ik}$ else if( ( ncc==0_${ik}$ .and. ldc<1_${ik}$ ) .or.( ncc>0_${ik}$ .and. ldc<max( 1_${ik}$, n ) ) ) then info = -14_${ik}$ end if if( info/=0_${ik}$ ) then call stdlib${ii}$_xerbla( 'DLASDQ', -info ) return end if if( n==0 )return ! rotate is true if any singular vectors desired, false otherwise rotate = ( ncvt>0_${ik}$ ) .or. ( nru>0_${ik}$ ) .or. ( ncc>0_${ik}$ ) np1 = n + 1_${ik}$ sqre1 = sqre ! if matrix non-square upper bidiagonal, rotate to be lower ! bidiagonal. the rotations are on the right. if( ( iuplo==1_${ik}$ ) .and. ( sqre1==1_${ik}$ ) ) then do i = 1, n - 1 call stdlib${ii}$_${ri}$lartg( d( i ), e( i ), cs, sn, r ) d( i ) = r e( i ) = sn*d( i+1 ) d( i+1 ) = cs*d( i+1 ) if( rotate ) then work( i ) = cs work( n+i ) = sn end if end do call stdlib${ii}$_${ri}$lartg( d( n ), e( n ), cs, sn, r ) d( n ) = r e( n ) = zero if( rotate ) then work( n ) = cs work( n+n ) = sn end if iuplo = 2_${ik}$ sqre1 = 0_${ik}$ ! update singular vectors if desired. if( ncvt>0_${ik}$ )call stdlib${ii}$_${ri}$lasr( 'L', 'V', 'F', np1, ncvt, work( 1_${ik}$ ),work( np1 ), vt, & ldvt ) end if ! if matrix lower bidiagonal, rotate to be upper bidiagonal ! by applying givens rotations on the left. if( iuplo==2_${ik}$ ) then do i = 1, n - 1 call stdlib${ii}$_${ri}$lartg( d( i ), e( i ), cs, sn, r ) d( i ) = r e( i ) = sn*d( i+1 ) d( i+1 ) = cs*d( i+1 ) if( rotate ) then work( i ) = cs work( n+i ) = sn end if end do ! if matrix (n+1)-by-n lower bidiagonal, one additional ! rotation is needed. if( sqre1==1_${ik}$ ) then call stdlib${ii}$_${ri}$lartg( d( n ), e( n ), cs, sn, r ) d( n ) = r if( rotate ) then work( n ) = cs work( n+n ) = sn end if end if ! update singular vectors if desired. if( nru>0_${ik}$ ) then if( sqre1==0_${ik}$ ) then call stdlib${ii}$_${ri}$lasr( 'R', 'V', 'F', nru, n, work( 1_${ik}$ ),work( np1 ), u, ldu ) else call stdlib${ii}$_${ri}$lasr( 'R', 'V', 'F', nru, np1, work( 1_${ik}$ ),work( np1 ), u, ldu ) end if end if if( ncc>0_${ik}$ ) then if( sqre1==0_${ik}$ ) then call stdlib${ii}$_${ri}$lasr( 'L', 'V', 'F', n, ncc, work( 1_${ik}$ ),work( np1 ), c, ldc ) else call stdlib${ii}$_${ri}$lasr( 'L', 'V', 'F', np1, ncc, work( 1_${ik}$ ),work( np1 ), c, ldc ) end if end if end if ! call stdlib${ii}$_${ri}$bdsqr to compute the svd of the reduced real ! n-by-n upper bidiagonal matrix. call stdlib${ii}$_${ri}$bdsqr( 'U', n, ncvt, nru, ncc, d, e, vt, ldvt, u, ldu, c,ldc, work, info ) ! sort the singular values into ascending order (insertion sort on ! singular values, but only one transposition per singular vector) do i = 1, n ! scan for smallest d(i). isub = i smin = d( i ) do j = i + 1, n if( d( j )<smin ) then isub = j smin = d( j ) end if end do if( isub/=i ) then ! swap singular values and vectors. d( isub ) = d( i ) d( i ) = smin if( ncvt>0_${ik}$ )call stdlib${ii}$_${ri}$swap( ncvt, vt( isub, 1_${ik}$ ), ldvt, vt( i, 1_${ik}$ ), ldvt ) if( nru>0_${ik}$ )call stdlib${ii}$_${ri}$swap( nru, u( 1_${ik}$, isub ), 1_${ik}$, u( 1_${ik}$, i ), 1_${ik}$ ) if( ncc>0_${ik}$ )call stdlib${ii}$_${ri}$swap( ncc, c( isub, 1_${ik}$ ), ldc, c( i, 1_${ik}$ ), ldc ) end if end do return end subroutine stdlib${ii}$_${ri}$lasdq #:endif #:endfor pure module subroutine stdlib${ii}$_slasda( icompq, smlsiz, n, sqre, d, e, u, ldu, vt, k,difl, difr, z, & !! Using a divide and conquer approach, SLASDA: computes the singular !! value decomposition (SVD) of a real upper bidiagonal N-by-M matrix !! B with diagonal D and offdiagonal E, where M = N + SQRE. The !! algorithm computes the singular values in the SVD B = U * S * VT. !! The orthogonal matrices U and VT are optionally computed in !! compact form. !! A related subroutine, SLASD0, computes the singular values and !! the singular vectors in explicit form. poles, givptr, givcol, ldgcol,perm, givnum, c, s, work, iwork, info ) ! -- lapack auxiliary routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments integer(${ik}$), intent(in) :: icompq, ldgcol, ldu, n, smlsiz, sqre integer(${ik}$), intent(out) :: info ! Array Arguments integer(${ik}$), intent(out) :: givcol(ldgcol,*), givptr(*), iwork(*), k(*), perm(ldgcol,& *) real(sp), intent(out) :: c(*), difl(ldu,*), difr(ldu,*), givnum(ldu,*), poles(ldu,*), & s(*), u(ldu,*), vt(ldu,*), work(*), z(ldu,*) real(sp), intent(inout) :: d(*), e(*) ! ===================================================================== ! Local Scalars integer(${ik}$) :: i, i1, ic, idxq, idxqi, im1, inode, itemp, iwk, j, lf, ll, lvl, lvl2, & m, ncc, nd, ndb1, ndiml, ndimr, nl, nlf, nlp1, nlvl, nr, nrf, nrp1, nru, nwork1, & nwork2, smlszp, sqrei, vf, vfi, vl, vli real(sp) :: alpha, beta ! Executable Statements ! test the input parameters. info = 0_${ik}$ if( ( icompq<0_${ik}$ ) .or. ( icompq>1_${ik}$ ) ) then info = -1_${ik}$ else if( smlsiz<3_${ik}$ ) then info = -2_${ik}$ else if( n<0_${ik}$ ) then info = -3_${ik}$ else if( ( sqre<0_${ik}$ ) .or. ( sqre>1_${ik}$ ) ) then info = -4_${ik}$ else if( ldu<( n+sqre ) ) then info = -8_${ik}$ else if( ldgcol<n ) then info = -17_${ik}$ end if if( info/=0_${ik}$ ) then call stdlib${ii}$_xerbla( 'SLASDA', -info ) return end if m = n + sqre ! if the input matrix is too small, call stdlib${ii}$_slasdq to find the svd. if( n<=smlsiz ) then if( icompq==0_${ik}$ ) then call stdlib${ii}$_slasdq( 'U', sqre, n, 0_${ik}$, 0_${ik}$, 0_${ik}$, d, e, vt, ldu, u, ldu,u, ldu, work, & info ) else call stdlib${ii}$_slasdq( 'U', sqre, n, m, n, 0_${ik}$, d, e, vt, ldu, u, ldu,u, ldu, work, & info ) end if return end if ! book-keeping and set up the computation tree. inode = 1_${ik}$ ndiml = inode + n ndimr = ndiml + n idxq = ndimr + n iwk = idxq + n ncc = 0_${ik}$ nru = 0_${ik}$ smlszp = smlsiz + 1_${ik}$ vf = 1_${ik}$ vl = vf + m nwork1 = vl + m nwork2 = nwork1 + smlszp*smlszp call stdlib${ii}$_slasdt( n, nlvl, nd, iwork( inode ), iwork( ndiml ),iwork( ndimr ), smlsiz & ) ! for the nodes on bottom level of the tree, solve ! their subproblems by stdlib${ii}$_slasdq. ndb1 = ( nd+1 ) / 2_${ik}$ loop_30: do i = ndb1, nd ! ic : center row of each node ! nl : number of rows of left subproblem ! nr : number of rows of right subproblem ! nlf: starting row of the left subproblem ! nrf: starting row of the right subproblem i1 = i - 1_${ik}$ ic = iwork( inode+i1 ) nl = iwork( ndiml+i1 ) nlp1 = nl + 1_${ik}$ nr = iwork( ndimr+i1 ) nlf = ic - nl nrf = ic + 1_${ik}$ idxqi = idxq + nlf - 2_${ik}$ vfi = vf + nlf - 1_${ik}$ vli = vl + nlf - 1_${ik}$ sqrei = 1_${ik}$ if( icompq==0_${ik}$ ) then call stdlib${ii}$_slaset( 'A', nlp1, nlp1, zero, one, work( nwork1 ),smlszp ) call stdlib${ii}$_slasdq( 'U', sqrei, nl, nlp1, nru, ncc, d( nlf ),e( nlf ), work( & nwork1 ), smlszp,work( nwork2 ), nl, work( nwork2 ), nl,work( nwork2 ), info ) itemp = nwork1 + nl*smlszp call stdlib${ii}$_scopy( nlp1, work( nwork1 ), 1_${ik}$, work( vfi ), 1_${ik}$ ) call stdlib${ii}$_scopy( nlp1, work( itemp ), 1_${ik}$, work( vli ), 1_${ik}$ ) else call stdlib${ii}$_slaset( 'A', nl, nl, zero, one, u( nlf, 1_${ik}$ ), ldu ) call stdlib${ii}$_slaset( 'A', nlp1, nlp1, zero, one, vt( nlf, 1_${ik}$ ), ldu ) call stdlib${ii}$_slasdq( 'U', sqrei, nl, nlp1, nl, ncc, d( nlf ),e( nlf ), vt( nlf, 1_${ik}$ & ), ldu, u( nlf, 1_${ik}$ ), ldu,u( nlf, 1_${ik}$ ), ldu, work( nwork1 ), info ) call stdlib${ii}$_scopy( nlp1, vt( nlf, 1_${ik}$ ), 1_${ik}$, work( vfi ), 1_${ik}$ ) call stdlib${ii}$_scopy( nlp1, vt( nlf, nlp1 ), 1_${ik}$, work( vli ), 1_${ik}$ ) end if if( info/=0_${ik}$ ) then return end if do j = 1, nl iwork( idxqi+j ) = j end do if( ( i==nd ) .and. ( sqre==0_${ik}$ ) ) then sqrei = 0_${ik}$ else sqrei = 1_${ik}$ end if idxqi = idxqi + nlp1 vfi = vfi + nlp1 vli = vli + nlp1 nrp1 = nr + sqrei if( icompq==0_${ik}$ ) then call stdlib${ii}$_slaset( 'A', nrp1, nrp1, zero, one, work( nwork1 ),smlszp ) call stdlib${ii}$_slasdq( 'U', sqrei, nr, nrp1, nru, ncc, d( nrf ),e( nrf ), work( & nwork1 ), smlszp,work( nwork2 ), nr, work( nwork2 ), nr,work( nwork2 ), info ) itemp = nwork1 + ( nrp1-1 )*smlszp call stdlib${ii}$_scopy( nrp1, work( nwork1 ), 1_${ik}$, work( vfi ), 1_${ik}$ ) call stdlib${ii}$_scopy( nrp1, work( itemp ), 1_${ik}$, work( vli ), 1_${ik}$ ) else call stdlib${ii}$_slaset( 'A', nr, nr, zero, one, u( nrf, 1_${ik}$ ), ldu ) call stdlib${ii}$_slaset( 'A', nrp1, nrp1, zero, one, vt( nrf, 1_${ik}$ ), ldu ) call stdlib${ii}$_slasdq( 'U', sqrei, nr, nrp1, nr, ncc, d( nrf ),e( nrf ), vt( nrf, 1_${ik}$ & ), ldu, u( nrf, 1_${ik}$ ), ldu,u( nrf, 1_${ik}$ ), ldu, work( nwork1 ), info ) call stdlib${ii}$_scopy( nrp1, vt( nrf, 1_${ik}$ ), 1_${ik}$, work( vfi ), 1_${ik}$ ) call stdlib${ii}$_scopy( nrp1, vt( nrf, nrp1 ), 1_${ik}$, work( vli ), 1_${ik}$ ) end if if( info/=0_${ik}$ ) then return end if do j = 1, nr iwork( idxqi+j ) = j end do end do loop_30 ! now conquer each subproblem bottom-up. j = 2_${ik}$**nlvl loop_50: do lvl = nlvl, 1, -1 lvl2 = lvl*2_${ik}$ - 1_${ik}$ ! find the first node lf and last node ll on ! the current level lvl. if( lvl==1_${ik}$ ) then lf = 1_${ik}$ ll = 1_${ik}$ else lf = 2_${ik}$**( lvl-1 ) ll = 2_${ik}$*lf - 1_${ik}$ end if loop_40: do i = lf, ll im1 = i - 1_${ik}$ ic = iwork( inode+im1 ) nl = iwork( ndiml+im1 ) nr = iwork( ndimr+im1 ) nlf = ic - nl nrf = ic + 1_${ik}$ if( i==ll ) then sqrei = sqre else sqrei = 1_${ik}$ end if vfi = vf + nlf - 1_${ik}$ vli = vl + nlf - 1_${ik}$ idxqi = idxq + nlf - 1_${ik}$ alpha = d( ic ) beta = e( ic ) if( icompq==0_${ik}$ ) then call stdlib${ii}$_slasd6( icompq, nl, nr, sqrei, d( nlf ),work( vfi ), work( vli ), & alpha, beta,iwork( idxqi ), perm, givptr( 1_${ik}$ ), givcol,ldgcol, givnum, ldu, & poles, difl, difr, z,k( 1_${ik}$ ), c( 1_${ik}$ ), s( 1_${ik}$ ), work( nwork1 ),iwork( iwk ), & info ) else j = j - 1_${ik}$ call stdlib${ii}$_slasd6( icompq, nl, nr, sqrei, d( nlf ),work( vfi ), work( vli ), & alpha, beta,iwork( idxqi ), perm( nlf, lvl ),givptr( j ), givcol( nlf, lvl2 ),& ldgcol,givnum( nlf, lvl2 ), ldu,poles( nlf, lvl2 ), difl( nlf, lvl ),difr( & nlf, lvl2 ), z( nlf, lvl ), k( j ),c( j ), s( j ), work( nwork1 ),iwork( iwk & ), info ) end if if( info/=0_${ik}$ ) then return end if end do loop_40 end do loop_50 return end subroutine stdlib${ii}$_slasda pure module subroutine stdlib${ii}$_dlasda( icompq, smlsiz, n, sqre, d, e, u, ldu, vt, k,difl, difr, z, & !! Using a divide and conquer approach, DLASDA: computes the singular !! value decomposition (SVD) of a real upper bidiagonal N-by-M matrix !! B with diagonal D and offdiagonal E, where M = N + SQRE. The !! algorithm computes the singular values in the SVD B = U * S * VT. !! The orthogonal matrices U and VT are optionally computed in !! compact form. !! A related subroutine, DLASD0, computes the singular values and !! the singular vectors in explicit form. poles, givptr, givcol, ldgcol,perm, givnum, c, s, work, iwork, info ) ! -- lapack auxiliary routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments integer(${ik}$), intent(in) :: icompq, ldgcol, ldu, n, smlsiz, sqre integer(${ik}$), intent(out) :: info ! Array Arguments integer(${ik}$), intent(out) :: givcol(ldgcol,*), givptr(*), iwork(*), k(*), perm(ldgcol,& *) real(dp), intent(out) :: c(*), difl(ldu,*), difr(ldu,*), givnum(ldu,*), poles(ldu,*), & s(*), u(ldu,*), vt(ldu,*), work(*), z(ldu,*) real(dp), intent(inout) :: d(*), e(*) ! ===================================================================== ! Local Scalars integer(${ik}$) :: i, i1, ic, idxq, idxqi, im1, inode, itemp, iwk, j, lf, ll, lvl, lvl2, & m, ncc, nd, ndb1, ndiml, ndimr, nl, nlf, nlp1, nlvl, nr, nrf, nrp1, nru, nwork1, & nwork2, smlszp, sqrei, vf, vfi, vl, vli real(dp) :: alpha, beta ! Executable Statements ! test the input parameters. info = 0_${ik}$ if( ( icompq<0_${ik}$ ) .or. ( icompq>1_${ik}$ ) ) then info = -1_${ik}$ else if( smlsiz<3_${ik}$ ) then info = -2_${ik}$ else if( n<0_${ik}$ ) then info = -3_${ik}$ else if( ( sqre<0_${ik}$ ) .or. ( sqre>1_${ik}$ ) ) then info = -4_${ik}$ else if( ldu<( n+sqre ) ) then info = -8_${ik}$ else if( ldgcol<n ) then info = -17_${ik}$ end if if( info/=0_${ik}$ ) then call stdlib${ii}$_xerbla( 'DLASDA', -info ) return end if m = n + sqre ! if the input matrix is too small, call stdlib${ii}$_dlasdq to find the svd. if( n<=smlsiz ) then if( icompq==0_${ik}$ ) then call stdlib${ii}$_dlasdq( 'U', sqre, n, 0_${ik}$, 0_${ik}$, 0_${ik}$, d, e, vt, ldu, u, ldu,u, ldu, work, & info ) else call stdlib${ii}$_dlasdq( 'U', sqre, n, m, n, 0_${ik}$, d, e, vt, ldu, u, ldu,u, ldu, work, & info ) end if return end if ! book-keeping and set up the computation tree. inode = 1_${ik}$ ndiml = inode + n ndimr = ndiml + n idxq = ndimr + n iwk = idxq + n ncc = 0_${ik}$ nru = 0_${ik}$ smlszp = smlsiz + 1_${ik}$ vf = 1_${ik}$ vl = vf + m nwork1 = vl + m nwork2 = nwork1 + smlszp*smlszp call stdlib${ii}$_dlasdt( n, nlvl, nd, iwork( inode ), iwork( ndiml ),iwork( ndimr ), smlsiz & ) ! for the nodes on bottom level of the tree, solve ! their subproblems by stdlib${ii}$_dlasdq. ndb1 = ( nd+1 ) / 2_${ik}$ loop_30: do i = ndb1, nd ! ic : center row of each node ! nl : number of rows of left subproblem ! nr : number of rows of right subproblem ! nlf: starting row of the left subproblem ! nrf: starting row of the right subproblem i1 = i - 1_${ik}$ ic = iwork( inode+i1 ) nl = iwork( ndiml+i1 ) nlp1 = nl + 1_${ik}$ nr = iwork( ndimr+i1 ) nlf = ic - nl nrf = ic + 1_${ik}$ idxqi = idxq + nlf - 2_${ik}$ vfi = vf + nlf - 1_${ik}$ vli = vl + nlf - 1_${ik}$ sqrei = 1_${ik}$ if( icompq==0_${ik}$ ) then call stdlib${ii}$_dlaset( 'A', nlp1, nlp1, zero, one, work( nwork1 ),smlszp ) call stdlib${ii}$_dlasdq( 'U', sqrei, nl, nlp1, nru, ncc, d( nlf ),e( nlf ), work( & nwork1 ), smlszp,work( nwork2 ), nl, work( nwork2 ), nl,work( nwork2 ), info ) itemp = nwork1 + nl*smlszp call stdlib${ii}$_dcopy( nlp1, work( nwork1 ), 1_${ik}$, work( vfi ), 1_${ik}$ ) call stdlib${ii}$_dcopy( nlp1, work( itemp ), 1_${ik}$, work( vli ), 1_${ik}$ ) else call stdlib${ii}$_dlaset( 'A', nl, nl, zero, one, u( nlf, 1_${ik}$ ), ldu ) call stdlib${ii}$_dlaset( 'A', nlp1, nlp1, zero, one, vt( nlf, 1_${ik}$ ), ldu ) call stdlib${ii}$_dlasdq( 'U', sqrei, nl, nlp1, nl, ncc, d( nlf ),e( nlf ), vt( nlf, 1_${ik}$ & ), ldu, u( nlf, 1_${ik}$ ), ldu,u( nlf, 1_${ik}$ ), ldu, work( nwork1 ), info ) call stdlib${ii}$_dcopy( nlp1, vt( nlf, 1_${ik}$ ), 1_${ik}$, work( vfi ), 1_${ik}$ ) call stdlib${ii}$_dcopy( nlp1, vt( nlf, nlp1 ), 1_${ik}$, work( vli ), 1_${ik}$ ) end if if( info/=0_${ik}$ ) then return end if do j = 1, nl iwork( idxqi+j ) = j end do if( ( i==nd ) .and. ( sqre==0_${ik}$ ) ) then sqrei = 0_${ik}$ else sqrei = 1_${ik}$ end if idxqi = idxqi + nlp1 vfi = vfi + nlp1 vli = vli + nlp1 nrp1 = nr + sqrei if( icompq==0_${ik}$ ) then call stdlib${ii}$_dlaset( 'A', nrp1, nrp1, zero, one, work( nwork1 ),smlszp ) call stdlib${ii}$_dlasdq( 'U', sqrei, nr, nrp1, nru, ncc, d( nrf ),e( nrf ), work( & nwork1 ), smlszp,work( nwork2 ), nr, work( nwork2 ), nr,work( nwork2 ), info ) itemp = nwork1 + ( nrp1-1 )*smlszp call stdlib${ii}$_dcopy( nrp1, work( nwork1 ), 1_${ik}$, work( vfi ), 1_${ik}$ ) call stdlib${ii}$_dcopy( nrp1, work( itemp ), 1_${ik}$, work( vli ), 1_${ik}$ ) else call stdlib${ii}$_dlaset( 'A', nr, nr, zero, one, u( nrf, 1_${ik}$ ), ldu ) call stdlib${ii}$_dlaset( 'A', nrp1, nrp1, zero, one, vt( nrf, 1_${ik}$ ), ldu ) call stdlib${ii}$_dlasdq( 'U', sqrei, nr, nrp1, nr, ncc, d( nrf ),e( nrf ), vt( nrf, 1_${ik}$ & ), ldu, u( nrf, 1_${ik}$ ), ldu,u( nrf, 1_${ik}$ ), ldu, work( nwork1 ), info ) call stdlib${ii}$_dcopy( nrp1, vt( nrf, 1_${ik}$ ), 1_${ik}$, work( vfi ), 1_${ik}$ ) call stdlib${ii}$_dcopy( nrp1, vt( nrf, nrp1 ), 1_${ik}$, work( vli ), 1_${ik}$ ) end if if( info/=0_${ik}$ ) then return end if do j = 1, nr iwork( idxqi+j ) = j end do end do loop_30 ! now conquer each subproblem bottom-up. j = 2_${ik}$**nlvl loop_50: do lvl = nlvl, 1, -1 lvl2 = lvl*2_${ik}$ - 1_${ik}$ ! find the first node lf and last node ll on ! the current level lvl. if( lvl==1_${ik}$ ) then lf = 1_${ik}$ ll = 1_${ik}$ else lf = 2_${ik}$**( lvl-1 ) ll = 2_${ik}$*lf - 1_${ik}$ end if loop_40: do i = lf, ll im1 = i - 1_${ik}$ ic = iwork( inode+im1 ) nl = iwork( ndiml+im1 ) nr = iwork( ndimr+im1 ) nlf = ic - nl nrf = ic + 1_${ik}$ if( i==ll ) then sqrei = sqre else sqrei = 1_${ik}$ end if vfi = vf + nlf - 1_${ik}$ vli = vl + nlf - 1_${ik}$ idxqi = idxq + nlf - 1_${ik}$ alpha = d( ic ) beta = e( ic ) if( icompq==0_${ik}$ ) then call stdlib${ii}$_dlasd6( icompq, nl, nr, sqrei, d( nlf ),work( vfi ), work( vli ), & alpha, beta,iwork( idxqi ), perm, givptr( 1_${ik}$ ), givcol,ldgcol, givnum, ldu, & poles, difl, difr, z,k( 1_${ik}$ ), c( 1_${ik}$ ), s( 1_${ik}$ ), work( nwork1 ),iwork( iwk ), & info ) else j = j - 1_${ik}$ call stdlib${ii}$_dlasd6( icompq, nl, nr, sqrei, d( nlf ),work( vfi ), work( vli ), & alpha, beta,iwork( idxqi ), perm( nlf, lvl ),givptr( j ), givcol( nlf, lvl2 ),& ldgcol,givnum( nlf, lvl2 ), ldu,poles( nlf, lvl2 ), difl( nlf, lvl ),difr( & nlf, lvl2 ), z( nlf, lvl ), k( j ),c( j ), s( j ), work( nwork1 ),iwork( iwk & ), info ) end if if( info/=0_${ik}$ ) then return end if end do loop_40 end do loop_50 return end subroutine stdlib${ii}$_dlasda #:for rk,rt,ri in REAL_KINDS_TYPES #:if not rk in ["sp","dp"] pure module subroutine stdlib${ii}$_${ri}$lasda( icompq, smlsiz, n, sqre, d, e, u, ldu, vt, k,difl, difr, z, & !! Using a divide and conquer approach, DLASDA: computes the singular !! value decomposition (SVD) of a real upper bidiagonal N-by-M matrix !! B with diagonal D and offdiagonal E, where M = N + SQRE. The !! algorithm computes the singular values in the SVD B = U * S * VT. !! The orthogonal matrices U and VT are optionally computed in !! compact form. !! A related subroutine, DLASD0, computes the singular values and !! the singular vectors in explicit form. poles, givptr, givcol, ldgcol,perm, givnum, c, s, work, iwork, info ) ! -- lapack auxiliary routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments integer(${ik}$), intent(in) :: icompq, ldgcol, ldu, n, smlsiz, sqre integer(${ik}$), intent(out) :: info ! Array Arguments integer(${ik}$), intent(out) :: givcol(ldgcol,*), givptr(*), iwork(*), k(*), perm(ldgcol,& *) real(${rk}$), intent(out) :: c(*), difl(ldu,*), difr(ldu,*), givnum(ldu,*), poles(ldu,*), & s(*), u(ldu,*), vt(ldu,*), work(*), z(ldu,*) real(${rk}$), intent(inout) :: d(*), e(*) ! ===================================================================== ! Local Scalars integer(${ik}$) :: i, i1, ic, idxq, idxqi, im1, inode, itemp, iwk, j, lf, ll, lvl, lvl2, & m, ncc, nd, ndb1, ndiml, ndimr, nl, nlf, nlp1, nlvl, nr, nrf, nrp1, nru, nwork1, & nwork2, smlszp, sqrei, vf, vfi, vl, vli real(${rk}$) :: alpha, beta ! Executable Statements ! test the input parameters. info = 0_${ik}$ if( ( icompq<0_${ik}$ ) .or. ( icompq>1_${ik}$ ) ) then info = -1_${ik}$ else if( smlsiz<3_${ik}$ ) then info = -2_${ik}$ else if( n<0_${ik}$ ) then info = -3_${ik}$ else if( ( sqre<0_${ik}$ ) .or. ( sqre>1_${ik}$ ) ) then info = -4_${ik}$ else if( ldu<( n+sqre ) ) then info = -8_${ik}$ else if( ldgcol<n ) then info = -17_${ik}$ end if if( info/=0_${ik}$ ) then call stdlib${ii}$_xerbla( 'DLASDA', -info ) return end if m = n + sqre ! if the input matrix is too small, call stdlib${ii}$_${ri}$lasdq to find the svd. if( n<=smlsiz ) then if( icompq==0_${ik}$ ) then call stdlib${ii}$_${ri}$lasdq( 'U', sqre, n, 0_${ik}$, 0_${ik}$, 0_${ik}$, d, e, vt, ldu, u, ldu,u, ldu, work, & info ) else call stdlib${ii}$_${ri}$lasdq( 'U', sqre, n, m, n, 0_${ik}$, d, e, vt, ldu, u, ldu,u, ldu, work, & info ) end if return end if ! book-keeping and set up the computation tree. inode = 1_${ik}$ ndiml = inode + n ndimr = ndiml + n idxq = ndimr + n iwk = idxq + n ncc = 0_${ik}$ nru = 0_${ik}$ smlszp = smlsiz + 1_${ik}$ vf = 1_${ik}$ vl = vf + m nwork1 = vl + m nwork2 = nwork1 + smlszp*smlszp call stdlib${ii}$_${ri}$lasdt( n, nlvl, nd, iwork( inode ), iwork( ndiml ),iwork( ndimr ), smlsiz & ) ! for the nodes on bottom level of the tree, solve ! their subproblems by stdlib${ii}$_${ri}$lasdq. ndb1 = ( nd+1 ) / 2_${ik}$ loop_30: do i = ndb1, nd ! ic : center row of each node ! nl : number of rows of left subproblem ! nr : number of rows of right subproblem ! nlf: starting row of the left subproblem ! nrf: starting row of the right subproblem i1 = i - 1_${ik}$ ic = iwork( inode+i1 ) nl = iwork( ndiml+i1 ) nlp1 = nl + 1_${ik}$ nr = iwork( ndimr+i1 ) nlf = ic - nl nrf = ic + 1_${ik}$ idxqi = idxq + nlf - 2_${ik}$ vfi = vf + nlf - 1_${ik}$ vli = vl + nlf - 1_${ik}$ sqrei = 1_${ik}$ if( icompq==0_${ik}$ ) then call stdlib${ii}$_${ri}$laset( 'A', nlp1, nlp1, zero, one, work( nwork1 ),smlszp ) call stdlib${ii}$_${ri}$lasdq( 'U', sqrei, nl, nlp1, nru, ncc, d( nlf ),e( nlf ), work( & nwork1 ), smlszp,work( nwork2 ), nl, work( nwork2 ), nl,work( nwork2 ), info ) itemp = nwork1 + nl*smlszp call stdlib${ii}$_${ri}$copy( nlp1, work( nwork1 ), 1_${ik}$, work( vfi ), 1_${ik}$ ) call stdlib${ii}$_${ri}$copy( nlp1, work( itemp ), 1_${ik}$, work( vli ), 1_${ik}$ ) else call stdlib${ii}$_${ri}$laset( 'A', nl, nl, zero, one, u( nlf, 1_${ik}$ ), ldu ) call stdlib${ii}$_${ri}$laset( 'A', nlp1, nlp1, zero, one, vt( nlf, 1_${ik}$ ), ldu ) call stdlib${ii}$_${ri}$lasdq( 'U', sqrei, nl, nlp1, nl, ncc, d( nlf ),e( nlf ), vt( nlf, 1_${ik}$ & ), ldu, u( nlf, 1_${ik}$ ), ldu,u( nlf, 1_${ik}$ ), ldu, work( nwork1 ), info ) call stdlib${ii}$_${ri}$copy( nlp1, vt( nlf, 1_${ik}$ ), 1_${ik}$, work( vfi ), 1_${ik}$ ) call stdlib${ii}$_${ri}$copy( nlp1, vt( nlf, nlp1 ), 1_${ik}$, work( vli ), 1_${ik}$ ) end if if( info/=0_${ik}$ ) then return end if do j = 1, nl iwork( idxqi+j ) = j end do if( ( i==nd ) .and. ( sqre==0_${ik}$ ) ) then sqrei = 0_${ik}$ else sqrei = 1_${ik}$ end if idxqi = idxqi + nlp1 vfi = vfi + nlp1 vli = vli + nlp1 nrp1 = nr + sqrei if( icompq==0_${ik}$ ) then call stdlib${ii}$_${ri}$laset( 'A', nrp1, nrp1, zero, one, work( nwork1 ),smlszp ) call stdlib${ii}$_${ri}$lasdq( 'U', sqrei, nr, nrp1, nru, ncc, d( nrf ),e( nrf ), work( & nwork1 ), smlszp,work( nwork2 ), nr, work( nwork2 ), nr,work( nwork2 ), info ) itemp = nwork1 + ( nrp1-1 )*smlszp call stdlib${ii}$_${ri}$copy( nrp1, work( nwork1 ), 1_${ik}$, work( vfi ), 1_${ik}$ ) call stdlib${ii}$_${ri}$copy( nrp1, work( itemp ), 1_${ik}$, work( vli ), 1_${ik}$ ) else call stdlib${ii}$_${ri}$laset( 'A', nr, nr, zero, one, u( nrf, 1_${ik}$ ), ldu ) call stdlib${ii}$_${ri}$laset( 'A', nrp1, nrp1, zero, one, vt( nrf, 1_${ik}$ ), ldu ) call stdlib${ii}$_${ri}$lasdq( 'U', sqrei, nr, nrp1, nr, ncc, d( nrf ),e( nrf ), vt( nrf, 1_${ik}$ & ), ldu, u( nrf, 1_${ik}$ ), ldu,u( nrf, 1_${ik}$ ), ldu, work( nwork1 ), info ) call stdlib${ii}$_${ri}$copy( nrp1, vt( nrf, 1_${ik}$ ), 1_${ik}$, work( vfi ), 1_${ik}$ ) call stdlib${ii}$_${ri}$copy( nrp1, vt( nrf, nrp1 ), 1_${ik}$, work( vli ), 1_${ik}$ ) end if if( info/=0_${ik}$ ) then return end if do j = 1, nr iwork( idxqi+j ) = j end do end do loop_30 ! now conquer each subproblem bottom-up. j = 2_${ik}$**nlvl loop_50: do lvl = nlvl, 1, -1 lvl2 = lvl*2_${ik}$ - 1_${ik}$ ! find the first node lf and last node ll on ! the current level lvl. if( lvl==1_${ik}$ ) then lf = 1_${ik}$ ll = 1_${ik}$ else lf = 2_${ik}$**( lvl-1 ) ll = 2_${ik}$*lf - 1_${ik}$ end if loop_40: do i = lf, ll im1 = i - 1_${ik}$ ic = iwork( inode+im1 ) nl = iwork( ndiml+im1 ) nr = iwork( ndimr+im1 ) nlf = ic - nl nrf = ic + 1_${ik}$ if( i==ll ) then sqrei = sqre else sqrei = 1_${ik}$ end if vfi = vf + nlf - 1_${ik}$ vli = vl + nlf - 1_${ik}$ idxqi = idxq + nlf - 1_${ik}$ alpha = d( ic ) beta = e( ic ) if( icompq==0_${ik}$ ) then call stdlib${ii}$_${ri}$lasd6( icompq, nl, nr, sqrei, d( nlf ),work( vfi ), work( vli ), & alpha, beta,iwork( idxqi ), perm, givptr( 1_${ik}$ ), givcol,ldgcol, givnum, ldu, & poles, difl, difr, z,k( 1_${ik}$ ), c( 1_${ik}$ ), s( 1_${ik}$ ), work( nwork1 ),iwork( iwk ), & info ) else j = j - 1_${ik}$ call stdlib${ii}$_${ri}$lasd6( icompq, nl, nr, sqrei, d( nlf ),work( vfi ), work( vli ), & alpha, beta,iwork( idxqi ), perm( nlf, lvl ),givptr( j ), givcol( nlf, lvl2 ),& ldgcol,givnum( nlf, lvl2 ), ldu,poles( nlf, lvl2 ), difl( nlf, lvl ),difr( & nlf, lvl2 ), z( nlf, lvl ), k( j ),c( j ), s( j ), work( nwork1 ),iwork( iwk & ), info ) end if if( info/=0_${ik}$ ) then return end if end do loop_40 end do loop_50 return end subroutine stdlib${ii}$_${ri}$lasda #:endif #:endfor pure module subroutine stdlib${ii}$_slasd6( icompq, nl, nr, sqre, d, vf, vl, alpha, beta,idxq, perm, & !! SLASD6 computes the SVD of an updated upper bidiagonal matrix B !! obtained by merging two smaller ones by appending a row. This !! routine is used only for the problem which requires all singular !! values and optionally singular vector matrices in factored form. !! B is an N-by-M matrix with N = NL + NR + 1 and M = N + SQRE. !! A related subroutine, SLASD1, handles the case in which all singular !! values and singular vectors of the bidiagonal matrix are desired. !! SLASD6 computes the SVD as follows: !! ( D1(in) 0 0 0 ) !! B = U(in) * ( Z1**T a Z2**T b ) * VT(in) !! ( 0 0 D2(in) 0 ) !! = U(out) * ( D(out) 0) * VT(out) !! where Z**T = (Z1**T a Z2**T b) = u**T VT**T, and u is a vector of dimension M !! with ALPHA and BETA in the NL+1 and NL+2 th entries and zeros !! elsewhere; and the entry b is empty if SQRE = 0. !! The singular values of B can be computed using D1, D2, the first !! components of all the right singular vectors of the lower block, and !! the last components of all the right singular vectors of the upper !! block. These components are stored and updated in VF and VL, !! respectively, in SLASD6. Hence U and VT are not explicitly !! referenced. !! The singular values are stored in D. The algorithm consists of two !! stages: !! The first stage consists of deflating the size of the problem !! when there are multiple singular values or if there is a zero !! in the Z vector. For each such occurrence the dimension of the !! secular equation problem is reduced by one. This stage is !! performed by the routine SLASD7. !! The second stage consists of calculating the updated !! singular values. This is done by finding the roots of the !! secular equation via the routine SLASD4 (as called by SLASD8). !! This routine also updates VF and VL and computes the distances !! between the updated singular values and the old singular !! values. !! SLASD6 is called from SLASDA. givptr, givcol, ldgcol, givnum,ldgnum, poles, difl, difr, z, k, c, s, work,iwork, info ) ! -- lapack auxiliary routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments integer(${ik}$), intent(out) :: givptr, info, k integer(${ik}$), intent(in) :: icompq, ldgcol, ldgnum, nl, nr, sqre real(sp), intent(inout) :: alpha, beta real(sp), intent(out) :: c, s ! Array Arguments integer(${ik}$), intent(out) :: givcol(ldgcol,*), iwork(*), perm(*) integer(${ik}$), intent(inout) :: idxq(*) real(sp), intent(inout) :: d(*), vf(*), vl(*) real(sp), intent(out) :: difl(*), difr(*), givnum(ldgnum,*), poles(ldgnum,*), work(*), & z(*) ! ===================================================================== ! Local Scalars integer(${ik}$) :: i, idx, idxc, idxp, isigma, ivfw, ivlw, iw, m, n, n1, n2 real(sp) :: orgnrm ! Intrinsic Functions ! Executable Statements ! test the input parameters. info = 0_${ik}$ n = nl + nr + 1_${ik}$ m = n + sqre if( ( icompq<0_${ik}$ ) .or. ( icompq>1_${ik}$ ) ) then info = -1_${ik}$ else if( nl<1_${ik}$ ) then info = -2_${ik}$ else if( nr<1_${ik}$ ) then info = -3_${ik}$ else if( ( sqre<0_${ik}$ ) .or. ( sqre>1_${ik}$ ) ) then info = -4_${ik}$ else if( ldgcol<n ) then info = -14_${ik}$ else if( ldgnum<n ) then info = -16_${ik}$ end if if( info/=0_${ik}$ ) then call stdlib${ii}$_xerbla( 'SLASD6', -info ) return end if ! the following values are for bookkeeping purposes only. they are ! integer pointers which indicate the portion of the workspace ! used by a particular array in stdlib${ii}$_slasd7 and stdlib${ii}$_slasd8. isigma = 1_${ik}$ iw = isigma + n ivfw = iw + m ivlw = ivfw + m idx = 1_${ik}$ idxc = idx + n idxp = idxc + n ! scale. orgnrm = max( abs( alpha ), abs( beta ) ) d( nl+1 ) = zero do i = 1, n if( abs( d( i ) )>orgnrm ) then orgnrm = abs( d( i ) ) end if end do call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, orgnrm, one, n, 1_${ik}$, d, n, info ) alpha = alpha / orgnrm beta = beta / orgnrm ! sort and deflate singular values. call stdlib${ii}$_slasd7( icompq, nl, nr, sqre, k, d, z, work( iw ), vf,work( ivfw ), vl, & work( ivlw ), alpha, beta,work( isigma ), iwork( idx ), iwork( idxp ), idxq,perm, & givptr, givcol, ldgcol, givnum, ldgnum, c, s,info ) ! solve secular equation, compute difl, difr, and update vf, vl. call stdlib${ii}$_slasd8( icompq, k, d, z, vf, vl, difl, difr, ldgnum,work( isigma ), work( & iw ), info ) ! report the possible convergence failure. if( info/=0_${ik}$ ) then return end if ! save the poles if icompq = 1. if( icompq==1_${ik}$ ) then call stdlib${ii}$_scopy( k, d, 1_${ik}$, poles( 1_${ik}$, 1_${ik}$ ), 1_${ik}$ ) call stdlib${ii}$_scopy( k, work( isigma ), 1_${ik}$, poles( 1_${ik}$, 2_${ik}$ ), 1_${ik}$ ) end if ! unscale. call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, one, orgnrm, n, 1_${ik}$, d, n, info ) ! prepare the idxq sorting permutation. n1 = k n2 = n - k call stdlib${ii}$_slamrg( n1, n2, d, 1_${ik}$, -1_${ik}$, idxq ) return end subroutine stdlib${ii}$_slasd6 pure module subroutine stdlib${ii}$_dlasd6( icompq, nl, nr, sqre, d, vf, vl, alpha, beta,idxq, perm, & !! DLASD6 computes the SVD of an updated upper bidiagonal matrix B !! obtained by merging two smaller ones by appending a row. This !! routine is used only for the problem which requires all singular !! values and optionally singular vector matrices in factored form. !! B is an N-by-M matrix with N = NL + NR + 1 and M = N + SQRE. !! A related subroutine, DLASD1, handles the case in which all singular !! values and singular vectors of the bidiagonal matrix are desired. !! DLASD6 computes the SVD as follows: !! ( D1(in) 0 0 0 ) !! B = U(in) * ( Z1**T a Z2**T b ) * VT(in) !! ( 0 0 D2(in) 0 ) !! = U(out) * ( D(out) 0) * VT(out) !! where Z**T = (Z1**T a Z2**T b) = u**T VT**T, and u is a vector of dimension M !! with ALPHA and BETA in the NL+1 and NL+2 th entries and zeros !! elsewhere; and the entry b is empty if SQRE = 0. !! The singular values of B can be computed using D1, D2, the first !! components of all the right singular vectors of the lower block, and !! the last components of all the right singular vectors of the upper !! block. These components are stored and updated in VF and VL, !! respectively, in DLASD6. Hence U and VT are not explicitly !! referenced. !! The singular values are stored in D. The algorithm consists of two !! stages: !! The first stage consists of deflating the size of the problem !! when there are multiple singular values or if there is a zero !! in the Z vector. For each such occurrence the dimension of the !! secular equation problem is reduced by one. This stage is !! performed by the routine DLASD7. !! The second stage consists of calculating the updated !! singular values. This is done by finding the roots of the !! secular equation via the routine DLASD4 (as called by DLASD8). !! This routine also updates VF and VL and computes the distances !! between the updated singular values and the old singular !! values. !! DLASD6 is called from DLASDA. givptr, givcol, ldgcol, givnum,ldgnum, poles, difl, difr, z, k, c, s, work,iwork, info ) ! -- lapack auxiliary routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments integer(${ik}$), intent(out) :: givptr, info, k integer(${ik}$), intent(in) :: icompq, ldgcol, ldgnum, nl, nr, sqre real(dp), intent(inout) :: alpha, beta real(dp), intent(out) :: c, s ! Array Arguments integer(${ik}$), intent(out) :: givcol(ldgcol,*), iwork(*), perm(*) integer(${ik}$), intent(inout) :: idxq(*) real(dp), intent(inout) :: d(*), vf(*), vl(*) real(dp), intent(out) :: difl(*), difr(*), givnum(ldgnum,*), poles(ldgnum,*), work(*), & z(*) ! ===================================================================== ! Local Scalars integer(${ik}$) :: i, idx, idxc, idxp, isigma, ivfw, ivlw, iw, m, n, n1, n2 real(dp) :: orgnrm ! Intrinsic Functions ! Executable Statements ! test the input parameters. info = 0_${ik}$ n = nl + nr + 1_${ik}$ m = n + sqre if( ( icompq<0_${ik}$ ) .or. ( icompq>1_${ik}$ ) ) then info = -1_${ik}$ else if( nl<1_${ik}$ ) then info = -2_${ik}$ else if( nr<1_${ik}$ ) then info = -3_${ik}$ else if( ( sqre<0_${ik}$ ) .or. ( sqre>1_${ik}$ ) ) then info = -4_${ik}$ else if( ldgcol<n ) then info = -14_${ik}$ else if( ldgnum<n ) then info = -16_${ik}$ end if if( info/=0_${ik}$ ) then call stdlib${ii}$_xerbla( 'DLASD6', -info ) return end if ! the following values are for bookkeeping purposes only. they are ! integer pointers which indicate the portion of the workspace ! used by a particular array in stdlib${ii}$_dlasd7 and stdlib${ii}$_dlasd8. isigma = 1_${ik}$ iw = isigma + n ivfw = iw + m ivlw = ivfw + m idx = 1_${ik}$ idxc = idx + n idxp = idxc + n ! scale. orgnrm = max( abs( alpha ), abs( beta ) ) d( nl+1 ) = zero do i = 1, n if( abs( d( i ) )>orgnrm ) then orgnrm = abs( d( i ) ) end if end do call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, orgnrm, one, n, 1_${ik}$, d, n, info ) alpha = alpha / orgnrm beta = beta / orgnrm ! sort and deflate singular values. call stdlib${ii}$_dlasd7( icompq, nl, nr, sqre, k, d, z, work( iw ), vf,work( ivfw ), vl, & work( ivlw ), alpha, beta,work( isigma ), iwork( idx ), iwork( idxp ), idxq,perm, & givptr, givcol, ldgcol, givnum, ldgnum, c, s,info ) ! solve secular equation, compute difl, difr, and update vf, vl. call stdlib${ii}$_dlasd8( icompq, k, d, z, vf, vl, difl, difr, ldgnum,work( isigma ), work( & iw ), info ) ! report the possible convergence failure. if( info/=0_${ik}$ ) then return end if ! save the poles if icompq = 1. if( icompq==1_${ik}$ ) then call stdlib${ii}$_dcopy( k, d, 1_${ik}$, poles( 1_${ik}$, 1_${ik}$ ), 1_${ik}$ ) call stdlib${ii}$_dcopy( k, work( isigma ), 1_${ik}$, poles( 1_${ik}$, 2_${ik}$ ), 1_${ik}$ ) end if ! unscale. call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, one, orgnrm, n, 1_${ik}$, d, n, info ) ! prepare the idxq sorting permutation. n1 = k n2 = n - k call stdlib${ii}$_dlamrg( n1, n2, d, 1_${ik}$, -1_${ik}$, idxq ) return end subroutine stdlib${ii}$_dlasd6 #:for rk,rt,ri in REAL_KINDS_TYPES #:if not rk in ["sp","dp"] pure module subroutine stdlib${ii}$_${ri}$lasd6( icompq, nl, nr, sqre, d, vf, vl, alpha, beta,idxq, perm, & !! DLASD6: computes the SVD of an updated upper bidiagonal matrix B !! obtained by merging two smaller ones by appending a row. This !! routine is used only for the problem which requires all singular !! values and optionally singular vector matrices in factored form. !! B is an N-by-M matrix with N = NL + NR + 1 and M = N + SQRE. !! A related subroutine, DLASD1, handles the case in which all singular !! values and singular vectors of the bidiagonal matrix are desired. !! DLASD6 computes the SVD as follows: !! ( D1(in) 0 0 0 ) !! B = U(in) * ( Z1**T a Z2**T b ) * VT(in) !! ( 0 0 D2(in) 0 ) !! = U(out) * ( D(out) 0) * VT(out) !! where Z**T = (Z1**T a Z2**T b) = u**T VT**T, and u is a vector of dimension M !! with ALPHA and BETA in the NL+1 and NL+2 th entries and zeros !! elsewhere; and the entry b is empty if SQRE = 0. !! The singular values of B can be computed using D1, D2, the first !! components of all the right singular vectors of the lower block, and !! the last components of all the right singular vectors of the upper !! block. These components are stored and updated in VF and VL, !! respectively, in DLASD6. Hence U and VT are not explicitly !! referenced. !! The singular values are stored in D. The algorithm consists of two !! stages: !! The first stage consists of deflating the size of the problem !! when there are multiple singular values or if there is a zero !! in the Z vector. For each such occurrence the dimension of the !! secular equation problem is reduced by one. This stage is !! performed by the routine DLASD7. !! The second stage consists of calculating the updated !! singular values. This is done by finding the roots of the !! secular equation via the routine DLASD4 (as called by DLASD8). !! This routine also updates VF and VL and computes the distances !! between the updated singular values and the old singular !! values. !! DLASD6 is called from DLASDA. givptr, givcol, ldgcol, givnum,ldgnum, poles, difl, difr, z, k, c, s, work,iwork, info ) ! -- lapack auxiliary routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments integer(${ik}$), intent(out) :: givptr, info, k integer(${ik}$), intent(in) :: icompq, ldgcol, ldgnum, nl, nr, sqre real(${rk}$), intent(inout) :: alpha, beta real(${rk}$), intent(out) :: c, s ! Array Arguments integer(${ik}$), intent(out) :: givcol(ldgcol,*), iwork(*), perm(*) integer(${ik}$), intent(inout) :: idxq(*) real(${rk}$), intent(inout) :: d(*), vf(*), vl(*) real(${rk}$), intent(out) :: difl(*), difr(*), givnum(ldgnum,*), poles(ldgnum,*), work(*), & z(*) ! ===================================================================== ! Local Scalars integer(${ik}$) :: i, idx, idxc, idxp, isigma, ivfw, ivlw, iw, m, n, n1, n2 real(${rk}$) :: orgnrm ! Intrinsic Functions ! Executable Statements ! test the input parameters. info = 0_${ik}$ n = nl + nr + 1_${ik}$ m = n + sqre if( ( icompq<0_${ik}$ ) .or. ( icompq>1_${ik}$ ) ) then info = -1_${ik}$ else if( nl<1_${ik}$ ) then info = -2_${ik}$ else if( nr<1_${ik}$ ) then info = -3_${ik}$ else if( ( sqre<0_${ik}$ ) .or. ( sqre>1_${ik}$ ) ) then info = -4_${ik}$ else if( ldgcol<n ) then info = -14_${ik}$ else if( ldgnum<n ) then info = -16_${ik}$ end if if( info/=0_${ik}$ ) then call stdlib${ii}$_xerbla( 'DLASD6', -info ) return end if ! the following values are for bookkeeping purposes only. they are ! integer pointers which indicate the portion of the workspace ! used by a particular array in stdlib${ii}$_${ri}$lasd7 and stdlib${ii}$_${ri}$lasd8. isigma = 1_${ik}$ iw = isigma + n ivfw = iw + m ivlw = ivfw + m idx = 1_${ik}$ idxc = idx + n idxp = idxc + n ! scale. orgnrm = max( abs( alpha ), abs( beta ) ) d( nl+1 ) = zero do i = 1, n if( abs( d( i ) )>orgnrm ) then orgnrm = abs( d( i ) ) end if end do call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, orgnrm, one, n, 1_${ik}$, d, n, info ) alpha = alpha / orgnrm beta = beta / orgnrm ! sort and deflate singular values. call stdlib${ii}$_${ri}$lasd7( icompq, nl, nr, sqre, k, d, z, work( iw ), vf,work( ivfw ), vl, & work( ivlw ), alpha, beta,work( isigma ), iwork( idx ), iwork( idxp ), idxq,perm, & givptr, givcol, ldgcol, givnum, ldgnum, c, s,info ) ! solve secular equation, compute difl, difr, and update vf, vl. call stdlib${ii}$_${ri}$lasd8( icompq, k, d, z, vf, vl, difl, difr, ldgnum,work( isigma ), work( & iw ), info ) ! report the possible convergence failure. if( info/=0_${ik}$ ) then return end if ! save the poles if icompq = 1. if( icompq==1_${ik}$ ) then call stdlib${ii}$_${ri}$copy( k, d, 1_${ik}$, poles( 1_${ik}$, 1_${ik}$ ), 1_${ik}$ ) call stdlib${ii}$_${ri}$copy( k, work( isigma ), 1_${ik}$, poles( 1_${ik}$, 2_${ik}$ ), 1_${ik}$ ) end if ! unscale. call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, one, orgnrm, n, 1_${ik}$, d, n, info ) ! prepare the idxq sorting permutation. n1 = k n2 = n - k call stdlib${ii}$_${ri}$lamrg( n1, n2, d, 1_${ik}$, -1_${ik}$, idxq ) return end subroutine stdlib${ii}$_${ri}$lasd6 #:endif #:endfor pure module subroutine stdlib${ii}$_slasd7( icompq, nl, nr, sqre, k, d, z, zw, vf, vfw, vl,vlw, alpha, & !! SLASD7 merges the two sets of singular values together into a single !! sorted set. Then it tries to deflate the size of the problem. There !! are two ways in which deflation can occur: when two or more singular !! values are close together or if there is a tiny entry in the Z !! vector. For each such occurrence the order of the related !! secular equation problem is reduced by one. !! SLASD7 is called from SLASD6. beta, dsigma, idx, idxp, idxq,perm, givptr, givcol, ldgcol, givnum, ldgnum,c, s, info ) ! -- lapack auxiliary routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments integer(${ik}$), intent(out) :: givptr, info, k integer(${ik}$), intent(in) :: icompq, ldgcol, ldgnum, nl, nr, sqre real(sp), intent(in) :: alpha, beta real(sp), intent(out) :: c, s ! Array Arguments integer(${ik}$), intent(out) :: givcol(ldgcol,*), idx(*), idxp(*), perm(*) integer(${ik}$), intent(inout) :: idxq(*) real(sp), intent(inout) :: d(*), vf(*), vl(*) real(sp), intent(out) :: dsigma(*), givnum(ldgnum,*), vfw(*), vlw(*), z(*), zw(*) ! ===================================================================== ! Local Scalars integer(${ik}$) :: i, idxi, idxj, idxjp, j, jp, jprev, k2, m, n, nlp1, nlp2 real(sp) :: eps, hlftol, tau, tol, z1 ! Intrinsic Functions ! Executable Statements ! test the input parameters. info = 0_${ik}$ n = nl + nr + 1_${ik}$ m = n + sqre if( ( icompq<0_${ik}$ ) .or. ( icompq>1_${ik}$ ) ) then info = -1_${ik}$ else if( nl<1_${ik}$ ) then info = -2_${ik}$ else if( nr<1_${ik}$ ) then info = -3_${ik}$ else if( ( sqre<0_${ik}$ ) .or. ( sqre>1_${ik}$ ) ) then info = -4_${ik}$ else if( ldgcol<n ) then info = -22_${ik}$ else if( ldgnum<n ) then info = -24_${ik}$ end if if( info/=0_${ik}$ ) then call stdlib${ii}$_xerbla( 'SLASD7', -info ) return end if nlp1 = nl + 1_${ik}$ nlp2 = nl + 2_${ik}$ if( icompq==1_${ik}$ ) then givptr = 0_${ik}$ end if ! generate the first part of the vector z and move the singular ! values in the first part of d one position backward. z1 = alpha*vl( nlp1 ) vl( nlp1 ) = zero tau = vf( nlp1 ) do i = nl, 1, -1 z( i+1 ) = alpha*vl( i ) vl( i ) = zero vf( i+1 ) = vf( i ) d( i+1 ) = d( i ) idxq( i+1 ) = idxq( i ) + 1_${ik}$ end do vf( 1_${ik}$ ) = tau ! generate the second part of the vector z. do i = nlp2, m z( i ) = beta*vf( i ) vf( i ) = zero end do ! sort the singular values into increasing order do i = nlp2, n idxq( i ) = idxq( i ) + nlp1 end do ! dsigma, idxc, idxc, and zw are used as storage space. do i = 2, n dsigma( i ) = d( idxq( i ) ) zw( i ) = z( idxq( i ) ) vfw( i ) = vf( idxq( i ) ) vlw( i ) = vl( idxq( i ) ) end do call stdlib${ii}$_slamrg( nl, nr, dsigma( 2_${ik}$ ), 1_${ik}$, 1_${ik}$, idx( 2_${ik}$ ) ) do i = 2, n idxi = 1_${ik}$ + idx( i ) d( i ) = dsigma( idxi ) z( i ) = zw( idxi ) vf( i ) = vfw( idxi ) vl( i ) = vlw( idxi ) end do ! calculate the allowable deflation tolerance eps = stdlib${ii}$_slamch( 'EPSILON' ) tol = max( abs( alpha ), abs( beta ) ) tol = eight*eight*eps*max( abs( d( n ) ), tol ) ! there are 2 kinds of deflation -- first a value in the z-vector ! is small, second two (or more) singular values are very close ! together (their difference is small). ! if the value in the z-vector is small, we simply permute the ! array so that the corresponding singular value is moved to the ! end. ! if two values in the d-vector are close, we perform a two-sided ! rotation designed to make one of the corresponding z-vector ! entries zero, and then permute the array so that the deflated ! singular value is moved to the end. ! if there are multiple singular values then the problem deflates. ! here the number of equal singular values are found. as each equal ! singular value is found, an elementary reflector is computed to ! rotate the corresponding singular subspace so that the ! corresponding components of z are zero in this new basis. k = 1_${ik}$ k2 = n + 1_${ik}$ do j = 2, n if( abs( z( j ) )<=tol ) then ! deflate due to small z component. k2 = k2 - 1_${ik}$ idxp( k2 ) = j if( j==n )go to 100 else jprev = j go to 70 end if end do 70 continue j = jprev 80 continue j = j + 1_${ik}$ if( j>n )go to 90 if( abs( z( j ) )<=tol ) then ! deflate due to small z component. k2 = k2 - 1_${ik}$ idxp( k2 ) = j else ! check if singular values are close enough to allow deflation. if( abs( d( j )-d( jprev ) )<=tol ) then ! deflation is possible. s = z( jprev ) c = z( j ) ! find sqrt(a**2+b**2) without overflow or ! destructive underflow. tau = stdlib${ii}$_slapy2( c, s ) z( j ) = tau z( jprev ) = zero c = c / tau s = -s / tau ! record the appropriate givens rotation if( icompq==1_${ik}$ ) then givptr = givptr + 1_${ik}$ idxjp = idxq( idx( jprev )+1_${ik}$ ) idxj = idxq( idx( j )+1_${ik}$ ) if( idxjp<=nlp1 ) then idxjp = idxjp - 1_${ik}$ end if if( idxj<=nlp1 ) then idxj = idxj - 1_${ik}$ end if givcol( givptr, 2_${ik}$ ) = idxjp givcol( givptr, 1_${ik}$ ) = idxj givnum( givptr, 2_${ik}$ ) = c givnum( givptr, 1_${ik}$ ) = s end if call stdlib${ii}$_srot( 1_${ik}$, vf( jprev ), 1_${ik}$, vf( j ), 1_${ik}$, c, s ) call stdlib${ii}$_srot( 1_${ik}$, vl( jprev ), 1_${ik}$, vl( j ), 1_${ik}$, c, s ) k2 = k2 - 1_${ik}$ idxp( k2 ) = jprev jprev = j else k = k + 1_${ik}$ zw( k ) = z( jprev ) dsigma( k ) = d( jprev ) idxp( k ) = jprev jprev = j end if end if go to 80 90 continue ! record the last singular value. k = k + 1_${ik}$ zw( k ) = z( jprev ) dsigma( k ) = d( jprev ) idxp( k ) = jprev 100 continue ! sort the singular values into dsigma. the singular values which ! were not deflated go into the first k slots of dsigma, except ! that dsigma(1) is treated separately. do j = 2, n jp = idxp( j ) dsigma( j ) = d( jp ) vfw( j ) = vf( jp ) vlw( j ) = vl( jp ) end do if( icompq==1_${ik}$ ) then do j = 2, n jp = idxp( j ) perm( j ) = idxq( idx( jp )+1_${ik}$ ) if( perm( j )<=nlp1 ) then perm( j ) = perm( j ) - 1_${ik}$ end if end do end if ! the deflated singular values go back into the last n - k slots of ! d. call stdlib${ii}$_scopy( n-k, dsigma( k+1 ), 1_${ik}$, d( k+1 ), 1_${ik}$ ) ! determine dsigma(1), dsigma(2), z(1), vf(1), vl(1), vf(m), and ! vl(m). dsigma( 1_${ik}$ ) = zero hlftol = tol / two if( abs( dsigma( 2_${ik}$ ) )<=hlftol )dsigma( 2_${ik}$ ) = hlftol if( m>n ) then z( 1_${ik}$ ) = stdlib${ii}$_slapy2( z1, z( m ) ) if( z( 1_${ik}$ )<=tol ) then c = one s = zero z( 1_${ik}$ ) = tol else c = z1 / z( 1_${ik}$ ) s = -z( m ) / z( 1_${ik}$ ) end if call stdlib${ii}$_srot( 1_${ik}$, vf( m ), 1_${ik}$, vf( 1_${ik}$ ), 1_${ik}$, c, s ) call stdlib${ii}$_srot( 1_${ik}$, vl( m ), 1_${ik}$, vl( 1_${ik}$ ), 1_${ik}$, c, s ) else if( abs( z1 )<=tol ) then z( 1_${ik}$ ) = tol else z( 1_${ik}$ ) = z1 end if end if ! restore z, vf, and vl. call stdlib${ii}$_scopy( k-1, zw( 2_${ik}$ ), 1_${ik}$, z( 2_${ik}$ ), 1_${ik}$ ) call stdlib${ii}$_scopy( n-1, vfw( 2_${ik}$ ), 1_${ik}$, vf( 2_${ik}$ ), 1_${ik}$ ) call stdlib${ii}$_scopy( n-1, vlw( 2_${ik}$ ), 1_${ik}$, vl( 2_${ik}$ ), 1_${ik}$ ) return end subroutine stdlib${ii}$_slasd7 pure module subroutine stdlib${ii}$_dlasd7( icompq, nl, nr, sqre, k, d, z, zw, vf, vfw, vl,vlw, alpha, & !! DLASD7 merges the two sets of singular values together into a single !! sorted set. Then it tries to deflate the size of the problem. There !! are two ways in which deflation can occur: when two or more singular !! values are close together or if there is a tiny entry in the Z !! vector. For each such occurrence the order of the related !! secular equation problem is reduced by one. !! DLASD7 is called from DLASD6. beta, dsigma, idx, idxp, idxq,perm, givptr, givcol, ldgcol, givnum, ldgnum,c, s, info ) ! -- lapack auxiliary routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments integer(${ik}$), intent(out) :: givptr, info, k integer(${ik}$), intent(in) :: icompq, ldgcol, ldgnum, nl, nr, sqre real(dp), intent(in) :: alpha, beta real(dp), intent(out) :: c, s ! Array Arguments integer(${ik}$), intent(out) :: givcol(ldgcol,*), idx(*), idxp(*), perm(*) integer(${ik}$), intent(inout) :: idxq(*) real(dp), intent(inout) :: d(*), vf(*), vl(*) real(dp), intent(out) :: dsigma(*), givnum(ldgnum,*), vfw(*), vlw(*), z(*), zw(*) ! ===================================================================== ! Local Scalars integer(${ik}$) :: i, idxi, idxj, idxjp, j, jp, jprev, k2, m, n, nlp1, nlp2 real(dp) :: eps, hlftol, tau, tol, z1 ! Intrinsic Functions ! Executable Statements ! test the input parameters. info = 0_${ik}$ n = nl + nr + 1_${ik}$ m = n + sqre if( ( icompq<0_${ik}$ ) .or. ( icompq>1_${ik}$ ) ) then info = -1_${ik}$ else if( nl<1_${ik}$ ) then info = -2_${ik}$ else if( nr<1_${ik}$ ) then info = -3_${ik}$ else if( ( sqre<0_${ik}$ ) .or. ( sqre>1_${ik}$ ) ) then info = -4_${ik}$ else if( ldgcol<n ) then info = -22_${ik}$ else if( ldgnum<n ) then info = -24_${ik}$ end if if( info/=0_${ik}$ ) then call stdlib${ii}$_xerbla( 'DLASD7', -info ) return end if nlp1 = nl + 1_${ik}$ nlp2 = nl + 2_${ik}$ if( icompq==1_${ik}$ ) then givptr = 0_${ik}$ end if ! generate the first part of the vector z and move the singular ! values in the first part of d one position backward. z1 = alpha*vl( nlp1 ) vl( nlp1 ) = zero tau = vf( nlp1 ) do i = nl, 1, -1 z( i+1 ) = alpha*vl( i ) vl( i ) = zero vf( i+1 ) = vf( i ) d( i+1 ) = d( i ) idxq( i+1 ) = idxq( i ) + 1_${ik}$ end do vf( 1_${ik}$ ) = tau ! generate the second part of the vector z. do i = nlp2, m z( i ) = beta*vf( i ) vf( i ) = zero end do ! sort the singular values into increasing order do i = nlp2, n idxq( i ) = idxq( i ) + nlp1 end do ! dsigma, idxc, idxc, and zw are used as storage space. do i = 2, n dsigma( i ) = d( idxq( i ) ) zw( i ) = z( idxq( i ) ) vfw( i ) = vf( idxq( i ) ) vlw( i ) = vl( idxq( i ) ) end do call stdlib${ii}$_dlamrg( nl, nr, dsigma( 2_${ik}$ ), 1_${ik}$, 1_${ik}$, idx( 2_${ik}$ ) ) do i = 2, n idxi = 1_${ik}$ + idx( i ) d( i ) = dsigma( idxi ) z( i ) = zw( idxi ) vf( i ) = vfw( idxi ) vl( i ) = vlw( idxi ) end do ! calculate the allowable deflation tolerance eps = stdlib${ii}$_dlamch( 'EPSILON' ) tol = max( abs( alpha ), abs( beta ) ) tol = eight*eight*eps*max( abs( d( n ) ), tol ) ! there are 2 kinds of deflation -- first a value in the z-vector ! is small, second two (or more) singular values are very close ! together (their difference is small). ! if the value in the z-vector is small, we simply permute the ! array so that the corresponding singular value is moved to the ! end. ! if two values in the d-vector are close, we perform a two-sided ! rotation designed to make one of the corresponding z-vector ! entries zero, and then permute the array so that the deflated ! singular value is moved to the end. ! if there are multiple singular values then the problem deflates. ! here the number of equal singular values are found. as each equal ! singular value is found, an elementary reflector is computed to ! rotate the corresponding singular subspace so that the ! corresponding components of z are zero in this new basis. k = 1_${ik}$ k2 = n + 1_${ik}$ do j = 2, n if( abs( z( j ) )<=tol ) then ! deflate due to small z component. k2 = k2 - 1_${ik}$ idxp( k2 ) = j if( j==n )go to 100 else jprev = j go to 70 end if end do 70 continue j = jprev 80 continue j = j + 1_${ik}$ if( j>n )go to 90 if( abs( z( j ) )<=tol ) then ! deflate due to small z component. k2 = k2 - 1_${ik}$ idxp( k2 ) = j else ! check if singular values are close enough to allow deflation. if( abs( d( j )-d( jprev ) )<=tol ) then ! deflation is possible. s = z( jprev ) c = z( j ) ! find sqrt(a**2+b**2) without overflow or ! destructive underflow. tau = stdlib${ii}$_dlapy2( c, s ) z( j ) = tau z( jprev ) = zero c = c / tau s = -s / tau ! record the appropriate givens rotation if( icompq==1_${ik}$ ) then givptr = givptr + 1_${ik}$ idxjp = idxq( idx( jprev )+1_${ik}$ ) idxj = idxq( idx( j )+1_${ik}$ ) if( idxjp<=nlp1 ) then idxjp = idxjp - 1_${ik}$ end if if( idxj<=nlp1 ) then idxj = idxj - 1_${ik}$ end if givcol( givptr, 2_${ik}$ ) = idxjp givcol( givptr, 1_${ik}$ ) = idxj givnum( givptr, 2_${ik}$ ) = c givnum( givptr, 1_${ik}$ ) = s end if call stdlib${ii}$_drot( 1_${ik}$, vf( jprev ), 1_${ik}$, vf( j ), 1_${ik}$, c, s ) call stdlib${ii}$_drot( 1_${ik}$, vl( jprev ), 1_${ik}$, vl( j ), 1_${ik}$, c, s ) k2 = k2 - 1_${ik}$ idxp( k2 ) = jprev jprev = j else k = k + 1_${ik}$ zw( k ) = z( jprev ) dsigma( k ) = d( jprev ) idxp( k ) = jprev jprev = j end if end if go to 80 90 continue ! record the last singular value. k = k + 1_${ik}$ zw( k ) = z( jprev ) dsigma( k ) = d( jprev ) idxp( k ) = jprev 100 continue ! sort the singular values into dsigma. the singular values which ! were not deflated go into the first k slots of dsigma, except ! that dsigma(1) is treated separately. do j = 2, n jp = idxp( j ) dsigma( j ) = d( jp ) vfw( j ) = vf( jp ) vlw( j ) = vl( jp ) end do if( icompq==1_${ik}$ ) then do j = 2, n jp = idxp( j ) perm( j ) = idxq( idx( jp )+1_${ik}$ ) if( perm( j )<=nlp1 ) then perm( j ) = perm( j ) - 1_${ik}$ end if end do end if ! the deflated singular values go back into the last n - k slots of ! d. call stdlib${ii}$_dcopy( n-k, dsigma( k+1 ), 1_${ik}$, d( k+1 ), 1_${ik}$ ) ! determine dsigma(1), dsigma(2), z(1), vf(1), vl(1), vf(m), and ! vl(m). dsigma( 1_${ik}$ ) = zero hlftol = tol / two if( abs( dsigma( 2_${ik}$ ) )<=hlftol )dsigma( 2_${ik}$ ) = hlftol if( m>n ) then z( 1_${ik}$ ) = stdlib${ii}$_dlapy2( z1, z( m ) ) if( z( 1_${ik}$ )<=tol ) then c = one s = zero z( 1_${ik}$ ) = tol else c = z1 / z( 1_${ik}$ ) s = -z( m ) / z( 1_${ik}$ ) end if call stdlib${ii}$_drot( 1_${ik}$, vf( m ), 1_${ik}$, vf( 1_${ik}$ ), 1_${ik}$, c, s ) call stdlib${ii}$_drot( 1_${ik}$, vl( m ), 1_${ik}$, vl( 1_${ik}$ ), 1_${ik}$, c, s ) else if( abs( z1 )<=tol ) then z( 1_${ik}$ ) = tol else z( 1_${ik}$ ) = z1 end if end if ! restore z, vf, and vl. call stdlib${ii}$_dcopy( k-1, zw( 2_${ik}$ ), 1_${ik}$, z( 2_${ik}$ ), 1_${ik}$ ) call stdlib${ii}$_dcopy( n-1, vfw( 2_${ik}$ ), 1_${ik}$, vf( 2_${ik}$ ), 1_${ik}$ ) call stdlib${ii}$_dcopy( n-1, vlw( 2_${ik}$ ), 1_${ik}$, vl( 2_${ik}$ ), 1_${ik}$ ) return end subroutine stdlib${ii}$_dlasd7 #:for rk,rt,ri in REAL_KINDS_TYPES #:if not rk in ["sp","dp"] pure module subroutine stdlib${ii}$_${ri}$lasd7( icompq, nl, nr, sqre, k, d, z, zw, vf, vfw, vl,vlw, alpha, & !! DLASD7: merges the two sets of singular values together into a single !! sorted set. Then it tries to deflate the size of the problem. There !! are two ways in which deflation can occur: when two or more singular !! values are close together or if there is a tiny entry in the Z !! vector. For each such occurrence the order of the related !! secular equation problem is reduced by one. !! DLASD7 is called from DLASD6. beta, dsigma, idx, idxp, idxq,perm, givptr, givcol, ldgcol, givnum, ldgnum,c, s, info ) ! -- lapack auxiliary routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments integer(${ik}$), intent(out) :: givptr, info, k integer(${ik}$), intent(in) :: icompq, ldgcol, ldgnum, nl, nr, sqre real(${rk}$), intent(in) :: alpha, beta real(${rk}$), intent(out) :: c, s ! Array Arguments integer(${ik}$), intent(out) :: givcol(ldgcol,*), idx(*), idxp(*), perm(*) integer(${ik}$), intent(inout) :: idxq(*) real(${rk}$), intent(inout) :: d(*), vf(*), vl(*) real(${rk}$), intent(out) :: dsigma(*), givnum(ldgnum,*), vfw(*), vlw(*), z(*), zw(*) ! ===================================================================== ! Local Scalars integer(${ik}$) :: i, idxi, idxj, idxjp, j, jp, jprev, k2, m, n, nlp1, nlp2 real(${rk}$) :: eps, hlftol, tau, tol, z1 ! Intrinsic Functions ! Executable Statements ! test the input parameters. info = 0_${ik}$ n = nl + nr + 1_${ik}$ m = n + sqre if( ( icompq<0_${ik}$ ) .or. ( icompq>1_${ik}$ ) ) then info = -1_${ik}$ else if( nl<1_${ik}$ ) then info = -2_${ik}$ else if( nr<1_${ik}$ ) then info = -3_${ik}$ else if( ( sqre<0_${ik}$ ) .or. ( sqre>1_${ik}$ ) ) then info = -4_${ik}$ else if( ldgcol<n ) then info = -22_${ik}$ else if( ldgnum<n ) then info = -24_${ik}$ end if if( info/=0_${ik}$ ) then call stdlib${ii}$_xerbla( 'DLASD7', -info ) return end if nlp1 = nl + 1_${ik}$ nlp2 = nl + 2_${ik}$ if( icompq==1_${ik}$ ) then givptr = 0_${ik}$ end if ! generate the first part of the vector z and move the singular ! values in the first part of d one position backward. z1 = alpha*vl( nlp1 ) vl( nlp1 ) = zero tau = vf( nlp1 ) do i = nl, 1, -1 z( i+1 ) = alpha*vl( i ) vl( i ) = zero vf( i+1 ) = vf( i ) d( i+1 ) = d( i ) idxq( i+1 ) = idxq( i ) + 1_${ik}$ end do vf( 1_${ik}$ ) = tau ! generate the second part of the vector z. do i = nlp2, m z( i ) = beta*vf( i ) vf( i ) = zero end do ! sort the singular values into increasing order do i = nlp2, n idxq( i ) = idxq( i ) + nlp1 end do ! dsigma, idxc, idxc, and zw are used as storage space. do i = 2, n dsigma( i ) = d( idxq( i ) ) zw( i ) = z( idxq( i ) ) vfw( i ) = vf( idxq( i ) ) vlw( i ) = vl( idxq( i ) ) end do call stdlib${ii}$_${ri}$lamrg( nl, nr, dsigma( 2_${ik}$ ), 1_${ik}$, 1_${ik}$, idx( 2_${ik}$ ) ) do i = 2, n idxi = 1_${ik}$ + idx( i ) d( i ) = dsigma( idxi ) z( i ) = zw( idxi ) vf( i ) = vfw( idxi ) vl( i ) = vlw( idxi ) end do ! calculate the allowable deflation tolerance eps = stdlib${ii}$_${ri}$lamch( 'EPSILON' ) tol = max( abs( alpha ), abs( beta ) ) tol = eight*eight*eps*max( abs( d( n ) ), tol ) ! there are 2 kinds of deflation -- first a value in the z-vector ! is small, second two (or more) singular values are very close ! together (their difference is small). ! if the value in the z-vector is small, we simply permute the ! array so that the corresponding singular value is moved to the ! end. ! if two values in the d-vector are close, we perform a two-sided ! rotation designed to make one of the corresponding z-vector ! entries zero, and then permute the array so that the deflated ! singular value is moved to the end. ! if there are multiple singular values then the problem deflates. ! here the number of equal singular values are found. as each equal ! singular value is found, an elementary reflector is computed to ! rotate the corresponding singular subspace so that the ! corresponding components of z are zero in this new basis. k = 1_${ik}$ k2 = n + 1_${ik}$ do j = 2, n if( abs( z( j ) )<=tol ) then ! deflate due to small z component. k2 = k2 - 1_${ik}$ idxp( k2 ) = j if( j==n )go to 100 else jprev = j go to 70 end if end do 70 continue j = jprev 80 continue j = j + 1_${ik}$ if( j>n )go to 90 if( abs( z( j ) )<=tol ) then ! deflate due to small z component. k2 = k2 - 1_${ik}$ idxp( k2 ) = j else ! check if singular values are close enough to allow deflation. if( abs( d( j )-d( jprev ) )<=tol ) then ! deflation is possible. s = z( jprev ) c = z( j ) ! find sqrt(a**2+b**2) without overflow or ! destructive underflow. tau = stdlib${ii}$_${ri}$lapy2( c, s ) z( j ) = tau z( jprev ) = zero c = c / tau s = -s / tau ! record the appropriate givens rotation if( icompq==1_${ik}$ ) then givptr = givptr + 1_${ik}$ idxjp = idxq( idx( jprev )+1_${ik}$ ) idxj = idxq( idx( j )+1_${ik}$ ) if( idxjp<=nlp1 ) then idxjp = idxjp - 1_${ik}$ end if if( idxj<=nlp1 ) then idxj = idxj - 1_${ik}$ end if givcol( givptr, 2_${ik}$ ) = idxjp givcol( givptr, 1_${ik}$ ) = idxj givnum( givptr, 2_${ik}$ ) = c givnum( givptr, 1_${ik}$ ) = s end if call stdlib${ii}$_${ri}$rot( 1_${ik}$, vf( jprev ), 1_${ik}$, vf( j ), 1_${ik}$, c, s ) call stdlib${ii}$_${ri}$rot( 1_${ik}$, vl( jprev ), 1_${ik}$, vl( j ), 1_${ik}$, c, s ) k2 = k2 - 1_${ik}$ idxp( k2 ) = jprev jprev = j else k = k + 1_${ik}$ zw( k ) = z( jprev ) dsigma( k ) = d( jprev ) idxp( k ) = jprev jprev = j end if end if go to 80 90 continue ! record the last singular value. k = k + 1_${ik}$ zw( k ) = z( jprev ) dsigma( k ) = d( jprev ) idxp( k ) = jprev 100 continue ! sort the singular values into dsigma. the singular values which ! were not deflated go into the first k slots of dsigma, except ! that dsigma(1) is treated separately. do j = 2, n jp = idxp( j ) dsigma( j ) = d( jp ) vfw( j ) = vf( jp ) vlw( j ) = vl( jp ) end do if( icompq==1_${ik}$ ) then do j = 2, n jp = idxp( j ) perm( j ) = idxq( idx( jp )+1_${ik}$ ) if( perm( j )<=nlp1 ) then perm( j ) = perm( j ) - 1_${ik}$ end if end do end if ! the deflated singular values go back into the last n - k slots of ! d. call stdlib${ii}$_${ri}$copy( n-k, dsigma( k+1 ), 1_${ik}$, d( k+1 ), 1_${ik}$ ) ! determine dsigma(1), dsigma(2), z(1), vf(1), vl(1), vf(m), and ! vl(m). dsigma( 1_${ik}$ ) = zero hlftol = tol / two if( abs( dsigma( 2_${ik}$ ) )<=hlftol )dsigma( 2_${ik}$ ) = hlftol if( m>n ) then z( 1_${ik}$ ) = stdlib${ii}$_${ri}$lapy2( z1, z( m ) ) if( z( 1_${ik}$ )<=tol ) then c = one s = zero z( 1_${ik}$ ) = tol else c = z1 / z( 1_${ik}$ ) s = -z( m ) / z( 1_${ik}$ ) end if call stdlib${ii}$_${ri}$rot( 1_${ik}$, vf( m ), 1_${ik}$, vf( 1_${ik}$ ), 1_${ik}$, c, s ) call stdlib${ii}$_${ri}$rot( 1_${ik}$, vl( m ), 1_${ik}$, vl( 1_${ik}$ ), 1_${ik}$, c, s ) else if( abs( z1 )<=tol ) then z( 1_${ik}$ ) = tol else z( 1_${ik}$ ) = z1 end if end if ! restore z, vf, and vl. call stdlib${ii}$_${ri}$copy( k-1, zw( 2_${ik}$ ), 1_${ik}$, z( 2_${ik}$ ), 1_${ik}$ ) call stdlib${ii}$_${ri}$copy( n-1, vfw( 2_${ik}$ ), 1_${ik}$, vf( 2_${ik}$ ), 1_${ik}$ ) call stdlib${ii}$_${ri}$copy( n-1, vlw( 2_${ik}$ ), 1_${ik}$, vl( 2_${ik}$ ), 1_${ik}$ ) return end subroutine stdlib${ii}$_${ri}$lasd7 #:endif #:endfor pure module subroutine stdlib${ii}$_slasd8( icompq, k, d, z, vf, vl, difl, difr, lddifr,dsigma, work, & !! SLASD8 finds the square roots of the roots of the secular equation, !! as defined by the values in DSIGMA and Z. It makes the appropriate !! calls to SLASD4, and stores, for each element in D, the distance !! to its two nearest poles (elements in DSIGMA). It also updates !! the arrays VF and VL, the first and last components of all the !! right singular vectors of the original bidiagonal matrix. !! SLASD8 is called from SLASD6. info ) ! -- lapack auxiliary routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments integer(${ik}$), intent(in) :: icompq, k, lddifr integer(${ik}$), intent(out) :: info ! Array Arguments real(sp), intent(out) :: d(*), difl(*), difr(lddifr,*), work(*) real(sp), intent(inout) :: dsigma(*), vf(*), vl(*), z(*) ! ===================================================================== ! Local Scalars integer(${ik}$) :: i, iwk1, iwk2, iwk2i, iwk3, iwk3i, j real(sp) :: diflj, difrj, dj, dsigj, dsigjp, rho, temp ! Intrinsic Functions ! Executable Statements ! test the input parameters. info = 0_${ik}$ if( ( icompq<0_${ik}$ ) .or. ( icompq>1_${ik}$ ) ) then info = -1_${ik}$ else if( k<1_${ik}$ ) then info = -2_${ik}$ else if( lddifr<k ) then info = -9_${ik}$ end if if( info/=0_${ik}$ ) then call stdlib${ii}$_xerbla( 'SLASD8', -info ) return end if ! quick return if possible if( k==1_${ik}$ ) then d( 1_${ik}$ ) = abs( z( 1_${ik}$ ) ) difl( 1_${ik}$ ) = d( 1_${ik}$ ) if( icompq==1_${ik}$ ) then difl( 2_${ik}$ ) = one difr( 1_${ik}$, 2_${ik}$ ) = one end if return end if ! modify values dsigma(i) to make sure all dsigma(i)-dsigma(j) can ! be computed with high relative accuracy (barring over/underflow). ! this is a problem on machines without a guard digit in ! add/subtract (cray xmp, cray ymp, cray c 90 and cray 2). ! the following code replaces dsigma(i) by 2*dsigma(i)-dsigma(i), ! which on any of these machines zeros out the bottommost ! bit of dsigma(i) if it is 1; this makes the subsequent ! subtractions dsigma(i)-dsigma(j) unproblematic when cancellation ! occurs. on binary machines with a guard digit (almost all ! machines) it does not change dsigma(i) at all. on hexadecimal ! and decimal machines with a guard digit, it slightly ! changes the bottommost bits of dsigma(i). it does not account ! for hexadecimal or decimal machines without guard digits ! (we know of none). we use a subroutine call to compute ! 2*dlambda(i) to prevent optimizing compilers from eliminating ! this code. do i = 1, k dsigma( i ) = stdlib${ii}$_slamc3( dsigma( i ), dsigma( i ) ) - dsigma( i ) end do ! book keeping. iwk1 = 1_${ik}$ iwk2 = iwk1 + k iwk3 = iwk2 + k iwk2i = iwk2 - 1_${ik}$ iwk3i = iwk3 - 1_${ik}$ ! normalize z. rho = stdlib${ii}$_snrm2( k, z, 1_${ik}$ ) call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, rho, one, k, 1_${ik}$, z, k, info ) rho = rho*rho ! initialize work(iwk3). call stdlib${ii}$_slaset( 'A', k, 1_${ik}$, one, one, work( iwk3 ), k ) ! compute the updated singular values, the arrays difl, difr, ! and the updated z. do j = 1, k call stdlib${ii}$_slasd4( k, j, dsigma, z, work( iwk1 ), rho, d( j ),work( iwk2 ), info ) ! if the root finder fails, report the convergence failure. if( info/=0_${ik}$ ) then return end if work( iwk3i+j ) = work( iwk3i+j )*work( j )*work( iwk2i+j ) difl( j ) = -work( j ) difr( j, 1_${ik}$ ) = -work( j+1 ) do i = 1, j - 1 work( iwk3i+i ) = work( iwk3i+i )*work( i )*work( iwk2i+i ) / ( dsigma( i )-& dsigma( j ) ) / ( dsigma( i )+dsigma( j ) ) end do do i = j + 1, k work( iwk3i+i ) = work( iwk3i+i )*work( i )*work( iwk2i+i ) / ( dsigma( i )-& dsigma( j ) ) / ( dsigma( i )+dsigma( j ) ) end do end do ! compute updated z. do i = 1, k z( i ) = sign( sqrt( abs( work( iwk3i+i ) ) ), z( i ) ) end do ! update vf and vl. do j = 1, k diflj = difl( j ) dj = d( j ) dsigj = -dsigma( j ) if( j<k ) then difrj = -difr( j, 1_${ik}$ ) dsigjp = -dsigma( j+1 ) end if work( j ) = -z( j ) / diflj / ( dsigma( j )+dj ) do i = 1, j - 1 work( i ) = z( i ) / ( stdlib${ii}$_slamc3( dsigma( i ), dsigj )-diflj )/ ( dsigma( i )& +dj ) end do do i = j + 1, k work( i ) = z( i ) / ( stdlib${ii}$_slamc3( dsigma( i ), dsigjp )+difrj )/ ( dsigma( i & )+dj ) end do temp = stdlib${ii}$_snrm2( k, work, 1_${ik}$ ) work( iwk2i+j ) = stdlib${ii}$_sdot( k, work, 1_${ik}$, vf, 1_${ik}$ ) / temp work( iwk3i+j ) = stdlib${ii}$_sdot( k, work, 1_${ik}$, vl, 1_${ik}$ ) / temp if( icompq==1_${ik}$ ) then difr( j, 2_${ik}$ ) = temp end if end do call stdlib${ii}$_scopy( k, work( iwk2 ), 1_${ik}$, vf, 1_${ik}$ ) call stdlib${ii}$_scopy( k, work( iwk3 ), 1_${ik}$, vl, 1_${ik}$ ) return end subroutine stdlib${ii}$_slasd8 pure module subroutine stdlib${ii}$_dlasd8( icompq, k, d, z, vf, vl, difl, difr, lddifr,dsigma, work, & !! DLASD8 finds the square roots of the roots of the secular equation, !! as defined by the values in DSIGMA and Z. It makes the appropriate !! calls to DLASD4, and stores, for each element in D, the distance !! to its two nearest poles (elements in DSIGMA). It also updates !! the arrays VF and VL, the first and last components of all the !! right singular vectors of the original bidiagonal matrix. !! DLASD8 is called from DLASD6. info ) ! -- lapack auxiliary routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments integer(${ik}$), intent(in) :: icompq, k, lddifr integer(${ik}$), intent(out) :: info ! Array Arguments real(dp), intent(out) :: d(*), difl(*), difr(lddifr,*), work(*) real(dp), intent(inout) :: dsigma(*), vf(*), vl(*), z(*) ! ===================================================================== ! Local Scalars integer(${ik}$) :: i, iwk1, iwk2, iwk2i, iwk3, iwk3i, j real(dp) :: diflj, difrj, dj, dsigj, dsigjp, rho, temp ! Intrinsic Functions ! Executable Statements ! test the input parameters. info = 0_${ik}$ if( ( icompq<0_${ik}$ ) .or. ( icompq>1_${ik}$ ) ) then info = -1_${ik}$ else if( k<1_${ik}$ ) then info = -2_${ik}$ else if( lddifr<k ) then info = -9_${ik}$ end if if( info/=0_${ik}$ ) then call stdlib${ii}$_xerbla( 'DLASD8', -info ) return end if ! quick return if possible if( k==1_${ik}$ ) then d( 1_${ik}$ ) = abs( z( 1_${ik}$ ) ) difl( 1_${ik}$ ) = d( 1_${ik}$ ) if( icompq==1_${ik}$ ) then difl( 2_${ik}$ ) = one difr( 1_${ik}$, 2_${ik}$ ) = one end if return end if ! modify values dsigma(i) to make sure all dsigma(i)-dsigma(j) can ! be computed with high relative accuracy (barring over/underflow). ! this is a problem on machines without a guard digit in ! add/subtract (cray xmp, cray ymp, cray c 90 and cray 2). ! the following code replaces dsigma(i) by 2*dsigma(i)-dsigma(i), ! which on any of these machines zeros out the bottommost ! bit of dsigma(i) if it is 1; this makes the subsequent ! subtractions dsigma(i)-dsigma(j) unproblematic when cancellation ! occurs. on binary machines with a guard digit (almost all ! machines) it does not change dsigma(i) at all. on hexadecimal ! and decimal machines with a guard digit, it slightly ! changes the bottommost bits of dsigma(i). it does not account ! for hexadecimal or decimal machines without guard digits ! (we know of none). we use a subroutine call to compute ! 2*dlambda(i) to prevent optimizing compilers from eliminating ! this code. do i = 1, k dsigma( i ) = stdlib${ii}$_dlamc3( dsigma( i ), dsigma( i ) ) - dsigma( i ) end do ! book keeping. iwk1 = 1_${ik}$ iwk2 = iwk1 + k iwk3 = iwk2 + k iwk2i = iwk2 - 1_${ik}$ iwk3i = iwk3 - 1_${ik}$ ! normalize z. rho = stdlib${ii}$_dnrm2( k, z, 1_${ik}$ ) call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, rho, one, k, 1_${ik}$, z, k, info ) rho = rho*rho ! initialize work(iwk3). call stdlib${ii}$_dlaset( 'A', k, 1_${ik}$, one, one, work( iwk3 ), k ) ! compute the updated singular values, the arrays difl, difr, ! and the updated z. do j = 1, k call stdlib${ii}$_dlasd4( k, j, dsigma, z, work( iwk1 ), rho, d( j ),work( iwk2 ), info ) ! if the root finder fails, report the convergence failure. if( info/=0_${ik}$ ) then return end if work( iwk3i+j ) = work( iwk3i+j )*work( j )*work( iwk2i+j ) difl( j ) = -work( j ) difr( j, 1_${ik}$ ) = -work( j+1 ) do i = 1, j - 1 work( iwk3i+i ) = work( iwk3i+i )*work( i )*work( iwk2i+i ) / ( dsigma( i )-& dsigma( j ) ) / ( dsigma( i )+dsigma( j ) ) end do do i = j + 1, k work( iwk3i+i ) = work( iwk3i+i )*work( i )*work( iwk2i+i ) / ( dsigma( i )-& dsigma( j ) ) / ( dsigma( i )+dsigma( j ) ) end do end do ! compute updated z. do i = 1, k z( i ) = sign( sqrt( abs( work( iwk3i+i ) ) ), z( i ) ) end do ! update vf and vl. do j = 1, k diflj = difl( j ) dj = d( j ) dsigj = -dsigma( j ) if( j<k ) then difrj = -difr( j, 1_${ik}$ ) dsigjp = -dsigma( j+1 ) end if work( j ) = -z( j ) / diflj / ( dsigma( j )+dj ) do i = 1, j - 1 work( i ) = z( i ) / ( stdlib${ii}$_dlamc3( dsigma( i ), dsigj )-diflj )/ ( dsigma( i )& +dj ) end do do i = j + 1, k work( i ) = z( i ) / ( stdlib${ii}$_dlamc3( dsigma( i ), dsigjp )+difrj )/ ( dsigma( i & )+dj ) end do temp = stdlib${ii}$_dnrm2( k, work, 1_${ik}$ ) work( iwk2i+j ) = stdlib${ii}$_ddot( k, work, 1_${ik}$, vf, 1_${ik}$ ) / temp work( iwk3i+j ) = stdlib${ii}$_ddot( k, work, 1_${ik}$, vl, 1_${ik}$ ) / temp if( icompq==1_${ik}$ ) then difr( j, 2_${ik}$ ) = temp end if end do call stdlib${ii}$_dcopy( k, work( iwk2 ), 1_${ik}$, vf, 1_${ik}$ ) call stdlib${ii}$_dcopy( k, work( iwk3 ), 1_${ik}$, vl, 1_${ik}$ ) return end subroutine stdlib${ii}$_dlasd8 #:for rk,rt,ri in REAL_KINDS_TYPES #:if not rk in ["sp","dp"] pure module subroutine stdlib${ii}$_${ri}$lasd8( icompq, k, d, z, vf, vl, difl, difr, lddifr,dsigma, work, & !! DLASD8: finds the square roots of the roots of the secular equation, !! as defined by the values in DSIGMA and Z. It makes the appropriate !! calls to DLASD4, and stores, for each element in D, the distance !! to its two nearest poles (elements in DSIGMA). It also updates !! the arrays VF and VL, the first and last components of all the !! right singular vectors of the original bidiagonal matrix. !! DLASD8 is called from DLASD6. info ) ! -- lapack auxiliary routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments integer(${ik}$), intent(in) :: icompq, k, lddifr integer(${ik}$), intent(out) :: info ! Array Arguments real(${rk}$), intent(out) :: d(*), difl(*), difr(lddifr,*), work(*) real(${rk}$), intent(inout) :: dsigma(*), vf(*), vl(*), z(*) ! ===================================================================== ! Local Scalars integer(${ik}$) :: i, iwk1, iwk2, iwk2i, iwk3, iwk3i, j real(${rk}$) :: diflj, difrj, dj, dsigj, dsigjp, rho, temp ! Intrinsic Functions ! Executable Statements ! test the input parameters. info = 0_${ik}$ if( ( icompq<0_${ik}$ ) .or. ( icompq>1_${ik}$ ) ) then info = -1_${ik}$ else if( k<1_${ik}$ ) then info = -2_${ik}$ else if( lddifr<k ) then info = -9_${ik}$ end if if( info/=0_${ik}$ ) then call stdlib${ii}$_xerbla( 'DLASD8', -info ) return end if ! quick return if possible if( k==1_${ik}$ ) then d( 1_${ik}$ ) = abs( z( 1_${ik}$ ) ) difl( 1_${ik}$ ) = d( 1_${ik}$ ) if( icompq==1_${ik}$ ) then difl( 2_${ik}$ ) = one difr( 1_${ik}$, 2_${ik}$ ) = one end if return end if ! modify values dsigma(i) to make sure all dsigma(i)-dsigma(j) can ! be computed with high relative accuracy (barring over/underflow). ! this is a problem on machines without a guard digit in ! add/subtract (cray xmp, cray ymp, cray c 90 and cray 2). ! the following code replaces dsigma(i) by 2*dsigma(i)-dsigma(i), ! which on any of these machines zeros out the bottommost ! bit of dsigma(i) if it is 1; this makes the subsequent ! subtractions dsigma(i)-dsigma(j) unproblematic when cancellation ! occurs. on binary machines with a guard digit (almost all ! machines) it does not change dsigma(i) at all. on hexadecimal ! and decimal machines with a guard digit, it slightly ! changes the bottommost bits of dsigma(i). it does not account ! for hexadecimal or decimal machines without guard digits ! (we know of none). we use a subroutine call to compute ! 2*dlambda(i) to prevent optimizing compilers from eliminating ! this code. do i = 1, k dsigma( i ) = stdlib${ii}$_${ri}$lamc3( dsigma( i ), dsigma( i ) ) - dsigma( i ) end do ! book keeping. iwk1 = 1_${ik}$ iwk2 = iwk1 + k iwk3 = iwk2 + k iwk2i = iwk2 - 1_${ik}$ iwk3i = iwk3 - 1_${ik}$ ! normalize z. rho = stdlib${ii}$_${ri}$nrm2( k, z, 1_${ik}$ ) call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, rho, one, k, 1_${ik}$, z, k, info ) rho = rho*rho ! initialize work(iwk3). call stdlib${ii}$_${ri}$laset( 'A', k, 1_${ik}$, one, one, work( iwk3 ), k ) ! compute the updated singular values, the arrays difl, difr, ! and the updated z. do j = 1, k call stdlib${ii}$_${ri}$lasd4( k, j, dsigma, z, work( iwk1 ), rho, d( j ),work( iwk2 ), info ) ! if the root finder fails, report the convergence failure. if( info/=0_${ik}$ ) then return end if work( iwk3i+j ) = work( iwk3i+j )*work( j )*work( iwk2i+j ) difl( j ) = -work( j ) difr( j, 1_${ik}$ ) = -work( j+1 ) do i = 1, j - 1 work( iwk3i+i ) = work( iwk3i+i )*work( i )*work( iwk2i+i ) / ( dsigma( i )-& dsigma( j ) ) / ( dsigma( i )+dsigma( j ) ) end do do i = j + 1, k work( iwk3i+i ) = work( iwk3i+i )*work( i )*work( iwk2i+i ) / ( dsigma( i )-& dsigma( j ) ) / ( dsigma( i )+dsigma( j ) ) end do end do ! compute updated z. do i = 1, k z( i ) = sign( sqrt( abs( work( iwk3i+i ) ) ), z( i ) ) end do ! update vf and vl. do j = 1, k diflj = difl( j ) dj = d( j ) dsigj = -dsigma( j ) if( j<k ) then difrj = -difr( j, 1_${ik}$ ) dsigjp = -dsigma( j+1 ) end if work( j ) = -z( j ) / diflj / ( dsigma( j )+dj ) do i = 1, j - 1 work( i ) = z( i ) / ( stdlib${ii}$_${ri}$lamc3( dsigma( i ), dsigj )-diflj )/ ( dsigma( i )& +dj ) end do do i = j + 1, k work( i ) = z( i ) / ( stdlib${ii}$_${ri}$lamc3( dsigma( i ), dsigjp )+difrj )/ ( dsigma( i & )+dj ) end do temp = stdlib${ii}$_${ri}$nrm2( k, work, 1_${ik}$ ) work( iwk2i+j ) = stdlib${ii}$_${ri}$dot( k, work, 1_${ik}$, vf, 1_${ik}$ ) / temp work( iwk3i+j ) = stdlib${ii}$_${ri}$dot( k, work, 1_${ik}$, vl, 1_${ik}$ ) / temp if( icompq==1_${ik}$ ) then difr( j, 2_${ik}$ ) = temp end if end do call stdlib${ii}$_${ri}$copy( k, work( iwk2 ), 1_${ik}$, vf, 1_${ik}$ ) call stdlib${ii}$_${ri}$copy( k, work( iwk3 ), 1_${ik}$, vl, 1_${ik}$ ) return end subroutine stdlib${ii}$_${ri}$lasd8 #:endif #:endfor #:endfor end submodule stdlib_lapack_eigv_svd_bidiag_dc